Python - Read/Write JSON Files | Application Architect

Key Insights

• Python’s json module provides load()/loads() for reading and dump()/dumps() for writing JSON data with built-in type conversion between Python objects and JSON format • Always use context managers (with statements) when working with JSON files to ensure proper file handling and automatic resource cleanup • Handle encoding issues explicitly with UTF-8 and implement proper error handling for malformed JSON using try-except blocks with JSONDecodeError

Reading JSON Files

The json.load() function reads JSON data directly from a file object and converts it to Python data structures. Use this when working with JSON files on disk.

import json

# Basic file reading
with open('config.json', 'r', encoding='utf-8') as file:
    data = json.load(file)
    print(data)

For JSON strings already in memory, use json.loads() instead:

import json

json_string = '{"name": "Alice", "age": 30, "active": true}'
data = json.loads(json_string)
print(data['name'])  # Output: Alice
print(type(data))    # Output: <class 'dict'>

JSON to Python Type Mapping

Understanding type conversion is critical when working with JSON data. Python’s json module automatically maps JSON types to Python equivalents:

import json

json_data = '''
{
    "string": "text",
    "number_int": 42,
    "number_float": 3.14,
    "boolean": true,
    "null_value": null,
    "array": [1, 2, 3],
    "object": {"nested": "value"}
}
'''

data = json.loads(json_data)

print(type(data['string']))       # <class 'str'>
print(type(data['number_int']))   # <class 'int'>
print(type(data['number_float'])) # <class 'float'>
print(type(data['boolean']))      # <class 'bool'>
print(type(data['null_value']))   # <class 'NoneType'>
print(type(data['array']))        # <class 'list'>
print(type(data['object']))       # <class 'dict'>

Writing JSON Files

The json.dump() function writes Python objects to a file, while json.dumps() converts objects to JSON strings.

import json

data = {
    'users': [
        {'id': 1, 'name': 'John', 'roles': ['admin', 'user']},
        {'id': 2, 'name': 'Jane', 'roles': ['user']}
    ],
    'timestamp': '2024-01-15T10:30:00Z'
}

# Write to file
with open('output.json', 'w', encoding='utf-8') as file:
    json.dump(data, file, indent=2)

# Convert to string
json_string = json.dumps(data, indent=2)
print(json_string)

The indent parameter formats the output for readability. In production, omit it for compact output:

# Compact format (production)
with open('output.json', 'w', encoding='utf-8') as file:
    json.dump(data, file, separators=(',', ':'))

Error Handling

Always implement error handling when reading JSON files. Malformed JSON or missing files will raise exceptions:

import json
from json.decoder import JSONDecodeError

def read_json_safe(filepath):
    try:
        with open(filepath, 'r', encoding='utf-8') as file:
            return json.load(file)
    except FileNotFoundError:
        print(f"File not found: {filepath}")
        return None
    except JSONDecodeError as e:
        print(f"Invalid JSON in {filepath}: {e.msg} at line {e.lineno}, column {e.colno}")
        return None
    except Exception as e:
        print(f"Unexpected error reading {filepath}: {str(e)}")
        return None

# Usage
data = read_json_safe('config.json')
if data:
    print("Successfully loaded:", data)

Working with Custom Objects

Python objects require custom serialization. Implement this using the default parameter or by extending JSONEncoder:

import json
from datetime import datetime
from decimal import Decimal

class User:
    def __init__(self, name, created_at, balance):
        self.name = name
        self.created_at = created_at
        self.balance = balance

# Method 1: Using default parameter
def serialize_custom(obj):
    if isinstance(obj, datetime):
        return obj.isoformat()
    if isinstance(obj, Decimal):
        return float(obj)
    if isinstance(obj, User):
        return {
            'name': obj.name,
            'created_at': obj.created_at,
            'balance': obj.balance
        }
    raise TypeError(f"Object of type {type(obj)} is not JSON serializable")

user = User("Alice", datetime(2024, 1, 15), Decimal('1000.50'))
json_output = json.dumps(user, default=serialize_custom, indent=2)
print(json_output)

# Method 2: Custom encoder class
class CustomEncoder(json.JSONEncoder):
    def default(self, obj):
        if isinstance(obj, datetime):
            return obj.isoformat()
        if isinstance(obj, Decimal):
            return float(obj)
        if isinstance(obj, User):
            return obj.__dict__
        return super().default(obj)

json_output = json.dumps(user, cls=CustomEncoder, indent=2)
print(json_output)

Reading Large JSON Files

For large JSON files, use streaming or process data in chunks to avoid memory issues:

import json

# Stream processing for large arrays
def process_large_json_array(filepath):
    with open(filepath, 'r', encoding='utf-8') as file:
        # Skip opening bracket
        file.read(1)
        
        decoder = json.JSONDecoder()
        buffer = ''
        
        for chunk in iter(lambda: file.read(4096), ''):
            buffer += chunk
            while buffer:
                buffer = buffer.lstrip()
                if not buffer or buffer[0] == ']':
                    break
                    
                try:
                    obj, idx = decoder.raw_decode(buffer)
                    yield obj
                    buffer = buffer[idx:].lstrip(',').lstrip()
                except json.JSONDecodeError:
                    break

# Usage
for record in process_large_json_array('large_data.json'):
    # Process each record individually
    print(record['id'])

For line-delimited JSON (JSONL), process line by line:

def read_jsonl(filepath):
    with open(filepath, 'r', encoding='utf-8') as file:
        for line in file:
            if line.strip():
                yield json.loads(line)

# Usage
for record in read_jsonl('data.jsonl'):
    print(record)

Advanced Formatting Options

Control JSON output formatting with additional parameters:

import json

data = {
    'name': 'Test User',
    'email': 'test@example.com',
    'tags': ['python', 'json', 'tutorial'],
    'metadata': {'created': '2024-01-15', 'updated': '2024-01-16'}
}

# Sort keys alphabetically
sorted_json = json.dumps(data, indent=2, sort_keys=True)

# Ensure ASCII encoding (escape non-ASCII characters)
ascii_json = json.dumps({'name': 'José'}, ensure_ascii=True)
print(ascii_json)  # {"name": "Jos\u00e9"}

# Preserve non-ASCII characters
utf8_json = json.dumps({'name': 'José'}, ensure_ascii=False)
print(utf8_json)  # {"name": "José"}

# Custom separators for compact output
compact_json = json.dumps(data, separators=(',', ':'))

# Allow NaN and Infinity (non-standard JSON)
special_values = {'value': float('nan'), 'infinity': float('inf')}
json_with_special = json.dumps(special_values, allow_nan=True)

Updating JSON Files

Read, modify, and write back JSON data atomically:

import json
import os
from tempfile import NamedTemporaryFile

def update_json_file(filepath, update_func):
    # Read existing data
    with open(filepath, 'r', encoding='utf-8') as file:
        data = json.load(file)
    
    # Apply updates
    data = update_func(data)
    
    # Write to temporary file first
    temp_file = NamedTemporaryFile(mode='w', delete=False, encoding='utf-8')
    try:
        json.dump(data, temp_file, indent=2)
        temp_file.close()
        
        # Replace original file atomically
        os.replace(temp_file.name, filepath)
    except Exception as e:
        os.unlink(temp_file.name)
        raise e

# Usage
def add_user(data):
    data['users'].append({'id': 3, 'name': 'Bob'})
    return data

update_json_file('users.json', add_user)

This approach ensures data integrity by writing to a temporary file first, preventing corruption if the write operation fails midway.