Python - Read/Write JSON Files
• Python's `json` module provides `load()`/`loads()` for reading and `dump()`/`dumps()` for writing JSON data with built-in type conversion between Python objects and JSON format
Key Insights
• Python’s json module provides load()/loads() for reading and dump()/dumps() for writing JSON data with built-in type conversion between Python objects and JSON format
• Always use context managers (with statements) when working with JSON files to ensure proper file handling and automatic resource cleanup
• Handle encoding issues explicitly with UTF-8 and implement proper error handling for malformed JSON using try-except blocks with JSONDecodeError
Reading JSON Files
The json.load() function reads JSON data directly from a file object and converts it to Python data structures. Use this when working with JSON files on disk.
import json
# Basic file reading
with open('config.json', 'r', encoding='utf-8') as file:
data = json.load(file)
print(data)
For JSON strings already in memory, use json.loads() instead:
import json
json_string = '{"name": "Alice", "age": 30, "active": true}'
data = json.loads(json_string)
print(data['name']) # Output: Alice
print(type(data)) # Output: <class 'dict'>
JSON to Python Type Mapping
Understanding type conversion is critical when working with JSON data. Python’s json module automatically maps JSON types to Python equivalents:
import json
json_data = '''
{
"string": "text",
"number_int": 42,
"number_float": 3.14,
"boolean": true,
"null_value": null,
"array": [1, 2, 3],
"object": {"nested": "value"}
}
'''
data = json.loads(json_data)
print(type(data['string'])) # <class 'str'>
print(type(data['number_int'])) # <class 'int'>
print(type(data['number_float'])) # <class 'float'>
print(type(data['boolean'])) # <class 'bool'>
print(type(data['null_value'])) # <class 'NoneType'>
print(type(data['array'])) # <class 'list'>
print(type(data['object'])) # <class 'dict'>
Writing JSON Files
The json.dump() function writes Python objects to a file, while json.dumps() converts objects to JSON strings.
import json
data = {
'users': [
{'id': 1, 'name': 'John', 'roles': ['admin', 'user']},
{'id': 2, 'name': 'Jane', 'roles': ['user']}
],
'timestamp': '2024-01-15T10:30:00Z'
}
# Write to file
with open('output.json', 'w', encoding='utf-8') as file:
json.dump(data, file, indent=2)
# Convert to string
json_string = json.dumps(data, indent=2)
print(json_string)
The indent parameter formats the output for readability. In production, omit it for compact output:
# Compact format (production)
with open('output.json', 'w', encoding='utf-8') as file:
json.dump(data, file, separators=(',', ':'))
Error Handling
Always implement error handling when reading JSON files. Malformed JSON or missing files will raise exceptions:
import json
from json.decoder import JSONDecodeError
def read_json_safe(filepath):
try:
with open(filepath, 'r', encoding='utf-8') as file:
return json.load(file)
except FileNotFoundError:
print(f"File not found: {filepath}")
return None
except JSONDecodeError as e:
print(f"Invalid JSON in {filepath}: {e.msg} at line {e.lineno}, column {e.colno}")
return None
except Exception as e:
print(f"Unexpected error reading {filepath}: {str(e)}")
return None
# Usage
data = read_json_safe('config.json')
if data:
print("Successfully loaded:", data)
Working with Custom Objects
Python objects require custom serialization. Implement this using the default parameter or by extending JSONEncoder:
import json
from datetime import datetime
from decimal import Decimal
class User:
def __init__(self, name, created_at, balance):
self.name = name
self.created_at = created_at
self.balance = balance
# Method 1: Using default parameter
def serialize_custom(obj):
if isinstance(obj, datetime):
return obj.isoformat()
if isinstance(obj, Decimal):
return float(obj)
if isinstance(obj, User):
return {
'name': obj.name,
'created_at': obj.created_at,
'balance': obj.balance
}
raise TypeError(f"Object of type {type(obj)} is not JSON serializable")
user = User("Alice", datetime(2024, 1, 15), Decimal('1000.50'))
json_output = json.dumps(user, default=serialize_custom, indent=2)
print(json_output)
# Method 2: Custom encoder class
class CustomEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, datetime):
return obj.isoformat()
if isinstance(obj, Decimal):
return float(obj)
if isinstance(obj, User):
return obj.__dict__
return super().default(obj)
json_output = json.dumps(user, cls=CustomEncoder, indent=2)
print(json_output)
Reading Large JSON Files
For large JSON files, use streaming or process data in chunks to avoid memory issues:
import json
# Stream processing for large arrays
def process_large_json_array(filepath):
with open(filepath, 'r', encoding='utf-8') as file:
# Skip opening bracket
file.read(1)
decoder = json.JSONDecoder()
buffer = ''
for chunk in iter(lambda: file.read(4096), ''):
buffer += chunk
while buffer:
buffer = buffer.lstrip()
if not buffer or buffer[0] == ']':
break
try:
obj, idx = decoder.raw_decode(buffer)
yield obj
buffer = buffer[idx:].lstrip(',').lstrip()
except json.JSONDecodeError:
break
# Usage
for record in process_large_json_array('large_data.json'):
# Process each record individually
print(record['id'])
For line-delimited JSON (JSONL), process line by line:
def read_jsonl(filepath):
with open(filepath, 'r', encoding='utf-8') as file:
for line in file:
if line.strip():
yield json.loads(line)
# Usage
for record in read_jsonl('data.jsonl'):
print(record)
Advanced Formatting Options
Control JSON output formatting with additional parameters:
import json
data = {
'name': 'Test User',
'email': 'test@example.com',
'tags': ['python', 'json', 'tutorial'],
'metadata': {'created': '2024-01-15', 'updated': '2024-01-16'}
}
# Sort keys alphabetically
sorted_json = json.dumps(data, indent=2, sort_keys=True)
# Ensure ASCII encoding (escape non-ASCII characters)
ascii_json = json.dumps({'name': 'José'}, ensure_ascii=True)
print(ascii_json) # {"name": "Jos\u00e9"}
# Preserve non-ASCII characters
utf8_json = json.dumps({'name': 'José'}, ensure_ascii=False)
print(utf8_json) # {"name": "José"}
# Custom separators for compact output
compact_json = json.dumps(data, separators=(',', ':'))
# Allow NaN and Infinity (non-standard JSON)
special_values = {'value': float('nan'), 'infinity': float('inf')}
json_with_special = json.dumps(special_values, allow_nan=True)
Updating JSON Files
Read, modify, and write back JSON data atomically:
import json
import os
from tempfile import NamedTemporaryFile
def update_json_file(filepath, update_func):
# Read existing data
with open(filepath, 'r', encoding='utf-8') as file:
data = json.load(file)
# Apply updates
data = update_func(data)
# Write to temporary file first
temp_file = NamedTemporaryFile(mode='w', delete=False, encoding='utf-8')
try:
json.dump(data, temp_file, indent=2)
temp_file.close()
# Replace original file atomically
os.replace(temp_file.name, filepath)
except Exception as e:
os.unlink(temp_file.name)
raise e
# Usage
def add_user(data):
data['users'].append({'id': 3, 'name': 'Bob'})
return data
update_json_file('users.json', add_user)
This approach ensures data integrity by writing to a temporary file first, preventing corruption if the write operation fails midway.