Python - Sort List of Dictionaries | Application Architect

Key Insights

Python offers multiple approaches to sort lists of dictionaries: sorted() with key parameter, itemgetter() for performance, and lambda functions for complex sorting logic
Multi-level sorting requires tuple-based keys or chained sort() calls with reverse parameter consideration
Custom sorting handles edge cases like missing keys, None values, and case-insensitive string comparisons through defensive key functions

Basic Sorting with Lambda Functions

The most straightforward approach uses the sorted() function with a lambda expression to specify which dictionary key to sort by.

users = [
    {'name': 'Alice', 'age': 30},
    {'name': 'Bob', 'age': 25},
    {'name': 'Charlie', 'age': 35}
]

# Sort by age (ascending)
sorted_users = sorted(users, key=lambda x: x['age'])
print(sorted_users)
# [{'name': 'Bob', 'age': 25}, {'name': 'Alice', 'age': 30}, {'name': 'Charlie', 'age': 35}]

# Sort by name (descending)
sorted_users = sorted(users, key=lambda x: x['name'], reverse=True)
print(sorted_users)
# [{'name': 'Charlie', 'age': 35}, {'name': 'Alice', 'age': 30}, {'name': 'Bob', 'age': 25}]

The sorted() function returns a new list without modifying the original. For in-place sorting, use the list.sort() method:

users.sort(key=lambda x: x['age'])
# users is now sorted in-place

Performance Optimization with itemgetter

For production code where performance matters, operator.itemgetter() provides faster execution than lambda functions.

from operator import itemgetter

products = [
    {'name': 'Laptop', 'price': 1200, 'stock': 5},
    {'name': 'Mouse', 'price': 25, 'stock': 150},
    {'name': 'Keyboard', 'price': 80, 'stock': 45}
]

# Sort by price using itemgetter
sorted_products = sorted(products, key=itemgetter('price'))
print(sorted_products)
# [{'name': 'Mouse', 'price': 25, ...}, {'name': 'Keyboard', 'price': 80, ...}, ...]

Benchmark comparison shows itemgetter() is approximately 20-30% faster than lambda for simple key access:

import timeit

setup = """
from operator import itemgetter
data = [{'value': i} for i in range(1000)]
"""

lambda_time = timeit.timeit(
    'sorted(data, key=lambda x: x["value"])',
    setup=setup,
    number=10000
)

itemgetter_time = timeit.timeit(
    'sorted(data, key=itemgetter("value"))',
    setup=setup + '\nfrom operator import itemgetter',
    number=10000
)

print(f"Lambda: {lambda_time:.4f}s")
print(f"itemgetter: {itemgetter_time:.4f}s")

Multi-Level Sorting

Sort by multiple keys using tuples in the key function. Python compares tuples element-by-element.

employees = [
    {'name': 'Alice', 'department': 'Engineering', 'salary': 90000},
    {'name': 'Bob', 'department': 'Engineering', 'salary': 85000},
    {'name': 'Charlie', 'department': 'Sales', 'salary': 70000},
    {'name': 'Diana', 'department': 'Sales', 'salary': 75000}
]

# Sort by department (ascending), then salary (descending)
sorted_employees = sorted(
    employees,
    key=lambda x: (x['department'], -x['salary'])
)

for emp in sorted_employees:
    print(f"{emp['name']}: {emp['department']} - ${emp['salary']}")
# Alice: Engineering - $90000
# Bob: Engineering - $85000
# Diana: Sales - $75000
# Charlie: Sales - $70000

For mixed ascending/descending with non-numeric values, use multiple itemgetter() calls or chain sorts:

from operator import itemgetter

# Method 1: Multiple sort calls (reverse order of priority)
employees.sort(key=itemgetter('salary'), reverse=True)
employees.sort(key=itemgetter('department'))

# Method 2: Custom tuple with reverse wrapper
sorted_employees = sorted(
    employees,
    key=lambda x: (x['department'], x['salary']),
    reverse=False
)

Handling Missing Keys and None Values

Real-world data often contains missing or None values. Defensive key functions prevent KeyError exceptions.

data = [
    {'name': 'Product A', 'rating': 4.5},
    {'name': 'Product B'},  # missing rating
    {'name': 'Product C', 'rating': None},
    {'name': 'Product D', 'rating': 3.8}
]

# Safe sorting with default value for missing keys
sorted_data = sorted(
    data,
    key=lambda x: x.get('rating', 0) or 0
)

for item in sorted_data:
    print(f"{item['name']}: {item.get('rating', 'N/A')}")
# Product B: N/A
# Product C: None
# Product D: 3.8
# Product A: 4.5

The or 0 handles None values by treating them as 0. For more control:

def safe_sort_key(item, key_name, default=0, none_value=0):
    """Extract sort key with defaults for missing and None values"""
    value = item.get(key_name, default)
    return none_value if value is None else value

sorted_data = sorted(
    data,
    key=lambda x: safe_sort_key(x, 'rating', default=float('-inf'), none_value=float('-inf'))
)

Case-Insensitive String Sorting

String sorting is case-sensitive by default. Normalize cases for alphabetical sorting:

companies = [
    {'name': 'apple Inc.'},
    {'name': 'Microsoft'},
    {'name': 'Amazon'},
    {'name': 'google LLC'}
]

# Case-sensitive (default)
sorted_companies = sorted(companies, key=lambda x: x['name'])
print([c['name'] for c in sorted_companies])
# ['Amazon', 'Microsoft', 'apple Inc.', 'google LLC']

# Case-insensitive
sorted_companies = sorted(companies, key=lambda x: x['name'].lower())
print([c['name'] for c in sorted_companies])
# ['Amazon', 'apple Inc.', 'google LLC', 'Microsoft']

Complex Sorting Logic

Combine multiple conditions and transformations in the key function:

from datetime import datetime

transactions = [
    {'id': 1, 'amount': 100, 'date': '2024-01-15', 'status': 'pending'},
    {'id': 2, 'amount': 200, 'date': '2024-01-10', 'status': 'completed'},
    {'id': 3, 'amount': 150, 'date': '2024-01-12', 'status': 'completed'},
    {'id': 4, 'amount': 300, 'date': '2024-01-15', 'status': 'pending'}
]

# Sort by status (completed first), then date (newest first), then amount (highest first)
def sort_key(transaction):
    status_priority = {'completed': 0, 'pending': 1, 'failed': 2}
    return (
        status_priority.get(transaction['status'], 999),
        datetime.strptime(transaction['date'], '%Y-%m-%d').timestamp() * -1,
        -transaction['amount']
    )

sorted_transactions = sorted(transactions, key=sort_key)

for t in sorted_transactions:
    print(f"ID {t['id']}: {t['status']} - ${t['amount']} on {t['date']}")
# ID 3: completed - $150 on 2024-01-12
# ID 2: completed - $200 on 2024-01-10
# ID 4: pending - $300 on 2024-01-15
# ID 1: pending - $100 on 2024-01-15

Sorting with Custom Objects

When dictionaries contain nested structures or require type conversions:

from decimal import Decimal

financial_data = [
    {'ticker': 'AAPL', 'price': Decimal('150.25'), 'volume': 1000000},
    {'ticker': 'GOOGL', 'price': Decimal('140.50'), 'volume': 800000},
    {'ticker': 'MSFT', 'price': Decimal('380.75'), 'volume': 1200000}
]

# Sort by price-to-volume ratio
sorted_data = sorted(
    financial_data,
    key=lambda x: float(x['price']) / x['volume'],
    reverse=True
)

for stock in sorted_data:
    ratio = float(stock['price']) / stock['volume']
    print(f"{stock['ticker']}: {ratio:.10f}")

For nested dictionaries:

users = [
    {'name': 'Alice', 'profile': {'age': 30, 'city': 'NYC'}},
    {'name': 'Bob', 'profile': {'age': 25, 'city': 'LA'}},
    {'name': 'Charlie', 'profile': {'age': 35, 'city': 'Chicago'}}
]

# Sort by nested age value
sorted_users = sorted(users, key=lambda x: x['profile']['age'])

# Safe nested access
sorted_users = sorted(
    users,
    key=lambda x: x.get('profile', {}).get('age', 0)
)

Stable Sorting Guarantees

Python’s sort is stable, meaning equal elements maintain their original relative order. Exploit this for complex multi-level sorts:

records = [
    {'type': 'A', 'priority': 1, 'timestamp': 100},
    {'type': 'B', 'priority': 1, 'timestamp': 95},
    {'type': 'A', 'priority': 2, 'timestamp': 90},
    {'type': 'B', 'priority': 1, 'timestamp': 105}
]

# Sort by timestamp first (establishes base order)
records.sort(key=lambda x: x['timestamp'])
# Then by priority (maintains timestamp order within same priority)
records.sort(key=lambda x: x['priority'])

for r in records:
    print(f"Type {r['type']}, Priority {r['priority']}, Time {r['timestamp']}")

This technique provides cleaner code than complex tuple-based keys when dealing with three or more sort criteria.