Python - Collections Module (Counter, deque, OrderedDict)

Key Insights

Counter provides efficient frequency counting with arithmetic operations, outperforming manual dictionary approaches for tallying elements and finding common items
deque implements O(1) append and pop operations from both ends, making it superior to lists for queue implementations and sliding window algorithms
OrderedDict maintains insertion order with move_to_end() and popitem() methods that regular dicts lack, essential for LRU caches and ordered data structures

Counter: Frequency Counting Made Simple

Counter is a dict subclass designed for counting hashable objects. It stores elements as keys and their counts as values, with several methods that make frequency analysis trivial.

from collections import Counter

# Basic counting
words = ['apple', 'banana', 'apple', 'cherry', 'banana', 'apple']
counter = Counter(words)
print(counter)  # Counter({'apple': 3, 'banana': 2, 'cherry': 1})

# Most common elements
print(counter.most_common(2))  # [('apple', 3), ('banana', 2)]

# Access counts (returns 0 for missing keys, unlike regular dict)
print(counter['apple'])    # 3
print(counter['orange'])   # 0

Counter supports arithmetic operations that make complex frequency manipulations straightforward:

# Combining counters
inventory1 = Counter({'apples': 5, 'oranges': 3})
inventory2 = Counter({'apples': 2, 'bananas': 4})

# Addition
total = inventory1 + inventory2
print(total)  # Counter({'apples': 7, 'bananas': 4, 'oranges': 3})

# Subtraction (keeps only positive counts)
diff = inventory1 - inventory2
print(diff)  # Counter({'apples': 3, 'oranges': 3})

# Intersection (minimum of corresponding counts)
common = inventory1 & inventory2
print(common)  # Counter({'apples': 2})

# Union (maximum of corresponding counts)
union = inventory1 | inventory2
print(union)  # Counter({'apples': 5, 'bananas': 4, 'oranges': 3})

Real-world application for finding anagrams:

def are_anagrams(str1, str2):
    return Counter(str1.lower()) == Counter(str2.lower())

print(are_anagrams("listen", "silent"))  # True
print(are_anagrams("hello", "world"))    # False

# Find character frequency difference
def char_difference(str1, str2):
    return Counter(str1) - Counter(str2)

print(char_difference("programming", "gaming"))
# Counter({'p': 1, 'r': 2, 'o': 1})

deque: Double-Ended Queue Operations

deque (pronounced “deck”) provides O(1) time complexity for append and pop operations from both ends, unlike lists which have O(n) complexity for operations at the beginning.

from collections import deque

# Creating and basic operations
dq = deque([1, 2, 3, 4, 5])

# Add elements
dq.append(6)        # Add to right: deque([1, 2, 3, 4, 5, 6])
dq.appendleft(0)    # Add to left: deque([0, 1, 2, 3, 4, 5, 6])

# Remove elements
dq.pop()            # Remove from right: returns 6
dq.popleft()        # Remove from left: returns 0

# Rotation
dq.rotate(2)        # Rotate right by 2
dq.rotate(-1)       # Rotate left by 1

print(dq)

Implementing a bounded queue with maxlen:

# Fixed-size queue automatically discards old elements
recent_logs = deque(maxlen=5)

for i in range(10):
    recent_logs.append(f"Log entry {i}")
    print(recent_logs)

# Final state keeps only last 5 entries
# deque(['Log entry 5', 'Log entry 6', 'Log entry 7', 
#        'Log entry 8', 'Log entry 9'], maxlen=5)

Practical example - sliding window maximum:

def sliding_window_max(nums, k):
    """Find maximum in each sliding window of size k"""
    if not nums or k == 0:
        return []
    
    dq = deque()  # Store indices
    result = []
    
    for i, num in enumerate(nums):
        # Remove indices outside current window
        while dq and dq[0] < i - k + 1:
            dq.popleft()
        
        # Remove indices with smaller values
        while dq and nums[dq[-1]] < num:
            dq.pop()
        
        dq.append(i)
        
        # Add to result when window is full
        if i >= k - 1:
            result.append(nums[dq[0]])
    
    return result

nums = [1, 3, -1, -3, 5, 3, 6, 7]
print(sliding_window_max(nums, 3))  # [3, 3, 5, 5, 6, 7]

Implementing a simple task scheduler:

class TaskScheduler:
    def __init__(self):
        self.tasks = deque()
    
    def add_task(self, task, priority='normal'):
        if priority == 'high':
            self.tasks.appendleft(task)
        else:
            self.tasks.append(task)
    
    def process_next(self):
        if self.tasks:
            return self.tasks.popleft()
        return None

scheduler = TaskScheduler()
scheduler.add_task("Regular task 1")
scheduler.add_task("Urgent task", priority='high')
scheduler.add_task("Regular task 2")

print(scheduler.process_next())  # "Urgent task"
print(scheduler.process_next())  # "Regular task 1"

OrderedDict: Order-Preserving Dictionary

While regular dicts maintain insertion order since Python 3.7, OrderedDict provides additional methods specifically for order manipulation and is more explicit about order guarantees.

from collections import OrderedDict

# Basic usage
od = OrderedDict()
od['first'] = 1
od['second'] = 2
od['third'] = 3

# Order is preserved
print(list(od.keys()))  # ['first', 'second', 'third']

# Move to end
od.move_to_end('first')
print(list(od.keys()))  # ['second', 'third', 'first']

# Move to beginning
od.move_to_end('first', last=False)
print(list(od.keys()))  # ['first', 'second', 'third']

# Pop from end or beginning
last_item = od.popitem(last=True)   # ('third', 3)
first_item = od.popitem(last=False) # ('first', 1)

Implementing an LRU (Least Recently Used) cache:

class LRUCache:
    def __init__(self, capacity):
        self.cache = OrderedDict()
        self.capacity = capacity
    
    def get(self, key):
        if key not in self.cache:
            return -1
        # Move to end to mark as recently used
        self.cache.move_to_end(key)
        return self.cache[key]
    
    def put(self, key, value):
        if key in self.cache:
            self.cache.move_to_end(key)
        self.cache[key] = value
        if len(self.cache) > self.capacity:
            # Remove least recently used (first item)
            self.cache.popitem(last=False)

# Usage
cache = LRUCache(3)
cache.put(1, 'one')
cache.put(2, 'two')
cache.put(3, 'three')
print(cache.get(1))      # 'one' - moves to end
cache.put(4, 'four')     # Removes key 2 (least recently used)
print(cache.get(2))      # -1 (not found)

Maintaining a sorted dictionary by value:

def sort_dict_by_value(d):
    """Sort dictionary by value while maintaining order"""
    return OrderedDict(sorted(d.items(), key=lambda x: x[1]))

scores = {'Alice': 85, 'Bob': 92, 'Charlie': 78, 'David': 95}
sorted_scores = sort_dict_by_value(scores)
print(sorted_scores)
# OrderedDict([('Charlie', 78), ('Alice', 85), ('Bob', 92), ('David', 95)])

# Get top N performers
def top_n(od, n):
    return OrderedDict(list(od.items())[-n:])

print(top_n(sorted_scores, 2))
# OrderedDict([('Bob', 92), ('David', 95)])

Equality comparison difference:

# Regular dicts: order doesn't matter for equality
dict1 = {'a': 1, 'b': 2}
dict2 = {'b': 2, 'a': 1}
print(dict1 == dict2)  # True

# OrderedDict: order matters
od1 = OrderedDict([('a', 1), ('b', 2)])
od2 = OrderedDict([('b', 2), ('a', 1)])
print(od1 == od2)  # False

Performance Considerations

Understanding when to use each collection type impacts application performance:

import time
from collections import deque, Counter

# List vs deque for left operations
def benchmark_left_ops(n=10000):
    # List
    start = time.time()
    lst = []
    for i in range(n):
        lst.insert(0, i)
    list_time = time.time() - start
    
    # deque
    start = time.time()
    dq = deque()
    for i in range(n):
        dq.appendleft(i)
    deque_time = time.time() - start
    
    print(f"List: {list_time:.4f}s")
    print(f"deque: {deque_time:.4f}s")
    print(f"deque is {list_time/deque_time:.1f}x faster")

benchmark_left_ops()
# Typical output:
# List: 0.0234s
# deque: 0.0012s
# deque is 19.5x faster

Counter vs manual counting:

# Manual counting
def manual_count(items):
    counts = {}
    for item in items:
        counts[item] = counts.get(item, 0) + 1
    return counts

# Counter
def counter_count(items):
    return Counter(items)

# Both work, but Counter provides additional methods
# and is optimized for counting operations

The collections module provides specialized data structures that solve common programming problems efficiently. Counter simplifies frequency analysis, deque enables efficient queue operations, and OrderedDict maintains explicit ordering with manipulation methods. Choose the right tool based on your specific requirements for optimal performance and code clarity.

Counter: Frequency Counting Made Simple

deque: Double-Ended Queue Operations

OrderedDict: Order-Preserving Dictionary

Performance Considerations

Liked this? There's more.