Python - Collections Module (Counter, deque, OrderedDict)
Counter is a dict subclass designed for counting hashable objects. It stores elements as keys and their counts as values, with several methods that make frequency analysis trivial.
Key Insights
- Counter provides efficient frequency counting with arithmetic operations, outperforming manual dictionary approaches for tallying elements and finding common items
- deque implements O(1) append and pop operations from both ends, making it superior to lists for queue implementations and sliding window algorithms
- OrderedDict maintains insertion order with move_to_end() and popitem() methods that regular dicts lack, essential for LRU caches and ordered data structures
Counter: Frequency Counting Made Simple
Counter is a dict subclass designed for counting hashable objects. It stores elements as keys and their counts as values, with several methods that make frequency analysis trivial.
from collections import Counter
# Basic counting
words = ['apple', 'banana', 'apple', 'cherry', 'banana', 'apple']
counter = Counter(words)
print(counter) # Counter({'apple': 3, 'banana': 2, 'cherry': 1})
# Most common elements
print(counter.most_common(2)) # [('apple', 3), ('banana', 2)]
# Access counts (returns 0 for missing keys, unlike regular dict)
print(counter['apple']) # 3
print(counter['orange']) # 0
Counter supports arithmetic operations that make complex frequency manipulations straightforward:
# Combining counters
inventory1 = Counter({'apples': 5, 'oranges': 3})
inventory2 = Counter({'apples': 2, 'bananas': 4})
# Addition
total = inventory1 + inventory2
print(total) # Counter({'apples': 7, 'bananas': 4, 'oranges': 3})
# Subtraction (keeps only positive counts)
diff = inventory1 - inventory2
print(diff) # Counter({'apples': 3, 'oranges': 3})
# Intersection (minimum of corresponding counts)
common = inventory1 & inventory2
print(common) # Counter({'apples': 2})
# Union (maximum of corresponding counts)
union = inventory1 | inventory2
print(union) # Counter({'apples': 5, 'bananas': 4, 'oranges': 3})
Real-world application for finding anagrams:
def are_anagrams(str1, str2):
return Counter(str1.lower()) == Counter(str2.lower())
print(are_anagrams("listen", "silent")) # True
print(are_anagrams("hello", "world")) # False
# Find character frequency difference
def char_difference(str1, str2):
return Counter(str1) - Counter(str2)
print(char_difference("programming", "gaming"))
# Counter({'p': 1, 'r': 2, 'o': 1})
deque: Double-Ended Queue Operations
deque (pronounced “deck”) provides O(1) time complexity for append and pop operations from both ends, unlike lists which have O(n) complexity for operations at the beginning.
from collections import deque
# Creating and basic operations
dq = deque([1, 2, 3, 4, 5])
# Add elements
dq.append(6) # Add to right: deque([1, 2, 3, 4, 5, 6])
dq.appendleft(0) # Add to left: deque([0, 1, 2, 3, 4, 5, 6])
# Remove elements
dq.pop() # Remove from right: returns 6
dq.popleft() # Remove from left: returns 0
# Rotation
dq.rotate(2) # Rotate right by 2
dq.rotate(-1) # Rotate left by 1
print(dq)
Implementing a bounded queue with maxlen:
# Fixed-size queue automatically discards old elements
recent_logs = deque(maxlen=5)
for i in range(10):
recent_logs.append(f"Log entry {i}")
print(recent_logs)
# Final state keeps only last 5 entries
# deque(['Log entry 5', 'Log entry 6', 'Log entry 7',
# 'Log entry 8', 'Log entry 9'], maxlen=5)
Practical example - sliding window maximum:
def sliding_window_max(nums, k):
"""Find maximum in each sliding window of size k"""
if not nums or k == 0:
return []
dq = deque() # Store indices
result = []
for i, num in enumerate(nums):
# Remove indices outside current window
while dq and dq[0] < i - k + 1:
dq.popleft()
# Remove indices with smaller values
while dq and nums[dq[-1]] < num:
dq.pop()
dq.append(i)
# Add to result when window is full
if i >= k - 1:
result.append(nums[dq[0]])
return result
nums = [1, 3, -1, -3, 5, 3, 6, 7]
print(sliding_window_max(nums, 3)) # [3, 3, 5, 5, 6, 7]
Implementing a simple task scheduler:
class TaskScheduler:
def __init__(self):
self.tasks = deque()
def add_task(self, task, priority='normal'):
if priority == 'high':
self.tasks.appendleft(task)
else:
self.tasks.append(task)
def process_next(self):
if self.tasks:
return self.tasks.popleft()
return None
scheduler = TaskScheduler()
scheduler.add_task("Regular task 1")
scheduler.add_task("Urgent task", priority='high')
scheduler.add_task("Regular task 2")
print(scheduler.process_next()) # "Urgent task"
print(scheduler.process_next()) # "Regular task 1"
OrderedDict: Order-Preserving Dictionary
While regular dicts maintain insertion order since Python 3.7, OrderedDict provides additional methods specifically for order manipulation and is more explicit about order guarantees.
from collections import OrderedDict
# Basic usage
od = OrderedDict()
od['first'] = 1
od['second'] = 2
od['third'] = 3
# Order is preserved
print(list(od.keys())) # ['first', 'second', 'third']
# Move to end
od.move_to_end('first')
print(list(od.keys())) # ['second', 'third', 'first']
# Move to beginning
od.move_to_end('first', last=False)
print(list(od.keys())) # ['first', 'second', 'third']
# Pop from end or beginning
last_item = od.popitem(last=True) # ('third', 3)
first_item = od.popitem(last=False) # ('first', 1)
Implementing an LRU (Least Recently Used) cache:
class LRUCache:
def __init__(self, capacity):
self.cache = OrderedDict()
self.capacity = capacity
def get(self, key):
if key not in self.cache:
return -1
# Move to end to mark as recently used
self.cache.move_to_end(key)
return self.cache[key]
def put(self, key, value):
if key in self.cache:
self.cache.move_to_end(key)
self.cache[key] = value
if len(self.cache) > self.capacity:
# Remove least recently used (first item)
self.cache.popitem(last=False)
# Usage
cache = LRUCache(3)
cache.put(1, 'one')
cache.put(2, 'two')
cache.put(3, 'three')
print(cache.get(1)) # 'one' - moves to end
cache.put(4, 'four') # Removes key 2 (least recently used)
print(cache.get(2)) # -1 (not found)
Maintaining a sorted dictionary by value:
def sort_dict_by_value(d):
"""Sort dictionary by value while maintaining order"""
return OrderedDict(sorted(d.items(), key=lambda x: x[1]))
scores = {'Alice': 85, 'Bob': 92, 'Charlie': 78, 'David': 95}
sorted_scores = sort_dict_by_value(scores)
print(sorted_scores)
# OrderedDict([('Charlie', 78), ('Alice', 85), ('Bob', 92), ('David', 95)])
# Get top N performers
def top_n(od, n):
return OrderedDict(list(od.items())[-n:])
print(top_n(sorted_scores, 2))
# OrderedDict([('Bob', 92), ('David', 95)])
Equality comparison difference:
# Regular dicts: order doesn't matter for equality
dict1 = {'a': 1, 'b': 2}
dict2 = {'b': 2, 'a': 1}
print(dict1 == dict2) # True
# OrderedDict: order matters
od1 = OrderedDict([('a', 1), ('b', 2)])
od2 = OrderedDict([('b', 2), ('a', 1)])
print(od1 == od2) # False
Performance Considerations
Understanding when to use each collection type impacts application performance:
import time
from collections import deque, Counter
# List vs deque for left operations
def benchmark_left_ops(n=10000):
# List
start = time.time()
lst = []
for i in range(n):
lst.insert(0, i)
list_time = time.time() - start
# deque
start = time.time()
dq = deque()
for i in range(n):
dq.appendleft(i)
deque_time = time.time() - start
print(f"List: {list_time:.4f}s")
print(f"deque: {deque_time:.4f}s")
print(f"deque is {list_time/deque_time:.1f}x faster")
benchmark_left_ops()
# Typical output:
# List: 0.0234s
# deque: 0.0012s
# deque is 19.5x faster
Counter vs manual counting:
# Manual counting
def manual_count(items):
counts = {}
for item in items:
counts[item] = counts.get(item, 0) + 1
return counts
# Counter
def counter_count(items):
return Counter(items)
# Both work, but Counter provides additional methods
# and is optimized for counting operations
The collections module provides specialized data structures that solve common programming problems efficiently. Counter simplifies frequency analysis, deque enables efficient queue operations, and OrderedDict maintains explicit ordering with manipulation methods. Choose the right tool based on your specific requirements for optimal performance and code clarity.