Python - List Files in Directory
The `os.listdir()` function returns a list of all entries in a directory as strings. This is the most straightforward approach for simple directory listings.
Key Insights
- Python provides multiple methods to list directory contents:
os.listdir()for simple listings,pathlib.Pathfor modern object-oriented approach, andos.walk()for recursive traversal - Use
globpatterns and filtering techniques to target specific file types, exclude hidden files, or retrieve files matching complex patterns - Combine directory listing with file metadata retrieval using
os.stat()orPath.stat()to build powerful file management utilities
Basic Directory Listing with os.listdir()
The os.listdir() function returns a list of all entries in a directory as strings. This is the most straightforward approach for simple directory listings.
import os
# List all items in current directory
items = os.listdir('.')
print(items)
# List items in specific directory
items = os.listdir('/path/to/directory')
for item in items:
print(item)
The function returns both files and directories without distinction. To filter only files:
import os
directory = '/path/to/directory'
files = [f for f in os.listdir(directory)
if os.path.isfile(os.path.join(directory, f))]
print(files)
Note that os.listdir() returns relative names, not full paths. Always use os.path.join() when performing operations on the results.
Modern Approach with pathlib
The pathlib module provides an object-oriented interface for filesystem operations. It’s the recommended approach for Python 3.4+.
from pathlib import Path
# List all items in directory
directory = Path('/path/to/directory')
items = list(directory.iterdir())
# Filter only files
files = [f for f in directory.iterdir() if f.is_file()]
# Filter only directories
dirs = [d for d in directory.iterdir() if d.is_dir()]
# Get file names as strings
file_names = [f.name for f in directory.iterdir() if f.is_file()]
The Path object provides convenient methods and properties:
from pathlib import Path
directory = Path('/path/to/directory')
for item in directory.iterdir():
print(f"Name: {item.name}")
print(f"Full path: {item.absolute()}")
print(f"Is file: {item.is_file()}")
print(f"Is directory: {item.is_dir()}")
print(f"Suffix: {item.suffix}")
print("---")
Pattern Matching with glob
The glob module enables pattern-based file selection using Unix-style wildcards.
import glob
# Find all Python files
python_files = glob.glob('/path/to/directory/*.py')
# Find files recursively
all_python_files = glob.glob('/path/to/directory/**/*.py', recursive=True)
# Find files with multiple extensions
patterns = ['*.py', '*.txt', '*.json']
files = []
for pattern in patterns:
files.extend(glob.glob(f'/path/to/directory/{pattern}'))
Using pathlib with glob patterns:
from pathlib import Path
directory = Path('/path/to/directory')
# Find all Python files
python_files = list(directory.glob('*.py'))
# Recursive search
all_python_files = list(directory.rglob('*.py'))
# Complex patterns
log_files = list(directory.glob('**/*.log'))
Recursive Directory Traversal with os.walk()
For walking entire directory trees, os.walk() provides a powerful generator-based approach.
import os
for root, dirs, files in os.walk('/path/to/directory'):
print(f"Current directory: {root}")
print(f"Subdirectories: {dirs}")
print(f"Files: {files}")
print("---")
Practical example - find all Python files in a project:
import os
def find_python_files(directory):
python_files = []
for root, dirs, files in os.walk(directory):
for file in files:
if file.endswith('.py'):
full_path = os.path.join(root, file)
python_files.append(full_path)
return python_files
files = find_python_files('/path/to/project')
for f in files:
print(f)
Control traversal behavior by modifying the dirs list in-place:
import os
for root, dirs, files in os.walk('/path/to/directory'):
# Skip hidden directories and __pycache__
dirs[:] = [d for d in dirs if not d.startswith('.') and d != '__pycache__']
for file in files:
print(os.path.join(root, file))
Filtering and Sorting Files
Implement custom filtering logic based on file attributes:
from pathlib import Path
import time
directory = Path('/path/to/directory')
# Files modified in last 24 hours
now = time.time()
recent_files = [
f for f in directory.iterdir()
if f.is_file() and (now - f.stat().st_mtime) < 86400
]
# Files larger than 1MB
large_files = [
f for f in directory.iterdir()
if f.is_file() and f.stat().st_size > 1024 * 1024
]
# Sort by modification time
sorted_files = sorted(
directory.iterdir(),
key=lambda f: f.stat().st_mtime,
reverse=True
)
Exclude hidden files and system files:
from pathlib import Path
directory = Path('/path/to/directory')
visible_files = [
f for f in directory.iterdir()
if f.is_file() and not f.name.startswith('.')
]
Retrieving File Metadata
Combine directory listing with metadata retrieval for comprehensive file information:
from pathlib import Path
from datetime import datetime
directory = Path('/path/to/directory')
for file in directory.iterdir():
if file.is_file():
stats = file.stat()
print(f"File: {file.name}")
print(f"Size: {stats.st_size} bytes")
print(f"Modified: {datetime.fromtimestamp(stats.st_mtime)}")
print(f"Created: {datetime.fromtimestamp(stats.st_ctime)}")
print("---")
Create a file inventory with detailed information:
from pathlib import Path
from datetime import datetime
def get_file_inventory(directory):
inventory = []
for file in Path(directory).rglob('*'):
if file.is_file():
stats = file.stat()
inventory.append({
'path': str(file),
'name': file.name,
'size': stats.st_size,
'extension': file.suffix,
'modified': datetime.fromtimestamp(stats.st_mtime),
'is_hidden': file.name.startswith('.')
})
return inventory
# Usage
inventory = get_file_inventory('/path/to/directory')
for item in inventory:
print(f"{item['name']}: {item['size']} bytes")
Performance Considerations
For large directories, use generators instead of lists to reduce memory consumption:
from pathlib import Path
def iter_python_files(directory):
"""Generator function for memory-efficient iteration"""
for file in Path(directory).rglob('*.py'):
yield file
# Process files one at a time
for py_file in iter_python_files('/large/project'):
# Process file without loading all paths into memory
print(py_file)
Use os.scandir() for better performance with large directories:
import os
with os.scandir('/path/to/directory') as entries:
for entry in entries:
if entry.is_file():
print(f"{entry.name}: {entry.stat().st_size} bytes")
The os.scandir() function returns DirEntry objects that cache file metadata, avoiding redundant system calls when checking file types or retrieving statistics.
Error Handling
Always implement proper error handling for directory operations:
from pathlib import Path
def safe_list_files(directory):
try:
path = Path(directory)
if not path.exists():
raise FileNotFoundError(f"Directory not found: {directory}")
if not path.is_dir():
raise NotADirectoryError(f"Not a directory: {directory}")
return [f for f in path.iterdir() if f.is_file()]
except PermissionError:
print(f"Permission denied: {directory}")
return []
except Exception as e:
print(f"Error accessing directory: {e}")
return []
files = safe_list_files('/path/to/directory')
This comprehensive approach to listing files in Python provides the tools needed for everything from simple directory listings to complex file management utilities. Choose the method that best fits your use case: os.listdir() for simplicity, pathlib for modern Python code, or os.walk() for recursive operations.