Python - List Files in Directory

The `os.listdir()` function returns a list of all entries in a directory as strings. This is the most straightforward approach for simple directory listings.

Key Insights

  • Python provides multiple methods to list directory contents: os.listdir() for simple listings, pathlib.Path for modern object-oriented approach, and os.walk() for recursive traversal
  • Use glob patterns and filtering techniques to target specific file types, exclude hidden files, or retrieve files matching complex patterns
  • Combine directory listing with file metadata retrieval using os.stat() or Path.stat() to build powerful file management utilities

Basic Directory Listing with os.listdir()

The os.listdir() function returns a list of all entries in a directory as strings. This is the most straightforward approach for simple directory listings.

import os

# List all items in current directory
items = os.listdir('.')
print(items)

# List items in specific directory
items = os.listdir('/path/to/directory')
for item in items:
    print(item)

The function returns both files and directories without distinction. To filter only files:

import os

directory = '/path/to/directory'
files = [f for f in os.listdir(directory) 
         if os.path.isfile(os.path.join(directory, f))]
print(files)

Note that os.listdir() returns relative names, not full paths. Always use os.path.join() when performing operations on the results.

Modern Approach with pathlib

The pathlib module provides an object-oriented interface for filesystem operations. It’s the recommended approach for Python 3.4+.

from pathlib import Path

# List all items in directory
directory = Path('/path/to/directory')
items = list(directory.iterdir())

# Filter only files
files = [f for f in directory.iterdir() if f.is_file()]

# Filter only directories
dirs = [d for d in directory.iterdir() if d.is_dir()]

# Get file names as strings
file_names = [f.name for f in directory.iterdir() if f.is_file()]

The Path object provides convenient methods and properties:

from pathlib import Path

directory = Path('/path/to/directory')

for item in directory.iterdir():
    print(f"Name: {item.name}")
    print(f"Full path: {item.absolute()}")
    print(f"Is file: {item.is_file()}")
    print(f"Is directory: {item.is_dir()}")
    print(f"Suffix: {item.suffix}")
    print("---")

Pattern Matching with glob

The glob module enables pattern-based file selection using Unix-style wildcards.

import glob

# Find all Python files
python_files = glob.glob('/path/to/directory/*.py')

# Find files recursively
all_python_files = glob.glob('/path/to/directory/**/*.py', recursive=True)

# Find files with multiple extensions
patterns = ['*.py', '*.txt', '*.json']
files = []
for pattern in patterns:
    files.extend(glob.glob(f'/path/to/directory/{pattern}'))

Using pathlib with glob patterns:

from pathlib import Path

directory = Path('/path/to/directory')

# Find all Python files
python_files = list(directory.glob('*.py'))

# Recursive search
all_python_files = list(directory.rglob('*.py'))

# Complex patterns
log_files = list(directory.glob('**/*.log'))

Recursive Directory Traversal with os.walk()

For walking entire directory trees, os.walk() provides a powerful generator-based approach.

import os

for root, dirs, files in os.walk('/path/to/directory'):
    print(f"Current directory: {root}")
    print(f"Subdirectories: {dirs}")
    print(f"Files: {files}")
    print("---")

Practical example - find all Python files in a project:

import os

def find_python_files(directory):
    python_files = []
    for root, dirs, files in os.walk(directory):
        for file in files:
            if file.endswith('.py'):
                full_path = os.path.join(root, file)
                python_files.append(full_path)
    return python_files

files = find_python_files('/path/to/project')
for f in files:
    print(f)

Control traversal behavior by modifying the dirs list in-place:

import os

for root, dirs, files in os.walk('/path/to/directory'):
    # Skip hidden directories and __pycache__
    dirs[:] = [d for d in dirs if not d.startswith('.') and d != '__pycache__']
    
    for file in files:
        print(os.path.join(root, file))

Filtering and Sorting Files

Implement custom filtering logic based on file attributes:

from pathlib import Path
import time

directory = Path('/path/to/directory')

# Files modified in last 24 hours
now = time.time()
recent_files = [
    f for f in directory.iterdir() 
    if f.is_file() and (now - f.stat().st_mtime) < 86400
]

# Files larger than 1MB
large_files = [
    f for f in directory.iterdir()
    if f.is_file() and f.stat().st_size > 1024 * 1024
]

# Sort by modification time
sorted_files = sorted(
    directory.iterdir(),
    key=lambda f: f.stat().st_mtime,
    reverse=True
)

Exclude hidden files and system files:

from pathlib import Path

directory = Path('/path/to/directory')

visible_files = [
    f for f in directory.iterdir()
    if f.is_file() and not f.name.startswith('.')
]

Retrieving File Metadata

Combine directory listing with metadata retrieval for comprehensive file information:

from pathlib import Path
from datetime import datetime

directory = Path('/path/to/directory')

for file in directory.iterdir():
    if file.is_file():
        stats = file.stat()
        print(f"File: {file.name}")
        print(f"Size: {stats.st_size} bytes")
        print(f"Modified: {datetime.fromtimestamp(stats.st_mtime)}")
        print(f"Created: {datetime.fromtimestamp(stats.st_ctime)}")
        print("---")

Create a file inventory with detailed information:

from pathlib import Path
from datetime import datetime

def get_file_inventory(directory):
    inventory = []
    for file in Path(directory).rglob('*'):
        if file.is_file():
            stats = file.stat()
            inventory.append({
                'path': str(file),
                'name': file.name,
                'size': stats.st_size,
                'extension': file.suffix,
                'modified': datetime.fromtimestamp(stats.st_mtime),
                'is_hidden': file.name.startswith('.')
            })
    return inventory

# Usage
inventory = get_file_inventory('/path/to/directory')
for item in inventory:
    print(f"{item['name']}: {item['size']} bytes")

Performance Considerations

For large directories, use generators instead of lists to reduce memory consumption:

from pathlib import Path

def iter_python_files(directory):
    """Generator function for memory-efficient iteration"""
    for file in Path(directory).rglob('*.py'):
        yield file

# Process files one at a time
for py_file in iter_python_files('/large/project'):
    # Process file without loading all paths into memory
    print(py_file)

Use os.scandir() for better performance with large directories:

import os

with os.scandir('/path/to/directory') as entries:
    for entry in entries:
        if entry.is_file():
            print(f"{entry.name}: {entry.stat().st_size} bytes")

The os.scandir() function returns DirEntry objects that cache file metadata, avoiding redundant system calls when checking file types or retrieving statistics.

Error Handling

Always implement proper error handling for directory operations:

from pathlib import Path

def safe_list_files(directory):
    try:
        path = Path(directory)
        if not path.exists():
            raise FileNotFoundError(f"Directory not found: {directory}")
        if not path.is_dir():
            raise NotADirectoryError(f"Not a directory: {directory}")
        
        return [f for f in path.iterdir() if f.is_file()]
    except PermissionError:
        print(f"Permission denied: {directory}")
        return []
    except Exception as e:
        print(f"Error accessing directory: {e}")
        return []

files = safe_list_files('/path/to/directory')

This comprehensive approach to listing files in Python provides the tools needed for everything from simple directory listings to complex file management utilities. Choose the method that best fits your use case: os.listdir() for simplicity, pathlib for modern Python code, or os.walk() for recursive operations.

Liked this? There's more.

Every week: one practical technique, explained simply, with code you can use immediately.