Python - Copy/Move/Rename Files (shutil)

The `shutil` module offers three primary copy functions, each with different metadata preservation guarantees.

Key Insights

  • Python’s shutil module provides high-level file operations that handle edge cases and platform differences automatically, making it superior to manual os module operations for copying, moving, and renaming files
  • Understanding the difference between copy(), copy2(), and copyfile() is critical—each preserves different metadata levels and has distinct performance characteristics
  • File operations require proper error handling and permission checks; using context managers and atomic operations prevents data corruption and partial transfers

Basic File Copying Operations

The shutil module offers three primary copy functions, each with different metadata preservation guarantees.

import shutil
import os
from pathlib import Path

# copy() - Copies file content and permission bits
shutil.copy('source.txt', 'destination.txt')

# copy2() - Copies file content, permission bits, and metadata (timestamps)
shutil.copy2('source.txt', 'destination_with_metadata.txt')

# copyfile() - Copies only file content (fastest, no metadata)
shutil.copyfile('source.txt', 'destination_content_only.txt')

The practical difference matters for backups and archival systems. Use copy2() when you need to preserve modification times for incremental backup systems:

import time
from datetime import datetime

def backup_if_modified(source, backup_dir, last_backup_time):
    """Only backup files modified after last backup."""
    source_path = Path(source)
    
    if not source_path.exists():
        raise FileNotFoundError(f"{source} does not exist")
    
    source_mtime = source_path.stat().st_mtime
    
    if source_mtime > last_backup_time:
        backup_path = Path(backup_dir) / source_path.name
        shutil.copy2(source, backup_path)
        print(f"Backed up {source} (modified: {datetime.fromtimestamp(source_mtime)})")
        return True
    
    return False

# Usage
last_backup = time.time() - 86400  # 24 hours ago
backup_if_modified('config.json', '/backups', last_backup)

Copying Directory Trees

For directory operations, copytree() recursively copies entire directory structures with filtering capabilities.

import shutil
from pathlib import Path

# Basic directory copy
shutil.copytree('source_dir', 'destination_dir')

# Copy with ignore patterns
shutil.copytree(
    'project',
    'project_backup',
    ignore=shutil.ignore_patterns('*.pyc', '__pycache__', '.git', '*.log')
)

# Custom ignore function for complex filtering
def ignore_large_files(directory, files):
    """Ignore files larger than 10MB."""
    ignored = []
    for filename in files:
        filepath = Path(directory) / filename
        if filepath.is_file() and filepath.stat().st_size > 10 * 1024 * 1024:
            ignored.append(filename)
    return ignored

shutil.copytree('data', 'data_backup', ignore=ignore_large_files)

Python 3.8+ added the dirs_exist_ok parameter, which prevents errors when the destination already exists:

# Merge directories instead of failing
shutil.copytree(
    'source_dir',
    'existing_destination',
    dirs_exist_ok=True
)

# Practical example: Incremental directory sync
def sync_directories(source, destination, overwrite_newer=False):
    """Sync source to destination, optionally preserving newer files."""
    def copy_function(src, dst):
        if not overwrite_newer and Path(dst).exists():
            src_mtime = Path(src).stat().st_mtime
            dst_mtime = Path(dst).stat().st_mtime
            if dst_mtime > src_mtime:
                return  # Skip if destination is newer
        shutil.copy2(src, dst)
    
    shutil.copytree(
        source,
        destination,
        copy_function=copy_function,
        dirs_exist_ok=True
    )

sync_directories('source', 'destination', overwrite_newer=False)

Moving and Renaming Files

shutil.move() handles both moving and renaming operations intelligently, working across different filesystems.

import shutil
from pathlib import Path

# Simple rename (same directory)
shutil.move('old_name.txt', 'new_name.txt')

# Move to different directory
shutil.move('file.txt', '/new/location/file.txt')

# Move with automatic rename if destination exists
destination = Path('/archive')
destination.mkdir(exist_ok=True)
shutil.move('report.pdf', destination)

# Cross-filesystem move (copies then deletes source)
shutil.move('/mnt/drive1/large_file.dat', '/mnt/drive2/large_file.dat')

Here’s a robust file organization system using move():

import shutil
from pathlib import Path
from datetime import datetime

def organize_downloads(source_dir, organize_by='extension'):
    """Organize files by extension or date."""
    source = Path(source_dir)
    
    for item in source.iterdir():
        if item.is_file():
            if organize_by == 'extension':
                ext = item.suffix[1:] or 'no_extension'
                dest_dir = source / ext
            elif organize_by == 'date':
                mtime = datetime.fromtimestamp(item.stat().st_mtime)
                dest_dir = source / mtime.strftime('%Y-%m')
            else:
                raise ValueError("organize_by must be 'extension' or 'date'")
            
            dest_dir.mkdir(exist_ok=True)
            dest_path = dest_dir / item.name
            
            # Handle naming conflicts
            counter = 1
            while dest_path.exists():
                stem = item.stem
                dest_path = dest_dir / f"{stem}_{counter}{item.suffix}"
                counter += 1
            
            shutil.move(str(item), str(dest_path))
            print(f"Moved {item.name} -> {dest_path}")

organize_downloads('/home/user/Downloads', organize_by='extension')

Error Handling and Atomic Operations

File operations can fail due to permissions, disk space, or concurrent access. Implement proper error handling:

import shutil
import tempfile
from pathlib import Path

def safe_copy_with_verification(source, destination):
    """Copy file with verification and rollback capability."""
    source_path = Path(source)
    dest_path = Path(destination)
    
    if not source_path.exists():
        raise FileNotFoundError(f"Source {source} not found")
    
    # Check available disk space
    source_size = source_path.stat().st_size
    dest_stat = shutil.disk_usage(dest_path.parent)
    
    if dest_stat.free < source_size * 1.1:  # 10% buffer
        raise IOError(f"Insufficient disk space: need {source_size}, have {dest_stat.free}")
    
    # Use temporary file for atomic operation
    temp_dir = dest_path.parent
    with tempfile.NamedTemporaryFile(dir=temp_dir, delete=False) as tmp:
        temp_path = Path(tmp.name)
    
    try:
        shutil.copy2(source, temp_path)
        
        # Verify copy integrity (optional but recommended for critical data)
        if source_path.stat().st_size != temp_path.stat().st_size:
            raise IOError("Copy verification failed: size mismatch")
        
        # Atomic rename
        temp_path.replace(dest_path)
        return True
        
    except Exception as e:
        # Cleanup on failure
        if temp_path.exists():
            temp_path.unlink()
        raise IOError(f"Copy failed: {e}")

# Usage with error handling
try:
    safe_copy_with_verification('important.db', 'important.db.backup')
except (FileNotFoundError, IOError, PermissionError) as e:
    print(f"Backup failed: {e}")

Permission and Ownership Handling

When copying files across systems or users, permission handling becomes critical:

import shutil
import os
import stat
from pathlib import Path

def copy_with_permissions(source, destination, preserve_owner=False):
    """Copy file with explicit permission handling."""
    shutil.copy2(source, destination)
    
    source_stat = os.stat(source)
    
    # Copy permission bits
    os.chmod(destination, source_stat.st_mode)
    
    # Copy ownership (requires root/sudo)
    if preserve_owner and os.geteuid() == 0:
        try:
            os.chown(destination, source_stat.st_uid, source_stat.st_gid)
        except PermissionError:
            print(f"Cannot preserve ownership for {destination}")

def make_executable_copy(source, destination):
    """Copy file and make it executable."""
    shutil.copy2(source, destination)
    
    current_permissions = os.stat(destination).st_mode
    os.chmod(
        destination,
        current_permissions | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH
    )

# Example: Deploy script with execution permissions
make_executable_copy('deploy.sh', '/usr/local/bin/deploy')

Performance Optimization for Large Files

For large file operations, consider buffer sizes and progress tracking:

import shutil
from pathlib import Path

def copy_large_file_with_progress(source, destination, buffer_size=1024*1024):
    """Copy large files with progress indication."""
    source_path = Path(source)
    source_size = source_path.stat().st_size
    copied = 0
    
    with open(source, 'rb') as src, open(destination, 'wb') as dst:
        while True:
            chunk = src.read(buffer_size)
            if not chunk:
                break
            dst.write(chunk)
            copied += len(chunk)
            progress = (copied / source_size) * 100
            print(f"\rProgress: {progress:.1f}%", end='', flush=True)
    
    print()  # New line after progress
    shutil.copystat(source, destination)  # Copy metadata separately

# For simple cases, shutil handles buffering efficiently
shutil.copyfile('large_video.mp4', 'backup.mp4')  # Optimized internally

The shutil module handles platform-specific optimizations automatically, including zero-copy operations on supported systems. For most applications, the built-in functions provide optimal performance without manual buffer management.

Liked this? There's more.

Every week: one practical technique, explained simply, with code you can use immediately.