Python - Copy/Move/Rename Files (shutil)
The `shutil` module offers three primary copy functions, each with different metadata preservation guarantees.
Key Insights
- Python’s
shutilmodule provides high-level file operations that handle edge cases and platform differences automatically, making it superior to manualosmodule operations for copying, moving, and renaming files - Understanding the difference between
copy(),copy2(), andcopyfile()is critical—each preserves different metadata levels and has distinct performance characteristics - File operations require proper error handling and permission checks; using context managers and atomic operations prevents data corruption and partial transfers
Basic File Copying Operations
The shutil module offers three primary copy functions, each with different metadata preservation guarantees.
import shutil
import os
from pathlib import Path
# copy() - Copies file content and permission bits
shutil.copy('source.txt', 'destination.txt')
# copy2() - Copies file content, permission bits, and metadata (timestamps)
shutil.copy2('source.txt', 'destination_with_metadata.txt')
# copyfile() - Copies only file content (fastest, no metadata)
shutil.copyfile('source.txt', 'destination_content_only.txt')
The practical difference matters for backups and archival systems. Use copy2() when you need to preserve modification times for incremental backup systems:
import time
from datetime import datetime
def backup_if_modified(source, backup_dir, last_backup_time):
"""Only backup files modified after last backup."""
source_path = Path(source)
if not source_path.exists():
raise FileNotFoundError(f"{source} does not exist")
source_mtime = source_path.stat().st_mtime
if source_mtime > last_backup_time:
backup_path = Path(backup_dir) / source_path.name
shutil.copy2(source, backup_path)
print(f"Backed up {source} (modified: {datetime.fromtimestamp(source_mtime)})")
return True
return False
# Usage
last_backup = time.time() - 86400 # 24 hours ago
backup_if_modified('config.json', '/backups', last_backup)
Copying Directory Trees
For directory operations, copytree() recursively copies entire directory structures with filtering capabilities.
import shutil
from pathlib import Path
# Basic directory copy
shutil.copytree('source_dir', 'destination_dir')
# Copy with ignore patterns
shutil.copytree(
'project',
'project_backup',
ignore=shutil.ignore_patterns('*.pyc', '__pycache__', '.git', '*.log')
)
# Custom ignore function for complex filtering
def ignore_large_files(directory, files):
"""Ignore files larger than 10MB."""
ignored = []
for filename in files:
filepath = Path(directory) / filename
if filepath.is_file() and filepath.stat().st_size > 10 * 1024 * 1024:
ignored.append(filename)
return ignored
shutil.copytree('data', 'data_backup', ignore=ignore_large_files)
Python 3.8+ added the dirs_exist_ok parameter, which prevents errors when the destination already exists:
# Merge directories instead of failing
shutil.copytree(
'source_dir',
'existing_destination',
dirs_exist_ok=True
)
# Practical example: Incremental directory sync
def sync_directories(source, destination, overwrite_newer=False):
"""Sync source to destination, optionally preserving newer files."""
def copy_function(src, dst):
if not overwrite_newer and Path(dst).exists():
src_mtime = Path(src).stat().st_mtime
dst_mtime = Path(dst).stat().st_mtime
if dst_mtime > src_mtime:
return # Skip if destination is newer
shutil.copy2(src, dst)
shutil.copytree(
source,
destination,
copy_function=copy_function,
dirs_exist_ok=True
)
sync_directories('source', 'destination', overwrite_newer=False)
Moving and Renaming Files
shutil.move() handles both moving and renaming operations intelligently, working across different filesystems.
import shutil
from pathlib import Path
# Simple rename (same directory)
shutil.move('old_name.txt', 'new_name.txt')
# Move to different directory
shutil.move('file.txt', '/new/location/file.txt')
# Move with automatic rename if destination exists
destination = Path('/archive')
destination.mkdir(exist_ok=True)
shutil.move('report.pdf', destination)
# Cross-filesystem move (copies then deletes source)
shutil.move('/mnt/drive1/large_file.dat', '/mnt/drive2/large_file.dat')
Here’s a robust file organization system using move():
import shutil
from pathlib import Path
from datetime import datetime
def organize_downloads(source_dir, organize_by='extension'):
"""Organize files by extension or date."""
source = Path(source_dir)
for item in source.iterdir():
if item.is_file():
if organize_by == 'extension':
ext = item.suffix[1:] or 'no_extension'
dest_dir = source / ext
elif organize_by == 'date':
mtime = datetime.fromtimestamp(item.stat().st_mtime)
dest_dir = source / mtime.strftime('%Y-%m')
else:
raise ValueError("organize_by must be 'extension' or 'date'")
dest_dir.mkdir(exist_ok=True)
dest_path = dest_dir / item.name
# Handle naming conflicts
counter = 1
while dest_path.exists():
stem = item.stem
dest_path = dest_dir / f"{stem}_{counter}{item.suffix}"
counter += 1
shutil.move(str(item), str(dest_path))
print(f"Moved {item.name} -> {dest_path}")
organize_downloads('/home/user/Downloads', organize_by='extension')
Error Handling and Atomic Operations
File operations can fail due to permissions, disk space, or concurrent access. Implement proper error handling:
import shutil
import tempfile
from pathlib import Path
def safe_copy_with_verification(source, destination):
"""Copy file with verification and rollback capability."""
source_path = Path(source)
dest_path = Path(destination)
if not source_path.exists():
raise FileNotFoundError(f"Source {source} not found")
# Check available disk space
source_size = source_path.stat().st_size
dest_stat = shutil.disk_usage(dest_path.parent)
if dest_stat.free < source_size * 1.1: # 10% buffer
raise IOError(f"Insufficient disk space: need {source_size}, have {dest_stat.free}")
# Use temporary file for atomic operation
temp_dir = dest_path.parent
with tempfile.NamedTemporaryFile(dir=temp_dir, delete=False) as tmp:
temp_path = Path(tmp.name)
try:
shutil.copy2(source, temp_path)
# Verify copy integrity (optional but recommended for critical data)
if source_path.stat().st_size != temp_path.stat().st_size:
raise IOError("Copy verification failed: size mismatch")
# Atomic rename
temp_path.replace(dest_path)
return True
except Exception as e:
# Cleanup on failure
if temp_path.exists():
temp_path.unlink()
raise IOError(f"Copy failed: {e}")
# Usage with error handling
try:
safe_copy_with_verification('important.db', 'important.db.backup')
except (FileNotFoundError, IOError, PermissionError) as e:
print(f"Backup failed: {e}")
Permission and Ownership Handling
When copying files across systems or users, permission handling becomes critical:
import shutil
import os
import stat
from pathlib import Path
def copy_with_permissions(source, destination, preserve_owner=False):
"""Copy file with explicit permission handling."""
shutil.copy2(source, destination)
source_stat = os.stat(source)
# Copy permission bits
os.chmod(destination, source_stat.st_mode)
# Copy ownership (requires root/sudo)
if preserve_owner and os.geteuid() == 0:
try:
os.chown(destination, source_stat.st_uid, source_stat.st_gid)
except PermissionError:
print(f"Cannot preserve ownership for {destination}")
def make_executable_copy(source, destination):
"""Copy file and make it executable."""
shutil.copy2(source, destination)
current_permissions = os.stat(destination).st_mode
os.chmod(
destination,
current_permissions | stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH
)
# Example: Deploy script with execution permissions
make_executable_copy('deploy.sh', '/usr/local/bin/deploy')
Performance Optimization for Large Files
For large file operations, consider buffer sizes and progress tracking:
import shutil
from pathlib import Path
def copy_large_file_with_progress(source, destination, buffer_size=1024*1024):
"""Copy large files with progress indication."""
source_path = Path(source)
source_size = source_path.stat().st_size
copied = 0
with open(source, 'rb') as src, open(destination, 'wb') as dst:
while True:
chunk = src.read(buffer_size)
if not chunk:
break
dst.write(chunk)
copied += len(chunk)
progress = (copied / source_size) * 100
print(f"\rProgress: {progress:.1f}%", end='', flush=True)
print() # New line after progress
shutil.copystat(source, destination) # Copy metadata separately
# For simple cases, shutil handles buffering efficiently
shutil.copyfile('large_video.mp4', 'backup.mp4') # Optimized internally
The shutil module handles platform-specific optimizations automatically, including zero-copy operations on supported systems. For most applications, the built-in functions provide optimal performance without manual buffer management.