Pandas - Read Multiple Sheets from Excel
• Use `pd.read_excel()` with the `sheet_name` parameter to read single, multiple, or all sheets from an Excel file into DataFrames or a dictionary of DataFrames
Key Insights
• Use pd.read_excel() with the sheet_name parameter to read single, multiple, or all sheets from an Excel file into DataFrames or a dictionary of DataFrames
• Reading all sheets at once with sheet_name=None returns an ordered dictionary where keys are sheet names and values are DataFrames, enabling efficient batch processing
• Combine pd.concat() with multi-sheet reading to merge data from multiple sheets into a single DataFrame when sheets share the same structure
Reading a Single Sheet
The default behavior of pd.read_excel() reads only the first sheet. You can specify a different sheet by name or index position (zero-based).
import pandas as pd
# Read the first sheet (default)
df = pd.read_excel('sales_data.xlsx')
# Read a specific sheet by name
df_q1 = pd.read_excel('sales_data.xlsx', sheet_name='Q1_Sales')
# Read a specific sheet by index (0-based)
df_second = pd.read_excel('sales_data.xlsx', sheet_name=1)
The sheet_name parameter accepts strings for sheet names or integers for positional indexing. This approach works well when you know exactly which sheet contains your target data.
Reading Multiple Specific Sheets
Pass a list of sheet names or indices to read multiple sheets simultaneously. This returns a dictionary where keys are sheet names and values are DataFrames.
import pandas as pd
# Read multiple sheets by name
sheets_dict = pd.read_excel(
'sales_data.xlsx',
sheet_name=['Q1_Sales', 'Q2_Sales', 'Q3_Sales']
)
# Access individual DataFrames
df_q1 = sheets_dict['Q1_Sales']
df_q2 = sheets_dict['Q2_Sales']
# Read multiple sheets by index
sheets_dict = pd.read_excel(
'sales_data.xlsx',
sheet_name=[0, 1, 2]
)
# Keys will be the actual sheet names
for sheet_name, df in sheets_dict.items():
print(f"{sheet_name}: {len(df)} rows")
This approach optimizes file I/O by opening the Excel file once and extracting multiple sheets in a single operation.
Reading All Sheets
Set sheet_name=None to read every sheet in the workbook. This returns an ordered dictionary containing all sheets.
import pandas as pd
# Read all sheets
all_sheets = pd.read_excel('sales_data.xlsx', sheet_name=None)
# Iterate through all sheets
for sheet_name, df in all_sheets.items():
print(f"\nSheet: {sheet_name}")
print(f"Shape: {df.shape}")
print(df.head())
# Get list of all sheet names
sheet_names = list(all_sheets.keys())
print(f"Available sheets: {sheet_names}")
The ordered dictionary preserves the sheet order from the Excel file, making it predictable when processing sheets sequentially.
Combining Multiple Sheets into One DataFrame
When sheets share identical column structures, concatenate them into a single DataFrame for unified analysis.
import pandas as pd
# Read all quarterly sheets
all_sheets = pd.read_excel('sales_data.xlsx', sheet_name=None)
# Combine all sheets vertically
combined_df = pd.concat(all_sheets.values(), ignore_index=True)
# Add source sheet identifier
frames = []
for sheet_name, df in all_sheets.items():
df['source_sheet'] = sheet_name
frames.append(df)
combined_with_source = pd.concat(frames, ignore_index=True)
print(combined_with_source.head())
The ignore_index=True parameter creates a new sequential index instead of preserving original indices from individual sheets.
Filtering Sheets During Read
Apply selection logic to read only sheets matching specific criteria.
import pandas as pd
# Read all sheets first
all_sheets = pd.read_excel('sales_data.xlsx', sheet_name=None)
# Filter sheets by name pattern
quarterly_sheets = {
name: df for name, df in all_sheets.items()
if name.startswith('Q') and 'Sales' in name
}
# Process only filtered sheets
for sheet_name, df in quarterly_sheets.items():
print(f"Processing {sheet_name}: {len(df)} records")
# Alternative: Use list comprehension to get sheet names first
sheet_names = list(all_sheets.keys())
sales_sheets = [name for name in sheet_names if 'Sales' in name]
# Read only filtered sheets
filtered_data = pd.read_excel('sales_data.xlsx', sheet_name=sales_sheets)
This pattern prevents loading unnecessary data into memory when working with large workbooks containing many sheets.
Handling Different Sheet Structures
Real-world Excel files often contain sheets with varying structures. Handle these cases with conditional logic.
import pandas as pd
all_sheets = pd.read_excel('mixed_data.xlsx', sheet_name=None)
data_sheets = {}
metadata_sheets = {}
for sheet_name, df in all_sheets.items():
# Classify sheets by structure
if 'transaction_id' in df.columns:
data_sheets[sheet_name] = df
elif 'metadata' in sheet_name.lower():
metadata_sheets[sheet_name] = df
# Process data sheets
if data_sheets:
combined_data = pd.concat(data_sheets.values(), ignore_index=True)
print(f"Combined data shape: {combined_data.shape}")
# Process metadata separately
for name, df in metadata_sheets.items():
print(f"\nMetadata from {name}:")
print(df)
This approach segregates sheets by structure, preventing errors when concatenating incompatible DataFrames.
Optimizing Performance with usecols and dtype
When reading multiple sheets from large files, specify columns and data types to reduce memory usage and improve speed.
import pandas as pd
# Define columns to read
columns_to_read = ['date', 'product_id', 'quantity', 'revenue']
# Define data types
dtype_spec = {
'product_id': 'category',
'quantity': 'int32',
'revenue': 'float32'
}
# Read multiple sheets with optimizations
sheets = pd.read_excel(
'large_sales_data.xlsx',
sheet_name=['Q1_Sales', 'Q2_Sales', 'Q3_Sales', 'Q4_Sales'],
usecols=columns_to_read,
dtype=dtype_spec,
parse_dates=['date']
)
# Combine optimized sheets
combined = pd.concat(sheets.values(), ignore_index=True)
print(f"Memory usage: {combined.memory_usage(deep=True).sum() / 1024**2:.2f} MB")
Specifying usecols skips reading unnecessary columns, while explicit dtype definitions prevent pandas from inferring types, which can be memory-intensive.
Using ExcelFile for Repeated Operations
When performing multiple read operations on the same file, use ExcelFile to avoid repeatedly opening the file.
import pandas as pd
# Open file once
with pd.ExcelFile('sales_data.xlsx') as xls:
# Get all sheet names
print(f"Available sheets: {xls.sheet_names}")
# Read specific sheets
df_q1 = pd.read_excel(xls, sheet_name='Q1_Sales')
df_q2 = pd.read_excel(xls, sheet_name='Q2_Sales')
# Read with different parameters for different sheets
df_summary = pd.read_excel(xls, sheet_name='Summary', header=2)
# Read all remaining sheets
all_sheets = {
name: pd.read_excel(xls, sheet_name=name)
for name in xls.sheet_names
if name not in ['Summary']
}
This context manager approach ensures the file is properly closed after operations complete, preventing file lock issues.
Practical Example: Quarterly Sales Analysis
Here’s a complete example combining multiple techniques to analyze quarterly sales data.
import pandas as pd
from datetime import datetime
def load_quarterly_sales(filepath):
"""Load and combine quarterly sales data from Excel."""
# Read all sheets
all_sheets = pd.read_excel(filepath, sheet_name=None)
# Filter quarterly sheets
quarterly_data = {
name: df for name, df in all_sheets.items()
if name.startswith('Q') and '_Sales' in name
}
# Add quarter identifier and combine
frames = []
for sheet_name, df in quarterly_data.items():
df = df.copy()
df['quarter'] = sheet_name.split('_')[0]
frames.append(df)
combined = pd.concat(frames, ignore_index=True)
# Data quality checks
print(f"Total records loaded: {len(combined)}")
print(f"Date range: {combined['date'].min()} to {combined['date'].max()}")
print(f"Quarters: {combined['quarter'].unique()}")
return combined
# Load and analyze
sales_df = load_quarterly_sales('sales_data.xlsx')
# Aggregate by quarter
quarterly_summary = sales_df.groupby('quarter').agg({
'revenue': 'sum',
'quantity': 'sum',
'transaction_id': 'count'
}).round(2)
print("\nQuarterly Summary:")
print(quarterly_summary)
This pattern provides a reusable function for loading multi-sheet Excel data with built-in validation and transformation logic.