Python - Read/Write Excel Files (openpyxl/xlsxwriter)

Key Insights

openpyxl handles both reading and writing Excel files with full formatting support, while xlsxwriter is write-only but offers superior performance and memory efficiency for large datasets
Reading Excel data requires understanding cell coordinates, ranges, and iterators to efficiently process thousands of rows without loading entire worksheets into memory
Writing formatted Excel reports involves styling cells, managing formulas, creating charts, and handling multiple worksheets within a single workbook

Installing Required Libraries

pip install openpyxl xlsxwriter pandas

openpyxl supports .xlsx files for both reading and writing. xlsxwriter only writes files but excels at generating large reports with complex formatting. Choose openpyxl when you need to modify existing files, xlsxwriter when creating new reports from scratch.

Reading Excel Files with openpyxl

from openpyxl import load_workbook

# Load workbook
wb = load_workbook('sales_data.xlsx')

# Access worksheet
ws = wb['Sheet1']  # By name
ws = wb.active     # Active sheet

# Read single cell
value = ws['A1'].value
value = ws.cell(row=1, column=1).value

# Read range
for row in ws['A1:C10']:
    for cell in row:
        print(cell.value, end=' ')
    print()

# Iterate all rows efficiently
for row in ws.iter_rows(min_row=2, max_row=100, min_col=1, max_col=5, values_only=True):
    product, quantity, price, date, customer = row
    print(f"{product}: {quantity} units @ ${price}")

The values_only=True parameter returns cell values directly instead of cell objects, improving performance by 3-4x when you don’t need formatting information.

Writing Excel Files with openpyxl

from openpyxl import Workbook
from openpyxl.styles import Font, PatternFill, Alignment, Border, Side
from datetime import datetime

wb = Workbook()
ws = wb.active
ws.title = "Sales Report"

# Write headers
headers = ['Product', 'Quantity', 'Unit Price', 'Total', 'Date']
ws.append(headers)

# Style headers
header_fill = PatternFill(start_color='366092', end_color='366092', fill_type='solid')
header_font = Font(bold=True, color='FFFFFF')

for cell in ws[1]:
    cell.fill = header_fill
    cell.font = header_font
    cell.alignment = Alignment(horizontal='center')

# Write data
data = [
    ['Laptop', 15, 899.99, '=B2*C2', datetime.now()],
    ['Mouse', 150, 24.99, '=B3*C3', datetime.now()],
    ['Keyboard', 75, 79.99, '=B4*C4', datetime.now()]
]

for row in data:
    ws.append(row)

# Format currency columns
for row in ws.iter_rows(min_row=2, min_col=3, max_col=4):
    for cell in row:
        cell.number_format = '$#,##0.00'

# Format date column
for cell in ws['E'][1:]:
    cell.number_format = 'MM/DD/YYYY'

# Add borders
thin_border = Border(
    left=Side(style='thin'),
    right=Side(style='thin'),
    top=Side(style='thin'),
    bottom=Side(style='thin')
)

for row in ws.iter_rows(min_row=1, max_row=ws.max_row, min_col=1, max_col=5):
    for cell in row:
        cell.border = thin_border

# Auto-adjust column widths
for column in ws.columns:
    max_length = 0
    column_letter = column[0].column_letter
    for cell in column:
        if cell.value:
            max_length = max(max_length, len(str(cell.value)))
    ws.column_dimensions[column_letter].width = max_length + 2

wb.save('formatted_report.xlsx')

High-Performance Writing with xlsxwriter

import xlsxwriter

# Create workbook
workbook = xlsxwriter.Workbook('performance_report.xlsx', {'constant_memory': True})
worksheet = workbook.add_worksheet('Data')

# Define formats
header_format = workbook.add_format({
    'bold': True,
    'bg_color': '#366092',
    'font_color': 'white',
    'align': 'center',
    'border': 1
})

currency_format = workbook.add_format({'num_format': '$#,##0.00'})
date_format = workbook.add_format({'num_format': 'mm/dd/yyyy'})

# Write headers
headers = ['Product', 'Quantity', 'Unit Price', 'Total', 'Date']
for col, header in enumerate(headers):
    worksheet.write(0, col, header, header_format)

# Write large dataset efficiently
for row in range(1, 100001):
    worksheet.write(row, 0, f'Product {row}')
    worksheet.write(row, 1, row * 10)
    worksheet.write(row, 2, 99.99, currency_format)
    worksheet.write_formula(row, 3, f'=B{row+1}*C{row+1}', currency_format)
    worksheet.write_datetime(row, 4, datetime.now(), date_format)

# Set column widths
worksheet.set_column('A:A', 15)
worksheet.set_column('B:B', 10)
worksheet.set_column('C:D', 12)
worksheet.set_column('E:E', 12)

workbook.close()

The constant_memory mode reduces memory usage by writing rows sequentially to disk. Critical for files exceeding 10,000 rows.

Adding Charts with xlsxwriter

import xlsxwriter

workbook = xlsxwriter.Workbook('charts.xlsx')
worksheet = workbook.add_worksheet()

# Sample data
data = [
    ['Month', 'Revenue', 'Expenses'],
    ['Jan', 50000, 35000],
    ['Feb', 55000, 38000],
    ['Mar', 62000, 42000],
    ['Apr', 58000, 40000],
    ['May', 67000, 45000],
    ['Jun', 71000, 48000]
]

# Write data
for row_num, row_data in enumerate(data):
    for col_num, cell_data in enumerate(row_data):
        worksheet.write(row_num, col_num, cell_data)

# Create chart
chart = workbook.add_chart({'type': 'column'})

# Configure series
chart.add_series({
    'name': '=Sheet1!$B$1',
    'categories': '=Sheet1!$A$2:$A$7',
    'values': '=Sheet1!$B$2:$B$7',
    'fill': {'color': '#4472C4'}
})

chart.add_series({
    'name': '=Sheet1!$C$1',
    'categories': '=Sheet1!$A$2:$A$7',
    'values': '=Sheet1!$C$2:$C$7',
    'fill': {'color': '#ED7D31'}
})

# Format chart
chart.set_title({'name': 'Monthly Financial Overview'})
chart.set_x_axis({'name': 'Month'})
chart.set_y_axis({'name': 'Amount ($)', 'num_format': '$#,##0'})
chart.set_style(11)

# Insert chart
worksheet.insert_chart('E2', chart, {'x_scale': 1.5, 'y_scale': 1.5})

workbook.close()

Reading and Modifying Existing Files

from openpyxl import load_workbook
from openpyxl.utils import get_column_letter

# Load existing workbook
wb = load_workbook('existing_report.xlsx')
ws = wb['Sales']

# Find last row
last_row = ws.max_row

# Append new data
new_data = ['New Product', 50, 149.99, f'=B{last_row+1}*C{last_row+1}']
ws.append(new_data)

# Update specific cell
ws['A1'] = 'Updated Sales Report'

# Insert row
ws.insert_rows(2)

# Delete row
ws.delete_rows(5)

# Copy formatting from one cell to another
source_cell = ws['B2']
target_cell = ws['B10']
target_cell.font = source_cell.font.copy()
target_cell.fill = source_cell.fill.copy()
target_cell.border = source_cell.border.copy()

wb.save('modified_report.xlsx')

Working with Multiple Sheets

from openpyxl import Workbook

wb = Workbook()

# Create sheets
summary = wb.active
summary.title = 'Summary'
details = wb.create_sheet('Details')
charts = wb.create_sheet('Charts', 0)  # Insert at position 0

# Write to different sheets
summary['A1'] = 'Total Sales'
summary['B1'] = '=SUM(Details!B:B)'

details.append(['Product', 'Amount'])
details.append(['Laptop', 1500])
details.append(['Mouse', 25])

# Copy sheet
source = wb['Details']
target = wb.copy_worksheet(source)
target.title = 'Details Backup'

# Remove sheet
wb.remove(wb['Charts'])

wb.save('multi_sheet.xlsx')

Performance Comparison and Best Practices

For reading 100,000 rows:

openpyxl with values_only=True: ~8 seconds
openpyxl with cell objects: ~25 seconds
pandas read_excel(): ~3 seconds (uses openpyxl backend)

For writing 100,000 rows:

xlsxwriter with constant_memory: ~12 seconds, 50MB RAM
xlsxwriter without optimization: ~15 seconds, 800MB RAM
openpyxl: ~35 seconds, 1.2GB RAM

Use openpyxl when: Modifying existing files, reading formatted data, working with files under 50,000 rows.

Use xlsxwriter when: Creating new reports, generating files over 50,000 rows, need advanced chart features, memory is constrained.

Use pandas when: Primary goal is data analysis, need to integrate with data processing pipelines, working with CSV/SQL alongside Excel.

Always close workbooks explicitly or use context managers to prevent file corruption and memory leaks in production environments.