Python - String startswith() and endswith()

The `startswith()` and `endswith()` methods check if a string begins or ends with specified substrings. Both methods return `True` or `False` and share identical parameter signatures.

Key Insights

  • startswith() and endswith() methods provide efficient prefix and suffix checking without regex overhead, supporting single strings, tuples of strings, and optional start/end parameters for substring matching
  • Both methods return boolean values and accept tuple arguments for checking multiple patterns simultaneously, eliminating the need for chained or conditions
  • These methods outperform string slicing and regex alternatives for simple pattern matching, offering O(n) complexity where n is the length of the prefix/suffix being checked

Basic Usage and Syntax

The startswith() and endswith() methods check if a string begins or ends with specified substrings. Both methods return True or False and share identical parameter signatures.

text = "application_config.yaml"

# Basic prefix checking
print(text.startswith("application"))  # True
print(text.startswith("config"))       # False

# Basic suffix checking
print(text.endswith(".yaml"))          # True
print(text.endswith(".json"))          # False

# Case-sensitive matching
url = "https://api.example.com"
print(url.startswith("HTTPS"))         # False
print(url.startswith("https"))         # True

The methods accept optional start and end parameters to check substrings within specific ranges:

path = "/var/log/application/error.log"

# Check substring from index 9 onwards
print(path.startswith("application", 9))     # True

# Check substring within range [9:24]
print(path.startswith("application", 9, 24)) # True

# Similar with endswith
print(path.endswith("error.log", 20))        # True
print(path.endswith(".log", 20, 30))         # True

Checking Multiple Patterns with Tuples

Both methods accept tuples to check against multiple patterns simultaneously, returning True if any pattern matches.

filename = "report_2024.pdf"

# Check multiple prefixes
valid_prefixes = ("report", "invoice", "summary")
if filename.startswith(valid_prefixes):
    print("Valid document type")

# Check multiple extensions
allowed_extensions = (".pdf", ".docx", ".txt")
if filename.endswith(allowed_extensions):
    print("Supported file format")

# Practical example: URL validation
def is_secure_url(url):
    secure_protocols = ("https://", "ftps://", "sftp://")
    return url.startswith(secure_protocols)

urls = [
    "https://example.com",
    "http://example.com",
    "ftps://ftp.example.com"
]

for url in urls:
    print(f"{url}: {is_secure_url(url)}")

Output:

https://example.com: True
http://example.com: False
ftps://ftp.example.com: True

File Extension and Path Handling

These methods excel at file system operations where prefix and suffix patterns determine file types and locations.

import os

def categorize_files(file_list):
    categories = {
        'images': [],
        'documents': [],
        'archives': [],
        'config': []
    }
    
    image_exts = ('.jpg', '.jpeg', '.png', '.gif', '.bmp')
    doc_exts = ('.pdf', '.doc', '.docx', '.txt', '.md')
    archive_exts = ('.zip', '.tar', '.gz', '.rar')
    config_exts = ('.yaml', '.yml', '.json', '.toml', '.ini')
    
    for filename in file_list:
        lower_name = filename.lower()
        if lower_name.endswith(image_exts):
            categories['images'].append(filename)
        elif lower_name.endswith(doc_exts):
            categories['documents'].append(filename)
        elif lower_name.endswith(archive_exts):
            categories['archives'].append(filename)
        elif lower_name.endswith(config_exts):
            categories['config'].append(filename)
    
    return categories

files = [
    "photo.JPG", "report.pdf", "backup.zip",
    "settings.yaml", "notes.txt", "archive.tar.gz"
]

result = categorize_files(files)
for category, items in result.items():
    print(f"{category}: {items}")

For path manipulation, combine with string operations:

def is_hidden_file(path):
    """Check if file or directory is hidden (Unix-style)"""
    filename = os.path.basename(path)
    return filename.startswith('.')

def get_log_files(directory, app_prefix):
    """Get log files for specific application"""
    log_files = []
    for filename in os.listdir(directory):
        if filename.startswith(app_prefix) and filename.endswith('.log'):
            log_files.append(filename)
    return log_files

# Example usage
hidden_files = ['.bashrc', 'visible.txt', '.gitignore']
for f in hidden_files:
    print(f"{f}: hidden={is_hidden_file(f)}")

String Parsing and Protocol Detection

These methods are invaluable for parsing structured strings like URLs, database connection strings, and protocol identifiers.

def parse_database_url(url):
    """Parse database connection URL"""
    db_info = {}
    
    if url.startswith('postgresql://'):
        db_info['type'] = 'PostgreSQL'
        db_info['default_port'] = 5432
    elif url.startswith('mysql://'):
        db_info['type'] = 'MySQL'
        db_info['default_port'] = 3306
    elif url.startswith('mongodb://'):
        db_info['type'] = 'MongoDB'
        db_info['default_port'] = 27017
    elif url.startswith('redis://'):
        db_info['type'] = 'Redis'
        db_info['default_port'] = 6379
    else:
        db_info['type'] = 'Unknown'
    
    return db_info

# Test with different connection strings
connections = [
    "postgresql://localhost/mydb",
    "mysql://db.example.com/users",
    "mongodb://cluster.example.com/app"
]

for conn in connections:
    info = parse_database_url(conn)
    print(f"{conn} -> {info['type']} (port: {info.get('default_port', 'N/A')})")

API versioning and content negotiation:

def get_api_version(accept_header):
    """Extract API version from Accept header"""
    version_prefixes = (
        'application/vnd.api.v1',
        'application/vnd.api.v2',
        'application/vnd.api.v3'
    )
    
    for prefix in version_prefixes:
        if accept_header.startswith(prefix):
            version = prefix.split('.')[-1]
            return version
    return 'v1'  # default

def validate_content_type(content_type):
    """Validate supported content types"""
    supported = ('application/json', 'application/xml', 'text/plain')
    return content_type.startswith(supported)

# Example headers
headers = [
    'application/vnd.api.v2+json',
    'application/json',
    'text/html'
]

for header in headers:
    print(f"{header}: version={get_api_version(header)}, "
          f"valid={validate_content_type(header)}")

Performance Optimization and Best Practices

These methods offer better performance than alternatives for simple pattern matching:

import timeit

text = "application_configuration_file.yaml"

# Method 1: startswith/endswith
def using_methods():
    return text.startswith("application") and text.endswith(".yaml")

# Method 2: String slicing
def using_slicing():
    return text[:11] == "application" and text[-5:] == ".yaml"

# Method 3: Regular expressions
import re
def using_regex():
    return bool(re.match(r'^application.*\.yaml$', text))

# Benchmark
methods = [
    ("startswith/endswith", using_methods),
    ("String slicing", using_slicing),
    ("Regex", using_regex)
]

for name, func in methods:
    time = timeit.timeit(func, number=1000000)
    print(f"{name}: {time:.4f} seconds")

Best practices for production code:

class FileValidator:
    """Validate file names with configurable rules"""
    
    def __init__(self, allowed_prefixes=None, allowed_extensions=None):
        self.allowed_prefixes = allowed_prefixes or ()
        self.allowed_extensions = allowed_extensions or ()
    
    def is_valid(self, filename):
        """Check if filename meets validation criteria"""
        if not filename:
            return False
        
        # Normalize for case-insensitive checking
        normalized = filename.lower()
        
        prefix_valid = (not self.allowed_prefixes or 
                       normalized.startswith(self.allowed_prefixes))
        
        extension_valid = (not self.allowed_extensions or 
                          normalized.endswith(self.allowed_extensions))
        
        return prefix_valid and extension_valid
    
    def filter_files(self, file_list):
        """Return only valid files from list"""
        return [f for f in file_list if self.is_valid(f)]

# Usage
validator = FileValidator(
    allowed_prefixes=('user_', 'admin_', 'system_'),
    allowed_extensions=('.csv', '.json', '.xml')
)

files = [
    'user_data.csv',
    'admin_config.json',
    'report.pdf',
    'system_log.xml',
    'temp_file.txt'
]

valid_files = validator.filter_files(files)
print(f"Valid files: {valid_files}")

Edge Cases and Common Pitfalls

Handle empty strings and None values appropriately:

def safe_check_prefix(text, prefix):
    """Safely check prefix with None handling"""
    if text is None or prefix is None:
        return False
    return text.startswith(prefix)

# Empty string behavior
print("".startswith(""))      # True
print("".endswith(""))        # True
print("test".startswith(""))  # True
print("test".endswith(""))    # True

# Tuple with single element requires trailing comma
filename = "document.pdf"
print(filename.endswith((".pdf")))   # Error: expects str, not tuple
print(filename.endswith((".pdf",)))  # Correct: True

# Case sensitivity
url = "HTTPS://EXAMPLE.COM"
print(url.lower().startswith("https"))  # True - normalize first

These methods provide clean, readable, and performant solutions for prefix and suffix matching in Python applications. Use them instead of regex for simple pattern checks, and leverage tuple arguments to eliminate verbose conditional chains.

Liked this? There's more.

Every week: one practical technique, explained simply, with code you can use immediately.