SQL - String Functions Complete Reference

Key Insights

• SQL string functions enable text manipulation directly in queries, eliminating the need for post-processing in application code and improving performance by reducing data transfer • Understanding the differences between CONCAT, CONCAT_WS, and the || operator prevents unexpected NULL behavior that can corrupt query results • Pattern matching with LIKE, REGEXP, and full-text search each serve distinct use cases—choosing the right approach impacts both query performance and result accuracy

Core Concatenation Functions

String concatenation combines multiple text values into a single result. SQL provides several approaches, each with specific behaviors around NULL handling.

-- Basic CONCAT - ignores NULL values
SELECT CONCAT('John', ' ', 'Doe') AS full_name;
-- Result: 'John Doe'

SELECT CONCAT('John', NULL, 'Doe') AS full_name;
-- Result: 'JohnDoe' (NULL is ignored)

-- CONCAT_WS (Concatenate With Separator)
SELECT CONCAT_WS('-', '2024', '01', '15') AS date_string;
-- Result: '2024-01-15'

SELECT CONCAT_WS(',', 'apple', NULL, 'orange', 'banana') AS fruits;
-- Result: 'apple,orange,banana' (NULL is skipped)

-- Pipe operator (standard SQL, but NULL-sensitive)
SELECT 'John' || ' ' || 'Doe' AS full_name;
-- Result: 'John Doe'

SELECT 'John' || NULL || 'Doe' AS full_name;
-- Result: NULL (entire expression becomes NULL)

The pipe operator follows SQL standard behavior where any NULL operand produces NULL. Use CONCAT when NULL values might exist but should be treated as empty strings.

Extraction and Substring Operations

Extracting portions of strings is fundamental for parsing structured text data, handling fixed-width formats, and isolating specific components.

-- SUBSTRING with position and length
SELECT SUBSTRING('PostgreSQL Database', 1, 10) AS result;
-- Result: 'PostgreSQL'

SELECT SUBSTRING('PostgreSQL Database' FROM 12) AS result;
-- Result: 'Database'

-- LEFT and RIGHT for edge extraction
SELECT 
    LEFT('product-12345', 7) AS prefix,
    RIGHT('product-12345', 5) AS suffix;
-- Result: prefix='product', suffix='12345'

-- Practical example: parsing email addresses
SELECT 
    email,
    SUBSTRING(email, 1, POSITION('@' IN email) - 1) AS username,
    SUBSTRING(email, POSITION('@' IN email) + 1) AS domain
FROM users
WHERE email = 'john.doe@example.com';
-- username='john.doe', domain='example.com'

-- Extract using regular expressions (PostgreSQL)
SELECT SUBSTRING('Order #12345 processed', '\d+') AS order_number;
-- Result: '12345'

Case Transformation

Case conversion is essential for normalization, comparison operations, and formatting output for display purposes.

-- Basic case conversion
SELECT 
    UPPER('postgresql') AS uppercase,
    LOWER('POSTGRESQL') AS lowercase,
    INITCAP('john doe') AS titlecase;
-- Result: uppercase='POSTGRESQL', lowercase='postgresql', titlecase='John Doe'

-- Case-insensitive comparison
SELECT * FROM products
WHERE LOWER(product_name) = LOWER('Widget Pro');

-- Normalizing user input
INSERT INTO users (email, username)
VALUES (
    LOWER('John.Doe@EXAMPLE.com'),
    LOWER('JohnDoe123')
);

Trimming and Padding

Whitespace management and fixed-width formatting are critical when handling user input or generating reports.

-- Removing whitespace
SELECT 
    TRIM('  hello  ') AS trimmed,
    LTRIM('  hello  ') AS left_trimmed,
    RTRIM('  hello  ') AS right_trimmed;
-- Result: trimmed='hello', left_trimmed='hello  ', right_trimmed='  hello'

-- Trim specific characters
SELECT TRIM(BOTH '.' FROM '...example...') AS result;
-- Result: 'example'

-- Padding for fixed-width output
SELECT 
    LPAD('42', 5, '0') AS padded_left,
    RPAD('ID', 10, '-') AS padded_right;
-- Result: padded_left='00042', padded_right='ID--------'

-- Practical example: generating fixed-width records
SELECT 
    LPAD(CAST(id AS VARCHAR), 8, '0') ||
    RPAD(product_name, 30, ' ') ||
    LPAD(CAST(price AS VARCHAR), 10, ' ') AS fixed_record
FROM products;

Search and Pattern Matching

Finding substrings and matching patterns enables filtering, validation, and data extraction from unstructured text.

-- Position finding
SELECT 
    POSITION('world' IN 'hello world') AS pos,
    STRPOS('hello world', 'world') AS strpos_result;
-- Result: both return 7

-- Pattern matching with LIKE
SELECT * FROM products
WHERE product_name LIKE 'Widget%'  -- Starts with 'Widget'
   OR product_name LIKE '%Pro'     -- Ends with 'Pro'
   OR product_name LIKE '%Deluxe%'; -- Contains 'Deluxe'

-- Case-insensitive LIKE (PostgreSQL)
SELECT * FROM products
WHERE product_name ILIKE '%widget%';

-- Regular expression matching
SELECT * FROM products
WHERE product_name ~ '^[A-Z]{3}-\d{4}$';  -- Format: ABC-1234

-- Extract all matches
SELECT 
    description,
    REGEXP_MATCHES(description, '\$\d+\.?\d*', 'g') AS prices
FROM products;

Replacement and Modification

String replacement enables data cleaning, standardization, and transformation without updating source records.

-- Basic replacement
SELECT REPLACE('Hello World', 'World', 'SQL') AS result;
-- Result: 'Hello SQL'

-- Multiple replacements
SELECT 
    REPLACE(
        REPLACE(
            REPLACE(phone, '-', ''),
            '(', ''
        ),
        ')', ''
    ) AS clean_phone
FROM contacts;

-- Regular expression replacement (PostgreSQL)
SELECT REGEXP_REPLACE(
    'Product ID: ABC-123, DEF-456',
    '[A-Z]{3}-(\d{3})',
    'ID-\1',
    'g'
) AS result;
-- Result: 'Product ID: ID-123, ID-456'

-- Translating characters
SELECT TRANSLATE('hello', 'helo', 'HELO') AS result;
-- Result: 'HELLO'

Length and Measurement

String length functions account for different character encodings and are essential for validation and truncation.

-- Character length vs byte length
SELECT 
    LENGTH('hello') AS char_length,
    OCTET_LENGTH('hello') AS byte_length,
    CHAR_LENGTH('hello') AS char_length_alt;

-- UTF-8 multibyte characters
SELECT 
    LENGTH('日本語') AS char_count,
    OCTET_LENGTH('日本語') AS byte_count;
-- char_count=3, byte_count=9 (3 bytes per character)

-- Validation example
SELECT * FROM users
WHERE LENGTH(username) BETWEEN 3 AND 20
  AND LENGTH(password) >= 8;

-- Truncation with ellipsis
SELECT 
    CASE 
        WHEN LENGTH(description) > 100 
        THEN LEFT(description, 97) || '...'
        ELSE description
    END AS truncated_description
FROM products;

Splitting and Array Operations

Modern SQL databases provide functions to split delimited strings into arrays or table rows, enabling analysis of comma-separated values and tags.

-- Split string to array (PostgreSQL)
SELECT STRING_TO_ARRAY('apple,orange,banana', ',') AS fruit_array;
-- Result: {'apple','orange','banana'}

-- Split to rows
SELECT UNNEST(STRING_TO_ARRAY('tag1,tag2,tag3', ',')) AS tag;
-- Returns 3 rows: 'tag1', 'tag2', 'tag3'

-- Join array to string
SELECT ARRAY_TO_STRING(ARRAY['apple','orange','banana'], ', ') AS fruits;
-- Result: 'apple, orange, banana'

-- Practical example: tag search
SELECT product_id, product_name
FROM products
WHERE 'electronics' = ANY(STRING_TO_ARRAY(tags, ','));

Advanced Text Processing

Complex text operations combine multiple functions to solve real-world data processing challenges.

-- URL slug generation
SELECT LOWER(
    REGEXP_REPLACE(
        REGEXP_REPLACE(
            TRIM(product_name),
            '[^a-zA-Z0-9\s-]',
            '',
            'g'
        ),
        '\s+',
        '-',
        'g'
    )
) AS slug
FROM products;
-- 'Widget Pro 2000!' becomes 'widget-pro-2000'

-- Phone number formatting
SELECT 
    CONCAT(
        '(',
        SUBSTRING(phone, 1, 3),
        ') ',
        SUBSTRING(phone, 4, 3),
        '-',
        SUBSTRING(phone, 7, 4)
    ) AS formatted_phone
FROM contacts
WHERE LENGTH(REGEXP_REPLACE(phone, '\D', '', 'g')) = 10;

-- Extract domain from URL
SELECT 
    url,
    REGEXP_REPLACE(
        REGEXP_REPLACE(url, '^https?://(www\.)?', ''),
        '/.*$',
        ''
    ) AS domain
FROM websites;

String functions form the backbone of text processing in SQL. Master these operations to handle data transformation at the database layer, reducing application complexity and improving query performance. Each database system implements these functions with slight variations—consult your specific database documentation for dialect-specific features and optimizations.