NumPy - np.isfinite() and np.isreal()
import numpy as np
Key Insights
np.isfinite()checks for finite numbers (excludes inf, -inf, and NaN), whilenp.isreal()checks if values have zero imaginary parts—two fundamentally different validation purposes often confused in data pipelinesnp.isfinite()is essential for numerical stability in machine learning preprocessing, filtering out problematic values before training, whilenp.isreal()validates complex number operations- Combining both functions with
np.where()and boolean indexing enables robust data cleaning workflows that handle edge cases in scientific computing and financial analysis
Understanding np.isfinite()
np.isfinite() returns a boolean array indicating which elements are finite numbers. A finite number is any value that is not infinity, negative infinity, or NaN (Not a Number). This function is critical when working with numerical computations that can produce undefined results.
import numpy as np
# Create array with various numeric states
arr = np.array([1.0, 2.5, np.inf, -np.inf, np.nan, 0.0, -3.14])
finite_mask = np.isfinite(arr)
print("Array:", arr)
print("Finite mask:", finite_mask)
print("Finite values:", arr[finite_mask])
Output:
Array: [ 1. 2.5 inf -inf nan 0. -3.14]
Finite mask: [ True True False False False True True]
Finite values: [ 1. 2.5 0. -3.14]
The function works element-wise on arrays of any dimension and returns a boolean array with the same shape as the input.
# 2D array example
matrix = np.array([
[1.0, np.inf, 3.0],
[np.nan, 5.0, -np.inf],
[7.0, 8.0, 9.0]
])
finite_2d = np.isfinite(matrix)
print("2D finite mask:\n", finite_2d)
print("Count of finite values:", np.sum(finite_2d))
Understanding np.isreal()
np.isreal() checks whether array elements have zero imaginary parts. This function returns True for real numbers (including integers and floats) and False for complex numbers with non-zero imaginary components.
import numpy as np
# Array with real and complex numbers
arr = np.array([1.0, 2+3j, 4.5, 0+0j, 5-2j, -7.0])
real_mask = np.isreal(arr)
print("Array:", arr)
print("Real mask:", real_mask)
print("Real values:", arr[real_mask])
Output:
Array: [ 1. +0.j 2. +3.j 4.5+0.j 0. +0.j 5. -2.j -7. +0.j]
Real mask: [ True False True True False True]
Real values: [ 1. +0.j 4.5+0.j 0. +0.j -7. +0.j]
Note that np.isreal() returns True for complex numbers with zero imaginary parts (like 0+0j). The values remain complex dtype but are considered “real” by this function.
# Demonstrate dtype preservation
real_complex = np.array([1+0j, 2+0j, 3+0j])
print("All real?", np.all(np.isreal(real_complex)))
print("Dtype:", real_complex.dtype) # Still complex128
Critical Differences and Common Pitfalls
These functions serve entirely different purposes. np.isfinite() validates numerical stability, while np.isreal() validates number type. Confusing them leads to bugs in data validation pipelines.
import numpy as np
# Demonstrate the differences
test_values = np.array([
1.0, # finite and real
np.inf, # NOT finite but real
np.nan, # NOT finite but real
2+3j, # finite but NOT real
complex(np.inf, 0), # NOT finite but real
complex(1, np.nan), # NOT finite and NOT real
])
print("Value\t\t\tisfinite\tisreal")
print("-" * 50)
for val in test_values:
print(f"{str(val):20s}\t{np.isfinite(val)}\t\t{np.isreal(val)}")
Output:
Value isfinite isreal
--------------------------------------------------
1.0 True True
inf False True
nan False True
(2+3j) True False
(inf+0j) False True
(1+nanj) False False
This table reveals that infinity and NaN are considered “real” by np.isreal() because they have zero imaginary parts, but they’re not finite.
Practical Application: Data Cleaning Pipeline
Real-world data often contains corrupted values from sensor failures, division by zero, or invalid computations. Here’s a production-ready cleaning workflow.
import numpy as np
def clean_sensor_data(data):
"""
Clean sensor data by removing non-finite values.
Returns cleaned data and indices of valid readings.
"""
# Identify finite values
valid_mask = np.isfinite(data)
# Get statistics before cleaning
total_points = len(data)
valid_points = np.sum(valid_mask)
invalid_points = total_points - valid_points
# Extract clean data
clean_data = data[valid_mask]
valid_indices = np.where(valid_mask)[0]
print(f"Data cleaning report:")
print(f" Total points: {total_points}")
print(f" Valid points: {valid_points}")
print(f" Removed: {invalid_points} ({100*invalid_points/total_points:.2f}%)")
return clean_data, valid_indices
# Simulate sensor data with failures
sensor_readings = np.array([23.5, 24.1, np.inf, 23.8, np.nan, 24.2, 23.9, -np.inf])
clean_readings, valid_idx = clean_sensor_data(sensor_readings)
print(f"\nClean readings: {clean_readings}")
print(f"Valid indices: {valid_idx}")
Machine Learning Preprocessing
np.isfinite() is essential in ML pipelines to prevent model training failures. Many algorithms cannot handle infinite or NaN values.
import numpy as np
def preprocess_features(X, strategy='remove'):
"""
Preprocess feature matrix by handling non-finite values.
Args:
X: Feature matrix (n_samples, n_features)
strategy: 'remove' or 'impute'
Returns:
Cleaned feature matrix
"""
if strategy == 'remove':
# Remove samples with any non-finite values
valid_samples = np.all(np.isfinite(X), axis=1)
X_clean = X[valid_samples]
print(f"Removed {np.sum(~valid_samples)} samples with non-finite values")
elif strategy == 'impute':
# Replace non-finite with column mean
X_clean = X.copy()
for col in range(X.shape[1]):
finite_mask = np.isfinite(X[:, col])
if not np.all(finite_mask):
col_mean = np.mean(X[finite_mask, col])
X_clean[~finite_mask, col] = col_mean
print(f"Imputed {np.sum(~finite_mask)} values in column {col}")
return X_clean
# Example with corrupted feature matrix
X = np.array([
[1.0, 2.0, 3.0],
[4.0, np.inf, 6.0],
[7.0, 8.0, np.nan],
[10.0, 11.0, 12.0]
])
print("Strategy: remove")
X_removed = preprocess_features(X, strategy='remove')
print(f"Shape: {X.shape} -> {X_removed.shape}\n")
print("Strategy: impute")
X_imputed = preprocess_features(X, strategy='impute')
print(f"Imputed matrix:\n{X_imputed}")
Complex Number Validation in Signal Processing
np.isreal() is valuable when working with Fourier transforms or complex signal processing where you need to verify real-valued results.
import numpy as np
def validate_fft_symmetry(signal):
"""
Validate that FFT of real signal has conjugate symmetry,
ensuring IFFT will produce real output.
"""
# Compute FFT
fft_result = np.fft.fft(signal)
# For real input, FFT should have conjugate symmetry
# This means IFFT should be real
ifft_result = np.fft.ifft(fft_result)
# Check if result is real (within numerical precision)
is_real = np.isreal(ifft_result)
# Account for floating-point errors
imaginary_parts = np.abs(np.imag(ifft_result))
tolerance = 1e-10
is_effectively_real = imaginary_parts < tolerance
print(f"All values pass isreal(): {np.all(is_real)}")
print(f"Max imaginary component: {np.max(imaginary_parts):.2e}")
print(f"All effectively real: {np.all(is_effectively_real)}")
return ifft_result.real
# Real-valued signal
signal = np.array([1.0, 2.0, 3.0, 4.0, 3.0, 2.0, 1.0])
reconstructed = validate_fft_symmetry(signal)
print(f"\nOriginal: {signal}")
print(f"Reconstructed: {reconstructed}")
Combined Validation Strategy
For robust numerical applications, combine both functions to create comprehensive validation.
import numpy as np
def validate_numerical_array(arr, require_real=False):
"""
Comprehensive numerical validation.
Args:
arr: Input array
require_real: If True, also check for real values
Returns:
Dictionary with validation results
"""
results = {
'total_elements': arr.size,
'finite_count': np.sum(np.isfinite(arr)),
'inf_count': np.sum(np.isinf(arr)),
'nan_count': np.sum(np.isnan(arr)),
'finite_percentage': 100 * np.sum(np.isfinite(arr)) / arr.size
}
if require_real:
results['real_count'] = np.sum(np.isreal(arr))
results['complex_count'] = np.sum(~np.isreal(arr))
results['real_percentage'] = 100 * np.sum(np.isreal(arr)) / arr.size
# Combined check: finite AND real (if required)
if require_real:
valid_mask = np.isfinite(arr) & np.isreal(arr)
else:
valid_mask = np.isfinite(arr)
results['valid_count'] = np.sum(valid_mask)
results['valid_mask'] = valid_mask
return results
# Test with mixed array
test_arr = np.array([1.0, np.inf, 2+3j, np.nan, 4.0, 5+0j])
validation = validate_numerical_array(test_arr, require_real=True)
for key, value in validation.items():
if key != 'valid_mask':
print(f"{key}: {value}")
This combined approach ensures data integrity across both numerical stability and type requirements, essential for production systems handling scientific or financial computations.