NumPy - np.min() and np.max()
NumPy's `np.min()` and `np.max()` functions find minimum and maximum values in arrays. Unlike Python's built-in functions, these operate on NumPy's contiguous memory blocks using optimized C...
Key Insights
np.min()andnp.max()operate significantly faster than Python’s built-inmin()andmax()for numerical arrays, with performance gains increasing proportionally to array size- Both functions support axis-specific operations, enabling efficient row-wise or column-wise extrema calculations in multidimensional arrays without explicit loops
- The
keepdimsparameter preserves array dimensionality during reduction operations, critical for broadcasting in subsequent calculations
Basic Usage and Performance Characteristics
NumPy’s np.min() and np.max() functions find minimum and maximum values in arrays. Unlike Python’s built-in functions, these operate on NumPy’s contiguous memory blocks using optimized C implementations.
import numpy as np
arr = np.array([23, 15, 42, 8, 16, 37])
print(np.min(arr)) # 8
print(np.max(arr)) # 42
# Direct method calls
print(arr.min()) # 8
print(arr.max()) # 42
The performance difference becomes substantial with larger datasets:
import time
# Python list
py_list = list(range(1000000))
start = time.perf_counter()
result = min(py_list)
py_time = time.perf_counter() - start
# NumPy array
np_arr = np.arange(1000000)
start = time.perf_counter()
result = np.min(np_arr)
np_time = time.perf_counter() - start
print(f"Python: {py_time:.6f}s") # ~0.015s
print(f"NumPy: {np_time:.6f}s") # ~0.0005s
print(f"Speedup: {py_time/np_time:.1f}x") # ~30x faster
Axis-Specific Operations
The axis parameter enables dimension-specific reductions, eliminating manual iteration through array dimensions.
matrix = np.array([
[12, 45, 23, 67],
[89, 34, 56, 12],
[23, 78, 90, 45]
])
# Global min/max
print(np.min(matrix)) # 12
print(np.max(matrix)) # 90
# Column-wise (axis=0)
col_min = np.min(matrix, axis=0)
print(col_min) # [12 34 23 12]
# Row-wise (axis=1)
row_max = np.max(matrix, axis=1)
print(row_max) # [67 89 90]
For 3D arrays, axis selection determines which dimensions collapse:
tensor = np.random.randint(0, 100, size=(3, 4, 5))
print(tensor.shape) # (3, 4, 5)
# Reduce first dimension
result_axis0 = np.min(tensor, axis=0)
print(result_axis0.shape) # (4, 5)
# Reduce multiple axes
result_multi = np.max(tensor, axis=(0, 2))
print(result_multi.shape) # (4,)
# Flatten everything
result_all = np.min(tensor, axis=None)
print(result_all.shape) # ()
Preserving Dimensions with keepdims
The keepdims=True parameter maintains the original number of dimensions, replacing reduced axes with size-1 dimensions. This proves essential for broadcasting operations.
data = np.array([
[10, 20, 30],
[40, 50, 60],
[70, 80, 90]
])
# Without keepdims
row_min = np.min(data, axis=1)
print(row_min.shape) # (3,)
print(row_min) # [10 40 70]
# With keepdims
row_min_kept = np.min(data, axis=1, keepdims=True)
print(row_min_kept.shape) # (3, 1)
print(row_min_kept)
# [[10]
# [40]
# [70]]
# Broadcasting normalization
normalized = data - row_min_kept
print(normalized)
# [[ 0 10 20]
# [ 0 10 20]
# [ 0 10 20]]
Without keepdims, broadcasting requires manual reshaping:
# Manual approach (error-prone)
row_min_manual = np.min(data, axis=1).reshape(-1, 1)
# Clean approach
row_min_auto = np.min(data, axis=1, keepdims=True)
Handling Missing Data with nanmin and nanmax
NumPy provides np.nanmin() and np.nanmax() for arrays containing NaN values, ignoring them during computation.
data_with_nan = np.array([12.5, np.nan, 45.3, 23.1, np.nan, 67.8])
# Standard functions propagate NaN
print(np.min(data_with_nan)) # nan
print(np.max(data_with_nan)) # nan
# NaN-aware functions
print(np.nanmin(data_with_nan)) # 12.5
print(np.nanmax(data_with_nan)) # 67.8
For 2D arrays with partial missing data:
measurements = np.array([
[12.3, np.nan, 45.6],
[np.nan, 34.5, 23.1],
[56.7, 78.9, np.nan]
])
col_mins = np.nanmin(measurements, axis=0)
print(col_mins) # [12.3 34.5 23.1]
row_maxes = np.nanmax(measurements, axis=1)
print(row_maxes) # [45.6 34.5 78.9]
# Check for all-NaN slices
all_nan_col = np.array([[np.nan], [np.nan], [np.nan]])
print(np.nanmin(all_nan_col)) # RuntimeWarning: All-NaN slice encountered
Output Arrays and Memory Efficiency
The out parameter enables in-place operations, reducing memory allocation overhead in performance-critical loops.
large_matrix = np.random.rand(1000, 1000)
result_buffer = np.empty(1000)
# Reuse buffer across iterations
for _ in range(100):
np.min(large_matrix, axis=1, out=result_buffer)
# Process result_buffer without creating new arrays
Combining with keepdims for cumulative operations:
data = np.random.rand(5, 1000)
cumulative_min = np.empty((5, 1))
for i in range(5):
np.min(data[:i+1], axis=0, keepdims=True, out=cumulative_min[i:i+1])
Practical Applications
Normalization to range [0, 1]:
def normalize_minmax(arr):
arr_min = np.min(arr, axis=0, keepdims=True)
arr_max = np.max(arr, axis=0, keepdims=True)
return (arr - arr_min) / (arr_max - arr_min)
data = np.array([[1, 200], [50, 150], [100, 100]])
normalized = normalize_minmax(data)
print(normalized)
# [[0. 1. ]
# [0.49 0.5 ]
# [1. 0. ]]
Clipping outliers:
def clip_to_percentile(arr, lower=5, upper=95):
min_val = np.percentile(arr, lower)
max_val = np.percentile(arr, upper)
return np.clip(arr, min_val, max_val)
data = np.random.randn(1000) * 10
clipped = clip_to_percentile(data)
Finding extrema indices:
matrix = np.random.rand(10, 10)
# Global position
flat_idx = np.argmin(matrix)
min_position = np.unravel_index(flat_idx, matrix.shape)
print(f"Minimum at {min_position}: {matrix[min_position]}")
# Per-row indices
row_max_indices = np.argmax(matrix, axis=1)
row_max_values = np.max(matrix, axis=1)
for i, (idx, val) in enumerate(zip(row_max_indices, row_max_values)):
print(f"Row {i}: max at column {idx} = {val:.4f}")
Performance Considerations
For boolean arrays, np.any() and np.all() provide optimized alternatives:
bool_arr = np.random.rand(1000000) > 0.5
# Slower
has_true = np.max(bool_arr) == True
# Faster
has_true = np.any(bool_arr)
For repeated operations on array slices, pre-compute views:
large_data = np.random.rand(10000, 1000)
# Inefficient
for i in range(100):
subset_min = np.min(large_data[i*100:(i+1)*100], axis=0)
# Efficient
reshaped = large_data[:10000].reshape(100, 100, 1000)
batch_mins = np.min(reshaped, axis=1)
These functions form the foundation for statistical analysis, data normalization, and numerical optimization in NumPy-based applications. Understanding axis manipulation and dimension preservation enables writing vectorized code that eliminates explicit Python loops while maintaining readability.