NumPy - np.argwhere() - Find Indices of Condition

Key Insights

np.argwhere() returns indices where a condition is True, outputting a 2D array with one row per match and columns representing each dimension
Unlike np.where(), argwhere() provides a cleaner interface for multi-dimensional arrays by returning coordinates as tuples rather than separate arrays
Performance matters: argwhere() creates a copy of indices while np.nonzero() returns views, making the latter faster for large datasets when you need separate dimension arrays

Basic Syntax and Return Format

np.argwhere() searches an array for elements that satisfy a condition and returns their indices as a 2D array. Each row contains the complete index coordinates for one matching element.

import numpy as np

# 1D array
arr = np.array([10, 25, 30, 45, 50])
indices = np.argwhere(arr > 30)
print(indices)
# Output: [[3]
#          [4]]

# Access the actual indices
flat_indices = indices.flatten()
print(flat_indices)  # [3 4]
print(arr[flat_indices])  # [45 50]

The return value is always 2D, even for 1D input arrays. This consistency simplifies code when working with arrays of varying dimensions.

Multi-Dimensional Array Operations

The real power of argwhere() emerges with multi-dimensional arrays, where it returns complete coordinate tuples for each match.

# 2D array example
matrix = np.array([
    [1, 8, 3],
    [9, 2, 7],
    [4, 6, 5]
])

# Find elements greater than 5
indices = np.argwhere(matrix > 5)
print(indices)
# Output: [[0 1]
#          [1 0]
#          [1 2]
#          [2 1]]

# Each row is [row_index, col_index]
for idx in indices:
    row, col = idx
    print(f"matrix[{row}, {col}] = {matrix[row, col]}")
# matrix[0, 1] = 8
# matrix[1, 0] = 9
# matrix[1, 2] = 7
# matrix[2, 1] = 6

For 3D arrays, each result row contains three coordinates:

# 3D array (depth, rows, cols)
cube = np.random.randint(0, 100, size=(3, 4, 5))
print("Cube shape:", cube.shape)

# Find all values greater than 90
high_values = np.argwhere(cube > 90)
print(f"Found {len(high_values)} values > 90")

# Access using the indices
for depth, row, col in high_values:
    value = cube[depth, row, col]
    print(f"cube[{depth}, {row}, {col}] = {value}")

Comparison with np.where()

While np.where() and argwhere() solve similar problems, their output formats differ significantly.

arr = np.array([[1, 2, 3],
                [4, 5, 6],
                [7, 8, 9]])

# Using np.where()
where_result = np.where(arr > 5)
print("np.where() returns tuple of arrays:")
print(where_result)
# (array([1, 2, 2, 2]), array([2, 0, 1, 2]))

# Using np.argwhere()
argwhere_result = np.argwhere(arr > 5)
print("\nnp.argwhere() returns 2D array:")
print(argwhere_result)
# [[1 2]
#  [2 0]
#  [2 1]
#  [2 2]]

# Accessing elements
# With np.where() - use tuple unpacking
rows, cols = where_result
for r, c in zip(rows, cols):
    print(f"arr[{r}, {c}] = {arr[r, c]}")

# With np.argwhere() - direct iteration
for r, c in argwhere_result:
    print(f"arr[{r}, {c}] = {arr[r, c]}")

Choose argwhere() when you need to iterate over matching coordinates or pass them as a single object. Use np.where() when you need the indices separated by dimension for advanced indexing.

Practical Use Cases

Finding Matrix Diagonal Elements

# Create a 5x5 matrix
matrix = np.arange(25).reshape(5, 5)

# Find diagonal elements (where row == col)
diagonal_indices = np.argwhere(np.eye(5, dtype=bool))
print("Diagonal indices:")
print(diagonal_indices)

# Extract diagonal values
diagonal_values = matrix[diagonal_indices[:, 0], diagonal_indices[:, 1]]
print("Diagonal values:", diagonal_values)

Detecting Outliers in Datasets

# Generate sample data with outliers
data = np.random.normal(100, 15, size=(50, 3))
data[5, 1] = 200  # Inject outlier
data[23, 0] = 20  # Inject outlier

# Define outlier threshold (3 standard deviations)
mean = np.mean(data, axis=0)
std = np.std(data, axis=0)
z_scores = np.abs((data - mean) / std)

# Find outliers
outlier_indices = np.argwhere(z_scores > 3)
print(f"Found {len(outlier_indices)} outlier(s):")

for row, col in outlier_indices:
    print(f"Row {row}, Column {col}: value = {data[row, col]:.2f}, "
          f"z-score = {z_scores[row, col]:.2f}")

Image Processing - Finding Specific Pixel Values

# Simulate a grayscale image (8-bit values)
image = np.random.randint(0, 256, size=(100, 100), dtype=np.uint8)

# Find all pure white pixels (value = 255)
white_pixels = np.argwhere(image == 255)
print(f"Found {len(white_pixels)} white pixels")

# Find pixels in a specific intensity range
dark_pixels = np.argwhere((image >= 0) & (image <= 50))
print(f"Found {len(dark_pixels)} dark pixels (0-50)")

# Create a mask from the indices
mask = np.zeros_like(image, dtype=bool)
mask[dark_pixels[:, 0], dark_pixels[:, 1]] = True
print(f"Mask coverage: {mask.sum() / mask.size * 100:.2f}%")

Complex Conditional Logic

Combine multiple conditions using boolean operators for sophisticated filtering.

# Sales data: [product_id, quantity, price]
sales = np.array([
    [101, 5, 29.99],
    [102, 15, 49.99],
    [103, 3, 19.99],
    [104, 25, 39.99],
    [105, 8, 99.99]
])

# Find high-value transactions (quantity > 10 AND price > 30)
high_value = np.argwhere((sales[:, 1] > 10) & (sales[:, 2] > 30))
print("High-value transaction rows:", high_value.flatten())
print("Details:")
print(sales[high_value.flatten()])

# Find products needing restock OR premium items
restock_or_premium = np.argwhere((sales[:, 1] < 5) | (sales[:, 2] > 80))
print("\nRestock or premium products:")
print(sales[restock_or_premium.flatten()])

Performance Considerations

For large arrays, performance differences between methods become significant.

import time

# Create large array
large_array = np.random.randint(0, 1000, size=(10000, 10000))

# Benchmark np.argwhere()
start = time.time()
result_argwhere = np.argwhere(large_array > 995)
time_argwhere = time.time() - start

# Benchmark np.nonzero()
start = time.time()
result_nonzero = np.nonzero(large_array > 995)
time_nonzero = time.time() - start

# Benchmark np.where()
start = time.time()
result_where = np.where(large_array > 995)
time_where = time.time() - start

print(f"argwhere: {time_argwhere:.4f}s - {len(result_argwhere)} matches")
print(f"nonzero:  {time_nonzero:.4f}s")
print(f"where:    {time_where:.4f}s")

np.nonzero() and np.where() are typically faster because they return views rather than creating a new array. Use argwhere() for code clarity when performance isn’t critical, or when you specifically need the coordinate format.

Edge Cases and Empty Results

Handle cases where no elements match the condition:

arr = np.array([1, 2, 3, 4, 5])

# No matches
result = np.argwhere(arr > 10)
print("Shape of empty result:", result.shape)  # (0, 1)
print("Is empty:", len(result) == 0)  # True

# Safe iteration
if len(result) > 0:
    for idx in result:
        print(f"Found: {arr[idx[0]]}")
else:
    print("No matches found")

# Using with default values
default_indices = result if len(result) > 0 else np.array([[0]])
print("Indices with default:", default_indices)

Working with Boolean Arrays Directly

Pass boolean arrays directly without explicit conditions:

# Create boolean mask
data = np.array([15, 22, 8, 31, 45, 12])
mask = np.array([True, False, True, False, True, False])

# Find True positions
true_positions = np.argwhere(mask)
print("True positions:", true_positions.flatten())
print("Masked data:", data[true_positions.flatten()])

# Combine with other operations
nan_array = np.array([1.0, np.nan, 3.0, np.nan, 5.0])
nan_positions = np.argwhere(np.isnan(nan_array))
print("NaN positions:", nan_positions.flatten())

np.argwhere() excels at converting logical conditions into actionable index coordinates, making it indispensable for data analysis, scientific computing, and array manipulation tasks where you need to locate and process specific elements based on criteria.