R - Matrices with Examples | Application Architect

Key Insights

Matrices in R are two-dimensional data structures that store elements of the same type, essential for linear algebra operations, statistical computations, and data transformations
R provides comprehensive matrix operations including creation, subsetting, mathematical operations, and specialized functions like transpose, inverse, and eigenvalue decomposition
Understanding matrix manipulation is crucial for machine learning implementations, statistical modeling, and efficient numerical computations in R

Creating Matrices

R offers multiple approaches to create matrices. The matrix() function is the most common method, taking a vector of values and organizing them into rows and columns.

# Basic matrix creation
mat1 <- matrix(1:12, nrow = 3, ncol = 4)
print(mat1)
#      [,1] [,2] [,3] [,4]
# [1,]    1    4    7   10
# [2,]    2    5    8   11
# [3,]    3    6    9   12

# Fill by row instead of column
mat2 <- matrix(1:12, nrow = 3, ncol = 4, byrow = TRUE)
print(mat2)
#      [,1] [,2] [,3] [,4]
# [1,]    1    2    3    4
# [2,]    5    6    7    8
# [3,]    9   10   11   12

# Create matrix from vectors
vec1 <- c(1, 2, 3)
vec2 <- c(4, 5, 6)
mat3 <- rbind(vec1, vec2)  # Bind as rows
mat4 <- cbind(vec1, vec2)  # Bind as columns

print(mat3)
#      [,1] [,2] [,3]
# vec1    1    2    3
# vec2    4    5    6

You can also create special matrices using dedicated functions:

# Identity matrix
identity_mat <- diag(4)
print(identity_mat)

# Diagonal matrix with specific values
diag_mat <- diag(c(1, 2, 3, 4))

# Zero matrix
zero_mat <- matrix(0, nrow = 3, ncol = 3)

# Matrix with all same values
ones_mat <- matrix(1, nrow = 2, ncol = 5)

Matrix Indexing and Subsetting

Accessing matrix elements uses bracket notation with row and column indices. Leave an index blank to select all rows or columns.

mat <- matrix(1:20, nrow = 4, ncol = 5)

# Single element
element <- mat[2, 3]  # Row 2, Column 3
print(element)  # 10

# Entire row
row2 <- mat[2, ]
print(row2)  # 2 6 10 14 18

# Entire column
col3 <- mat[, 3]
print(col3)  # 9 10 11 12

# Multiple rows and columns
subset <- mat[1:2, 3:5]
print(subset)
#      [,1] [,2] [,3]
# [1,]    9   13   17
# [2,]   10   14   18

# Non-contiguous selection
selected <- mat[c(1, 3), c(2, 4)]
print(selected)

# Logical indexing
mat[mat > 10]  # All elements greater than 10
mat[mat > 10] <- 0  # Replace elements > 10 with 0

Matrix Attributes and Dimensions

Understanding matrix dimensions and attributes is essential for proper manipulation.

mat <- matrix(1:12, nrow = 3, ncol = 4)

# Dimensions
dim(mat)      # c(3, 4)
nrow(mat)     # 3
ncol(mat)     # 4
length(mat)   # 12 (total elements)

# Add row and column names
rownames(mat) <- c("R1", "R2", "R3")
colnames(mat) <- c("C1", "C2", "C3", "C4")
print(mat)

# Access names
dimnames(mat)

# Check if object is a matrix
is.matrix(mat)  # TRUE

# Convert to vector
as.vector(mat)

Matrix Arithmetic Operations

R supports element-wise and matrix-specific mathematical operations.

mat1 <- matrix(1:9, nrow = 3)
mat2 <- matrix(9:1, nrow = 3)

# Element-wise operations
addition <- mat1 + mat2
subtraction <- mat1 - mat2
multiplication <- mat1 * mat2  # Element-wise
division <- mat1 / mat2

# Scalar operations
scalar_mult <- mat1 * 3
scalar_add <- mat1 + 10

# Matrix multiplication (linear algebra)
mat_mult <- mat1 %*% mat2
print(mat_mult)

# Transpose
t_mat <- t(mat1)
print(t_mat)

# Cross product
crossprod(mat1, mat2)  # Equivalent to t(mat1) %*% mat2

# Outer product
tcrossprod(mat1, mat2)  # Equivalent to mat1 %*% t(mat2)

Advanced Matrix Operations

R provides functions for complex linear algebra operations critical for statistical computing.

# Create a square matrix
A <- matrix(c(4, 2, 3, 1), nrow = 2)

# Determinant
det(A)  # -2

# Matrix inverse
A_inv <- solve(A)
print(A_inv)
# Verify: A %*% A_inv should give identity matrix
round(A %*% A_inv, 10)

# Solve linear system Ax = b
b <- c(10, 5)
x <- solve(A, b)
print(x)

# Eigenvalues and eigenvectors
eigen_result <- eigen(A)
eigenvalues <- eigen_result$values
eigenvectors <- eigen_result$vectors

# Singular Value Decomposition
svd_result <- svd(A)
U <- svd_result$u
D <- diag(svd_result$d)
V <- svd_result$v

# QR decomposition
qr_result <- qr(A)
Q <- qr.Q(qr_result)
R <- qr.R(qr_result)

# Cholesky decomposition (for positive definite matrices)
pos_def <- matrix(c(4, 2, 2, 3), nrow = 2)
chol_result <- chol(pos_def)

Matrix Manipulation Functions

Practical functions for reshaping and combining matrices.

mat <- matrix(1:12, nrow = 3)

# Add rows or columns
new_row <- matrix(13:16, nrow = 1)
mat_extended <- rbind(mat, new_row)

new_col <- matrix(13:15, ncol = 1)
mat_extended2 <- cbind(mat, new_col)

# Apply functions across margins
row_sums <- apply(mat, 1, sum)     # Sum each row
col_means <- apply(mat, 2, mean)   # Mean of each column
col_sds <- apply(mat, 2, sd)       # SD of each column

# Faster alternatives for common operations
rowSums(mat)
colSums(mat)
rowMeans(mat)
colMeans(mat)

# Diagonal operations
diag(mat)           # Extract diagonal
diag(mat) <- c(0, 0, 0)  # Replace diagonal

# Matrix reshaping
dim(mat) <- c(4, 3)  # Reshape to 4x3

Practical Example: Linear Regression

Implementing linear regression using matrix operations demonstrates practical application.

# Generate sample data
set.seed(123)
n <- 100
X <- cbind(1, rnorm(n), rnorm(n))  # Design matrix with intercept
true_beta <- c(2, 1.5, -0.8)
y <- X %*% true_beta + rnorm(n, sd = 0.5)

# Calculate coefficients using normal equation
# beta = (X'X)^(-1) X'y
XtX <- t(X) %*% X
Xty <- t(X) %*% y
beta_hat <- solve(XtX) %*% Xty
print(beta_hat)

# Predictions
y_pred <- X %*% beta_hat

# Residual sum of squares
residuals <- y - y_pred
RSS <- sum(residuals^2)

# R-squared
TSS <- sum((y - mean(y))^2)
R_squared <- 1 - (RSS / TSS)
print(paste("R-squared:", round(R_squared, 4)))

# Standard errors
residual_var <- RSS / (n - ncol(X))
var_covar_matrix <- residual_var * solve(XtX)
std_errors <- sqrt(diag(var_covar_matrix))
print(std_errors)

Performance Considerations

Matrix operations in R use optimized BLAS and LAPACK libraries. Understanding performance characteristics helps write efficient code.

# Preallocate matrices when possible
n <- 1000
# Bad: Growing matrix in loop
system.time({
  result <- matrix(nrow = 0, ncol = 3)
  for(i in 1:n) {
    result <- rbind(result, c(i, i^2, i^3))
  }
})

# Good: Preallocate
system.time({
  result <- matrix(nrow = n, ncol = 3)
  for(i in 1:n) {
    result[i, ] <- c(i, i^2, i^3)
  }
})

# Best: Vectorized operations
system.time({
  indices <- 1:n
  result <- cbind(indices, indices^2, indices^3)
})

# Use crossprod instead of t(X) %*% Y
X <- matrix(rnorm(10000), nrow = 1000)
Y <- matrix(rnorm(10000), nrow = 1000)

system.time(result1 <- t(X) %*% Y)
system.time(result2 <- crossprod(X, Y))

Matrix operations form the foundation of numerical computing in R. Master these operations to implement efficient statistical algorithms, machine learning models, and data transformations. The key is understanding when to use vectorized operations, leveraging optimized functions, and choosing appropriate matrix decompositions for your specific computational needs.