impute_matrix()
imputes the missing NA
values in a given matrix using a
given imputation_scheme
.
Arguments
- D
The input data matrix.
- imputation_scheme
The values to replace missing
NA
values inD
with. Can be either:A scalar numeric, indicating all
NA
values should be imputed with the same scalar numeric value;A vector of length
ncol(D)
, signifying column-specific imputation, where each entry in theimputation_scheme
vector corresponds to the imputation value for each column inD
; orA matrix of dimension
dim(D)
, indicating an observation-specific imputation scheme, where each entry in theimputation_scheme
matrix corresponds to the imputation value for each entry inD
.
Examples
#### ------------Imputation with a scalar------------####
# simulate a small 5x5 mixture
D <- sim_data(5, 5)$D
# corrupt the mixture with 40% missing observations
D_tilde <- sim_na(D, 0.4)$D_tilde
D_tilde
#> [,1] [,2] [,3] [,4] [,5]
#> [1,] NA 1.123878 1.052029 0.5437246 NA
#> [2,] 1.6716886 NA 1.526285 NA 1.739935
#> [3,] 0.3776991 1.047751 1.969528 NA 1.058204
#> [4,] NA 1.054244 1.029697 1.4755161 1.138653
#> [5,] NA NA NA 0.6210926 NA
# impute missing values with 0
impute_matrix(D_tilde, 0)
#> [,1] [,2] [,3] [,4] [,5]
#> [1,] 0.0000000 1.123878 1.052029 0.5437246 0.000000
#> [2,] 1.6716886 0.000000 1.526285 0.0000000 1.739935
#> [3,] 0.3776991 1.047751 1.969528 0.0000000 1.058204
#> [4,] 0.0000000 1.054244 1.029697 1.4755161 1.138653
#> [5,] 0.0000000 0.000000 0.000000 0.6210926 0.000000
# impute missing values with -1
impute_matrix(D_tilde, -1)
#> [,1] [,2] [,3] [,4] [,5]
#> [1,] -1.0000000 1.123878 1.052029 0.5437246 -1.000000
#> [2,] 1.6716886 -1.000000 1.526285 -1.0000000 1.739935
#> [3,] 0.3776991 1.047751 1.969528 -1.0000000 1.058204
#> [4,] -1.0000000 1.054244 1.029697 1.4755161 1.138653
#> [5,] -1.0000000 -1.000000 -1.000000 0.6210926 -1.000000
#### ------------Imputation with a vector------------####
# impute missing values with the column-mean
impute_matrix(D_tilde, apply(D_tilde, 2, mean, na.rm = TRUE))
#> [,1] [,2] [,3] [,4] [,5]
#> [1,] 1.0246939 1.123878 1.052029 0.5437246 1.312264
#> [2,] 1.6716886 1.075291 1.526285 0.8801111 1.739935
#> [3,] 0.3776991 1.047751 1.969528 0.8801111 1.058204
#> [4,] 1.0246939 1.054244 1.029697 1.4755161 1.138653
#> [5,] 1.0246939 1.075291 1.394385 0.6210926 1.312264
# impute missing values with the column-min
impute_matrix(D_tilde, apply(D_tilde, 2, min, na.rm = TRUE))
#> [,1] [,2] [,3] [,4] [,5]
#> [1,] 0.3776991 1.123878 1.052029 0.5437246 1.058204
#> [2,] 1.6716886 1.047751 1.526285 0.5437246 1.739935
#> [3,] 0.3776991 1.047751 1.969528 0.5437246 1.058204
#> [4,] 0.3776991 1.054244 1.029697 1.4755161 1.138653
#> [5,] 0.3776991 1.047751 1.029697 0.6210926 1.058204
#### ------------Imputation with a matrix------------####
# impute missing values with random Gaussian noise
noise <- matrix(rnorm(prod(dim(D_tilde))), nrow(D_tilde), ncol(D_tilde))
impute_matrix(D_tilde, noise)
#> [,1] [,2] [,3] [,4] [,5]
#> [1,] -0.09465904 1.1238777 1.052029 0.5437246 -1.7631631
#> [2,] 1.67168861 -0.2787888 1.526285 1.2146747 1.7399355
#> [3,] 0.37769915 1.0477507 1.969528 1.8951935 1.0582040
#> [4,] 1.30486965 1.0542439 1.029697 1.4755161 1.1386529
#> [5,] 2.28664539 -0.2842529 -1.781308 0.6210926 0.7048373
#### ------------Imputation with LOD/sqrt(2)------------####
D <- sim_data(5, 5)$D
lod_info <- sim_lod(D, q = 0.2)
D_tilde <- lod_info$D_tilde
D_tilde
#> [,1] [,2] [,3] [,4] [,5]
#> [1,] 2.453301 1.123878 1.052029 0.5437246 1.357418
#> [2,] 1.671689 2.418877 1.526285 0.7718546 1.739935
#> [3,] NA NA 1.969528 NA NA
#> [4,] 1.331306 1.054244 NA 1.4755161 1.138653
#> [5,] 1.413282 1.212585 1.191888 0.6210926 2.284681
lod <- lod_info$lod
impute_matrix(D_tilde, lod / sqrt(2))
#> [,1] [,2] [,3] [,4] [,5]
#> [1,] 2.4533007 1.1238777 1.0520287 0.5437246 1.357418
#> [2,] 1.6716886 2.4188773 1.5262852 0.7718546 1.739935
#> [3,] 0.8065153 0.7445447 1.9695275 0.3796034 0.793772
#> [4,] 1.3313064 1.0542439 0.7407385 1.4755161 1.138653
#> [5,] 1.4132823 1.2125850 1.1918875 0.6210926 2.284681