Skip to contents

impute_matrix() imputes the missing NA values in a given matrix using a given imputation_scheme.

Usage

impute_matrix(D, imputation_scheme)

Arguments

D

The input data matrix.

imputation_scheme

The values to replace missing NA values in D with. Can be either:

  • A scalar numeric, indicating all NA values should be imputed with the same scalar numeric value;

  • A vector of length ncol(D), signifying column-specific imputation, where each entry in the imputation_scheme vector corresponds to the imputation value for each column in D; or

  • A matrix of dimension dim(D), indicating an observation-specific imputation scheme, where each entry in the imputation_scheme matrix corresponds to the imputation value for each entry in D.

Value

The imputed matrix.

Examples

#### ------------Imputation with a scalar------------####
# simulate a small 5x5 mixture
D <- sim_data(5, 5)$D
# corrupt the mixture with 40% missing observations
D_tilde <- sim_na(D, 0.4)$D_tilde
D_tilde
#>           [,1]     [,2]     [,3]      [,4]     [,5]
#> [1,]        NA 1.123878 1.052029 0.5437246       NA
#> [2,] 1.6716886       NA 1.526285        NA 1.739935
#> [3,] 0.3776991 1.047751 1.969528        NA 1.058204
#> [4,]        NA 1.054244 1.029697 1.4755161 1.138653
#> [5,]        NA       NA       NA 0.6210926       NA
# impute missing values with 0
impute_matrix(D_tilde, 0)
#>           [,1]     [,2]     [,3]      [,4]     [,5]
#> [1,] 0.0000000 1.123878 1.052029 0.5437246 0.000000
#> [2,] 1.6716886 0.000000 1.526285 0.0000000 1.739935
#> [3,] 0.3776991 1.047751 1.969528 0.0000000 1.058204
#> [4,] 0.0000000 1.054244 1.029697 1.4755161 1.138653
#> [5,] 0.0000000 0.000000 0.000000 0.6210926 0.000000
# impute missing values with -1
impute_matrix(D_tilde, -1)
#>            [,1]      [,2]      [,3]       [,4]      [,5]
#> [1,] -1.0000000  1.123878  1.052029  0.5437246 -1.000000
#> [2,]  1.6716886 -1.000000  1.526285 -1.0000000  1.739935
#> [3,]  0.3776991  1.047751  1.969528 -1.0000000  1.058204
#> [4,] -1.0000000  1.054244  1.029697  1.4755161  1.138653
#> [5,] -1.0000000 -1.000000 -1.000000  0.6210926 -1.000000

#### ------------Imputation with a vector------------####
# impute missing values with the column-mean
impute_matrix(D_tilde, apply(D_tilde, 2, mean, na.rm = TRUE))
#>           [,1]     [,2]     [,3]      [,4]     [,5]
#> [1,] 1.0246939 1.123878 1.052029 0.5437246 1.312264
#> [2,] 1.6716886 1.075291 1.526285 0.8801111 1.739935
#> [3,] 0.3776991 1.047751 1.969528 0.8801111 1.058204
#> [4,] 1.0246939 1.054244 1.029697 1.4755161 1.138653
#> [5,] 1.0246939 1.075291 1.394385 0.6210926 1.312264
# impute missing values with the column-min
impute_matrix(D_tilde, apply(D_tilde, 2, min, na.rm = TRUE))
#>           [,1]     [,2]     [,3]      [,4]     [,5]
#> [1,] 0.3776991 1.123878 1.052029 0.5437246 1.058204
#> [2,] 1.6716886 1.047751 1.526285 0.5437246 1.739935
#> [3,] 0.3776991 1.047751 1.969528 0.5437246 1.058204
#> [4,] 0.3776991 1.054244 1.029697 1.4755161 1.138653
#> [5,] 0.3776991 1.047751 1.029697 0.6210926 1.058204

#### ------------Imputation with a matrix------------####
# impute missing values with random Gaussian noise
noise <- matrix(rnorm(prod(dim(D_tilde))), nrow(D_tilde), ncol(D_tilde))
impute_matrix(D_tilde, noise)
#>             [,1]       [,2]      [,3]      [,4]       [,5]
#> [1,] -0.09465904  1.1238777  1.052029 0.5437246 -1.7631631
#> [2,]  1.67168861 -0.2787888  1.526285 1.2146747  1.7399355
#> [3,]  0.37769915  1.0477507  1.969528 1.8951935  1.0582040
#> [4,]  1.30486965  1.0542439  1.029697 1.4755161  1.1386529
#> [5,]  2.28664539 -0.2842529 -1.781308 0.6210926  0.7048373

#### ------------Imputation with LOD/sqrt(2)------------####
D <- sim_data(5, 5)$D
lod_info <- sim_lod(D, q = 0.2)
D_tilde <- lod_info$D_tilde
D_tilde
#>          [,1]     [,2]     [,3]      [,4]     [,5]
#> [1,] 2.453301 1.123878 1.052029 0.5437246 1.357418
#> [2,] 1.671689 2.418877 1.526285 0.7718546 1.739935
#> [3,]       NA       NA 1.969528        NA       NA
#> [4,] 1.331306 1.054244       NA 1.4755161 1.138653
#> [5,] 1.413282 1.212585 1.191888 0.6210926 2.284681
lod <- lod_info$lod
impute_matrix(D_tilde, lod / sqrt(2))
#>           [,1]      [,2]      [,3]      [,4]     [,5]
#> [1,] 2.4533007 1.1238777 1.0520287 0.5437246 1.357418
#> [2,] 1.6716886 2.4188773 1.5262852 0.7718546 1.739935
#> [3,] 0.8065153 0.7445447 1.9695275 0.3796034 0.793772
#> [4,] 1.3313064 1.0542439 0.7407385 1.4755161 1.138653
#> [5,] 1.4132823 1.2125850 1.1918875 0.6210926 2.284681