Skip to contents

Fit linear model on each feature after applying decorrelation projection to response and predictors.

Usage

lm_each_eclairs(
  formula,
  data,
  X,
  ecl,
  subset,
  weights,
  na.action,
  method = "qr",
  model = TRUE,
  x = FALSE,
  y = FALSE,
  qr = TRUE,
  singular.ok = TRUE,
  contrasts = NULL,
  offset,
  ...
)

Arguments

formula

an object of class 'formula' (or one that can be coerced to that class): a symbolic description of the model to be fitted.

data

a matrix or data.frame containing the variables in the model

X

matrix or data.frame where each column stores a predictor to be evaluated by the regression model one at a time. The \(i^{th}\) model includes X[,i] as a predictor.

ecl

estimate of covariance/correlation matrix from eclairs storing \(U\), \(d_1^2\), \(\lambda\) and \(\nu\)

subset

same as for lm

weights

same as for lm

na.action

same as for lm

method

same as for lm

model

same as for lm

x

same as for lm

y

same as for lm

qr

same as for lm

singular.ok

same as for lm

contrasts

same as for lm

offset

same as for lm

...

other arguments passed to lm()

Value

data.frame with columns beta, se, tsat, pvalue storing results for regression model fit for each feature

Examples

library(Rfast)

n <- 800 # number of samples
p <- 200 # number of features

# create correlation matrix
Sigma <- autocorr.mat(p, .9)

# draw data from correlation matrix Sigma
Y <- rmvnorm(n, rep(0, p), sigma = Sigma * 5.1, seed = 1)

# eclairs decomposition
ecl <- eclairs(Y)

# simulate covariates
data <- data.frame(matrnorm(p, 2, seed = 1))
colnames(data) <- paste0("v", 1:2)

# simulate response
y <- rnorm(p)

# Simulate 1000 features to test
X <- matrnorm(p, 1000, seed = 1)
colnames(X) <- paste0("set_", seq(ncol(X)))

# Use linear model to test each feature stored as columns in X
res <- lm_each_eclairs(y ~ v1 + v2, data, X, ecl)

head(res)
#>                beta           se      tstat      pvalue
#> set_1  2.887431e+13 1.563537e+14  0.1846730 0.853676462
#> set_2 -1.659811e+14 6.478078e+14 -0.2562197 0.798049901
#> set_3 -2.181485e-01 7.886980e-02 -2.7659318 0.006218786
#> set_4 -8.510124e-02 8.776742e-02 -0.9696222 0.333429845
#> set_5  1.186028e-02 7.372858e-02  0.1608641 0.872366104
#> set_6 -6.625690e-02 7.768578e-02 -0.8528833 0.394765035

# Analysis after non-linear transform
#------------------------------------

# Apply function to transforme data
f <- function(x) log(x^2 + 0.001)

# evaluate covariance of transformed data
ecl_transform <- cov_transform(ecl, f, 100)

# Use linear model to test each feature stored as columns in X
# in data transformed by f()
res2 <- lm_each_eclairs(f(y) ~ v1 + v2, data, X, ecl_transform)

head(res)
#>                beta           se      tstat      pvalue
#> set_1  2.887431e+13 1.563537e+14  0.1846730 0.853676462
#> set_2 -1.659811e+14 6.478078e+14 -0.2562197 0.798049901
#> set_3 -2.181485e-01 7.886980e-02 -2.7659318 0.006218786
#> set_4 -8.510124e-02 8.776742e-02 -0.9696222 0.333429845
#> set_5  1.186028e-02 7.372858e-02  0.1608641 0.872366104
#> set_6 -6.625690e-02 7.768578e-02 -0.8528833 0.394765035