Skip to contents

Fit regression model y ~ design + X_features[,j] for each feature j

Usage

# S4 method for class 'ANY,ANY,GenomicDataStream'
lmFitFeatures(
  y,
  design,
  data,
  weights,
  detail = 1,
  preprojection = TRUE,
  lambda = 0,
  nthreads = 1,
  verbose = TRUE,
  ...
)

Arguments

y

response vector

design

design matrix, mat or sp_mat

data

matrix or GenomicDataStream with additional features to be fit one at a time

weights

sample-level weights

detail

return model with specified level of detail. LOW (beta, se, sigSq, rdf), MEDIUM (vcov), HIGH (residuals), MOST (hatvalues)

preprojection

default true. Use preproject of design matrix to accelerate calculations

lambda

ridge shrinkage parameter

nthreads

number of threads. Each model is fit in serial, analysis is parallelized across features

verbose

show progress

...

other args

Value

List of parameter estimates with entries coef, se, dispersion, rdf and other depending on detail

Examples

library(GenomicDataStream)

# create response, design and weights
y <- rnorm(60)
names(y) = paste0("I", seq(60))
info = data.frame(Age = rpois(60, 40))
rownames(info) = names(y)

design <- model.matrix(~ Age, info)
w <- rep(1, 60)

# VCF file
file <- system.file("extdata", "test.vcf.gz", package = "GenomicDataStream")

# Read data into R
# then run lmFitFeatures()
gds <- GenomicDataStream(file, "DS", initialize = TRUE)
dat <- getNextChunk(gds)

res1 <- lmFitFeatures(y, design, dat$X, w)

res1
#> 		 lmFitFeatures 
#> 
#> coefs(1): x
#> features(10): 1:10000:C:A, 1:11000:T:C, ..., 1:18000:C:G, 1:19000:T:G
#> family: gaussian/identity 
#> Estimated: se, dispersion, rdf 
#> 

# Data stays at C++ level
# then run lmFitFeatures()
gds <- GenomicDataStream(file, "DS")

res2 <- lmFitFeatures(y, design, gds, w)
#> preprojection: 1

res2
#> 		 lmFitFeatures 
#> 
#> coefs(1): x
#> features(10): 1:10000:C:A, 1:11000:T:C, ..., 1:18000:C:G, 1:19000:T:G
#> family: gaussian/identity 
#> Estimated: se, dispersion, rdf 
#>