10#ifndef LINEAR_REGRESSION_H_
11#define LINEAR_REGRESSION_H_
20#include <RcppArmadillo.h>
21#include <RcppParallel.h>
58 const arma::colvec& y,
60 const double &lambda = 0,
61 const double &rdf_offset = 0,
62 LMWork *work =
nullptr,
63 const bool estimateDispersion =
true,
64 const bool scaleByDispersion =
true) {
66 int n = X.n_rows, k = X.n_cols;
69 bool alloc_local =
false;
70 if( work ==
nullptr ){
77 bool success0 = qr_econ(work->Q, work->R, X);
80 double minDiagR = abs(diagvec(work->R)).min();
82 if( success0 && (minDiagR < tol)){
87 bool success1 =
false;
90 mat A = work->R.t() * work->R;
96 success1 = solve(beta, work->R, trans(work->Q) * y, solve_opts::no_approx);
101 for( uword i=1; i<A.n_rows; ++i){
105 success1 = solve(beta, A, work->R.t() * work->Q.t() * y, arma::solve_opts::fast + arma::solve_opts::likely_sympd);
111 if( ! success0 || ! success1){
112 beta = vec(work->R.n_cols);
113 beta.fill(datum::nan);
118 double rdf = n - k - rdf_offset;
119 double dispersion = 1.0;
120 bool success2 =
true;
126 success2 = inv_sympd(work->V, A);
132 work->residuals = y - X*beta;
135 if( estimateDispersion ){
137 dispersion = dot(work->residuals, work->residuals) / rdf;
143 if( success0 && success1 && success2 ){
144 stderr = sqrt(dispersion * diagvec(work->V));
146 stderr = vec(k, fill::value(datum::nan));
147 beta.fill(datum::nan);
151 bool success = success0 && success1 && success2;
161 fit =
ModelFit( success, beta, stderr, dispersion, rdf);
165 fit =
ModelFit( success, beta, stderr, dispersion, rdf, work->V * dispersion);
169 fit =
ModelFit( success, beta, stderr, dispersion, rdf, work->V * dispersion, work->residuals);
174 vec hatvalues = diagvec(work->Q * trans(work->Q));
175 fit =
ModelFit( success, beta, stderr, dispersion, rdf, work->V * dispersion, work->residuals, hatvalues);
176 fit.setFittedValues( X*beta );
181 if( alloc_local)
delete work;
195static tuple<vec, T> preprojection(
199 const vec &weights = {}){
207 if( ! weights.is_empty() ){
210 w = vec(y.n_elem).ones();
212 arma::colvec wsqrt = sqrt(w / mean(w));
215 mat X_design_wsqrt = X_design.each_col() % wsqrt;
216 vec y_wsqrt = y % wsqrt;
221 qr_econ(Q, R, X_design_wsqrt);
224 vec beta = solve(R, trans(Q) * y_wsqrt);
225 vec y_proj = y_wsqrt - X_design_wsqrt * beta;
229 mat gamma = solve(R, trans(Q) * X_features_wsqrt);
233 if( is_same_v<
decltype(X_features_wsqrt),
decltype(X_design_wsqrt)> ){
234 X_proj = X_features_wsqrt - X_design_wsqrt * gamma;
236 X_proj = X_features_wsqrt - T(X_design_wsqrt * gamma);
239 return {y_proj, X_proj};
251static tuple<vec, T> preprojection(
253 const sp_mat &X_design,
255 const vec &weights = {}){
263 if( ! weights.is_empty() ){
266 w = vec(y.n_elem).ones();
268 arma::colvec wsqrt = sqrt(w / mean(w));
272 vec y_wsqrt = y % wsqrt;
282 sp_mat V = trans(X_design_wsqrt) * X_design_wsqrt;
283 success = spsolve(beta, V, vec(trans(X_design_wsqrt) * y_wsqrt),
"lapack");
284 if( ! success ) beta.fill(datum::nan);
285 vec y_proj = y_wsqrt - X_design_wsqrt * beta;
287 success = spsolve(gamma, V, mat(trans(X_design_wsqrt) * X_features_wsqrt),
"lapack");
288 if( ! success ) gamma.fill(datum::nan);
292 if( is_same_v<
decltype(X_features_wsqrt),
decltype(X_design_wsqrt)> ){
293 X_proj = X_features_wsqrt - X_design_wsqrt * gamma;
295 X_proj = X_features_wsqrt - T(X_design_wsqrt * gamma);
298 return {y_proj, X_proj};
316 const arma::colvec& y,
317 const arma::colvec& w = {},
319 const double &lambda = 0,
320 const double &rdf_offset = 0,
326 fit = lm( X, y, md, lambda, rdf_offset, work);
328 arma::colvec wsqrt = sqrt(w / mean(w));
329 fit = lm( X.each_col() % wsqrt, y % wsqrt, md, lambda, rdf_offset, work);
334 fit.residuals /= wsqrt;
352static vector<ModelFit> lmFitFeatures_standard(
354 const arma::mat &X_design,
355 const arma::mat &X_features,
356 const vector<string> &ids,
357 const arma::vec &weights = {},
359 const double &lambda = 0,
360 const int &nthreads = 1){
362 int n_covs = X_design.n_cols;
364 if( X_features.n_cols == 0){
365 throw invalid_argument(
"X_features has 0 columns");
368 vector<ModelFit> fitList(X_features.n_cols,
ModelFit());
371 tbb::task_arena limited_arena(nthreads);
372 limited_arena.execute([&] {
374 tbb::blocked_range<int>(0, X_features.n_cols, 100),
375 [&](
const tbb::blocked_range<int>& r){
378 arma::mat X(X_design);
379 X.insert_cols(n_covs, X_features.col(0));
381 LMWork *work = new LMWork();
384 for (int j = r.begin(); j != r.end(); ++j) {
387 X.col(n_covs) = X_features.col(j);
390 ModelFit fit = wlm(X, y, weights, md, lambda, 0, work);
415template <
typename T1,
typename T2>
419 const T2 &X_features,
420 const vector<string> &ids,
421 const arma::vec &weights = {},
423 const double &lambda = 0,
424 const int &nthreads = 1){
426 if( X_features.n_cols == 0){
427 throw invalid_argument(
"X_features has 0 columns");
437 tie(y_proj, X_proj) = preprojection(y, X_design, X_features, weights);
438 double rdf_offset = X_design.n_cols;
440 vec wsqrt = sqrt(weights);
443 tbb::task_arena limited_arena(nthreads);
444 limited_arena.execute([&] {
446 tbb::blocked_range<int>(0, X_proj.n_cols, 100),
447 [&](
const tbb::blocked_range<int>& r){
449 disable_parallel_blas();
451 LMWork *work = new LMWork();
453 for (int j = r.begin(); j != r.end(); ++j) {
456 ModelFit fit = lm(X_proj.col(j), y_proj, md, lambda, rdf_offset, work );
463 fit.residuals /= wsqrt;
487template <
typename T1,
typename T2>
491 const T2 &X_features,
492 const vector<string> &ids,
493 const arma::vec &weights = {},
495 const double &lambda = 0,
496 const bool &preprojection =
true,
497 const int &nthreads = 1){
503 fitList = lmFitFeatures_preproj(y, X_design, X_features, ids, weights, md, lambda, nthreads);
506 fitList = lmFitFeatures_standard(y, mat(X_design), mat(X_features), ids, weights, md, lambda, nthreads);
531 const vector<string> &ids,
532 const arma::mat &Weights,
533 const ModelDetail md = LOW,
534 const double &lambda = 0,
535 const int &nthreads = 1){
539 CleanData data(Y, X, Weights);
541 mat X_clean = data.get_X();
542 mat Wsqrt = sqrt(data.get_W());
543 mat Yw = data.get_Y() % Wsqrt;
546 tbb::task_arena limited_arena(nthreads);
547 limited_arena.execute([&] {
549 tbb::blocked_range<int>(0, Y.n_cols, 10),
550 [&](
const tbb::blocked_range<int>& r){
552 disable_parallel_blas();
554 for (int j = r.begin(); j != r.end(); ++j) {
558 int rdf_offset = accu(Wsqrt.col(j) == 0.0);
562 ModelFit fit = lm(X_clean.each_col() % Wsqrt.col(j),
563 Yw.col(j), md, lambda, rdf_offset);
570 fit.residuals /= Wsqrt.col(j);
571 fit.mu /= Wsqrt.col(j);
mat scaleEachCol(const mat &X, const vec &w)
Definition misc.h:16
Definition CleanData.h:17
ModelDetail
Definition ModelFit.h:26
@ MOST
Definition ModelFit.h:31
@ MEDIUM
Definition ModelFit.h:29
@ LEAST
Definition ModelFit.h:27
@ HIGH
Definition ModelFit.h:30
@ MAX
Definition ModelFit.h:32
@ LOW
Definition ModelFit.h:28
vector< ModelFit > ModelFitList
Definition ModelFit.h:393
Definition linearRegression.h:37
mat R
Definition linearRegression.h:38
vec residuals
Definition linearRegression.h:39
mat V
Definition linearRegression.h:38
LMWork()
Definition linearRegression.h:40
mat Q
Definition linearRegression.h:38