GenomicDataStream
A scalable interface between data and analysis
Loading...
Searching...
No Matches
dashSVD.h
Go to the documentation of this file.
1
2// /************************************************************
3// * @file dashSVD.h
4// * @author Gabriel Hoffman
5// * @email gabriel.hoffman@mssm.edu
6// * @brief dashSVD One algorithm
7// * Copyright (C) 2026 Gabriel Hoffman
8// *************************************************/
9
10
11// #include "GenomicDataStream.h"
12
13// using namespace Rcpp;
14// using namespace arma;
15
16// struct SVDResult {
17// mat U;
18// vec S;
19// mat V;
20// };
21
22
23// struct PCA {
24
25// PCA( mat u):
26// u(u) {}
27
28// PCA( const mat &u, const mat &v, const vec &d, const vector<string> featureIds):
29// u(u), v(v), d(d), featureIds(featureIds) {}
30
31// mat u, v;
32// vec d;
33// const vector<string> featureIds;
34// };
35
36
37// SVDResult eigSVD_internal(const mat& A, double tol = 1e-10) {
38// mat C = A.t() * A;
39
40// vec eigval;
41// mat V;
42
43// eig_sym(eigval, V, C);
44
45// eigval = reverse(eigval);
46// V = fliplr(V);
47
48// vec sigma = sqrt(clamp(eigval, 0.0, datum::inf));
49
50// vec sigma_inv = zeros<vec>(sigma.n_elem);
51// uvec keep = find(sigma > tol);
52// sigma_inv.elem(keep) = 1.0 / sigma.elem(keep);
53
54// mat U = A * V;
55// U.each_row() %= sigma_inv.t();
56
57// return {U, sigma, V};
58// }
59
60
61// inline mat mat_prod_AtAQ(const mat& A, const mat& Q) {
62// return A.t() * (A * Q);
63// }
64
65// inline mat crossprod(const mat& A, const mat& Q) {
66// return A.t() * Q;
67// }
68
69
70// // [[Rcpp::export]]
71// List dashSVD(
72// const mat& A,
73// int k,
74// int p = 3,
75// int s = 20,
76// int rand = 1,
77// bool verbose = false,
78// double tol = 1e-10
79// ) {
80// const uword M = A.n_rows;
81// const uword N = A.n_cols;
82// const uword L = static_cast<uword>(k + s);
83
84// if (k <= 0) {
85// stop("k must be positive");
86// }
87
88// if (L > N) {
89// stop("k + s must be <= ncol(A)");
90// }
91
92// mat Omg;
93
94// if (rand == 1) {
95// Omg = randn<mat>(M, L);
96// } else {
97// Omg = randu<mat>(M, L);
98// }
99
100// mat Q = crossprod(A.t(), Omg);
101
102// SVDResult e = eigSVD_internal(Q, tol);
103// Q = e.U;
104
105// double alpha = 0.0;
106
107// for (int i = 1; i <= p; ++i) {
108// if (verbose) {
109// Rcpp::Rcout << "power iteration " << i
110// << ", alpha=" << alpha << std::endl;
111// }
112
113// mat B = mat_prod_AtAQ(A, Q) - alpha * Q;
114
115// e = eigSVD_internal(B, tol);
116// Q = e.U;
117
118// if (e.S(L - 1) > alpha) {
119// alpha = (alpha + e.S(L - 1)) / 2.0;
120// }
121// }
122
123// e = eigSVD_internal(A * Q, tol);
124
125// mat U = e.U;
126// vec S = e.S;
127// mat V = Q * e.V;
128
129// const uword kk = static_cast<uword>(k);
130
131// return PCA(U.cols(0, kk - 1),
132// V.cols(0, kk - 1),
133// S.head(kk))
134// }
135
136
137
138