fastglmm
Massively scalable generalized linear mixed models
Loading...
Searching...
No Matches
misc.h
Go to the documentation of this file.
1#ifndef FASTLMM_MISC_H_
2#define FASTLMM_MISC_H_
3
4// if -D USE_R, use RcppArmadillo library
5#ifdef USE_R
6// [[Rcpp::depends(RcppParallel)]]
7#include <RcppArmadillo.h>
8#else
9#include <armadillo>
10#endif
11
12using namespace arma;
13
14
15// for each column, scale by w
16inline mat scaleEachCol(const mat &X, const vec &w){
17 return X.each_col() % w;
18}
19
20// for each column, scale by w
21inline sp_mat scaleEachCol(const sp_mat &X, const vec &w){
22
23 sp_mat M = sp_mat(X);
24 for(size_t i=0; i<X.n_cols; i++){
25 M.col(i) %= w;
26 }
27 return( M );
28}
29
30// for each row, scale by w
31inline mat scaleEachRow(const mat &X, const vec &w){
32 return w.t() % X.each_row();
33}
34
35// for each row, scale by w
36inline sp_mat scaleEachRow(const sp_mat &X, const vec &w){
37
38 sp_mat M = sp_mat(X);
39 for(size_t i=0; i<X.n_cols; i++){
40 M.col(i) *= w[i];
41 }
42 return( M );
43}
44
45
46// scale by w and s
47template <typename T>
48T scaleRowsCols(const T &X, const vec &w1, const vec &w2){
49
50 T M = T(X);
51 for(size_t i=0; i<X.n_cols; i++){
52 // M.col(i) %= w1 * w2(i);
53 // manually setting order of operations is _much_ faster
54 // since this reduces the number of multiplications
55 M.col(i) *= w2(i);
56 M.col(i) %= w1;
57 }
58 return( M );
59}
60
61
62// general case, returns false
63template <class T>
64inline bool isSpMatrix(const T &t) { return false; }
65
66 // but for sp_mat returns true
67template <>
68inline bool isSpMatrix( const sp_mat &t) { return true; }
69
70
71// Given matrix, set entries in rows idx to zero
72static void rows_to_zero( mat &X, const uvec &idx ){
73 X.rows(idx).zeros();
74}
75
76static void rows_to_zero( sp_mat &X, const uvec &idx ){
77
78 for(auto j: idx ){
79 for (auto it = X.begin_row(j); it != X.end_row(j); ++it) {
80 *it = 0.0;
81 }
82 }
83 X.clean(0.0);
84}
85
86
87// Return indices (0-based) of rows that contain at least one NaN
88static uvec rows_with_nan(const mat& M) {
89 std::vector<uword> nan_rows;
90
91 for (uword i = 0; i < M.n_rows; ++i) {
92 bool has_nan = false;
93
94 // iterate over elements in row i
95 for (mat::const_row_iterator it = M.begin_row(i);
96 it != M.end_row(i); ++it) {
97 if (std::isnan(*it)) {
98 has_nan = true;
99 break; // no need to check rest of row
100 }
101 }
102
103 if (has_nan){
104 nan_rows.push_back(i);
105 }
106 }
107
108 return conv_to<uvec>::from(nan_rows);
109}
110
111
112// Return indices (0-based) of rows in sp_mat containing NaN
113static uvec rows_with_nan(const sp_mat& M) {
114
115 std::vector<uword> nan_rows;
116
117 for (uword i = 0; i < M.n_rows; ++i) {
118 bool has_nan = false;
119
120 // iterate over stored elements in row i
121 for (sp_mat::const_row_iterator it = M.begin_row(i);
122 it != M.end_row(i); ++it){
123 if (std::isnan(*it)) {
124 has_nan = true;
125 break; // stop once a NaN is found
126 }
127 }
128
129 if (has_nan)
130 nan_rows.push_back(i);
131 }
132
133 return conv_to<uvec>::from(nan_rows);
134}
135
136
137// template <typename T>
138// const T& min(const T& a, const T& b) {
139// return (a < b) ? a : b;
140// }
141
142// template <typename T>
143// const T& max(const T& a, const T& b) {
144// return (a > b) ? a : b;
145// }
146
147static vec pmax( const vec &v, const double &value){
148
149 vec tmp(v);
150 for(int i=0; i<tmp.n_elem; i++){
151 tmp[i] = std::max(tmp[i], value);
152 }
153
154 return tmp;
155}
156
157static vec pmin( const vec &v, const double &value){
158
159 vec tmp(v);
160 for(int i=0; i<tmp.n_elem; i++){
161 tmp[i] = std::min(tmp[i], value);
162 }
163
164 return tmp;
165}
166
167#include "qnorm.h"
168static vec qnorm( const vec & v, const double &mean=0, const double &sd=1 ){
169 vec tmp(v);
170 for(int i=0; i<tmp.n_elem; i++){
171 // R implementation of qnorm
172 // tmp[i] = R::qnorm(tmp[i], mean, sd, 1, 0);
173
174 // C++ without R dependency
175 tmp[i] = qnorm_as241(v[i], mean, sd, 1, 0);
176 }
177
178 return tmp;
179}
180
181static vec y_log_y(const vec & y, const vec & mu){
182 // (y) ? (y * log(y/mu)) : 0;
183
184 // initialize to zeros
185 vec ret(y.n_elem, fill::zeros);
186
187 for(int i=0; i<y.n_elem; i++){
188 if( y[i] != 0.0){
189 ret[i] = y[i] * log(y[i]/mu[i]);
190 }
191 }
192
193 return ret;
194}
195
196
197static double wvar(const vec & x, const vec & w) {
198
199 if (x.n_elem != w.n_elem) {
200 throw invalid_argument("Data and weights vectors must have the same number of elements.");
201 }
202
203 if (x.is_empty()) {
204 return 0.0;
205 }
206
207 double sum_weights = sum(w);
208
209 if (sum_weights == 0) {
210 throw runtime_error("Sum of weights cannot be zero.");
211 }
212
213 // Calculate weighted mean
214 double weighted_mean = sum(x % w) / sum_weights;
215
216 // Calculate weighted variance
217 vec diff_sq = square(x - weighted_mean);
218 double numerator = sum(w % diff_sq);
219
220 double denominator = sum_weights - (sum(square(w)) / sum_weights);
221
222 if (denominator == 0) {
223 return 0.0;
224 }
225
226 return numerator / denominator;
227}
228
229
230// Adapted from tbb::blocked_range
231// Designed to be used when tbb is not available
232template<typename T>
234 public:
235 // constructors
237 begin_value(begin), end_value(end) {}
238
239 // get range limits
240 T begin(){ return begin_value; }
241 T end(){ return end_value; }
242 private:
243 T begin_value, end_value;
244};
245
246
247// Set OpenMP threads to 1 and disable nested parallelism
248static void disable_parallel_blas(){
249 #ifdef _OPENMP
250 #include <omp.h>
251 // set threads
252 omp_set_num_threads(1);
253 // disable nested parallelism
254 omp_set_max_active_levels(0);
255 #endif
256}
257
258// For NAN entries in Y, set matching entry in W to zero
259static void match_NAN_zeros( const mat &Y, mat &W){
260 for (arma::uword r = 0; r < Y.n_rows; ++r) {
261 for (arma::uword c = 0; c < Y.n_cols; ++c) {
262 if (std::isnan(Y(r, c))) {
263 W(r, c) = 0.0;
264 }
265 }
266 }
267}
268
269static int count_nan( const vec &v){
270 int nan_count = 0;
271 for (arma::uword i = 0; i < v.n_elem; ++i) {
272 if (std::isnan(v(i))) {
273 nan_count++;
274 }
275 }
276 return nan_count;
277}
278
279
280static uvec countResponseFilter( const mat &Y){
281
282 // # filter genes by expression
283 // keep1 <- rowSums2(countMatrix > 0) > max(2, 0.1*ncol(countMatrix))
284 // keep2 <- rowSums2(countMatrix > 1) > max(2, 0.01*ncol(countMatrix))
285 // keep <- keep1 & keep2
286
287 int nr = Y.n_rows;
288
289 uvec keep1 = find(sum(Y > 0, 0) > max(2, (int) 0.1*nr));
290 uvec keep2 = find(sum(Y > 1, 0) > max(2, (int) 0.01*nr));
291
292 return intersect(keep1, keep2);
293}
294
297template <typename T>
298static vector<T> subset( const vector<T> &v, const uvec &idx){
299
300 vector<T> res;
301 res.reserve(idx.n_elem);
302
303 for (uword i = 0; i < idx.n_elem; ++i) {
304 res.push_back( v[idx[i]] );
305 }
306
307 return res;
308}
309
312static double robust_mean( const vec &x, const double &z_cutoff){
313
314 // keep only finite values
315 vec x1 = x.elem(find_finite(x));
316
317 // if no elements retained
318 if( x1.n_elem == 0){
319 return datum::nan;
320 }
321
322 // compute z-score
323 // add 1e-15 to avoid issue with sd is zero
324 vec z = (x1 - mean(x1)) / (stddev(x1) + 1e-15);
325
326 // find indeces where abs z-score is less than cutoff
327 uvec idx = find(abs(z) < z_cutoff);
328
329 // mean of retained values
330 return mean(x1.elem(idx));
331
332}
333
334
335
336
337
338#endif
blocked_range(T begin, T end)
Definition misc.h:236
T end()
Definition misc.h:241
T begin()
Definition misc.h:240
mat scaleEachCol(const mat &X, const vec &w)
Definition misc.h:16
mat scaleEachRow(const mat &X, const vec &w)
Definition misc.h:31
bool isSpMatrix(const T &t)
Definition misc.h:64
T scaleRowsCols(const T &X, const vec &w1, const vec &w2)
Definition misc.h:48
double qnorm_as241(double p, double mean=0.0, double sd=1.0, bool lower_tail=true, bool log_p=false)
Definition qnorm.h:9