fastglmm
Massively scalable generalized linear mixed models
Loading...
Searching...
No Matches
misc.h
Go to the documentation of this file.
1#ifndef FASTLMM_MISC_H_
2#define FASTLMM_MISC_H_
3#pragma once
4
5// if -D USE_R, use RcppArmadillo library
6#ifdef USE_R
7// [[Rcpp::depends(RcppParallel)]]
8#include <RcppArmadillo.h>
9#else
10#include <armadillo>
11#endif
12
13using namespace arma;
14
15
16// for each column, scale by w
17inline mat scaleEachCol(const mat &X, const vec &w){
18 return X.each_col() % w;
19}
20
21// for each column, scale by w
22inline sp_mat scaleEachCol(const sp_mat &X, const vec &w){
23
24 sp_mat M = sp_mat(X);
25 for(size_t i=0; i<X.n_cols; i++){
26 M.col(i) %= w;
27 }
28 return( M );
29}
30
31// for each row, scale by w
32inline mat scaleEachRow(const mat &X, const vec &w){
33 return w.t() % X.each_row();
34}
35
36// for each row, scale by w
37inline sp_mat scaleEachRow(const sp_mat &X, const vec &w){
38
39 sp_mat M = sp_mat(X);
40 for(size_t i=0; i<X.n_cols; i++){
41 M.col(i) *= w[i];
42 }
43 return( M );
44}
45
46
47// scale by w and s
48template <typename T>
49T scaleRowsCols(const T &X, const vec &w1, const vec &w2){
50
51 T M = T(X);
52 for(size_t i=0; i<X.n_cols; i++){
53 // M.col(i) %= w1 * w2(i);
54 // manually setting order of operations is _much_ faster
55 // since this reduces the number of multiplications
56 M.col(i) *= w2(i);
57 M.col(i) %= w1;
58 }
59 return( M );
60}
61
62
63// general case, returns false
64template <class T>
65inline bool isSpMatrix(const T &t) { return false; }
66
67 // but for sp_mat returns true
68template <>
69inline bool isSpMatrix( const sp_mat &t) { return true; }
70
71
72// Given matrix, set entries in rows idx to zero
73static void rows_to_zero( mat &X, const uvec &idx ){
74 X.rows(idx).zeros();
75}
76
77static void rows_to_zero( sp_mat &X, const uvec &idx ){
78
79 for(auto j: idx ){
80 for (auto it = X.begin_row(j); it != X.end_row(j); ++it) {
81 *it = 0.0;
82 }
83 }
84 X.clean(0.0);
85}
86
87
88// Return indices (0-based) of rows that contain at least one NaN
89static uvec rows_with_nan(const mat& M) {
90 std::vector<uword> nan_rows;
91
92 for (uword i = 0; i < M.n_rows; ++i) {
93 bool has_nan = false;
94
95 // iterate over elements in row i
96 for (mat::const_row_iterator it = M.begin_row(i);
97 it != M.end_row(i); ++it) {
98 if (std::isnan(*it)) {
99 has_nan = true;
100 break; // no need to check rest of row
101 }
102 }
103
104 if (has_nan){
105 nan_rows.push_back(i);
106 }
107 }
108
109 return conv_to<uvec>::from(nan_rows);
110}
111
112
113// Return indices (0-based) of rows in sp_mat containing NaN
114static uvec rows_with_nan(const sp_mat& M) {
115
116 std::vector<uword> nan_rows;
117
118 for (uword i = 0; i < M.n_rows; ++i) {
119 bool has_nan = false;
120
121 // iterate over stored elements in row i
122 for (sp_mat::const_row_iterator it = M.begin_row(i);
123 it != M.end_row(i); ++it){
124 if (std::isnan(*it)) {
125 has_nan = true;
126 break; // stop once a NaN is found
127 }
128 }
129
130 if (has_nan)
131 nan_rows.push_back(i);
132 }
133
134 return conv_to<uvec>::from(nan_rows);
135}
136
137
138// template <typename T>
139// const T& min(const T& a, const T& b) {
140// return (a < b) ? a : b;
141// }
142
143// template <typename T>
144// const T& max(const T& a, const T& b) {
145// return (a > b) ? a : b;
146// }
147
148static vec pmax( const vec &v, const double &value){
149
150 vec tmp(v);
151 for(int i=0; i<tmp.n_elem; i++){
152 tmp[i] = std::max(tmp[i], value);
153 }
154
155 return tmp;
156}
157
158static vec pmin( const vec &v, const double &value){
159
160 vec tmp(v);
161 for(int i=0; i<tmp.n_elem; i++){
162 tmp[i] = std::min(tmp[i], value);
163 }
164
165 return tmp;
166}
167
168#include "qnorm.h"
169static vec qnorm( const vec & v, const double &mean=0, const double &sd=1 ){
170 vec tmp(v);
171 for(int i=0; i<tmp.n_elem; i++){
172 // R implementation of qnorm
173 // tmp[i] = R::qnorm(tmp[i], mean, sd, 1, 0);
174
175 // C++ without R dependency
176 tmp[i] = qnorm_as241(v[i], mean, sd, 1, 0);
177 }
178
179 return tmp;
180}
181
182static vec y_log_y(const vec & y, const vec & mu){
183 // (y) ? (y * log(y/mu)) : 0;
184
185 // initialize to zeros
186 vec ret(y.n_elem, fill::zeros);
187
188 for(int i=0; i<y.n_elem; i++){
189 if( y[i] != 0.0){
190 ret[i] = y[i] * log(y[i]/mu[i]);
191 }
192 }
193
194 return ret;
195}
196
197
198static double wvar(const vec & x, const vec & w) {
199
200 if (x.n_elem != w.n_elem) {
201 throw invalid_argument("Data and weights vectors must have the same number of elements.");
202 }
203
204 if (x.is_empty()) {
205 return 0.0;
206 }
207
208 double sum_weights = sum(w);
209
210 if (sum_weights == 0) {
211 throw runtime_error("Sum of weights cannot be zero.");
212 }
213
214 // Calculate weighted mean
215 double weighted_mean = sum(x % w) / sum_weights;
216
217 // Calculate weighted variance
218 vec diff_sq = square(x - weighted_mean);
219 double numerator = sum(w % diff_sq);
220
221 double denominator = sum_weights - (sum(square(w)) / sum_weights);
222
223 if (denominator == 0) {
224 return 0.0;
225 }
226
227 return numerator / denominator;
228}
229
230
231// Adapted from tbb::blocked_range
232// Designed to be used when tbb is not available
233template<typename T>
235 public:
236 // constructors
238 begin_value(begin), end_value(end) {}
239
240 // get range limits
241 T begin(){ return begin_value; }
242 T end(){ return end_value; }
243 private:
244 T begin_value, end_value;
245};
246
247
248// Set OpenMP threads to 1 and disable nested parallelism
249static void disable_parallel_blas(){
250 #ifdef _OPENMP
251 #include <omp.h>
252 // set threads
253 omp_set_num_threads(1);
254 // disable nested parallelism
255 omp_set_max_active_levels(0);
256 #endif
257}
258
259// For NAN entries in Y, set matching entry in W to zero
260static void match_NAN_zeros( const mat &Y, mat &W){
261 for (arma::uword r = 0; r < Y.n_rows; ++r) {
262 for (arma::uword c = 0; c < Y.n_cols; ++c) {
263 if (std::isnan(Y(r, c))) {
264 W(r, c) = 0.0;
265 }
266 }
267 }
268}
269
270static int count_nan( const vec &v){
271 int nan_count = 0;
272 for (arma::uword i = 0; i < v.n_elem; ++i) {
273 if (std::isnan(v(i))) {
274 nan_count++;
275 }
276 }
277 return nan_count;
278}
279
280
281static uvec countResponseFilter( const mat &Y, int n0, int n1){
282
283 // Rcpp::Rcout << "n0: " << n0 << " n1: " << n1 << std::endl;
284
285 // # filter genes by expression
286 uvec keep1 = find(sum(Y > 0, 0) > max(2, n0));
287 uvec keep2 = find(sum(Y > 1, 0) > max(2, n1));
288
289 return intersect(keep1, keep2);
290}
291
294template <typename T>
295static vector<T> subset( const vector<T> &v, const uvec &idx){
296
297 vector<T> res;
298 res.reserve(idx.n_elem);
299
300 for (uword i = 0; i < idx.n_elem; ++i) {
301 res.push_back( v[idx[i]] );
302 }
303
304 return res;
305}
306
309static double robust_mean( const vec &x, const double &z_cutoff){
310
311 // keep only finite values
312 vec x1 = x.elem(find_finite(x));
313
314 // if no elements retained
315 if( x1.n_elem == 0){
316 return datum::nan;
317 }
318
319 // compute z-score
320 // add 1e-15 to avoid issue with sd is zero
321 vec z = (x1 - mean(x1)) / (stddev(x1) + 1e-15);
322
323 // find indeces where abs z-score is less than cutoff
324 uvec idx = find(abs(z) < z_cutoff);
325
326 // mean of retained values
327 return mean(x1.elem(idx));
328
329}
330
333static bool all_integer_valued(const vec &x, double tol = 1e-12) {
334 return all(abs(x - round(x)) < tol);
335}
336
337
338
339#endif
blocked_range(T begin, T end)
Definition misc.h:237
T end()
Definition misc.h:242
T begin()
Definition misc.h:241
mat scaleEachCol(const mat &X, const vec &w)
Definition misc.h:17
mat scaleEachRow(const mat &X, const vec &w)
Definition misc.h:32
bool isSpMatrix(const T &t)
Definition misc.h:65
T scaleRowsCols(const T &X, const vec &w1, const vec &w2)
Definition misc.h:49
double qnorm_as241(double p, double mean=0.0, double sd=1.0, bool lower_tail=true, bool log_p=false)
Definition qnorm.h:9