fastglmm
Massively scalable generalized linear mixed models
Loading...
Searching...
No Matches
glm_family.h
Go to the documentation of this file.
1/***************************************************************
2 * @file glm_family.h
3 * @author Gabriel Hoffman
4 * @email gabriel.hoffman@mssm.edu
5 * @brief Define GLM link functions
6 * Copyright (C) 2024 Gabriel Hoffman
7 **************************************************************/
8
9
10#ifndef _GLM_FAMILY_H_
11#define _GLM_FAMILY_H_
12
13#include <string>
14#include <regex>
15#include "misc.h"
16
17using namespace arma;
18using namespace std;
19
20
21namespace fastglmmLib {
22
30class GLMFamily {
31 public:
33
34 virtual ~GLMFamily() {};
35 virtual vec link( const vec &mu) const {return vec(1);}
36 virtual vec linkinv( const vec &eta) const {return vec(1);}
37 virtual vec mu_eta( const vec &eta) const {return vec(1);}
38 virtual vec variance( const vec &mu) const {return vec(1);}
39
40 // compute deviance redisuals as in gaussian()$dev.resids
41 // But also need to transform as in residuals.glm():
42 // in ModelFit::setDevResids()
43 virtual vec dev_resids( const vec &y, const vec &mu, const vec &weights) const {return vec(1);}
44 virtual vec initialize( const vec &y, const vec &weights) const {return vec(1);}
45 virtual bool estimateDispersion() const {return true;}
46 virtual string family() const {return "GLMFamily";};
47 virtual bool isCountModel() const {return true;}
48 virtual void setOverdispersion( const double & value){}
49};
50
52 virtual public GLMFamily {
53
54 public:
56
58
59 vec link( const vec &mu) const {
60 return mu;
61 }
62 vec linkinv( const vec &eta) const {
63 return eta;
64 }
65 vec mu_eta( const vec &eta) const {
66 return vec(eta.n_elem, fill::ones);
67 }
68 vec variance( const vec &mu) const {
69 return vec(mu.n_elem, fill::ones);
70 }
71 vec dev_resids( const vec &y, const vec &mu, const vec &weights) const {
72 // wt * ((y - mu)^2)
73 return weights % square(y-mu);
74 }
75 vec initialize( const vec &y, const vec &weights) const {
76 return y;
77 }
78 bool estimateDispersion() const {return true;}
79 string family() const {return "GaussianIdentity";}
80 bool isCountModel() const { return false; }
81};
82
84 virtual public GLMFamily {
85
86 public:
88
90
91 vec link( const vec &mu) const {
92 return log(mu / (1-mu));
93 }
94 vec linkinv( const vec &eta) const {
95 return 1.0 / (1.0 + exp(-1.0*eta));
96 }
97 vec mu_eta( const vec &eta) const {
98 vec v = exp(-1.0*eta);
99 return v / square( 1.0 + v);
100 }
101 vec variance( const vec &mu) const {
102 return mu % (1.0 - mu);
103 }
104 vec dev_resids( const vec &y, const vec &mu, const vec &weights) const {
105
106 // 2 * rwt[i] * (y_log_y(yi, mui) + y_log_y(1 - yi, 1 - mui));
107 return 2.0 * weights % (y_log_y(y, mu) + y_log_y(1.0 - y, 1.0 - mu));
108 }
109 vec initialize( const vec &y, const vec &weights) const {
110 return (weights % y + 0.5)/(weights + 1.0);
111 }
112 bool estimateDispersion() const {return false;}
113 string family() const {return "BinomialLogit";}
114 bool isCountModel() const { return false; }
115};
116
117
119 public BinomialLogit {
120 bool estimateDispersion() const {return true;}
121 string family() const {return "QuasibinomialLogit";}
122};
123
124
126 virtual public GLMFamily {
127
128 public:
130
132
133 vec link( const vec &mu) const {
134 // qnorm(mu)
135 return qnorm(mu);
136 }
137 vec linkinv( const vec &eta) const {
138 // pnorm(eta)
139 // return normcdf( pmin(pmax(eta, -thresh), thresh) );
140 return normcdf( clamp(eta, -thresh, thresh) );
141 }
142 vec mu_eta( const vec &eta) const {
143 // pmax(dnorm(eta), .Machine$double.eps)
144 return pmax(normpdf(eta), tol);
145 }
146 vec variance( const vec &mu) const {
147 return mu % (1.0 - mu);
148 }
149 vec dev_resids( const vec &y, const vec &mu, const vec &weights) const {
150
151 // 2 * rwt[i] * (y_log_y(yi, mui) + y_log_y(1 - yi, 1 - mui));
152 return 2.0 * weights % (y_log_y(y, mu) + y_log_y(1 - y, 1 - mu));
153 }
154 vec initialize( const vec &y, const vec &weights) const {
155 return (weights % y + 0.5)/(weights + 1.0);
156 }
157 bool estimateDispersion() const {return false;}
158 string family() const {return "BinomialProbit";}
159 bool isCountModel() const { return false; }
160
161 private:
162 double tol = 2.220446e-16;
163 // double thresh = -1.0 * R::qnorm( tol, 0.0, 1.0, 1, 0 );
164 double thresh = 8.125891; // -1*qnorm(.Machine$double.eps)
165};
166
167
169 virtual public GLMFamily {
170
171 public:
173
175
176 vec link( const vec &mu) const {
177 return log(mu);
178 }
179 vec linkinv( const vec &eta) const {
180 // pmax(exp(eta), .Machine$double.eps)
181 return pmax(exp(eta), tol);
182 }
183 vec mu_eta( const vec &eta) const {
184 // pmax(exp(eta), .Machine$double.eps)
185 return pmax(exp(eta), tol);
186 }
187 vec variance( const vec &mu) const {
188 return mu;
189 }
190 vec dev_resids( const vec &y, const vec &mu, const vec &weights) const {
191 // r <- mu * wt
192 // p <- which(y > 0)
193 // r[p] <- (wt * (y * log(y/mu) - (y - mu)))[p]
194 // 2 * r
195
196 vec res = mu % weights;
197 uvec idx = find(y > 0.0);
198 vec tmp = (weights % (y % log(y/mu) - (y - mu)));
199 res.elem(idx) = tmp.elem(idx);
200
201 return 2.0 * res;
202 }
203 vec initialize( const vec &y, const vec &weights) const {
204 return y + 0.1;
205 }
206 bool estimateDispersion() const {return false;}
207 string family() const {return "PoissonLog";}
208 bool isCountModel() const { return true; }
209
210 private:
211 double tol = 2.220446e-16;
212};
213
214
216 public PoissonLog {
217 bool estimateDispersion() const {return true;}
218};
219
220class NB :
221 virtual public GLMFamily {
222
223 public:
224 NB() {
225 // if theta is estimated from data,
226 // set QL dispersion phi = 1
227 ql_dispersion = false;
228 }
229
230 NB(const double &theta) :
231 theta(theta) {
232 // if theta is fixed,
233 // estiamted QL dispersion phi
234 ql_dispersion = true;
235 }
236
237 ~NB() {}
238
239 vec link( const vec &mu) const {
240 return log(mu);
241 }
242 vec linkinv( const vec &eta) const {
243 // pmax(exp(eta), .Machine$double.eps)
244 return pmax(exp(eta), tol);
245 }
246 vec mu_eta( const vec &eta) const {
247 // pmax(exp(eta), .Machine$double.eps)
248 return pmax(exp(eta), tol);
249 }
250 vec variance( const vec &mu) const {
251 return mu + square(mu) / theta;
252 }
253 vec dev_resids( const vec &y, const vec &mu, const vec &weights) const {
254 // 2 * wt * (y * log(pmax(1, y)/mu) - (y + .Theta) * log((y + .Theta)/(mu + .Theta)))
255
256 return 2.0 * weights % (y % log(pmax(y, 1.0)/mu) - (y + theta) % log((y + theta)/(mu + theta)));
257 }
258 vec initialize( const vec &y, const vec &weights) const {
259 // y + (y == 0)/6
260 return y + accu(y == 0) / 6.0;
261 }
262 bool estimateDispersion() const {return ql_dispersion;}
263 string family() const {return "NB";}
264 bool isCountModel() const { return true; }
265
266 void setOverdispersion( const double &value){
267 theta = value;
268 }
269
270 double theta = std::numeric_limits<double>::quiet_NaN();
271 bool ql_dispersion = true;
272
273 private:
274 double tol = 2.220446e-16;
275};
276
277
278
282static shared_ptr<GLMFamily> getGLMFamily( const string &family){
283
284 shared_ptr<GLMFamily> fam;
285
286 if( family == "gaussian" || family == "gaussian/identity" ){
287 fam = shared_ptr<GLMFamily>(new GaussianIdentity());
288 }
289 else if( family == "binomial/logit" ){
290 fam = shared_ptr<GLMFamily>(new BinomialLogit());
291 }
292 else if( family == "binomial/probit" ){
293 fam = shared_ptr<GLMFamily>(new BinomialProbit());
294 }
295 else if( family == "poisson/log" ){
296 fam = shared_ptr<GLMFamily>(new PoissonLog());
297 }
298 else if( family == "quasibinomial/logit" ){
299 fam = shared_ptr<GLMFamily>(new QuasibinomialLogit());
300 }
301 else if( family == "quasipoisson/log" ){
302 fam = shared_ptr<GLMFamily>(new QuasipoissonLog());
303 }
304 else if( family == "nb" ){
305 fam = shared_ptr<GLMFamily>(new NB());
306 }
307 else if( regex_search( family, regex("^nb:")) ){
308 string theta_str = regex_replace( family, regex("^nb:"), "");
309 double theta = atof(theta_str.c_str());
310 fam = shared_ptr<GLMFamily>(new NB(theta));
311 }else{
312 throw invalid_argument( "Invalid GLM family: " + family );
313 }
314
315 return fam;
316}
317
320static bool isCountModel(const string &family){
321
322 shared_ptr<GLMFamily> fam = getGLMFamily( family );
323 string famStr = fam->family();
324
325 bool value = false;
326 if( famStr == "PoissonLog" ||
327 famStr == "QuasipoissonLog" ||
328 famStr == "NB"){
329 value = true;
330 }
331
332 return value;
333}
334
335}
336
337#endif
vec linkinv(const vec &eta) const
Definition glm_family.h:94
vec mu_eta(const vec &eta) const
Definition glm_family.h:97
vec variance(const vec &mu) const
Definition glm_family.h:101
vec link(const vec &mu) const
Definition glm_family.h:91
bool isCountModel() const
Definition glm_family.h:114
vec initialize(const vec &y, const vec &weights) const
Definition glm_family.h:109
vec dev_resids(const vec &y, const vec &mu, const vec &weights) const
Definition glm_family.h:104
string family() const
Definition glm_family.h:113
BinomialLogit()
Definition glm_family.h:87
bool estimateDispersion() const
Definition glm_family.h:112
~BinomialLogit()
Definition glm_family.h:89
Definition glm_family.h:126
vec mu_eta(const vec &eta) const
Definition glm_family.h:142
vec linkinv(const vec &eta) const
Definition glm_family.h:137
~BinomialProbit()
Definition glm_family.h:131
vec variance(const vec &mu) const
Definition glm_family.h:146
bool estimateDispersion() const
Definition glm_family.h:157
BinomialProbit()
Definition glm_family.h:129
vec dev_resids(const vec &y, const vec &mu, const vec &weights) const
Definition glm_family.h:149
bool isCountModel() const
Definition glm_family.h:159
vec link(const vec &mu) const
Definition glm_family.h:133
vec initialize(const vec &y, const vec &weights) const
Definition glm_family.h:154
string family() const
Definition glm_family.h:158
virtual vec mu_eta(const vec &eta) const
Definition glm_family.h:37
virtual bool estimateDispersion() const
Definition glm_family.h:45
virtual vec dev_resids(const vec &y, const vec &mu, const vec &weights) const
Definition glm_family.h:43
GLMFamily()
Definition glm_family.h:32
virtual vec initialize(const vec &y, const vec &weights) const
Definition glm_family.h:44
virtual bool isCountModel() const
Definition glm_family.h:47
virtual string family() const
Definition glm_family.h:46
virtual void setOverdispersion(const double &value)
Definition glm_family.h:48
virtual vec linkinv(const vec &eta) const
Definition glm_family.h:36
virtual vec link(const vec &mu) const
Definition glm_family.h:35
virtual vec variance(const vec &mu) const
Definition glm_family.h:38
virtual ~GLMFamily()
Definition glm_family.h:34
Definition glm_family.h:52
~GaussianIdentity()
Definition glm_family.h:57
GaussianIdentity()
Definition glm_family.h:55
vec dev_resids(const vec &y, const vec &mu, const vec &weights) const
Definition glm_family.h:71
bool isCountModel() const
Definition glm_family.h:80
bool estimateDispersion() const
Definition glm_family.h:78
vec mu_eta(const vec &eta) const
Definition glm_family.h:65
vec initialize(const vec &y, const vec &weights) const
Definition glm_family.h:75
vec variance(const vec &mu) const
Definition glm_family.h:68
vec link(const vec &mu) const
Definition glm_family.h:59
vec linkinv(const vec &eta) const
Definition glm_family.h:62
string family() const
Definition glm_family.h:79
Definition glm_family.h:221
NB()
Definition glm_family.h:224
vec initialize(const vec &y, const vec &weights) const
Definition glm_family.h:258
vec variance(const vec &mu) const
Definition glm_family.h:250
NB(const double &theta)
Definition glm_family.h:230
void setOverdispersion(const double &value)
Definition glm_family.h:266
bool ql_dispersion
Definition glm_family.h:271
vec link(const vec &mu) const
Definition glm_family.h:239
string family() const
Definition glm_family.h:263
bool estimateDispersion() const
Definition glm_family.h:262
~NB()
Definition glm_family.h:237
double theta
Definition glm_family.h:270
vec dev_resids(const vec &y, const vec &mu, const vec &weights) const
Definition glm_family.h:253
vec linkinv(const vec &eta) const
Definition glm_family.h:242
bool isCountModel() const
Definition glm_family.h:264
vec mu_eta(const vec &eta) const
Definition glm_family.h:246
Definition glm_family.h:169
bool estimateDispersion() const
Definition glm_family.h:206
vec initialize(const vec &y, const vec &weights) const
Definition glm_family.h:203
vec variance(const vec &mu) const
Definition glm_family.h:187
vec dev_resids(const vec &y, const vec &mu, const vec &weights) const
Definition glm_family.h:190
bool isCountModel() const
Definition glm_family.h:208
vec mu_eta(const vec &eta) const
Definition glm_family.h:183
vec link(const vec &mu) const
Definition glm_family.h:176
PoissonLog()
Definition glm_family.h:172
vec linkinv(const vec &eta) const
Definition glm_family.h:179
string family() const
Definition glm_family.h:207
~PoissonLog()
Definition glm_family.h:174
Definition glm_family.h:119
Definition glm_family.h:216
Definition CleanData.h:17