fastglmm
Massively scalable generalized linear mixed models
Loading...
Searching...
No Matches
nb_theta.h
Go to the documentation of this file.
1// if -D USE_R, use RcppArmadillo library
2#ifdef USE_R
3// [[Rcpp::depends(RcppParallel)]]
4#include <RcppArmadillo.h>
5#else
6#include <armadillo>
7#endif
8
9#include "local_min.h"
10
11#ifndef NB_THETA_ML_H_
12#define NB_THETA_ML_H_
13
14using namespace arma;
15
18typedef unordered_map<long, double> CountTable;
19
22static CountTable CreateLUT( const vec &y, const vec &weights = {}, const int &maxUniqueEntries = 1000){
23
24 vec w(weights);
25 if( w.is_empty() ){
26 w = vec(y.n_elem, fill::ones);
27 }
28
29 // lookup table for suming weights for each y value
30 unordered_map<long, double> lut;
31 lut.reserve(maxUniqueEntries);
32
33 // for each entry in y,
34 // increament count for that index
35 for(int i=0; i<y.n_elem; i++){
36 // increment count for y[i]
37 // ++lut[(long) y[i]];
38
39 // increment weights for y[i]
40 lut[(long) y[i]] += w[i];
41
42 // if number of entries exceeds maxUniqueEntries
43 // clear the table and break
44 // to return empty table
45 if( lut.size() > maxUniqueEntries){
46 lut.clear();
47 break;
48 }
49 }
50
51 return lut;
52}
53
54
55struct nbData {
56 vec y;
57 vec mu;
58 double n;
60 mat X;
63
64 // Constructor
66
67 // Constructor
68 nbData(const vec &y, const vec &mu, const double &n, const vec &weights, const mat &X, const bool &doCoxReid, const CountTable &ct = {} ):
69 y(y), mu(mu), n(n), weights(weights), X(X), doCoxReid(doCoxReid), ct(ct) {}
70};
71
72// log-likelihood for NB GLM
73static double nb_ll(
74 const vec &y,
75 const vec &mu,
76 const double &n,
77 const vec &weights,
78 const mat &X,
79 const bool &doCoxReid,
80 const CountTable &ct,
81 const double &theta){
82
83 vec y_theta = y + theta;
84
85 double res;
86
87 if( ct.size() == 0){
88 // vanilla
89 vec value = lgamma(y_theta) - lgamma(theta) + theta*log(theta) - y_theta%log(mu + theta);
90 if( weights.is_empty() ){
91 res = sum(value) / n;
92 }else{
93 res = dot(value, weights) / n;
94 }
95 }else{
96 // count table
97 double sum_lgamma_y_theta = 0;
98 for(auto &it: ct){
99 sum_lgamma_y_theta += it.second * lgamma(it.first + theta);
100 }
101
102 if( weights.is_empty() ){
103 // Original
104 res = sum_lgamma_y_theta / n -
105 n*lgamma(theta) / n +
106 n*theta*log(theta)/n -
107 dot(y_theta, log(mu + theta)) / n;
108 }else{
109 double sum_w = sum(weights);
110
111 // Original
112 res = sum_lgamma_y_theta / n -
113 sum_w*lgamma(theta) / n +
114 sum_w*theta*log(theta)/n;
115
116 res -= dot(weights,y_theta%log(mu + theta)) / n;
117 }
118 }
119
120 // Doesn't change with theta,
121 // so don't need to evaluate it
122 // value += - lgamma(y+1) + y%log(mu);
123
124 double cr = 0;
125 if( doCoxReid ){
126 // Compute logDet of information matrix
127 // use sample weights here
128 // use 1/theta here since notation is different
129 // than in glmGammaPoi
130 vec w;
131 if( weights.is_empty() ){
132 w = 1 / (1.0/mu + 1.0/theta);
133 }else{
134 w = weights / (1.0/mu + 1.0/theta);
135 }
136 double ld;
137 bool success = log_det_sympd(ld, X.t() * (X.each_col()%w));
138 cr = -0.5 * ld * 0.99;
139 if( ! success ) cr = 0;
140 }
141
142 res += cr / n;
143
144 return res;
145}
146
147// define functions to be minimized
148static inline double nb_ll( double theta_log, void *arg){
149
150 // cast void pointer to nbData
151 auto *data = (nbData *) arg;
152
153 // value function to be minimized
154 return -1.0*nb_ll(data->y, data->mu, data->n, data->weights, data->X, data->doCoxReid, data->ct, exp(theta_log));
155}
156
157
158
159
160/*
161MASS::theta.ml() and glmGamPois() maximize the likelihood function using Fishers method with the score and info functions. That has the advantage of 1) using a starting value from the last call, 2) computes a look up table to compute lgamma(y + theta) since y has a small number of unique values, and 3) gives an estimate of the standard error of theta
162*/
163
166static double nb_theta_ml(
167 const vec &y,
168 const vec &mu,
169 const double &n,
170 const vec &weights = {},
171 const mat &X = {},
172 const bool doCoxReid = true,
173 const CountTable &ct = {},
174 const double &left = -5,
175 const double &right = 20,
176 const double &tol = 0.0001220703){
177
178 double theta_log;
179 int iter = 0;
180
181 // Setup code to minimize function
182
183 // initialize function
184 funcStruct F;
185 F.function = nb_ll;
186
187 nbData *d;
188
189 if( weights.is_empty() || all(weights == 1.0) ){
190 d = new nbData(y, mu, n, {}, X, doCoxReid, ct);
191 }else{
192 d = new nbData(y, mu, n, weights, X, doCoxReid, ct);
193 }
194
195 F.params = d;
196
197 // maximize log-likelihood
198 // theta_log is returned by reference
199 local_min(left, right, tol, &F, theta_log, iter);
200
201 delete d;
202
203 return exp(theta_log);
204}
205
206#endif
double local_min(double a, double b, double t, funcStruct *f, double &x, int &calls)
Definition local_min.h:25
unordered_map< long, double > CountTable
Definition nb_theta.h:18
Definition local_min.h:18
double(* function)(double x, void *params)
Definition local_min.h:19
void * params
Definition local_min.h:20
Definition nb_theta.h:55
vec y
Definition nb_theta.h:56
vec mu
Definition nb_theta.h:57
double n
Definition nb_theta.h:58
mat X
Definition nb_theta.h:60
vec weights
Definition nb_theta.h:59
bool doCoxReid
Definition nb_theta.h:61
nbData()
Definition nb_theta.h:65
CountTable ct
Definition nb_theta.h:62
nbData(const vec &y, const vec &mu, const double &n, const vec &weights, const mat &X, const bool &doCoxReid, const CountTable &ct={})
Definition nb_theta.h:68