*! gmentropylogit 1.0.2 October 2,2016 * Paul Corral (World Bank Group - Poverty and Equity Global Practice) * Daniel Kuehn (Urban Institute) * Ermengarde Jabir (American University) program define gmentropylinear, eclass version 11.2 syntax varlist(min=2 numeric fv) [if] [in], SUPport(string) [SIGMAvalue(real 3.0) /// ENDpoint(integer 3) LAMbda(string) wmat(string) RESidual(string) NOSigma NOCons] //Mark the estimation sample marksample touse tokenize `varlist' // Local for dependent variable local depvars `1' // obtain the independent variables macro shift local indeps `*' local Ind `*' //Remove collinear exolanatory vars _rmcoll `indeps' if `touse', forcedrop local indeps `r(varlist)' if "`support'"=="" { display as error "A support matrix for the coefficients must be specified" exit } if (`sigmavalue'<0){ display as error "Sigma value must only be positive, I made it positive" } // Check if Esupport is an odd number tempname u1 u2 u3 if "`nosigma'"==""{ scalar `u3'=0 } else{ scalar `u3'=1 } if `endpoint'<3{ display as error "Minimum number of supports for error is 3" exit } //Get number of weights for errors scalar `u2'=mod(`endpoint',2) if `u2'!=1{ display "error: support space must be odd numbered to include 0, I added 1 to make it odd" scalar `u2'=`endpoint'+1 } else{ scalar `u2'=`endpoint' } local endpoint=`u2' //Get value of multiplier for sigma value scalar `u1' = `sigmavalue' //Weight matrix requested if "`wmat'"!=""{ cap confirm variable `wmat' if !_rc{ display as error "Please specify a variable name that does not exist for option wmat" exit } else{ forval x=1/`endpoint'{ qui:gen double `wmat'_`x' = . label var `wmat'_`x' "Weight for error support `x'" local wmats `wmats' `wmat'_`x' } } local wmat = 1 } else{ local wmat = 0 } //Lambdas requested if "`lambda'"!=""{ cap confirm variable `lambda' if !_rc{ display as error "Please specify a variable name that does not exist for option lambda" exit } else{ qui:gen double `lambda' = . label var `lambda' "Lambda for this constraint" local lambdas `lambda' } local lambda = 1 } else{ local lambda = 0 } //residual requested if "`residual'"!=""{ cap confirm variable `residual' if !_rc{ display as error "Please specify a variable name that does not exist for option residual" exit } else{ qui:gen double `residual' = . label var `residual' "predicted residual" local residuals `residual' } local residual = 1 } else{ local residual = 0 } // Number of indep vars local wc=wordcount("`Ind'") local wc=`wc'+1 local r=rowsof("`support'") if (`wc'!=`r' & "`nocons'"=="") { display as error "Number of rows in support matrix must match number of independent varaibles" } if ((`wc'-1)!=`r' & "`nocons'"=="nocons") { display as error "Number of rows in support matrix must match number of independent varaibles" exit } if "`nocons'"=="nocons"{ mata:gme_linear("`depvars'", "`indeps'", "`support'", "`u1'", "`u2'","`u3'","`touse'", 1, `wmat', `lambda', `residual') // Matrix for results tempname b V mat `b' = r(beta) mat coln `b' = `indeps' mat `V' = r(V) mat coln `V' = `indeps' mat rown `V' = `indeps' local N=r(N) } else{ mata:gme_linear("`depvars'", "`indeps'", "`support'", "`u1'", "`u2'","`u3'","`touse'",0,`wmat', `lambda', `residual') // Matrix for results tempname b V mat `b' = r(beta) mat coln `b' = `indeps' _cons mat `V' = r(V) mat coln `V' = `indeps' _cons mat rown `V' = `indeps' _cons local N=r(N) } ereturn post `b' `V', depname(`depvars') obs(`N') esample(`touse') // Statistics //Number of observations ereturn scalar N = r(N) //Degs of freedom ereturn scalar d_fm = (r(K)-1) //entropy ereturn scalar entropy = r(L) //Initial entropy ereturn scalar int_entropy =r(L0) //pseudo r2 ereturn scalar pseudoR2 = r(pseudoR2) //Normalized entropy ereturn scalar sign_entropy = r(norm_entro) //Noise entropy ereturn scalar noise_entropy = r(noiseent) //Ent. ratio stat. //ereturn scalar ERS = r(entratio) //Ereturn matrix probs if ("`nocons'"=="nocons") mat rown probs = `indeps' else mat rown probs = `indeps' _cons ereturn matrix betaprobs = probs //Ereturn matrix V ereturn matrix esupport = esupport display _newline in gr "Generalized Maximum Entropy (Linear)" _col(52) in gr "Number of obs" _col(71) in gr "=" _col(72) in ye %7.0f e(N) display _col(52) in gr "Degrees of freedom" _col(71) in gr "=" _col(72) in ye %7.0f e(d_fm) display _col(52) in gr "Model Entropy" _col(71) in gr "=" _col(72) in ye %7.1f e(entropy) display _col(52) in gr "Pseudo R2" _col(71) in gr "=" _col(72) in ye %7.4f e(pseudoR2) display _col(52) in gr "Signal entropy" _col(71) in gr "=" _col(72) in ye %7.4f e(sign_entropy) display _col(52) in gr "Noise entropy" _col(71) in gr "=" _col(72) in ye %7.4f e(noise_entropy) //display _col(52) in gr "Ent. ratio stat." _col(71) in gr "=" _col(72) in ye %7.1f e(ERS) ereturn display end mata:mata clear version 11.2 mata: mata set matastrict on mata: // Linear GME 1.0.0 Nov. 21, 2014 void gme_linear(string scalar yname, string scalar xname, string scalar betasupport, string scalar tope, string scalar soporte, string scalar sig, string scalar touse, real scalar nc, real scalar wmat, real scalar lout, real scalar epsilon) { real matrix Y, X, sigmaz, sigmav, lambda, Z, v, p, w, numerator, omegas, num_w, psi, beta, cons, B, psi_sum, omega_sum, grad, H, bold, sigma2, omega2, V, pseudoR2, p0, w0, H0, HB real scalar sdy, K, N, s, R, a, b, lnp, cha, iter, lhs, L, L0, norment, entratio, top, esupport,i, s1, noiseent // Use st_data to import variables from stata Y=st_data(., tokens(yname), touse) X=st_data(., tokens(xname), touse) if (nc==0){ cons = J(rows(X),1,1) X=X, cons } top=st_numscalar(tope) esupport=st_numscalar(soporte) s1=st_numscalar(sig) // Import matrix from stata, user must provide Z=st_matrix(betasupport) if (s1==0){ //endogenous error support sdy=sqrt(quadvariance(Y)) } else{ sdy=1 } v=J(1,esupport,0) v[1,1]=-top for(i=2; i<=esupport; i++){ v[1,i]=v[1,(i-1)]+(top/((esupport-1)/2)) } v=v*sdy // Observations N=rows(X) // Variables K=cols(X) // Optimization cha=1 iter=1 B=J(1,rows(Y), 0) //Beginning of NR -> use of optimize command is not giving valid results (results are compared to GAMS) while (abs(cha)>1e-24) { //while the change between the old beta(lambdas) and new one is greater than the value we continue looping lhs=quadcross(B',Y) numerator = exp(-((quadcross(B',X))':*Z)) omegas = quadrowsum(numerator) num_w = exp(-quadcross(B,v)) psi = quadrowsum(num_w) psi_sum = quadsum(ln(psi)) omega_sum = quadsum(ln(omegas)) L = lhs+omega_sum+psi_sum if (iter==1){ L0=L } p = numerator:/omegas w=num_w:/psi sigmaz = quadrowsum(p:*(Z:^2)) - quadrowsum(p:*Z):*quadrowsum(p:*Z) sigmav = diag(quadrowsum(w:*(v:^2)) - quadrowsum(w:*v):*quadrowsum(w:*v)) //Gradient grad = Y' - (quadcross(X',rowsum(p:*Z)))' - quadcross(v',w') //Hessian H=J(cols(B), cols(B),0) for (a=1;a<=cols(B);a++) { for (b=1;b<=a;b++) { H[a,b] = quadcross((X[a,.]:*X[b,.])',sigmaz) + sigmav[a,b] } } _makesymmetric(H) printf("{txt}Iteration %f", iter) printf("{txt}: Entropy = %f\n", L) iter = iter+1 //Count number of iterations bold=B //set b old equal to the current b in order to estimate the new b B = bold - quadcross(grad',luinv(H)) // This is the essence of the Newton Rhapson cha = quadcross((bold-B)',(bold-B)')/ quadcross(bold',bold') // Change between the old beta and new } //end of newton rhapson beta = quadrowsum(p:*Z) sigma2=quadcross(B',B')/cols(B) omega2=((quadcolsum((quadrowsum((v:^2):*w)-((quadrowsum(v:*w)):^2)):^(-1)))/cols(B))^2 V=(sigma2/omega2)*invsym(quadcross(X,X)) p0 = J(rows(p), cols(p),(1/cols(p))) w0 = J(rows(w),cols(w), (1/cols(w))) H0 = -(quadsum(p0:*(log(p0))))-(quadsum(w0:*(log(w0)))) HB = -quadsum(p:*(log(p)))-quadsum(w:*(log(w))) pseudoR2=1-((-quadsum(p:*(log(p))))/(-(quadsum(p0:*(log(p0)))))) norment =-quadsum(p:*(log(p)))/(cols(X)*log(cols(Z))) noiseent=-quadsum(w:*(log(w)))/-(quadsum(w0:*(log(w0)))) entratio=(2*omega2/sigma2)*abs(HB-H0) if (wmat==1) st_store(.,st_varindex(tokens(st_local("wmats"))),touse,w) if (lout==1) st_store(.,st_varindex(tokens(st_local("lambdas"))),touse,B') if (epsilon==1) st_store(.,st_varindex(tokens(st_local("residuals"))),touse,quadcross(v',w')') // Return results to stata st_matrix("r(beta)", beta') st_matrix("probs", p) st_matrix("esupport", v) st_numscalar("r(N)", N) st_numscalar("r(K)", K) st_matrix("r(sigma2)", sigma2) st_matrix("r(omega2)", omega2) st_matrix("r(V)", V) st_numscalar("r(L)", L) st_numscalar("r(L0)", L0) st_numscalar("r(pseudoR2)", pseudoR2) st_numscalar("r(norm_entro)", norment) st_numscalar("r(entratio)", entratio) st_numscalar("r(noiseent)", noiseent) } end