$title  demand_IV.GMS

* ------------------------------------------------------
* Per Capita Income and the Demand for Skills
* Journal of International Economics

* Justin Caron, Thibault Fally and James Markusen

* December 2019
* ------------------------------------------------------

* ESTIMATE DEMAND SYSTEM PARAMETERS, INSTRUMENTING PHI BY `INTERNATIONAL ONLY' PHIS\
* USING 2-STAGE LEAST-SQUARES


$if not set datadir $set datadir "data\"
$setglobal datadir %datadir%


* SPECIFICATION ?

$if not set spec $set spec tc
*$if not set spec $set spec theta4


* subset of regions to include
$if not set regsubset $set regsubset rall

* MINIMIZE ERRORS ON LOGS OR CONSUMPTION SHARES ?
* choices : log, consshare, logweighted
$ if not set objective $set objective logweighted
*$ if not set objective $set objective consshare


* load gravity data from stata or gams ?
* choices : stata, gams
$ if not set gravitydata $set gravitydata gams

* skip reporting ?
* choices : yes, no
$ if not set skipreporting $set skipreporting no


* BOOSTRAP ?
$if not set boot $set boot no
* set nb of boostrap iterations:
$if not set itr $set itr 4


* SELECT DATASET TO USE - DONT FORGET TO CHECK WHERE TO LOAD GRAVITY ESTIMATES FROM
*$if not set ds $set ds gtap7_all
*$if not set ds $set ds gtap8_all

* with gas and gdt aggregated
$if not set ds $set ds gtap5

*$include loaddata.gms
$include loaddata_gtap5.gms

* population missing?
parameter missingpop;
missingpop(r)$(not pop(r)) = yes;

display missingpop;

display expenditure, pcexp;

display f;


* ------------------------------------
* IMPORT  GRAVITY ESTIMATES

parameter       coeffs stores all sector-specific coefficients
                phiest
		phiestIV
                tcostest
                logphiest
                IM
                EX
                cst;


*$gdxin estimates\gravityestimates_%ds%_internationaltrade.gdx
$gdxin estimates\gravityestimates_%ds%_atc.gdx
$load  coeffs phiest im ex cst tcostest phiestIV

parameter esttheta;
esttheta(i) = 0;

display coeffs;


* --  DEFINE SECTORS WHICH ARE MOSTLY INTERMEDIATES

parameter sharefd % FD in vdm (dom output);
set intermediates(i);

sharefd(i) = (sum((r,g), vdfm(i,g,r) + vifm(i,g,r)) - sum((j,r), vdfm(i,j,r) + vifm(i,j,r)))/ sum((r,g), vdfm(i,g,r) + vifm(i,g,r));

* defined as "intermediate" if  less than 10 % of production goes to final demand
intermediates(i)$(sharefd(i)<0.1) = yes;
display sharefd, intermediates;

* -- DEFINE SERVICE AND TRADABLE SECTORS SECTORS

*set     serv(i) service sectors / CMN, DWE,ISR,OBS, OFI,OSG,ROS,WTR,TRD,CNS, OTP, ATP, WTP, GDT, ELY/
set     serv(i) service sectors /osg/
        tradables(i) the tradable sectors;

tradables(i) = yes;
tradables(serv) = no;
display tradables;


* -----------------------------------------------------------
* NLLS Demand estimations
* -----------------------------------------------------------
* the only exogenous parameters needed are: x(i,r), per capita expenditure, w(n) = wage = PCI


parameter       x(i,r)          per capita expenditure
                w(r)            wage = PCI
                indexp(i)       industry total expenditure,
                logphi,
                phi
                bhat;

set i_(i), r_(r),g_(g),
        rall(r) set of 94 regions;
i_(i) = no;


* -- SELECT WHICH PHI TO USE HERE
*$if "%gravitydata%" == "stata" logphi(i,r) = importdata(i,r,"log_Phi_v2");
*$if "%gravitydata%" == "gams"  phi(i,r) = phiest(i,r);


$if "%gravitydata%" == "gams"  phi(i,r) = phiest(i,r);

display phi;



* -- SELECT WHICH SECTORS TO USE HERE :

* using all sectors for which we have estimates of PHI :
i_(i)$sum(r,phi(i,r))= yes;

* selecting regions :
rall(r)$sum(i,phi(i,r)) = yes;


Sets
r_ country  /
col,aus,	
nzl,chn,hkg,jpn,kor,twn,idn,mys,phl,sgp,tha,vnm,bgd,ind,lka,can,usa,mex,
per,ven,arg,bra,chl,ury,aut,bel,dnk,fin,fra,deu,gbr,grc,irl,ita,lux,nld,prt,esp,swe,che,
alb,bgr,hrv,cze,hun,mlt,pol,rom,svk,svn,est,lva,ltu,rus,cyp,tur,mar,bwa,mwi,moz,tza,zmb,zwe,uga/;

display r_;

* dropping intermediates
i_(intermediates) = no;

* DROPPING DWELLINGS :
i_("dwe") = no;

display i,i_, intermediates;
alias(i_,j_);
alias(r_,s_);

* define new g set
g_(i_) = yes; g_("c") = yes; g_("g") = yes; g_("i") = yes;
	

parameter nbr, nbi;
nbr = card(r_);
nbi = card(i_);

display nbr, nbi;

display r_, i_;



* definine per-capita expenditure only on selected sectors:
w(r)$pop(r) = 10e8* sum(i_,fd(i_,r)) /pop(r);
x(i,r)$pop(r)  = 10e8 * fd(i,r) / pop(r);
indexp(i)  =  sum(r_,fd(i,r_));

* 2017 test, logging the weightes

* works better
indexp(i)  = log(sum(r_,fd(i,r_)));


logphi(i,r)$phi(i,r) =log(phi(i,r));

parameter logphi_inter;
logphi_inter(i,r)$phiestIV(i,r) = log(phiestIV(i,r));

parameter expshare, sectdrop;
expshare(i_,r_) = fd(i_,r_) / sum(i_.local,fd(i_,r_));

sectdrop(i_,r_) = 1;


* drop in reporting at least.. (matters for R2)
sectdrop(i_,r_) = 1;
sectdrop(i_,r_)$(NOT fd(i_,r_)) =0 ;

* GTAP 8 WAS CENSORED AT 1E-6: USING THIS HERE TO MAKE THINGS COMPARABLE..
sectdrop(i_,r_)$(expshare(i_,r_) < 1E-5) =0 ;

* compute some statistics
parameter       fittedPCexp     fitted per capita expenditure
                fittedexp       fitted expenditure
*                fittedexpshare  fitted expenditure share
                sstot total sum of squares
                rsquared
                nobs number of observations
                Fstat F-test statistic
                nbp number of parameters in model
                df degrees of freedom for Fstat
                sigma2hat estimated variance of regression
		modelselection;

* ----------------------------------------------
* for bootstrapping :

set boot /1*%itr%/;
option seed=081567;

parameter  wt(r)        weight in the objective
           bootcoef(boot,*,*)
           dim          number of ctries
           rdraw        random draw from the pool of ctries
           cardzz(r)    index on each observation
           wtchk        weight check
;


wt(r_)=1;


* which country-sector pairs are missing
set missingx(i,r);

missingx(i_,r_) = yes;
missingx(i_,r_)$x(i_,r_) = no;

display missingx;


* ---------------------------------------------------------
* -- DEFINE MODEL : 1-stage of 2SLS


equations obj_first;
variables gamma_, cst_, sigma_first_, sse_first;

obj_first.. sse_first =e=      sum((i_,r_)$logphi(i_,r_)
,  sqr(
logphi(i_,r_) - (
cst_(i_) - sigma_first_(i_)*log(w(r_)) + gamma_(i_)*logphi_inter(i_,r_)
)));


model firststage /obj_first/;


solve firststage using nlp minimizing sse_first;

* computed fitted values for second-stage:

parameter logphiIV;

logphiIV(i_,r_) = cst_.L(i_) +  gamma_.L(i_) * logphi_inter(i_,r_) + sigma_first_.L(i_)*log(w(r_));

execute_unload 'iv_check.gdx', logphiIV, logphi;

* for second stage, replace logphi with logphi IV

logphi(i_,r_) = logphiIV(i_,r_) ;



* ---------------------------------------------------------
* -- 2ND STAGE - DEFINE MODEL :


variables sse_;

positive variables      sigma_scale_,  sigma_(i), lambda_(r), theta_, delta_,
 fe_(i) the fixed effect
 eta_(i) coefficient on ICP price index;

positive variable  mu_(i);


equations  obj_log, obj_consshare, obj_logweighted, obj_logweighted_reducedform, budget, commontheta,  sigma_fact, commondelta, est_theta;

* minimizing LOG error terms
obj_log.. sse_ =e=      sum((i_,r_)$x(i_,r_), wt(r_) * sqr(log(x(i_,r_)) - (log(fe_(i_)) - sigma_(i_)*log(lambda_(r_))
                + mu_(i_)*logphi(i_,r_)  )));

* minimizing consumption share error terms
obj_consshare.. sse_ =e= sum((i_,r_)$x(i_,r_), wt(r_) * sqr(x(i_,r_)/w(r_) - fe_(i_) * (lambda_(r_)**(-sigma_(i_)))
                         * (phi(i_,r_)**mu_(i_))  /w(r_) ));

* minimizing LOG error terms weighted by total expenditure by sector
obj_logweighted.. sse_ =e=      sum((i_,r_)$x(i_,r_), indexp(i_) 
   * sectdrop(i_,r_)
		* wt(r_) * sqr(log(x(i_,r_)) - (log(fe_(i_)) - sigma_(i_)*log(lambda_(r_))
                + mu_(i_)*logphi(i_,r_)  )));

* reduced form equivalent:
obj_logweighted_reducedform.. sse_ =e=      sum((i_,r_)$x(i_,r_), indexp(i_) 
   * sectdrop(i_,r_)
*
		* wt(r_) * sqr(log(x(i_,r_)) - (log(fe_(i_)) + sigma_(i_)*sigma_scale_*log(w(r_))
                + mu_(i_)*logphi(i_,r_)  )));

budget(r_) ..   sum(i_, (lambda_(r_)**(-sigma_(i_))) * fe_(i_) *(phi(i_,r_)**mu_(i_))) =e= w(r_);

* for specification 3, impose a common theta across sectors :
commontheta(i_) ..      mu_(i_)  =e= ((sigma_(i_) * sigma_scale_) -1) / theta_ ;

* for specification 4, impose a common delta across sectors :
commondelta(i_) ..      mu_(i_) =e= ((sigma_(i_) * sigma_scale_) -1) * delta_ / bhat(i_) ;

* for specification 5, use estimated thetas :
est_theta(i_) ..      mu_(i_) =e= ((sigma_(i_) * sigma_scale_) -1) / esttheta(i_) ;


* initialize variables:
sse_.L = 0;
sigma_.L(i_) = 1;
lambda_.L(r_)$w(r_) = w("usa")/w(r_);
fe_.L(i_) = w("usa") * 1/card(i_);
mu_.L(i_) = 0;
eta_.L(i_) = 0;
theta_.L = 1;
sigma_scale_.L = 1;
mu_.UP(i_) =  1e10;
eta_.UP(i_) =  1e10;
sigma_scale_.LO = 1E-7;
sigma_.LO(i_) = 1E-7;
sigma_.UP(i_) = 40;


lambda_.LO(r_)= 1E-7;
fe_.LO(i_) = 1E-7;
theta_.LO = 1E-7;

* fix one sigma_ to one (textiles: one that is not a service industry) :
sigma_.FX("tex") =1;
lambda_.FX("USA") =1;

* should be secondary energy only
set ene(i) / col, gas, ely, p_c /;

* data to be exported to STATA for analysis
parameter forstata;

forstata("fd",i_,r_) = x(i_,r_);
forstata("expenditure share",i_,r_) = x(i_,r_) / sum(i_.local, x(i_,r_));
forstata("fd","all ene",r_) = sum(ene, x(ene,r_));
forstata("logpcfd",i_,r_)$x(i_,r_) = log(x(i_,r_));
forstata("logpcfd","all ene",r_) = log(forstata("fd","all ene",r_));
forstata("GAMS weights",i_,r_) = indexp(i_);
forstata("logPHI",i_,r_) = logphi(i_,r_);
forstata("logpci",i_,r_) = log(w(r_));
forstata("logpci","all ene",r_) = log(w(r_));
forstata("total exp",i_,r_) = expenditure(r_);
forstata("total exp","all ene",r_) = expenditure(r_);
forstata("population",i_,r_) =  pop(r_);


parameter  lambda, mu,  sse, fe, theta, sigma_scale, eta, avgmu;
theta=0;
*sigma_scale=0;
parameter specificationstats for reporting;

* -- define specification :

$if "%spec%"=="tc" eta_.FX(i_) = 0; model nlls /obj_%objective%, budget/; nbp("non-homoth")= card(r_) + 3*card(i_);

$if "%spec%"=="nobc" eta_.FX(i_) = 0; model nlls /obj_%objective%/; nbp("non-homoth")= card(r_) + 3*card(i_);

$if "%spec%"=="reducedform" eta_.FX(i_) = 0; model nlls /obj_%objective%_reducedform/; nbp("non-homoth")= card(r_) + 3*card(i_);

$if "%spec%"=="notc" mu_.FX(i_) = 0; eta_.FX(i_) = 0; model nlls /obj_%objective%, budget/; nbp("non-homoth")= card(r_) + 2*card(i_);

$if "%spec%"=="commontheta" eta_.FX(i_) = 0; model nlls /obj_%objective%, budget, commontheta/; nbp("non-homoth")= card(r_) + 2*card(i_) + 1;

$if "%spec%"=="theta4" eta_.FX(i_) = 0; model nlls /obj_%objective%, budget, commontheta/; theta_.FX = 4; nbp("non-homoth")= card(r_) + 2*card(i_) ;

$if "%spec%"=="theta5" eta_.FX(i_) = 0; model nlls /obj_%objective%, budget, commontheta/; theta_.FX = 5; nbp("non-homoth")= card(r_) + 2*card(i_) ;

$if "%spec%"=="theta6" eta_.FX(i_) = 0; model nlls /obj_%objective%, budget, commontheta/; theta_.FX = 6; nbp("non-homoth")= card(r_) + 2*card(i_) ;

$if "%spec%"=="homotheta4" eta_.FX(i_) = 0; sigma_.FX(i_) = 1; model nlls /obj_%objective%, budget, commontheta/; theta_.FX = 4; nbp("non-homoth")= card(r_) + 2*card(i_) ;

$if "%spec%"=="esttheta" eta_.FX(i_) = 0;  model nlls /obj_%objective%, budget, est_theta/; nbp("non-homoth")= card(r_) + 2*card(i_) ;


* -- Solve model :

* make a savepoint for boostraping
nlls.savepoint=1;
solve nlls using nlp minimizing sse_;
nlls.savepoint=0;

* calculate statistics
coeffs("sigma","coeff", i_) = sigma_.L(i_);
coeffs("eta","coeff", i_) = eta_.L(i_);
coeffs("mu","coeff", i_) = mu_.L(i_);
coeffs("GTAP ARMINGTON","coeff", i_) = esubd(i_);
coeffs("estimated theta","coeff", i_) = esttheta(i_);
coeffs("indexp","coeff", i_) = indexp(i_) ;

nobs = card(i_) * card(r_);

* defining income classifications
set lowincome(r), lowincome2;
set middleincome(r), middleincome2;
set highincome(r);

$ontext
* using the world bank guidelines for analytical classification in 2007 (GNI/capita):
 https://datahelpdesk.worldbank.org/knowledgebase/articles/378833-how-are-the-income-group-thresholds-determined


Low income
Lower middle income
Upper middle income
High income

<= 935
936-3,705
3,706-11,455
> 11,455
$offtext

* lowincome
lowincome(r)$(pcexp(r) < 935) = 1;

* lower middle income
lowincome2(r)$(pcexp(r) > 935 and pcexp(r) < 3705) = 1;
	     
* upper middle income
middleincome2(r)$(pcexp(r) > 3705 and pcexp(r) < 11455) = 1;


* aggregating middle income
middleincome(r)$(pcexp(r) > 935 and pcexp(r) < 11455) = 1;

highincome(r)$(pcexp(r) > 11455) = 1;

* count number of countries in each group:
parameter country_counter;

country_counter("low") = sum(lowincome(r_), 1);
country_counter("lowermid") = sum(lowincome2(r_), 1);
country_counter("uppermid") = sum(middleincome2(r_), 1);
country_counter("higher") = sum(highincome(r_), 1);

display country_counter;


* calculate the average elasticity of expenditures w.r.t phi :
* weighted by sectors size
avgmu("weighted") = sum(i_, indexp(i_) * mu_.L(i_)) / sum(i_, indexp(i_));
avgmu("unweighted") = sum(i_,   mu_.L(i_)) / card(i_);

* calculate weighted Rsquare for logweighted specification
parameter weightedsstot, weightedrsquared;

weightedrsquared = 0;
weightedsstot = 0;

* in log weighted,  we weight the square errors in SST
*$if "%objective%"=="logweighted"       weightedsstot = sum((i_,r_)$x(i_,r_), indexp(i_) * sqr(log(x(i_,r_)) - sum((i_.local,r_.local)$x(i_,r_), log(x(i_,r_)))/nobs  ));

*$if "%objective%"=="logweighted"     weightedrsquared = 1- sse_.L /weightedsstot;
sse("non-homoth") = sse_.L;
sstot = sum((i_,r_)$x(i_,r_), sectdrop(i_,r_) *  indexp(i_) *  sqr(log(x(i_,r_)) - sum((i_.local,r_.local)$x(i_,r_), log(x(i_,r_)))/nobs  ));


modelselection("aic weighted","non-homoth") =  log(sse_.L /nobs) + 2* nbp("non-homoth") / nobs ;
modelselection("bic weighted","non-homoth") =  log(sse_.L /nobs) + log(nobs)* nbp("non-homoth") /nobs ;
modelselection("bic weighted2","non-homoth")$weightedsstot = nobs * log(sse_.L /weightedsstot) + log(nobs)* nbp("non-homoth");

* compute backed out theta:
* the scale of sigma matters and is not identified.
* ASSUMPTION: the smallest sigma =1
*  except for GRO for which this doesn't work as sigma = 0
coeffs("backed out theta","coeff",i_)$coeffs("mu","coeff", i_) = 
(coeffs("sigma","coeff", i_)/ smin(i_.local$((coeffs("sigma","coeff", i_) >0.05) and coeffs("mu","coeff", i_)),coeffs("sigma","coeff", i_))-1)/ coeffs("mu","coeff", i_);


parameter withinrsquare;

withinrsquare("to avg","all") = 1 - 
((sum((i_,r_)$x(i_,r_), indexp(i_) 
		* wt(r_) * sqr(log(x(i_,r_)) - (log(fe_.L(i_)) - sigma_.L(i_)*log(lambda_.L(r_))
                + mu_.L(i_)*logphi(i_,r_)  ))))

/ sum((i_,r_)$x(i_,r_), indexp(i_) * sqr(log(x(i_,r_)) - sum((r_.local)$x(i_,r_), log(x(i_,r_)))/card(r_)  )));

sigma2hat = sse_.L /(nobs -nbp("non-homoth")) ;
fe("non-homoth",i_) = fe_.L(i_) ;
lambda("non-homoth", r_) = lambda_.L(r_) ;
sigma_scale("non-homoth") = sigma_scale_.L ;
theta = theta_.L ;

modelselection("aic weighted","non-homoth") =  log(sse_.L /nobs) + 2* nbp("non-homoth") / nobs ;
modelselection("bic weighted","non-homoth") =  log(sse_.L /nobs) + log(nobs)* nbp("non-homoth") / nobs;


SET  ii_	 Sectoral classifications /

MAN	Manufactured and Processed Goods
ENE
SRV
TRN
AGR
/;

* calculate fitted expenditure shares :
fittedPCexp("non-homoth",i_,r_) = fe_.L(i_)* lambda_.L(r_)**(- sigma_.L(i_)) * (phi(i_,r_)**(mu_.L(i_)));

parameter avg_sigma;

avg_sigma =    sum((r_,i_.local), sigma_.L(i_) * fd(i_,r_)) /
                                               (sum((i_.local,r_), fd(i_,r_) ));

display avg_sigma;

fittedPCexp("homoth partial",i_,r_) = fe_.L(i_)* lambda_.L(r_)**(- avg_sigma) * (phi(i_,r_)**(mu_.L(i_)));
fittedPCexp("non-homoth",i_,r_)$(not sectdrop(i_,r_)) = 0; 
fittedexp("non-homoth",i_,r_) = fittedPCexp("non-homoth",i_,r_) / 10e8* pop(r_)  ;
fittedexp("homoth partial",i_,r_) = fittedPCexp("homoth partial",i_,r_) / 10e8* pop(r_)  ;
fittedPCexp("Pci",i_,r_) = pcexp(r_);
fittedPCexp("expenditure",i_,r_) = expenditure(r_);
forstata("fitted nh",i_,r_)$x(i_,r_) = fittedPCexp("non-homoth",i_,r_);
forstata("fitted nh","all ene",r_) = sum(ene, fittedPCexp("non-homoth",ene,r_));

* computing expenditure shares
coeffs("expenditure share - lowincome","coeff", i_) = sum(lowincome, fd(i_,lowincome)) /sum((i_.local,lowincome), fd(i_,lowincome));
coeffs("expenditure share - middleincome","coeff", i_) =  sum(middleincome,fd(i_,middleincome)) /sum((i_.local,middleincome), fd(i_,middleincome));

coeffs("expenditure share - lowincome2","coeff", i_) = sum(lowincome2, fd(i_,lowincome2)) /sum((i_.local,lowincome2), fd(i_,lowincome2));
coeffs("expenditure share - middleincome2","coeff", i_) =  sum(middleincome2,fd(i_,middleincome2)) /sum((i_.local,middleincome2), fd(i_,middleincome2));

coeffs("expenditure share - highincome","coeff", i_) =  sum(highincome,fd(i_,highincome)) /sum((i_.local,highincome), fd(i_,highincome));
coeffs("expenditure share","coeff", i_) =  sum(r_,fd(i_,r_)) /sum((r_.local,i_.local), fd(i_,r_));

coeffs("expenditure share","coeff", "all ene") =  sum(r_,sum(ene,fd(ene,r_))) /sum((r_.local,i_.local), fd(i_,r_));

* calculate income elasticities
* using global consumption shares here (doesnt matter for correlations)
coeffs("incelast - mean shares","coeff", i_) = sigma_.L(i_) / (sum(i_.local,  sigma_.L(i_) * sum(r,fd(i_,r)) /sum((i_.local,r), fd(i_,r))));


parameter incelast, avgincelast;
IncElast("direct",i_,r_) = sigma_.L(i_)*(sum(i_.local,  fd(i_,r_))) /
                                               (sum(i_.local,  sigma_.L(i_) * fd(i_,r_) ));


IncElast("direct fitted dem",i_,r_) = sigma_.L(i_)*(sum(i_.local, forstata("fitted nh",i_,r_))) /
                                               (sum(i_.local,  sigma_.L(i_) * forstata("fitted nh",i_,r_)));



* splitting the countries into low- middle and high income

* low income = to Thailand = 3000
* middle income = to Czech Republic = 15000
coeffs("incelast - mean shares - lowincome","coeff", i_) = sigma_.L(i_) / (sum(i_.local,  sigma_.L(i_) * sum(lowincome,fd(i_,lowincome)) /sum((i_.local,lowincome), fd(i_,lowincome))));
coeffs("incelast - mean shares - middleincome","coeff", i_) = sigma_.L(i_) / (sum(i_.local,  sigma_.L(i_) * sum(middleincome,fd(i_,middleincome)) /sum((i_.local,middleincome), fd(i_,middleincome))));
coeffs("incelast - mean shares - highincome","coeff", i_) = sigma_.L(i_) / (sum(i_.local,  sigma_.L(i_) * sum(highincome,fd(i_,highincome)) /sum((i_.local,highincome), fd(i_,highincome))));

forstata("Incel",i_,r_) = IncElast("direct",i_,r_);


* using median country fitted consumption shares here
* median country = BGR  3496.850823
coeffs("incelast - median ctry","coeff", i_) = sigma_.L(i_)*(sum(i_.local,  fittedPCexp("non-homoth",i_,"BGR"))) /
                                               (sum(i_.local,  sigma_.L(i_) * fittedPCexp("non-homoth",i_,"BGR") ));

* weighted
rsquared("nh","all","total") = 1- sse_.L /sstot;


* ----------

* "PARTIAL HOMOTHETIC", USING MU FROM NH VERSION
* NOTE: not used in this paper

* unfix bounds:
fe_.LO(i_) = 1E-7;
fe_.up(i_) = 1E8;



* re-ininitialize variables
sse_.L = 0;
lambda_.L(r_)$w(r_) = w("usa")/w(r_);
fe_.L(i_) = w("usa") * 1/card(i_);
eta_.L(i_) = 0;


sigma_scale_.L = 10;

*re-solve with sigmas fixed at one
sigma_.FX(i_) = 1;
theta_.UP = 1E10;

$if "%spec%"=="notc"  model nllshomoth /obj_%objective%, budget, commontheta/; mu_.FX(i_) = 0; theta_.FX = 4;  nbp("homoth") = card(r_) +  card(i_) ;
$if "%spec%"=="theta4"  model nllshomoth /obj_%objective%, budget, commontheta/; theta_.FX = 4;  nbp("homoth") = card(r_) +  card(i_) ;
$if "%spec%"=="theta5"  model nllshomoth /obj_%objective%, budget, commontheta/; theta_.FX = 5; nbp("homoth") = card(r_) +  card(i_) ;
$if "%spec%"=="theta6"  model nllshomoth /obj_%objective%, budget, commontheta/; theta_.FX = 6; nbp("homoth") = card(r_) +  card(i_) ;
$if "%spec%"=="tc"  model nllshomoth /obj_%objective%, budget/;  nbp("homoth") = card(r_) +  card(i_) *2;
$if "%spec%"=="nobc"  model nllshomoth /obj_%objective%/;  nbp("homoth") = card(r_) +  card(i_) *2;
$if "%spec%"=="reducedform" eta_.FX(i_) = 0; model nllshomoth /obj_%objective%_reducedform/; nbp("non-homoth")= card(r_) + 3*card(i_);

solve nllshomoth using nlp minimizing sse_;
sigma_.UP(i_) = 10e7;
sigma_.LO(i_) = 1E-7;


coeffs("sigma H","coeff", i_) = sigma_.L(i_) * sigma_scale_.L  ;
coeffs("eta","coeff", i_) = eta_.L(i_);
coeffs("mu H","coeff", i_) = mu_.L(i_);

fe("homoth partial",i_) = fe_.L(i_) ;
lambda("homoth partial", r_) = lambda_.L(r_) ;
sigma_scale("homoth partial") = sigma_scale_.L ;

rsquared("h partial","all","total") = 1- sse_.L /sstot;
sse("homoth partial") = sse_.L;

* calculate fitted expenditure shares :
fittedPCexp("homoth partial",i_,r_) = fe_.L(i_)* lambda_.L(r_)**(- sigma_.L(i_)) * (phi(i_,r_)**(mu_.L(i_)));
fittedPCexp("homoth partial",i_,r_)$(not sectdrop(i_,r_)) = 0;
fittedexp("homoth partial",i_,r_) = fittedPCexp("homoth partial",i_,r_) / 10e8* pop(r_) ;


forstata("fitted h partial",i_,r_)$x(i_,r_) = fittedPCexp("homoth partial",i_,r_);


withinrsquare("to homoth partial","all") = 1 - (sse("non-homoth") /
sse("homoth partial")
);


* -- HOMOTHETIC VERSION

* unfix
mu_.UP(i_) =  1e10;
mu_.LO(i_) =  1e-10;


* re-ininitialize variables
sse_.L = 0;
lambda_.L(r_)$w(r_) = w("usa")/w(r_);
fe_.L(i_) = w("usa") * 1/card(i_);
mu_.L(i_) = 0;
eta_.L(i_) = 0;


*re-solve with sigmas fixed at one
sigma_.FX(i_) = 1;
theta_.UP = 1E10;

solve nllshomoth using nlp minimizing sse_;
* re-set to non-homoth for bootstrap
sigma_.UP(i_) = 10e7;
sigma_.LO(i_) = 1E-7;

fe("homoth",i_) = fe_.L(i_) ;
lambda("homoth", r_) = lambda_.L(r_) ;
sigma_scale("homoth") = sigma_scale_.L ;
rsquared("h","all","total") = 1- sse_.L /sstot;

sse("homoth") = sse_.L;

* calculate fitted expenditure shares :
fittedPCexp("homoth",i_,r_) = fe_.L(i_)* lambda_.L(r_)**(- sigma_.L(i_)) * (phi(i_,r_)**(mu_.L(i_)));
fittedPCexp("homoth",i_,r_)$(not sectdrop(i_,r_)) = 0;

fittedexp("homoth",i_,r_) = fittedPCexp("homoth",i_,r_) / 10e8* pop(r_) ;
fittedexp("true",i_, r_) = fd(i_, r_) ;
forstata("fitted h",i_,r_)$x(i_,r_) = fittedPCexp("homoth",i_,r_);
withinrsquare("to homoth","all") = 1 - (sse("non-homoth") /
sse("homoth")
);


* comparison to Homo no tc
* use this for consistency with QJE paper
* run homonotc and get the Rsquare

withinrsquare("to homoth notc","all") = 1 - (sse("non-homoth") /
964
);



* --  F-tests for parameter restrictions :

* 1 - test restriction of mu = 0
* import from notc case manually


$if "%objective%"=="log"        sse("no TC imp") = 14003.3885458859  ;
$if "%objective%"=="consshare"  sse("no TC imp") = 1.51084988080653   ;
$if "%objective%"=="logweighted"  sse("no TC imp") = 1794672.8200  ;


nbp("no TC imp") = card(r_) + 2 *card(i_) ;


fstat("mu=0")$(nbp("non-homoth") - nbp("no TC imp"))  = ((sse("no TC imp") - sse("non-homoth"))
         / (nbp("non-homoth") - nbp("no TC imp")))
         / (sse("non-homoth")
         / ( nobs - nbp("non-homoth"))) ;

* this F stat has an F distribution, with (p2 - p1, n - p2) degrees of freedom
* according to Greene : only approximate in non-linear case, but good enough
df("1") = (nbp("non-homoth") - nbp("no TC imp"));
df("2") = nobs - nbp("non-homoth");

* so we need to compare to F(56,5002) .. at 5% significance level, that is about 1.35
* (according to http://www.ma.utexas.edu/users/davis/375/popecol/tables/f005.html)
* we get a value of 8.79 > 1.35

* 2 - test restriction of sigma = 1

fstat("sigma=1") = ((sse("homoth") - sse("non-homoth")) / (nbp("non-homoth") - nbp("homoth"))) / (sse("non-homoth") / ( nobs - nbp("non-homoth"))) ;

* july '18:
* for energy goods only: compute model with sigma = 1 for energy, get SSE and paste it here:
* run tc_noenergy specification
fstat("sigma=1 energy") = ((655.466   - sse("non-homoth")) / (4)) / (sse("non-homoth") / ( nobs - nbp("non-homoth"))) ;
* so we need to compare to F(4,5002) .. at 5% significance level, that is about 1.9
* according to http://www.socscistatistics.com/pvalues/fdistribution.aspx  highly signif <0.00001



* 3 - test restriction of mu = (sigma-1) / theta
* if high, mean mu is significantly different from (sigma-1) / theta in data

* import SSE of case with no-restriction (tc)
$if "%objective%"=="logweighted"  sse("TC imp") = 1515414.70042461;
nbp("TC imp") = card(r_) + 3 *card(i_) ;

fstat("commontheta")$(nbp("TC imp") - nbp("non-homoth")) = ((sse("non-homoth") - sse("TC imp")) / (nbp("TC imp") - nbp("non-homoth"))) / (sse("TC imp") / ( nobs - nbp("TC imp"))) ;

specificationstats("avgmu - weighted") = avgmu("weighted");
specificationstats("F-stat sigma=1") = fstat("sigma=1") ;
specificationstats("F-stat flex nh") = 100000000000000000000;


specificationstats("R2") = rsquared("nh","all","total");
specificationstats("within R2") = withinrsquare("to homoth","all");
specificationstats("aic weighted") = modelselection("aic weighted","non-homoth");
specificationstats("bic weighted") = modelselection("bic weighted","non-homoth");

specificationstats("p") = nbp("non-homoth");
specificationstats("n") = nobs;

specificationstats("df1") = df("1");
specificationstats("df2") = df("2");
specificationstats("sse unweighted") = sse("non-homoth");
*specificationstats("sse homoth") = sse("homoth");
specificationstats("sstot") = sstot;
specificationstats("avgmu - unweighted") = avgmu("unweighted") ;

specificationstats("bic unweighted") = modelselection("bic unweighted","non-homoth");
specificationstats("within Rsquare") = withinrsquare("to homoth","all");
specificationstats("theta") = theta;
display specificationstats;


* -----------------------------------------------
* FOR REPORTING

set     sharetype/ "prod true (gross)","prod true (net)", "exp fitted homoth", "exp fitted non-homoth", "exp true"/
        dtype   demand type /"homoth", "non-homoth", "true"/
        asstype demand and supply assumption type / "true", "notc - nonhomoth", "notc - homoth", "tc - homoth", "tc - nonhomoth" /
        weighttype / noweight, expshare /
        ftype direct or total ? /direct, total/
        avgtype average over which set of countries ?  /all , eu, USA, Jpn, robustr/
        wtype / unweighted, weighted/;


parameter       tradecost GTAP trade cost (transport + tariffs),
                nettrade
                imp imports
                output
                netoutput net output net of intermediates
                sharegdp country share of GDP
                mickey
                co2int(i,ftype,avgtype,wtype),
                energyint
                dirco2int(i,r)
                relshares(*,i,r) country r's relative specialization in  good i
                shares(*,*,*)   sectoral shares of total
                netprodshares  sectoral shares of total net production,
                relnetprodshares sectoral shares of total net production relative to world total
                fct  factor content of trade
                importshare(*,r,s) country r's share of imports in s (Xni over Xn)
                TradeGDP ratio of trade to GDP
                tradepatterns trade patterns
                factorcontent average weighted values
                exporterFE, tcost
                trade, predphi
                vardata variance in data,
                fit rsquare "fit"
                nettradepattern
                sharenetoutput
                avgfactorcontent;

parameter yy, y, correlations, weight weight in correlation;
parameter XX, XXX , sharecorrelations;
parameter shrfact share of good i in total usage of factor f in region r;
parameter skillpremiumapprox;
parameter intermultiplier intermediate good multiplier;
parameter specificationstats for reporting;
parameter avgIAcoeffs average coefficients of the (I-A)-1 matrix;
parameter       fiindir
                factorint(f,i,*,r) factor intensity ratio by region
                avgfactorint(f,i,ftype,*,wtype) average factor intensity
                avgfactorratio(f,i,ftype,*,wtype) average factor ratio to total labor
                totalfacdem
                IAinverted(i,j,r);

* initialize reporting parameters:

avgfactorint(f,i,ftype,R,wtype) = 0;
avgfactorratio(f,i,ftype,r,wtype) = 0;
avgIAcoeffs(i,j,"unweighted") = 0;
specificationstats("theta") = 0;
intermultiplier(i_) = 0;
skillpremiumapprox("logpci",r_)  = 0;
shrfact("direct",f,i_, r_) =0;
avgfactorcontent("exp fitted nonhomoth",f,r_) =0;
sharenetoutput(r_) = 0;
nettradepattern("true",i_,r_) = 0;
tradepatterns("trade/gdp",asstype,r_) = 0;
importshare("true", r_, s_) = 0;
tradepatterns("avgpcitradingpartners",asstype,r_) = 0;
shares("pci",i_,r_) = 0;
relshares("pci",i_,r_) =0;
trade("true", i_,r_,s_) = 0;
fit("Tradegdp sse", asstype, "all") = 0;
fct("pci", f,r_) = 0;
correlations(f,"coeff", "noweight", ftype,avgtype,wtype)   = 0;
sharecorrelations("direct","coeff",sharetype,"df") = 0;
factorcontent(f,r_, "exp true") = 0;
nettrade("net",i_,r_) = 0;

netoutput(i_,r_) = 0;

parameter factrewardshare;
parameter estdata;
parameter avgfi;
parameter sharenetoutput;

parameter       impshares import shares as ratio of absorbtion
                shareimp_adjrow share of non-Row over total
                mrio B matrix
                output_alt output net of excluded intermediates and ROW exports
                output_diff
                output_adjrow
                output_corrected
                bilatfd_adjrow
                trade_adjrow
                bilatfd
                bilatfd_corrected New version: taking total imported final goods as originally
                exprow exports to RoW
                totexprow total exports to RoW ;

parameter impfd_alt alternative variable for imported final demand
          impfd_adjrow adjusting for rest of the world (RoW)
          impfd_diff cheking the difference
          imports
          btrade_corrected
          bilatfd_corrected
          trade_corrected;


factrewardshare(f,r_)=0;
estdata("log factorrewardshare",f,i_,r_) =0;
avgfi =0;
sharenetoutput(r_) =0;

impshares(i_, s_,r_) = 0;
shareimp_adjrow(i_,r_) =0;
exprow(i_,r_) =0;
totexprow(i_) =0;
totexprow(i_) =0;

impfd_alt(i_,r_) = 0;
impfd_diff(i_,r_) = 0 ;
impfd_adjrow(i_,r_) = 0;

bilatfd_corrected(i_,s_,r_) = 0;
bilatfd_corrected(i_,r_,r_) = 0;
btrade_corrected(i_,r_,r_) = 0 ;
btrade_corrected(i_,s_,r_) = 0;

output_alt(i_,r_) = 0;
output_adjrow(i_,r_) =  0;

output_corrected(i_,r_) = 0;

output_diff("alt/output",i_,r_) = 0;
trade_corrected("homoth", i_,s_,r_) = 0;

trade_adjrow("homoth",i_,s_,r_) = 0;

parameter expcheck, impcheck, expcheck_adjrow, impcheck_adjrow, expcheck_corrected, impcheck_corrected;
parameter bilatfdcompare;
expcheck(i_,r_) = 0;
impcheck(i_,s_) = 0 ;
expcheck_adjrow(i_,r_) = 0;
impcheck_adjrow(i_,s_) = 0;

expcheck_corrected(i_,r_) = 0;
impcheck_corrected(i_,s_) =  0;
bilatfdcompare("bilatfd","all","all","all") = 0;

parameter       dev             Maximum deviation
                coeffanew       New estimate of embodied factors
                dev_adjrow          Maximum deviation
                coeffanew_adjrow    New estimate of embodied factors
                dev_corrected          Maximum deviation
                coeffanew_corrected    New estimate of embodied factors;

parameter shareforeign;
parameter avgcoeffa;

imports("no row",i_,r_) = 0;
output(i_,r_) = 0;
shareforeign("FD",i_,r_) = 0;
avgcoeffa(f,i_) = 0 ;

$if "%skipreporting%" == "yes" $goto skip_reporting

* -----------------------------------------------------------------------------
* FACTOR INTENSITIES

* set of regions with robust skill intensity data
set europe(r) / DEU, FRA, ITA, GBR, AUT, BEL, DNK, FIN, GRC, IRL, LUX, NLD, PRT, ESP, SWE, CZE, HUN, MLT, POL, ROM, SVK, SVN, EST, LVA, LTU, BGR, CYP /;
set robustr(r) /  USA, AUS, JPN, TWN, KOR /;
robustr(europe) = yes;

* ------- "REGIONAL" INDIRECT FACTOR USAGE CALCULATIONS :
* indirect calculation based on:
* xr = Ar xr + yr + er minus mr

parameter
		Amatrix	A matrix expressed as proportion of production,
		avgAmatrix
		IA I-A matrix,
		IAinverse(i,j) temporary inverted I-A matrix,
		IAinverted(i,j,r)  inverted matrix,
		Ex exports
		Imp imports,
		output	sectoral output (X);
 

* create A(to,from) matrix containing share of j going to i 
amatrix(i,j,r)$vom(j,r) = vdfm(i,j,r)/vom(j,r);


* create (I-A)^(-1) matrix
loop(r,
* create (I-A) matrix which will be inverted
IA(i,i)$vom(i,r) = (1-Amatrix(i,i,r)); 
IA(i,j)$(vom(i,r) and not sameas(i,j))= -Amatrix(i,j,r);
* invert this matrix:
execute_unload 'gdxforinverse.gdx' i,IA;
execute 'invert gdxforinverse.gdx i IA gdxfrominverse.gdx IAinverse';
execute_load 'gdxfrominverse.gdx' , IAinverse;
IAinverted(i,j,r) = IAinverse(i,j);
);

* import and export vectors:

ex(i,r) = sum(s, vxmd(i,r,s));

* total imports imp(i,r) = impfd(i,r) + impinter(i,r)
parameter	impinter(i,r) import for intermediate production;
*		impcheck(i,r);

imp(i,r) = sum(g, vifm(i,g,r));
alias(i,ii);
impinter(i,r)= imp(i,r)-impfd(i,r);

*check trade balance:
parameter tradebal;
tradebal(i)=sum(r, ex(i,r))-sum(r, imp(i,r));
display tradebal;

output(i,r) = sum(j,IAinverted(i,j,r)*(fd(j,r)+ex(j,r)+vst(j,r)));

* this holds:
parameter totalcheck;
*totalcheck(i,r) = sum(j,amatrix(i,j,r)*output(j,r)) + fd(i,r)+ex(i,r) + vst(i,r) - output(i,r) ;
*display totalcheck;

* -------- FACTOR INTENSITIES:
* gtap only includes factor payments as values (no physical qties)

* make some summary stats:
parameter factorstats;
factorstats("dir - capital",i) = sum(r, vfm("capital",i,r));
factorstats("dir - skilled lab",i) = sum(r, vfm("sklab",i,r));
factorstats("dir - unskilled lab",i) = sum(r, vfm("unsklab",i,r));
factorstats("dir - resources",i) = sum(r, vfm("natlres",i,r));
factorstats("dir - land",i) = sum(r, vfm("land",i,r));


* calculate indirect factor usage:
parameter	 fidir		direct factor intensites in production,
		 fiindir	indirect factor intensities in production
		 fiindirMRIO	indirect factor intensities in production;


fidir(f,i,r)$vom(i,r) = vfm(f,i,r)/vom(i,r);
fiindir(f,j,r) = sum(i,fidir(f,i,r)*iainverted(i,j,r));



* OR, COULD RE-LOAD HERE: 

* load total factor intensity
*$gdxin '..\data\indirectfactorintensity.gdx'
*$load fiindir IAinverted

output(i_,r_) = sum(f,vfm(f,i_,r_) ) + sum(j_, vdfm(j_,i_,r_) + vifm(j_,i_,r_) );
factorint(f,i_,"direct",r_)$output(i_,r_)=  vfm(f,i_,r_) / output(i_,r_);
factorint(f,i_,"total",r_)$output(i_,r_) =  fiindir(f,i_,r_) / output(i_,r_);

* compute total factor demand :
totalfacdem(f,i,r) = fiindir(f,i,r) * vom(i,r);

* -----
* COMPUTE INTERMEDIATE VALUES


sharegdp(r_) = gdp(r_) / sum(r_.local,gdp(r_));
imp(i_,r_) = sum(g_, vifm(i_,g_,r_));

* trade costs : tariffs and transport costs
* includes value of imports from dropped regions
tradecost(i_,r_) =  imp(i_,r_) - sum(s_,  vxmd(i_,s_,r_));

netoutput(i_,r_) = vom(i_,r_) - vst(i_,r_) - sum(j_, vdfm(i_,j_,r_)) - sum(j_, vifm(i_,j_,r_)) + tradecost(i_,r_) ;

* nettrade
* gross : includes trade in intermediates as well
nettrade("gross",i_,r_) = sum(s_,  vxmd(i_,r_,s_)) - sum(s_,  vxmd(i_,s_,r_));

* net :
nettrade("net",i_,r_) = netoutput(i_,r_) - fd(i_, r_);



* -------
* COMPUTE FITTED TRADE

* includes domestic absorbtion
trade("true", i_,r_,s_) = btrade(i_,r_,s_);
trade("true", i_,r_,s_)$sameas(r_,s_) = sum(g,vdfm(i_,g,r_));


* fitted trade flows
trade(dtype, i_,s_,r_) =  ((exp(ex(i_,s_)+cst(i_)) * tcostest(i_,s_,r_)) /
                                 phiest(i_,r_))  *
                                 (fittedexp(dtype,i_,r_));



trade("sum share test", i_,s_,r_) =  sum(s_.local, ((exp(ex(i_,s_)+cst(i_))) * tcostest(i_,s_,r_)) /
                                 phiest(i_,r_))                  ;


* ----------------------------------------------------
* PART 1, FOR CORRELATIONS
* compute average factor intensities

* UNWEIGHTED factor intensities
avgfactorint(f,i,ftype,"all","unweighted") = sum(r_, factorint(f,i,ftype,r_)) / card(r_);
avgfactorint(f,i,ftype,"robust","unweighted") =  sum(robustr, factorint(f,i,ftype,robustr)) / card(robustr);

* WEIGHTED factor intensities
avgfactorint(f,i,ftype,"all","weighted")$sum((f.local,r_.local), factorint(f,i,ftype,r_)) =
 sum(r_, factorint(f,i,ftype,r_)) / sum((f.local,r_.local), factorint(f,i,ftype,r_)) ;

avgfactorint(f,i,ftype,"robust","weighted")$sum((f.local,robustr.local), factorint(f,i,ftype,robustr))
 =  sum(robustr, factorint(f,i,ftype,robustr))
        / sum((f.local,robustr.local), factorint(f,i,ftype,robustr)) ;


* as a ratio of total labor - ALL REGIONS:
avgfactorratio(f,i,"direct","all","unweighted") = sum(r_$(vfm("SkLab",i,r_) + vfm("UnskLab",i,r_)), vfm(f,i,r_) /
(vfm("SkLab",i,r_) + vfm("UnskLab",i,r_))) /card(r_);

avgfactorratio(f,i,"direct","all","weighted")  = sum(r_, vfm(f,i,r_)) /
sum(r_,(vfm("SkLab",i,r_) + vfm("UnskLab",i,r_)));

avgfactorratio(f,i,"total","all","unweighted")  = sum(r_$(totalfacdem("SkLab",i,r_) + totalfacdem("UnskLab",i,r_)), totalfacdem(f,i,r_) /
(totalfacdem("SkLab",i,r_) + totalfacdem("UnskLab",i,r_))) /card(r_);

avgfactorratio(f,i,"total","all","weighted")  = sum(r_, totalfacdem(f,i,r_)) /
sum(r_,(totalfacdem("SkLab",i,r_) + totalfacdem("UnskLab",i,r_)));

* as a ratio of total labor - ROBUSTR REGIONS:
avgfactorratio(f,i,"direct","robustr","unweighted") = sum(robustr$(vfm("SkLab",i,robustr) + vfm("UnskLab",i,robustr)), vfm(f,i,robustr) /
(vfm("SkLab",i,robustr) + vfm("UnskLab",i,robustr))) /card(robustr);

avgfactorratio(f,i,"direct","robustr","weighted")  = sum(robustr, vfm(f,i,robustr)) /
sum(robustr,(vfm("SkLab",i,robustr) + vfm("UnskLab",i,robustr)));

avgfactorratio(f,i,"total","robustr","unweighted")  = sum(robustr$(totalfacdem("SkLab",i,robustr) + totalfacdem("UnskLab",i,robustr)), totalfacdem(f,i,robustr) /
(totalfacdem("SkLab",i,robustr) + totalfacdem("UnskLab",i,robustr))) /card(robustr);

avgfactorratio(f,i,"total","robustr","weighted")  = sum(robustr, totalfacdem(f,i,robustr)) /
sum(robustr,(totalfacdem("SkLab",i,robustr) + totalfacdem("UnskLab",i,robustr)));


* as a ratio of total labor - EUROPE :
avgfactorratio(f,i,"direct","eu","unweighted") = sum(europe$(vfm("SkLab",i,europe) + vfm("UnskLab",i,europe)), vfm(f,i,europe) /
(vfm("SkLab",i,europe) + vfm("UnskLab",i,europe))) /card(europe);

avgfactorratio(f,i,"direct","eu","weighted")  = sum(europe, vfm(f,i,europe)) /
sum(europe,(vfm("SkLab",i,europe) + vfm("UnskLab",i,europe)));

avgfactorratio(f,i,"total","eu","unweighted")  = sum(europe$(totalfacdem("SkLab",i,europe) + totalfacdem("UnskLab",i,europe)), totalfacdem(f,i,europe) /
(totalfacdem("SkLab",i,europe) + totalfacdem("UnskLab",i,europe))) /card(europe);

avgfactorratio(f,i,"total","eu","weighted")  = sum(europe, totalfacdem(f,i,europe)) /
sum(europe,(totalfacdem("SkLab",i,europe) + totalfacdem("UnskLab",i,europe)));

set usajap(r) /usa, jpn/;

* FAVOURITE SPECIFICATION - TOTAL, WEIGHTED, all regions
coeffs("skill int total (weighted)","coeff", i_) = avgfactorratio("sklab",i_,"total","all","weighted");
coeffs("Capital int total (weighted)","coeff", i_) = avgfactorratio("Capital",i_,"total","all","weighted");
coeffs("Nat res int total (weighted)","coeff", i_) = avgfactorratio("Natlres",i_,"total","all","weighted") +
avgfactorratio("Land",i_,"total","all","weighted");

coeffs("skill int direct (weighted)","coeff", i_) = avgfactorratio("sklab",i_,"direct","all","weighted");
coeffs("share of output","coeff", i) = sum((r_),vom(i,r_)) / sum((i.local,r_), vom(i,r_));
coeffs("share of fd","coeff", i) = sum((r_),vdfm(i,"c",r_) + vifm(i,"c",r_) + vdfm(i,"g",r_) + vifm(i,"g",r_)) /
         sum((i.local,r_),vdfm(i,"c",r_) + vifm(i,"c",r_) + vdfm(i,"g",r_) + vifm(i,"g",r_));

coeffs("share of trade","coeff", i) = sum((r_,s_), vxmd(i,r_,s_)) /
         sum((i.local,r_,s_), vxmd(i,r_,s_));


execute_unload 'estimates\estimates_%ds%_%objective%_%spec%_%regsubset%_PHI_IV.gdx', coeffs, lambda,
specificationstats, theta,  fe , sigma_scale, im, ex, cst, forstata, w,
shares, fittedexp, fittedPCexp, nbi, nbr, correlations, sharecorrelations, factorint, nettrade, netoutput, vom
relshares, avgfactorratio, importshare,  tradepatterns, fct, factorcontent, avgfactorcontent, nettrade,
avgfactorint, avgfactorratio, nettradepattern, rsquared
skillpremiumapprox, intermultiplier, avgIAcoeffs, co2int, fit,  estdata, impfd_alt, impfd, imports,
impcheck, expcheck, expcheck_adjrow, impcheck_adjrow, impcheck_corrected, expcheck_corrected, output, output_adjrow, output_alt, output_diff, shareforeign,
avgcoeffa, modelselection, nobs, nbp, sse, withinrsquare, phiest, tcostest, sectdrop, IncElast
;

* exit here if no bootstrapping
$if %boot%==no $exit


$label bootstrap
* ---------------------------------------------------------------------------
* BOOTSTRAPPING
* ---------------------------------------------------------------------------

parameter nbbootiterations /%itr%/;
set params / sigma, mu, eta, theta "incelast - mean shares", "incelast - median ctry" /;
parameter bootcorr, bootsharecorr;

option solprint=off;
option limrow=0;
option limcol=0;

bootcoef(boot,"sigma",i_) = 0; bootcoef(boot,"eta",i_) = 0;bootcoef(boot,"mu",i_) = 0;

* -- RE-ESTIMATE LAMBDA FOR MEDIAN COUNTRY
* define model to re-estimate the lambda for BGR (median country) for income elasticity calculation
parameter fittedPCexpBGR(i), saveparams just to save the parameter estimates;
saveparams("sigma",i_) = 0;
positive variables lambdaBGR_; variable sseBGR_; equations obj_fitlambdaBGR;
* minimizing error in budget constraint for BGR


obj_fitlambdaBGR.. sseBGR_ =e= sum(i_, sqr(sum(i_.local, (lambdaBGR_**(-saveparams("sigma",i_))) * saveparams("fe",i_) *(phi(i_,"bgr")**saveparams("mu",i_))
        ) - w("bgr")));

model fitBGR /obj_fitlambdaBGR/;

parameter reportlambda;

loop(boot,
wt(r_)=0;
        loop(k,
        rdraw=round(uniform(0.5,dim+0.5))
                loop((z)$(cardZZ(z) eq rdraw),
                 wt(z)=wt(z)+1;
*  note : countries can be reselected
                );
        );

display wt;

wtchk=round(sum((k),wt(k))-dim,5);
display wtchk;
abort$(wtchk ne 0) "Inconsistent Weighting";

*execute_loadpoint "nlls_p.gdx";
solve nlls using nlp minimizing sse_;


bootcoef(boot,"sigma",i_)=sigma_.l(i_)$(nlls.solvestat eq 1);
bootcoef(boot,"mu",i_)=mu_.l(i_)$(nlls.solvestat eq 1);
bootcoef(boot,"eta",i_)=eta_.l(i_)$(nlls.solvestat eq 1);
bootcoef(boot,"fe",i_)=fe_.l(i_)$(nlls.solvestat eq 1);
bootcoef(boot,"theta",i_)=theta_.l$(nlls.solvestat eq 1);


bootcoef(boot,"incelast - mean shares",i_) =
        sigma_.L(i_) / (sum(i_.local,  sigma_.L(i_) * sum(r,fd(i_,r)) /sum((i_.local,r), fd(i_,r))));


* when using medium country, need to re-estimate its lambda in case it is out of the bootstrap sample
saveparams("eta",i_) =eta_.L(i_); saveparams("mu",i_) =mu_.L(i_); saveparams("fe",i_) = fe_.L(i_); saveparams("sigma",i_) =sigma_.L(i_);
lambdaBGR_.L = lambda_.L("bgr");
solve fitBGR using nlp minimizing sseBGR_;
fittedPCexpBGR(i_) = fe_.L(i_)* lambdaBGR_.L**(- sigma_.L(i_)) * (phi(i_,"BGR")**(mu_.L(i_)))* (ICPprice(i_,"BGR")**eta_.L(i_));
bootcoef(boot, "incelast - median ctry", i_) = sigma_.L(i_)*(sum(i_.local,  fittedPCexpBGR(i_))) / (sum(i_.local,  sigma_.L(i_) * fittedPCexpBGR(i_) ));
reportlambda(boot) = lambdaBGR_.L;



bootcoef(boot,"incelast - median ctry", i_) = sigma_.L(i_)*(sum(i_.local,  fd(i_,"BGR"))) / (sum(i_.local,  sigma_.L(i_) * fd(i_,"BGR") ));


bootcoef(boot,"factorcoeff","all")=factorcoeff_.l$(nlls.solvestat eq 1);

* do factor correlations here :
y(i_) = sigma_.L(i_);
*YY(f,i_) = avgfactorratio(f,i_,"direct","all");

bootcorr(boot,f,"noweight", ftype,avgtype)  =    (card(i_)*sum((i_), yy(f,i_,ftype,avgtype)*y(i_)) -sum((i_), yy(f,i_,ftype,avgtype))*sum((i_), y(i_))) /
        (
        (card(i_)*sum((i_), yy(f,i_,ftype,avgtype)**2) - (sum((i_), yy(f,i_,ftype,avgtype))**2))**(0.5)*
        (card(i_)*sum((i_), y(i_)**2) - (sum((i_), y(i_))**2))**(0.5));

bootcorr(boot,f,"expshare", ftype,avgtype)  = (sum((i_), weight(i_)* yy(f,i_,ftype,avgtype)*y(i_))
  -sum((i_), weight(i_)* yy(f,i_,ftype,avgtype))*sum((i_), weight(i_)* y(i_))) /
        (
        (sum((i_), weight(i_)* yy(f,i_,ftype,avgtype)**2) - (sum((i_), weight(i_)* yy(f,i_,ftype,avgtype))**2))**(0.5)*
        (sum((i_), weight(i_)* y(i_)**2) - (sum((i_), weight(i_)* y(i_))**2))**(0.5));



* do share correlations here :
* re calculate fitted expenditure shares :
fittedPCexp("non-homoth",i_,r_) = fe_.L(i_)* lambda_.L(r_)**(- sigma_.L(i_)) * phi(i_,r_)**(mu_.L(i_));
shares("exp fitted",i_,r_) = fittedPCexp("non-homoth",i_,r_) / sum((i_.local), fittedPCexp("non-homoth",i_,r_));
shares("exp fitted",i_,"world") = sum(r_, fittedPCexp("non-homoth",i_,r_)) / sum((i_.local,r_), fittedPCexp("non-homoth",i_,r_));
relshares("exp fitted",i_,r_) = shares("exp fitted",i_,r_) / shares("exp fitted",i_,"world");


loop(sharetype, loop(sharetype2,

XX(i_,r_) = relshares(sharetype,i_,r_);
XXX(i_,r_) = relshares(sharetype2,i_,r_) ;


bootsharecorr(boot,sharetype,sharetype2)$(sum((i_,r_),xx(i_,r_)) and sum((i_,r_),xxx(i_,r_))) = (card(i_)*card(r_)*sum((i_,r_), XX(i_,r_)*XXX(i_,r_)) -sum((i_,r_), XX(i_,r_))*sum((i_,r_), XXX(i_,r_))) /
        (
        sqrt(abs(card(i_)*card(r_)*sum((i_,r_), XX(i_,r_)*XX(i_,r_)) - (sum((i_,r_), XX(i_,r_))*sum((i_,r_), XX(i_,r_)))))*
        sqrt(abs(card(i_)*card(r_)*sum((i_,r_), XXX(i_,r_)*XXX(i_,r_)) - (sum((i_,r_), XXX(i_,r_))*sum((i_,r_), XXX(i_,r_))))));

););
* end share correlation
*- ---------------------------------
);

display bootcoef, bootsharecorr;

*       Rank the estimates
alias(boot,boot2);
parameter rank(boot,*,*)        rank order of estimate,
        rankcorr, ranksharecorr;

rank(boot,params,i_)=1+sum(boot2$(bootcoef(boot2,params,i_)
                                 gt bootcoef(boot,params,i_)),1);


rank(boot,"factorcoeff","all")=1+sum(boot2$(bootcoef(boot2,"factorcoeff","all")
                                 gt bootcoef(boot,"factorcoeff","all")),1);

rankcorr(boot,f,weighttype,ftype,avgtype)=1+sum(boot2$(bootcorr(boot2,f,weighttype,ftype,avgtype)
                                 gt bootcorr(boot,f,weighttype,ftype,avgtype)),1);

ranksharecorr(boot,sharetype,sharetype2)=1+sum(boot2$(bootsharecorr(boot2,sharetype,sharetype2)
                                 gt bootsharecorr(boot,sharetype,sharetype2)),1);

display rank, ranksharecorr;


* correction:
parameter nbsame, nbsamecorr, nbsamesharecorr nb of elements with same rank;
nbsame(params,boot,i_) =0;
nbsamecorr(boot,f,weighttype,ftype,avgtype) =0;
nbsamesharecorr(boot,sharetype,sharetype2) =0;

loop(i_,
loop(params,
loop(boot,
        loop(boot2,
*nbsame(boot,i_)$(rank(boot,"sigma",i_) eq rank(boot2,"sigma",i_)) = nbsame(boot,i_) +1;
*rank(boot,"sigma",i_)$(nbsame(boot,i_) gt 1) = rank(boot,"sigma",i_) + 1;

nbsame(params,boot,i_)$(rank(boot,params,i_) eq rank(boot2,params,i_)) = nbsame(params,boot,i_) +1;
rank(boot,params,i_)$(nbsame(params,boot,i_) gt 1) = rank(boot,params,i_) + 1;

*        loop(boot2,
*nbsame(boot,i_)$(rank(boot,"factorcoeff","all") eq rank(boot2,"factorcoeff","all")) = nbsame(boot,i_) +1;
*rank(boot,"sigma",i_)$(nbsame(boot,i_) gt 1) = rank(boot,"sigma",i_) + 1;
* NOT done yet for


););););

loop(f,
loop(ftype,
loop(avgtype, loop(weighttype,
loop(boot,
        loop(boot2,
nbsamecorr(boot,f,weighttype,ftype,avgtype)$(rankcorr(boot,f,weighttype,ftype,avgtype) eq rankcorr(boot2,f,weighttype,ftype,avgtype)) = nbsamecorr(boot,f,weighttype,ftype,avgtype) +1;
rankcorr(boot,f,weighttype,ftype,avgtype)$(nbsamecorr(boot,f,weighttype,ftype,avgtype) gt 1) = rankcorr(boot,f,weighttype,ftype,avgtype) + 1;

););););););

loop(sharetype,
loop(sharetype2,
loop(boot,
        loop(boot2,
nbsamesharecorr(boot,sharetype,sharetype2)$(ranksharecorr(boot,sharetype,sharetype2) eq ranksharecorr(boot2,sharetype,sharetype2)) = nbsamesharecorr(boot,sharetype,sharetype2) +1;
ranksharecorr(boot,sharetype,sharetype2)$(nbsamesharecorr(boot,sharetype,sharetype2) gt 1) = ranksharecorr(boot,sharetype,sharetype2) + 1;

););););

display nbsamesharecorr, ranksharecorr;

execute_loadpoint "nlls_p.gdx";
Parameter
        median(*,*)     "median bootstrap value"
        mediancorr
        mediansharecorr
        bias(*,*)       "bias in bootstrap median minus first moment"

        st_err(*,*)     "bootstrap standard error (average upper and lower)"
        st_errcorr
        st_errsharecorr
        crit95up(*,*)   "upper critical value for the 95 conf. interval"
        crit95lo(*,*)   "lower critical value for the 95 conf. interval"
;


median(params,i_)=
sum(boot$(rank(boot,params,i_) eq
                              round(0.5*(card(boot2)))),
                                bootcoef(boot,params,i_))


;

mediancorr(f,weighttype,ftype,avgtype)=
sum(boot$(rankcorr(boot,f,weighttype,ftype,avgtype) eq
                              round(0.5*(card(boot2)))),
                                bootcorr(boot,f,weighttype,ftype,avgtype));

mediansharecorr(sharetype,sharetype2)=
sum(boot$(ranksharecorr(boot,sharetype,sharetype2) eq
                              round(0.5*(card(boot2)))),
                                bootsharecorr(boot,sharetype,sharetype2));


display mediansharecorr;

bias("sigma",i_)=median("sigma",i_)-sigma_.l(i_);
bias("eta",i_)=median("eta",i_)-eta_.l(i_);
bias("mu",i_)=median("mu",i_)-mu_.l(i_);


st_err(params,i_)=ABS(0.5*     (
                                sum(boot$(rank(boot,params,i_) eq
                                        round(0.8413*card(boot2))),
                                bootcoef(boot,params,i_))-
* seems to me like this could be taking too many values if many have same rank ?
                                sum(boot$(rank(boot,params,i_) eq
                                        round(0.1587*card(boot2))),
                                bootcoef(boot,params,i_))
                                ));



st_err("factorcoeff","all")=ABS(0.5*     (
                                sum(boot$(rank(boot,"factorcoeff","all") eq
                                        round(0.8413*card(boot2))),
                                bootcoef(boot,"factorcoeff","all"))-
* seems to me like this could be taking too many values if many have same rank ?
                                sum(boot$(rank(boot,"factorcoeff","all") eq
                                        round(0.1587*card(boot2))),
                                bootcoef(boot,"factorcoeff","all"))
                                ));

st_errcorr(f,weighttype,ftype,avgtype)=ABS(0.5*     (
                                sum(boot$(rankcorr(boot,f,weighttype,ftype,avgtype) eq
                                        round(0.8413*card(boot2))),
                                bootcorr(boot,f,weighttype,ftype,avgtype))-
* seems to me like this could be taking too many values if many have same rank ?
                                sum(boot$(rankcorr(boot,f,weighttype,ftype,avgtype) eq
                                         round(0.1587*card(boot2))),
                                bootcorr(boot,f,weighttype,ftype,avgtype))
                                ));

st_errsharecorr(sharetype,sharetype2)=ABS(0.5*     (
                                sum(boot$(ranksharecorr(boot,sharetype,sharetype2) eq
                                        round(0.8413*card(boot2))),
                                bootsharecorr(boot,sharetype,sharetype2))-
* seems to me like this could be taking too many values if many have same rank ?
                                sum(boot$(ranksharecorr(boot,sharetype,sharetype2) eq
                                        round(0.1587*card(boot2))),
                                bootsharecorr(boot,sharetype,sharetype2))
                                ));



crit95lo(params,i_)=sum(boot$(rank(boot,params,i_) eq
                                        round(0.975*(card(boot2)))),
                                bootcoef(boot,params,i_));

crit95up(params,i_)=sum(boot$(rank(boot,params,i_) eq
                                        round(0.025*(card(boot2)))),
                                bootcoef(boot,params,i_));

display median,  st_err,crit95lo, crit95up, bias;

coeffs(params, "st_err", i_) = st_err(params,i_);
coeffs(params,"crit95lo", i_) = crit95lo(params,i_);
coeffs(params,"crit95up", i_) = crit95up(params,i_);
coeffs(params, "bias", i_) = bias(params,i_);
coeffs(params, "median coeff", i_) = median(params,i_);


* significance :
* what does it mean for sigma ? (what is H0 ?)
coeffs("sigma","t-stat", i_)$coeffs("sigma", "st_err", i_) = (coeffs("sigma", "coeff", i_) - 1) / coeffs("sigma", "st_err", i_) ;
coeffs("eta","t-stat", i_)$coeffs("eta", "st_err", i_) = (coeffs("eta", "coeff", i_)-0) / coeffs("eta", "st_err", i_) ;
coeffs("mu","t-stat", i_)$coeffs("mu", "st_err", i_) = (coeffs("mu", "coeff", i_)-0) / coeffs("mu", "st_err", i_) ;

coeffs("sigma","sig", i_)$(abs(coeffs("sigma","t-stat", i_)) >1.96) = 1;
coeffs("eta","sig", i_)$(abs(coeffs("eta","t-stat", i_)) >1.96) = 1;
coeffs("mu","sig", i_)$(abs(coeffs("mu","t-stat", i_)) >1.96) = 1;



correlations(f,"std err",weighttype,ftype,avgtype) = st_errcorr(f,weighttype,ftype,avgtype);
correlations(f,"median coeff",weighttype,ftype,avgtype) = mediancorr(f,weighttype,ftype,avgtype);

sharecorrelations("direct", "std err",sharetype,sharetype2) = st_errsharecorr(sharetype,sharetype2);
sharecorrelations("direct", "median",sharetype,sharetype2) = mediansharecorr(sharetype,sharetype2);


display reportlambda;

execute_unload 'results\CRIE_%objective%_%spec%.gdx', coeffs, lambda, specificationstats
theta,  fe , sigma_scale, st_err, nbbootiterations, shares,relshares, sharecorrelations, correlations,
nbr, nbi, bootcorr, bootcoef,fittedPCexp, relshares, avgfactorratio, importshare,  tradepatterns,
fct, factorcontent, avgfactorcontent, nettrade, trade,estdata, indexp
;

