$title  demand_CLM.GMS

* ------------------------------------------------------
* Per Capita Income and the Demand for Skills
* Journal of International Economics

* Justin Caron, Thibault Fally and James Markusen

* December 2019
* ------------------------------------------------------

* ESTIMATE DEMAND SYSTEM PARAMETERS - COMIN ET AL. 2016 (NH CES) PREFERENCES

$if not set datadir $set datadir "data\"
$setglobal datadir %datadir%

* ---------------------------------------------------------------------------------------------
* OPTIONS :

* SPECIFICATION ?
* choices : tc, THETA4

$if not set spec $set spec theta4

* subset of regions to include
$if not set regsubset $set regsubset rall

* MINIMIZE ERRORS ON LOGS OR CONSUMPTION SHARES ?
* choices : log, consshare, logweighted
$ if not set objective $set objective logweighted

* load gravity data from stata or gams ?
* choices : stata, gams
$ if not set gravitydata $set gravitydata gams

* skip reporting ?
* choices : yes, no
$ if not set skipreporting $set skipreporting yes

* SELECT DATASET TO USE 
$if not set ds $set ds gtap5

$include loaddata_gtap5.gms

* BOOSTRAP ?  (note: not tested with CLM preferences)
$if not set boot $set boot no
* set nb of boostrap iterations:
$if not set itr $set itr 4


* ------------------------------------
* IMPORT  GRAVITY ESTIMATES

parameter       coeffs stores all sector-specific coefficients
                phiest
                tcostest
                logphiest
                IM
                EX
                cst;


$gdxin estimates\gravityestimates_%ds%_atc.gdx
$load  coeffs phiest im ex cst tcostest

parameter esttheta;
esttheta(i) = 0;

display coeffs;

* --  DEFINE SECTORS WHICH ARE MOSTLY INTERMEDIATES

parameter sharefd % FD in vdm (dom output);
set intermediates(i);

sharefd(i) = (sum((r,g), vdfm(i,g,r) + vifm(i,g,r)) - sum((j,r), vdfm(i,j,r) + vifm(i,j,r)))/ sum((r,g), vdfm(i,g,r) + vifm(i,g,r));

* defined as "intermediate" if  less than 10 % of production goes to final demand
intermediates(i)$(sharefd(i)<0.1) = yes;
display sharefd, intermediates;

* -- DEFINE SERVICE AND TRADABLE SECTORS SECTORS

*set     serv(i) service sectors / CMN, DWE,ISR,OBS, OFI,OSG,ROS,WTR,TRD,CNS, OTP, ATP, WTP, GDT, ELY/
set     serv(i) service sectors /osg/
        tradables(i) the tradable sectors;

tradables(i) = yes;
tradables(serv) = no;
display tradables;


* -----------------------------------------------------------
* NLLS Demand estimations
* -----------------------------------------------------------
* the only exogenous parameters needed are: x(i,r), per capita expenditure, w(n) = wage = PCI


parameter       x(i,r)          per capita expenditure
                w(r)            wage = PCI
                indexp(i)       industry total expenditure,
                logphi,
                phi
                bhat;

set i_(i), r_(r),g_(g),
        rall(r) set of 94 regions
        rICP(r) set of regions with ICP prices;
i_(i) = no;



* -- SELECT WHICH PHI TO USE HERE
*$if "%gravitydata%" == "stata" logphi(i,r) = importdata(i,r,"log_Phi_v2");
$if "%gravitydata%" == "gams"  phi(i,r) = phiest(i,r);


*****************************************************************
* necessary to get the same results as in the paper..
* note: not making this change yields very similar results (within 1%)
phi("tex",r) = 66;
phi("otn",r) = 66;

* -- SELECT WHICH SECTORS TO USE HERE :

* using all sectors for which we have estimates of PHI :
i_(i)$sum(r,phi(i,r))= yes;

* selecting regions :
rall(r)$sum(i,phi(i,r)) = yes;


Sets
r_ country  /
col,aus,	
nzl,chn,hkg,jpn,kor,twn,idn,mys,phl,sgp,tha,vnm,bgd,ind,lka,can,usa,mex,
per,ven,arg,bra,chl,ury,aut,bel,dnk,fin,fra,deu,gbr,grc,irl,ita,lux,nld,prt,esp,swe,che,
alb,bgr,hrv,cze,hun,mlt,pol,rom,svk,svn,est,lva,ltu,rus,cyp,tur,mar,bwa,mwi,moz,tza,zmb,zwe,uga/;


display r_;

* dropping intermediates
i_(intermediates) = no;

i_("dwe") = no;

display i,i_, intermediates;
alias(i_,j_);
alias(r_,s_);

* define new g set
g_(i_) = yes; g_("c") = yes; g_("g") = yes; g_("i") = yes;

parameter nbr, nbi;
nbr = card(r_);
nbi = card(i_);

display nbr, nbi;

display r_, i_;


* definine per-capita expenditure only on selected sectors:
w(r)$pop(r) = 10e8* sum(i_,fd(i_,r)) /pop(r);
x(i,r)$pop(r)  = 10e8 * fd(i,r) / pop(r);
indexp(i)  = sum(r_,fd(i,r_)) / sum((r_,i.local),fd(i,r_));

display w;

* ALT WEIGHING SCHEMES, CHOSE HERE:

*	 maximum exp share
indexp(i)  = smax(r_, x(i,r_)/w(r_));

* or no weighing:
*indexp(i)  = 0.01;

logphi(i,r)$phi(i,r) =log(phi(i,r));

parameter expshare, sectdrop;
expshare(i_,r_) = fd(i_,r_) / sum(i_.local,fd(i_,r_));
sectdrop(i_,r_) = 1;

* compute some statistics
parameter       fittedPCexp     fitted per capita expenditure
                fittedexp       fitted expenditure
                sstot total sum of squares
                rsquared
                nobs number of observations
                Fstat F-test statistic
                nbp number of parameters in model
                df degrees of freedom for Fstat
                sigma2hat estimated variance of regression
                modelselection;


* ----------------------------------------------
* for bootstrapping :

set boot /1*%itr%/;
option seed=081567;

parameter  wt(r)        weight in the objective
           bootcoef(boot,*,*)
           dim          number of ctries
           rdraw        random draw from the pool of ctries
           cardzz(r)    index on each observation
           wtchk        weight check
;
wt(r_)=1;

alias (r_,k,kk,z,zz);
*cardzz(k,kk)=(ord(k)-1)*card(k) + ord(kk);
* PROBLEM CANNOT USE ORD ON AN "UNSTABLE SET", see correction "trick" above
cardzz(k) = ord(k);
dim=card(k);
display dim, cardzz;

parameter fe_norm(i);

fe_norm(i) = indexp(i);

set lowincome(r);

lowincome(r_)$(log(w(r_)) < 8.65) = 1;


* ---------------------------------------------------------
* -- DEFINE MODEL :

variables sse_;

Positive variables       U_(r), theta_, delta_, b_  ;

variables rho_, nu_;

VARIABLES mu_(i), sigma_,  fe_(i) the LOGGED fixed effect, commonrho_  ;

equations  obj_log, obj_consshare, obj_logweighted, budget, commontheta,  epsilon_fact, commondelta, est_theta, sigmadiff, positive_g_cstr, u_cstr, 
commonrho;

* -----------------------
* CLM ESTIMATION

* this is the CLM objective function
obj_logweighted.. sse_ =e=      sum((i_,r_)$x(i_,r_),
 sectdrop(i_,r_) *  indexp(i_) *  wt(r_) * sqr(log(x(i_,r_)) - (
 sigma_ * log(w(r_)) +  mu_(i_)*logphi(i_,r_)  +  (  fe_(i_)*fe_norm(i_) + rho_(i_)   * log(U_(r_)) - nu_(i_)  * log(U_(r_))*log(U_(r_)) ))));

* Note: here we redefined g to include ** (1/1- sigma)  -- but, this means that the constraint on g (below), must include sigma.
* also, means we should have no non-negativity constraints on rho and nu
* here rho = epsilon - sigma

* constraint on mu
commontheta(i_) ..      (mu_(i_)* theta_ +1 )=e= ( sigma_ )  ;

* for homoth version
commonrho(i_) ..      rho_(i_)  =e= rho_("obs") ;

* budget constraint in logs 
budget(r_) ..   sum(i_, exp(  sigma_ * log(w(r_)) +  mu_(i_)*logphi(i_,r_) +
*  redefine g to include sigma
  fe_(i_)*fe_norm(i_) + rho_(i_)  * log(U_(r_)) - nu_(i_)  * log(U_(r_))*log(U_(r_))) )  =e= w(r_);

* INITIALIZING

fe_norm(i) = indexp(i);
fe_norm("pdr") = indexp("pdr") * 10;
fe_norm("wol") = indexp("wol") * 10;
fe_norm("gdt") = indexp("gdt") * 10;

fe_.L(i_) = 1;
* !! results could sensitive to this number if binding
fe_.LO(i_) = -1E4;
fe_.UP(i_) = 1E4;

* initialize variables:
sse_.L = 0;

* CLM: can on only normalize one of lambda and sigma
U_.L(r_)= 1;
* note, if sigma > 1 than the lower bound becomes important..
U_.LO(r_)= 1E-3;
U_.UP(r_)= 1E3;
U_.FX("USA") =30;

* FOR TC
*nu_.FX(i_) = 0;

rho_.UP(i_) =1E2;
rho_.LO(i_) =-1E2;
rho_.L(i_) = 0;



* for general FLEXIBLE FORM
* note: not used in final version of paper

*nu_.FX = 1;
nu_.L(i_) = 0;
nu_.LO(i_) =  -1e3;
*nu_.LO(i_) =  0;
nu_.UP(i_) = 1e3;
* TURN OFF QUAD TERM:
nu_.FX(i_) = 0;

*this still matters:
b_.L =1/5;
b_.UP =10e7;
*b_.FX(i_)$(not sameas(i_, "ely")) =0;
*b_.FX(i_)$(sameas(i_, "ely")) =5;

mu_.L(i_) =  0.2 / 4;
*mu_.UP(i_) =  1e2;
*mu_.LO(i_) =  1e-3;
*mu_.LO(i_) =  -1e3;

sigma_.L = 0.9;

sigma_.LO = 0.2;
sigma_.UP = 0.9;
* bound on rho if g_constraint turned off in nmodel : lower bound if sigma<1, upper bound otherwise
rho_.LO(i_) = -2;

U_.FX("USA") = 20;
fe_norm(i) = - indexp(i) * log(w("usa"))*(sigma_.L);

*sigma_.FX = 0.8;

theta_.L = 1;
theta_.UP = 30;

parameter forstata;

forstata("fd",i_,r_) = x(i_,r_);
forstata("logpcfd",i_,r_)$x(i_,r_) = log(x(i_,r_));
forstata("logPHI",i_,r_) = logphi(i_,r_);
forstata("logpci",i_,r_) = log(w(r_));
forstata("total exp",i_,r_) = expenditure(r_);


parameter  lambda, mu,  sse, fe, theta, epsilon_scale, eta, avgmu;
theta=0;
epsilon_scale=0;
parameter specificationstats for reporting;

* -- define specification :
* for CLM, use these:

$if "%spec%"=="tc"  model nlls /obj_%objective%, budget, positive_g_cstr /; 
$if "%spec%"=="tc" nbp("non-homoth")= card(r_) + 3*card(i_) + 1;
$if "%spec%"=="theta4" theta_.FX = 4;  model nlls /obj_%objective%, commontheta , budget/; 
$if "%spec%"=="theta4" nbp("non-homoth")= card(r_) + 2*card(i_) + 1 ;

* -----------------------

nlls.reslim = 100000;

* -- Solve model :

* code to change solver tolerance:
nlls.optfile = 1 ;
FILE  OPT   conopt option file  / conopt.OPT /;
PUT OPT;
PUT 'rtredg  3.0e-13' /
 'rtobjr 3.0e-14'/
'rtpiva 1.0e-14'/
'rtpivt 1.0e-12'
PUTCLOSE OPT;

solve nlls using nlp minimizing sse_;


* for simulations:
parameter epsilon_diff, sigma_p;
epsilon_diff(i_) = rho_.L(i_);
sigma_p = sigma_.L;

parameter epsi epsilon parameter in CLM;
epsi(i) = sigma_p + epsilon_diff(i);

* compute average income elasticity for stata to compare to CRIE
parameter incelast, incelast_avg, epsibar_diff, forstata;

epsibar_diff(r) = sum(j, epsilon_diff(j) * fd(j,r)) / sum(j, fd(j,r));

incelast(i,r) = 1 + (1 - sigma_p) * (epsilon_diff(i) - epsibar_diff(r)) / epsibar_diff(r) ;


forstata("incel",i_,r_)$x(i_,r_) = sigma_.L + 
  (1 - sigma_.L) * 
 (rho_.L(i_) - 2* nu_.L(i_) * log(U_.L(r_))) * sum(i_.local, x(i_,r_)) /
   sum(i_.local,  (rho_.L(i_) - 2* nu_.L(i_) * log(U_.L(r_))) * x(i_,r_));


incelast_avg(i_) = sigma_.L + 
  (1 - sigma_.L) * 
 (rho_.L(i_)  ) * sum((r_,i_.local), x(i_,r_)) /
   sum((r_,i_.local),  (rho_.L(i_) ) * x(i_,r_));


execute_unload 'estimates\estimates_gtap5_CLM.gdx'  epsilon_diff, sigma_p, incelast, epsibar_diff, forstata, incelast_avg;






