$title  DATAPREPARATION.GMS

* ------------------------------------------------------
* Per Capita Income and the Demand for Skills
* Journal of International Economics

* Justin Caron, Thibault Fally and James Markusen

* December 2019
* ------------------------------------------------------


* PREPARES DEMAND, IO COEFFICIENTS, TRADE SHARES, AND OTHER PARAMETERS TO BE PASSED ON TO THE SIMULATION SIMULATION FILES 


* load gravity data from stata or gams ?
* choices : stata, gams
$ if not set gravitydata $set gravitydata gams

$if not set datadir $set datadir "data\"
$setglobal datadir %datadir%

* --------------------------------------------
* DEFINE CALIBRATION TYPE

* -- set of sectors - options : all, fdonly
$if not set sectorset $set sectorset all

* -- type of factor intensity - options : avg, cs - if using average, should change definiton of ENDOW
$if not set beta $set beta avg

* -- rescale ? - options : yes, yes
$if not set rescale $set rescale no

* declare set of demand estimates to use : theta4, notc, tc
$if not set demandest $set demandest tc


* -- declare dataset

$if not set ds $set ds gtap5


$include loaddata_gtap5.gms


* ------------------------------------
* IMPORT GRAVITY AND DEMAND ESTIMATES


* gravity
parameter	coeffs
		ex,im,
		cst
		phiest;

* demand
parameter  lambdaest_;

parameter
fe           previously estimated demand fixed effects
sigma(i)        previously estimated sigmas
scale           scaling parameter for sigma
technology(i,r) export fixed effects from GE
tcost(i,r,s)    fitted transport costs from GE
theta(i)           calibrated using estimates from the literature?
fittedexp	fitted expenditure
;

$gdxin estimates\estimates_%ds%_logweighted_%demandest%_rall.gdx
$load  coeffs, ex, im, cst, fe,  lambdaest_=lambda, scale= sigma_scale fittedexp 

theta(i) = coeffs("backed out theta","coeff",i);
theta(i)$(not theta(i)) = 4;

$if "%demandest%"=="theta4" theta(i) =4;

sigma(i) = scale("non-homoth") * coeffs("sigma","coeff",i);

* ---------------------------------------------------------------
* -- DEFINE SET OF SECTORS AND REGIONS TO INCLUDE IN MODEL

set i_(i), r_(r), g_(g); i_(i) = no; r_(r) = no;

$if "%sectorset%"=="all"  i_(i) = yes;

*i_("dwe") = no;

$if "%sectorset%"=="fdonly"  i_(i)$sigma(i) = yes;

g_(i_) = yes; g_("c") = yes; g_("g") = yes; g_("i") = yes;

set c(g);
c("c") = yes; c("g") = yes; c("i") = yes;


set fds(i) set of sectors with final demand calc;

fds(i)$sigma(i) = yes;
*fds("dwe") = no;


set     serv(i) service sectors / CMN, ISR,OBS, OFI,OSG,ROS,WTR,TRD,CNS, OTP, ATP, WTP,  ELY/
        tradables(i) the tradable sectors;

tradables(i) = yes;
tradables(serv) = no;
display tradables;

coeffs("manufsector","coeff", i_)$(not tradables(i_)) = 1;


r_(r) = yes;

alias(r_,s_); alias(i_,j_); alias(i_,k_);



* -----------------------------------------------------------------------
* RECONSTRUCT ESTIMATES FROM GRAVITY - FROM GRAVITYINGAMS

* EXPORTER FIXED EFFECTS - at the power 1/theta to have technology as TFP
technology(i_,r_) = exp((ex(i_,r_) + cst(i_))/theta(i_));
* 

* TRANSPORT COSTS - at  power -1/theta to have transport cost as a cost
* = "fitted" transport costs from the Gravity equations
tcost(i_,r_,s_) = exp( -(
                -importdist(r_,s_,"ldist")      * coeffs("ldist","coeff",i_) 
                +importdist(r_,s_,"contig")        * coeffs("contig","coeff",i_) 
                +importdist(r_,s_,"comlang_off")   * coeffs("comlang_off","coeff",i_) 
                +importdist(r_,s_,"colony")        * coeffs("colony","coeff",i_) 
                +importdist(r_,s_,"homebias")      * coeffs("homebias","coeff",i_) )
                 / theta(i_));


* -------------------------------------------------------------------
* COMPUTE INITIAL VALUES

set scn scenarios / initial, bmk, data, cf, "%chg"/;

* define all variables of interest
parameter	FinalD final demand,
		X absorbtion,
		Trade includes domestic absorbtion,
		fprice,
		factord factor demand,
		interd intermediate demand,
		macro macro stats
		ByReg(*,scn,*) 
		ByRegSect
		Bilat
		ByRegIO;


* this is production as defined by value of inputs
byregsect("outputVal","data",i,r) = sum(f,vfm(f,i,r)*(1+rtf(f,i,r))) + sum(j_, (vdfm(j_,i,r)*(1+rtfd0(j_,i,r)) + vifm(j_,i,r)*(1+rtfi0(j_,i,r))));

set nooutput(i,r);

nooutput(i_,r_) = yes;
nooutput(i_,r_)$byregsect("outputVal","data",i_,r_) = no;

display nooutput;

byregsect("fdVal","data",i_,r_) = fd(i_,r_);
trade("data",i_,r_,s_) = btrade(i_,r_,s_);
trade("data",i_,r_,r_) = sum(g_,vdfm(i_,g_,r_) *(1+rtfd0(i_,g_,r_)));
byregsect("X","data",i_,r_) = sum(g_, vdfm(i_,g_,r_)+ vifm(i_,g_,r_));
ByReg("PCIncomeNom","data",r_) = pcexp(r_);
factord("data",f,i_,r_) = vfm(f,i_,r_)*(1+rtf(f,i_,r_));
interd("value","data",j_,i_,r_) =  vdfm(j_,i_,r_)*(1+rtfd0(j_,i_,r_)) + vifm(j_,i_,r_)*(1+rtfi0(j_,i_,r_));

* --------------------------------------------------
* CALCULATE INPUT SHARES - BASED ON SET OF INCLUDED GOODS ONLY

parameter      intersh		intermediate input coefficient (from i to j)
	       beta		country specific factor input coefficient ;

* AVERAGES 
* factor shares :  
$if "%beta%"=="avg"  beta(f,i_,r)$sum(r.local, byregsect("outputVal","data",i_,r)) = sum(r.local,vfm(f,i_,r)*(1+rtf(f,i_,r)))/ sum(r.local, byregsect("outputVal","data",i_,r));
$if "%beta%"=="cs"   beta(f,i_,r)$byregsect("outputVal","data",i_,r) = vfm(f,i_,r)*(1+rtf(f,i_,r))/ byregsect("outputVal","data",i_,r) ;

* intermediate input shares :
$if "%beta%"=="avg"	intersh(i_,j,r)   = sum(r.local, vdfm(i_,j,r)*(1+rtfd0(i_,j,r)) + vifm(i_,j,r)*(1+rtfi0(i_,j,r))) / sum(r.local,byregsect("outputVal","data",j,r));
$if "%beta%"=="cs"	intersh(i_,j,r)$byregsect("outputVal","data",j,r) = (vdfm(i_,j,r)*(1+rtfd0(i_,j,r)) + vifm(i_,j,r)*(1+rtfi0(i_,j,r))) / byregsect("outputVal","data",j,r);


* check zero profit condition
parameter chkzp	 zero-profit check ;

chkzp(i_,r_) = round((sum(f, beta(f,i_,r_)) + sum(j_, intersh(j_,i_,r_)) - 1), 10);

display chkzp;

X("data - with average coeff",i_,r_) = fd(i_,r_) + sum((j_,s_), intersh(i_,j_,r_) * trade("data",j_,r_,s_));



* this computes the full MRIO coefficients matrix. 

* idea: need to do it just once, and be able to use them for any other embodied carbon / energy computation later on

parameter   trade_sh(i,r,s)  share of country r in country s's absorption (Import shares)
		beta_bilat_dir direct bilateral IO coefficients
	    beta_bilat_total total bilateral IO coefficients
	    beta_bilat_total_new total bilateral IO coefficients
	    beta_bilat_total_abs
	    beta_dev;


set	iter /0*10/;

* trade shares defined as: from r to s:
trade_sh(i,r,s)$sum(r.local , trade("data",i,r,s))  = trade("data",i,r,s) / sum(r.local , trade("data",i,r,s));

coeffs("avg trade share","coeff", i) =  1-  sum(r,trade("data",i,r,r)) / sum((s.local,r.local) , trade("data",i,r,s)) ;

* QUICKLY COMPUTE SKILL PREMIUM APPROXIMATIONS

parameters production, absorption;

production(i,r) = sum(s, trade("data",i,r,s));
* INSTEAD OF:
*production(i,r) = vom(i,r);

absorption(j,r) = fd(j,r) + sum(i,intersh(j,i,r) * production(i,r));

parameter income_shock /1.01/;

parameter
dem_sh(i,r)  share of sector i in total final demand
trade_sh(i,r,s)  share of country r in country s's absorption
prod_sh(i,r,s)  share of country s in country r's sales
fact_sh(f,i,r)   share of sector i in demand for factor f
endow_sh(f,r) share of factor f in total endowment (income)
gosh_m(j,i,r) gosh matrix coefficient (how much j is used in industry i)
gosh_f(j,r) gosh matrix coefficient (how much j is used by final consumers);


gosh_m(j,i,r)$absorption(j,r) = intersh(j,i,r) * production(i,r) / absorption(j,r);

gosh_f(j,r) = 1 - sum(i,gosh_m(j,i,r));

fact_sh(f,i,r) =  beta(f,i, r) * vom(i,r) / sum(i.local,  beta(f,i,r) * vom(i,r))  ;

parameters incelast, chg_fd_approx, chg_fprice_justfd, chg_absorption_approx,  chg_fprice_approx, chg_skillprem_approx, chg_skillprem_justfd;

incelast(i,r) = sigma(i) * sum(j, fd(j,r)) / sum(j, sigma(j) * fd(j,r));
chg_fd_approx(i,r) = 1 + (incelast(i,r) - 1) * (income_shock - 1);

* 0) Inverting Gosh matrix:

variables chg_absorption;

equations eq_absorption_auatarky;
* absorption = production:
eq_absorption_auatarky(j,r).. chg_absorption(j,r) =e= gosh_f(j,r) *chg_fd_approx(j,r) + sum(i$gosh_m(j,i,r),gosh_m(j,i,r) * chg_absorption(i,r));

chg_absorption.fx(j,r)$(gosh_m(j,j,r) =1)  =1;

model Invert_gosh / eq_absorption_auatarky.chg_absorption /;
Invert_gosh.iterlim = 1000000;

solve Invert_gosh using MCP;


chg_absorption_approx(i,r) = chg_absorption.L(i,r);
chg_fprice_justfd(f,r) = sum(i,fact_sh(f,i,r) * chg_fd_approx(i,r));
chg_fprice_approx(f,r) = sum(i,fact_sh(f,i,r) * chg_absorption_approx(i,r));

chg_skillprem_approx("logpci",r)$pcexp(r) = log(pcexp(r));

chg_skillprem_approx("total",r) = sum(i,fact_sh("SkLab",i,r) * chg_absorption_approx(i,r)) / sum(i,fact_sh("UnskLab",i,r) * chg_absorption_approx(i,r));
chg_skillprem_approx("final only",r) = sum(i,fact_sh("SkLab",i,r) * chg_fd_approx(i,r)) / sum(i,fact_sh("UnskLab",i,r) * chg_fd_approx(i,r));

parameter chg_skillprem_decomp;

chg_skillprem_decomp("final only",i,r) = (fact_sh("SkLab",i,r) - fact_sh("UnskLab",i,r)) * chg_fd_approx(i,r);
chg_skillprem_decomp("fact_sh",i,r) = (fact_sh("SkLab",i,r) - fact_sh("UnskLab",i,r)) ;
chg_skillprem_decomp("chg_fd",i,r) = chg_fd_approx(i,r);



execute_unload 'estimates\DATA_%ds%_%demandest%_%beta%.gdx',sigma, theta, beta, intersh, trade,
 endow, vom, vdfm, vifm, fd, vfm, pcexp, pop, r, i, f, fittedexp, coeffs, chg_skillprem_approx, chg_skillprem_decomp;





