*==============================================
* This file prepares the matched and unmatched moments from all datasets. 
* It exports moment vectors and covariance matrices later used as inputs to the matlab program
*==============================================

cd "$output"
set seed 100

clear all
cap postclose moments 
cap postclose moments_nk 
cap postclose moments_nontarget 
postfile moments str32 name value using moments, replace
postfile moments_nk str32 name value using moments_nk, replace
postfile moments_nontarget str32 name value using moments_nontarget, replace

*** Distribution of wife CC
u data_ATUS_for_descstats, clear
su cc_w if age>=30&age<=35,d 	
post moments ("T2") (r(mean)) 
post moments_nk ("T2") (0) 
post moments_nontarget ("T2") (0) 

*** Distribution of husband CC
u husband_avg_childcare_for_descstat, clear
su cc if sp_age>=30&sp_age<=35,d 	
post moments ("T1") (r(mean)) 
post moments_nk ("T1") (0) 
post moments_nontarget ("T1") (0) 

*** PSID based moments
u data_PSID_for_moments, clear
tsset person year
gen dummy = age_youngest<=10
replace hours=. if logwg_h==. & hours!=0
replace hourw=. if logwg_w==. & hourw!=0
gen emp  = hours>=80 & hours!=.
gen empw = hourw>=80 & hourw!=.

*** Mean of wife hours
su hourw if agew>=30&agew<=35 & dummy==1,d 	
post moments ("H2") (r(mean)) 
su hourw if agew>=30&agew<=35 & dummy==0,d 	
post moments_nk ("H2") (r(mean)) 
su hourw if agew>=50&agew<=55 & dummy==0,d 	
post moments_nontarget ("H2") (r(mean)) 

*** Mean of husband hours
su hours if agew>=30&agew<=35 & dummy==1,d 	
post moments ("H1") (r(mean)) 
su hours if agew>=30&agew<=35 & dummy==0,d 	
post moments_nk ("H1") (r(mean)) 
su hours if agew>=50&agew<=55 & dummy==0,d 	
post moments_nontarget ("H1") (r(mean)) 

*** Employment wife
su empw if agew>=30&agew<=35 & dummy==1 & hourw!=.
post moments ("E2") (r(mean)) 
su empw if agew>=30&agew<=35 & dummy==0 & hours!=.
post moments_nk ("E2") (r(mean)) 
su empw if agew>=50&agew<=55 & dummy==0 & hours!=.
post moments_nontarget ("E2") (r(mean)) 

*** Distribution of usband hours
su hourw if agew>=30&agew<=35 & dummy==1,d 	
local H2_75_50 = r(p75)-r(p50)
post moments ("H2_75_50") (`H2_75_50 ') 

su hourw if agew>=30&agew<=35 & dummy==0,d 	
local H2_75_50 = r(p75)-r(p50)
post moments_nk ("H2_75_50") (`H2_75_50 ') 

su hourw if agew>=50&agew<=55 & dummy==0 ,d 	
local H2_75_50 = r(p75)-r(p50)
post moments_nontarget ("H2_75_50") (`H2_75_50 ') 

* Consumption and child care
su totcons, de
replace totcons=r(p99) if totcons>r(p99) & totcons!=.
gen dlc = log(totcons/p_totcons) - log(l2.totcons/l2.p_totcons)
gen d_young = dummy - l2.dummy

reg dlc d_young if agew>=25&agew<=35	
post moments ("dlc_dyoung") (_b[d_young]) 
post moments_nk ("dlc_dyoung") (0) 
post moments_nontarget ("dlc_dyoung") (0) 

cap postclose moments
cap postclose moments_nk
cap postclose moments_nontarget

*** Export the moments
u moments, clear
gen ind = 1
replace ind=sum(ind)
replace ind=1 in 3
replace ind=2 in 4
replace ind=3 in 1
replace ind=4 in 2
sort ind
keep value
outsheet using "$MATLAB/moments.csv", replace comma noname

u moments_nk, clear
gen ind = 1
replace ind=sum(ind)
replace ind=1 in 3
replace ind=2 in 4
replace ind=3 in 1
replace ind=4 in 2
sort ind
keep value
outsheet using "$MATLAB/moments_nk.csv", replace comma noname

u moments_nontarget, clear
gen ind = 1
replace ind=sum(ind)
replace ind=1 in 3
replace ind=2 in 4
replace ind=3 in 1
replace ind=4 in 2
sort ind
keep value
outsheet using "$MATLAB/moments_nontarget.csv", replace comma noname


*==========================
* Calculate SIGMA by bootstrapping (note that independent datasets imply some off diagonal zeros)
*==========================
clear all
cap postclose moments_bsreps 
cap postclose moments_nontarget_bsreps 
postfile moments_bsreps H2 H1 H2_nk H1_nk T2 T1 p75_p50 p75_p50_nk E2 E2_nk dc using moments_bsreps, replace
postfile moments_nontarget_bsreps H2_nt H1_nt p75_p50_nt E2_nt using moments_nontarget_bsreps, replace

global nrep = 500

forvalues i=1/$nrep {
*** Distribution of wife CC
u data_ATUS_for_descstats, clear
bsample
su cc_w if age>=30&age<=35,d 	
local T2 = r(mean)

u husband_avg_childcare_for_descstat, clear
bsample
*** Distribution of husband CC
su cc if sp_age>=30&sp_age<=35,d 	
local T1 = r(mean)

u data_PSID_for_moments, clear
ren person idcluster
bsample, cluster(idcluster) id(person)
tsset person year
gen dummy = age_youngest<=10
replace hours=. if logwg_h==. & hours!=0
replace hourw=. if logwg_w==. & hourw!=0
gen emp  = hours>=80 & hours!=.
gen empw = hourw>=80 & hourw!=.

*** Mean of wife hours
su hourw if agew>=30&agew<=35 & dummy==1,d 	
local H2 = r(mean)
su hourw if agew>=30&agew<=35 & dummy==0,d 	
local H2_nk = r(mean)
su hourw if agew>=50&agew<=55 & dummy==0,d 	
local H2_nt = r(mean)

*** Mean of husband hours
su hours if agew>=30&agew<=35 & dummy==1,d 	
local H1 = r(mean)
su hours if agew>=30&agew<=35 & dummy==0,d 	
local H1_nk = r(mean)
su hours if agew>=50&agew<=55 & dummy==0,d 	
local H1_nt = r(mean)

*** Employment wife
su empw if agew>=30&agew<=35 & dummy==1 & hourw!=.
local E2 = r(mean)
su empw if agew>=30&agew<=35 & dummy==0 & hours!=.
local E2_nk = r(mean)
su empw if agew>=50&agew<=55 & dummy==0 & hours!=.
local E2_nt = r(mean)

su hourw if agew>=30&agew<=35 & dummy==1,d 	
local p75_p50 = r(p75)-r(p50)

su hourw if agew>=30&agew<=35 & dummy==0,d 	
local p75_p50_nk = r(p75)-r(p50)

su hourw if agew>=50&agew<=55 & dummy==0 ,d 	
local p75_p50_nt = r(p75)-r(p50)

* Consumption and child care
su totcons, de
replace totcons=r(p99) if totcons>r(p99) & totcons!=.
gen dlc = log(totcons/p_totcons) - log(l2.totcons/l2.p_totcons)
gen d_young = dummy - l2.dummy

reg dlc d_young if agew>=25&agew<=35	/* Larger span to get most changes for first born */ 
local dc = _b[d_young]

post moments_bsreps (`H2') (`H1') (`H2_nk') (`H1_nk') (`T2') (`T1') (`p75_p50') (`p75_p50_nk') (`E2') (`E2_nk') (`dc') 
post moments_nontarget_bsreps (`H2_nk') (`H1_nk') (`p75_p50_nk') (`E2_nk') 

}

postclose moments_bsreps
postclose moments_nontarget_bsreps

u moments_bsreps, clear
corr, cov
matrix SIGMA_temp = r(C)
matrix SIGMA = J(11,11,0)

* Impose 0 correlations where independent data is used
matrix SIGMA[1,1] = SIGMA_temp[1,1]
matrix SIGMA[1,2] = SIGMA_temp[1,2]
matrix SIGMA[1,7] = SIGMA_temp[1,7]
matrix SIGMA[1,9] = SIGMA_temp[1,9]
matrix SIGMA[1,11] = SIGMA_temp[1,11]

matrix SIGMA[2,1] = SIGMA_temp[2,1]
matrix SIGMA[2,2] = SIGMA_temp[2,2]
matrix SIGMA[2,7] = SIGMA_temp[2,7]
matrix SIGMA[2,9] = SIGMA_temp[2,9]
matrix SIGMA[2,11] = SIGMA_temp[2,11]

matrix SIGMA[3,3] = SIGMA_temp[3,3]
matrix SIGMA[3,4] = SIGMA_temp[3,4]
matrix SIGMA[3,8] = SIGMA_temp[3,8]
matrix SIGMA[3,10] = SIGMA_temp[3,10]

matrix SIGMA[4,3] = SIGMA_temp[4,3]
matrix SIGMA[4,4] = SIGMA_temp[4,4]
matrix SIGMA[4,8] = SIGMA_temp[4,8]
matrix SIGMA[4,10] = SIGMA_temp[4,10]

matrix SIGMA[5,5] = SIGMA_temp[5,5]

matrix SIGMA[6,6] = SIGMA_temp[6,6]

matrix SIGMA[7,1] = SIGMA_temp[7,1]
matrix SIGMA[7,2] = SIGMA_temp[7,2]
matrix SIGMA[7,7] = SIGMA_temp[7,7]
matrix SIGMA[7,9] = SIGMA_temp[7,9]
matrix SIGMA[7,11] = SIGMA_temp[7,11]

matrix SIGMA[8,3] = SIGMA_temp[8,3]
matrix SIGMA[8,4] = SIGMA_temp[8,4]
matrix SIGMA[8,8] = SIGMA_temp[8,8]
matrix SIGMA[8,10] = SIGMA_temp[8,10]

matrix SIGMA[9,1] = SIGMA_temp[9,1]
matrix SIGMA[9,2] = SIGMA_temp[9,2]
matrix SIGMA[9,7] = SIGMA_temp[9,7]
matrix SIGMA[9,9] = SIGMA_temp[9,9]
matrix SIGMA[9,11] = SIGMA_temp[9,11]

matrix SIGMA[10,3] = SIGMA_temp[10,3]
matrix SIGMA[10,4] = SIGMA_temp[10,4]
matrix SIGMA[10,8] = SIGMA_temp[10,8]
matrix SIGMA[10,10] = SIGMA_temp[10,10]

matrix SIGMA[11,1] = SIGMA_temp[11,1]
matrix SIGMA[11,2] = SIGMA_temp[11,2]
matrix SIGMA[11,7] = SIGMA_temp[11,7]
matrix SIGMA[11,9] = SIGMA_temp[11,9]
matrix SIGMA[11,11] = SIGMA_temp[11,11]

clear 
svmat SIGMA
format * %24.0g
outsheet using "$MATLAB/SIGMA.csv", replace comma noname


*=============================
* Parametrization using PSID 
*=============================
u data_PSID_for_moments, clear
tsset person year
gen dummy = age_youngest<=10
replace hourw=. if logwg_w==. & hourw!=0
gen empw = hourw>=80 & hourw!=.
gen zero_assets = tot_assets3<=0
replace tot_assets3=1000000 if tot_assets3>1000000 & tot_assets3!=.
su p_totcons if year==2010
gen log_tot_assets3 = log(tot_assets3*r(mean)/p_totcons) /* In 2010 prices */ 

**** Child care
su childcare if childcare>0, de /* Note that winsorize on the non-zeros, to focus on the relevant distribution */
replace childcare=r(p99) if childcare>r(p99) & childcare!=.
su p_totcons if year==2010
gen rcc = (r(mean)/p_totcons)*childcare
cap log close
log using childcare_employed, replace t
su rcc if dummy==1 & empw==1 & agew>=30&agew<=35 , de
log close

*** life-cycle wages 
su p_totcons if year==2010
replace logwg_w = log(exp(logwg_w)*r(mean)/100)
replace logwg_h = log(exp(logwg_h)*r(mean)/100)

cap drop WCOH*
g WCOH=.
replace WCOH=1 if ybw<=1945
replace WCOH=2 if ybw>=1945 & ybw<=1949
replace WCOH=3 if ybw>=1950 & ybw<=1954
replace WCOH=4 if ybw>=1955 & ybw<=1959
replace WCOH=5 if ybw>=1960 & ybw<=1964
replace WCOH=6 if ybw>=1965 & ybw<=1969
replace WCOH=7 if ybw>=1970 & ybw<=1974
replace WCOH=8 if ybw>=1975 

cap drop COH*
g COH=.
replace COH=1 if yb<=1945
replace COH=2 if yb>=1945 & yb<=1949
replace COH=3 if yb>=1950 & yb<=1954
replace COH=4 if yb>=1955 & yb<=1959
replace COH=5 if yb>=1960 & yb<=1964
replace COH=6 if yb>=1965 & yb<=1969
replace COH=7 if yb>=1970 & yb<=1974
replace COH=8 if yb>=1975 

replace age=. if age==999
gen agew2 = agew^2
gen age2  = age^2

*** Preliminary variance is the variance at 25-30 after removing cohort and age affects and educ 
cap log close
log using theta_wages, replace t
areg logwg_w agew agew2 i.weduc if agew>=25 & agew<=65, a(WCOH)
cap drop y_cons
gen y_cons = _b[_cons] + _b[2.weduc]*(weduc==2) + _b[3.weduc]*(weduc==3) if e(sample)
su y_cons	 
local y_cons = r(mean)
log close

cap drop logwg_w_hat
gen logwg_w_hat = `y_cons' + agew*_b[agew] + agew2*_b[agew2]
cap drop logwg_w_r
predict logwg_w_r if e(sample), r


log using theta_wages, append t
areg logwg_h age  age2 i.educ if age>=25 & age<=65, a(COH)
cap drop y_cons
gen y_cons = _b[_cons] + _b[2.educ]*(educ==2) + _b[3.educ]*(educ==3) if e(sample)
su y_cons	
local y_cons = r(mean)
log close

cap drop logwg_h_hat
gen logwg_h_hat = `y_cons' + age*_b[age] + age2*_b[age2]
cap drop logwg_h_r
predict logwg_h_r if e(sample), r

log using theta_wages, append t
su logwg_w_r if agew>=25 & agew<=30, de
su logwg_h_r if age>=25 & age<=30, de
log close

*** Net assets distribution at age 25 (working husbands)
log using assets0, replace t
su log_tot_assets3 if agew>=25&agew<=30,de
su zero_assets if agew>=25&agew<=30,de
log close

cd "$MD"
