/* This file runs the regression that generates the covariance matrix for the level of unemployed homeowners
	which is used in the Delta method. The covariance matrix is found for two cases
	1) Baseline
	2) Homeowner correction - sample restricted to age 16 to 65
	*/ 

   
clear all
set more off
set mem 2g
cd "$output"



*=======================
* Monthly big regression
* Baseline - Labor Force
*=======================

u cps_panel_2007_2012_C, clear

drop if chip==1 	/* The March (ASEC) sample has additional interviews over and above the basic CPS. These are dropped. */

*** Generate the LHS variables and the clustering variables
egen uuid = group(uid yyyymm)
gen weight1 = weight
gen weight2 = weight

gen byte unemp1=own==1 & (empst==3|empst==4) if weight1!=.
su yyyymm
gen byte unemp2=empst<5&empst>-1 if yyyymm==r(max) & weight2!=.
gen msa = gtcbsa
replace msa = gtcbsa3 if chip==1
gen state = gestcen 
replace state = gestcen3 if chip==1
replace msa = state if msa==0

*** Prepare the population matrix for the scaling of unemployment of homeowners
preserve
egen double sweight = sum(weight1), missing by(yyyymm)
egen tag=tag(yyyymm)
keep if tag==1
sort yyyymm
mkmat sweight
restore


*** Keep only the relevant variables and reshape for long vector
keep uuid uid huid msa yyyymm weight1 weight2 unemp1 unemp2

reshape long unemp weight, i(uuid) j(ho_vs_all) 
drop if unemp==.

#delimit;
label define sample
1 "Home owners"
2 "Labor Force"
3 "March Mobility", replace;
#delimit cr
label values ho_vs_all sample

gen yyyymmho = 1000000*ho_vs_all+yyyymm
local i=1
levelsof yyyymmho, local(yyyymmho)
foreach val in `yyyymmho' {
	gen byte ym`val'=yyyymmho==`val'
}

egen geocluster=group(msa yyyymm)
egen clusterint=group(huid geocluster)


*** Run the twoway cluster regressions and store the estimates 
reg unemp ym* [pw=weight], noc cluster(huid)
matrix c1=e(V)
reg unemp ym* [pw=weight], noc cluster(geocluster)
matrix c2=e(V)
reg unemp ym* [pw=weight], noc cluster(clusterint)
matrix c12=e(V)

matrix V=c1+c2-c12

*** Transform the matrix to a matrix in levels
local T = rowsof(sweight)
matrix A = sweight\sweight[`T',1]
matrix AA = diag(A)

/*
* verification: Compare to M_OWN_J/monthly_avg_OWN in the accounting data files
matlist AA*e(b)'
*/

matrix big_cov = AA*V*AA
clear
svmat double big_cov

save U_cov_baseline, replace




*=========================
* Monthly big regression
* HO Correction - 16 to 65
*=========================

u cps_panel_2007_2012_C, clear

drop if chip==1 	/* The March (ASEC) sample has additional interviews over and above the basic CPS. These are dropped. */

*** Generate the LHS variables and the clustering variables
egen uuid = group(uid yyyymm)
gen weight1 = weight
gen weight2 = weight

gen byte unemp1=own==1 & (empst==3|empst==4) & (age>15 & age<66) if weight1!=.
su yyyymm
gen byte unemp2=(empst<5&empst>-1)& (age>15 & age<66) if yyyymm==r(max) & weight2!=.
gen msa = gtcbsa
replace msa = gtcbsa3 if chip==1
gen state = gestcen 
replace state = gestcen3 if chip==1
replace msa = state if msa==0

*** Prepare the population matrix for the scaling of unemployment of homeowners
preserve
egen double sweight = sum(weight1), missing by(yyyymm)
egen tag=tag(yyyymm)
keep if tag==1
sort yyyymm
mkmat sweight
restore


*** Keep only the relevant variables and reshape for long vector
keep uuid uid huid msa yyyymm weight1 weight2 unemp1 unemp2

reshape long unemp weight, i(uuid) j(ho_vs_all) 
drop if unemp==.

#delimit;
label define sample
1 "Home owners"
2 "Labor Force"
3 "March Mobility", replace;
#delimit cr
label values ho_vs_all sample

gen yyyymmho = 1000000*ho_vs_all+yyyymm
local i=1
levelsof yyyymmho, local(yyyymmho)
foreach val in `yyyymmho' {
	gen byte ym`val'=yyyymmho==`val'
}

egen geocluster=group(msa yyyymm)
egen clusterint=group(huid geocluster)


*** Run the twoway cluster regressions and store the estimates 
reg unemp ym* [pw=weight], noc cluster(huid)
matrix c1=e(V)
reg unemp ym* [pw=weight], noc cluster(geocluster)
matrix c2=e(V)
reg unemp ym* [pw=weight], noc cluster(clusterint)
matrix c12=e(V)

matrix V=c1+c2-c12

*** Transform the matrix to a matrix in levels
local T = rowsof(sweight)
matrix A = sweight\sweight[`T',1]
matrix AA = diag(A)

/*
* verification: Compare to M_OWN_J/monthly_avg_OWN in the accounting data files
matlist AA*e(b)'
*/

matrix big_cov = AA*V*AA
clear
svmat double big_cov

save U_cov_HOCorr, replace



*=============================
* Monthly big regression
* M and MJ in the regressions 
*=============================
/* This is used in Footnote 10 in the web appendix */ 

u cps_panel_2007_2012_C, clear

*** Generate the LHS variables and the clustering variables
egen uuid = group(uid yyyymm)
gen weight1 = weight
gen weight2 = weight
gen weight4 = weight3

gen byte unemp1=own==1 & (empst==3|empst==4) if weight1!=.
su yyyymm
gen byte unemp2=empst<5&empst>-1 if yyyymm==r(max) & weight2!=.
gen byte unemp3 = migrate1>1 & own3==10 & empstat3<30 & migrate1!=. if weight3!=.
gen byte unemp4 = (whymove==4|whymove==5|whymove==6|whymove==8) if unemp3==1

gen msa = gtcbsa
replace msa = gtcbsa3 if chip==1
gen state = gestcen 
replace state = gestcen3 if chip==1
replace msa = state if msa==0


*** Prepare the population matrix for the scaling of unemployment of homeowners
preserve
egen double sweight = sum(weight1), missing by(yyyymm)
egen tag=tag(yyyymm)
keep if tag==1
sort yyyymm
save sweight, replace
mkmat sweight
restore

*** Prepare the population matrix for the scaling of mobility rates - this is required for mobility because of the scaling by the 12 month average. It is not required for the mobility for job
preserve
use data_for_accounting_monthly, clear
keep if month==3 & year>=2008 & year<=2012
gen yyyymm=100*year+month
keep yyyymm monthly_avg_OWN
sort yyyymm 
save temp, replace
restore

preserve
keep if month==3 & year>=2008 & year<=2012
egen double sweight = sum(weight3), missing by(yyyymm)
egen tag=tag(yyyymm) 
keep if tag==1
sort yyyymm
merge yyyymm using temp
drop _m
gen unemp_boost=2
gen sweight_m = (1/12)*unemp_boost*sweight/monthly_avg_OWN
keep if sweight_m!=.
sort yyyymm
save sweight_m, replace
mkmat sweight_m
restore

*** Similar for counterfactual mobility
preserve
use data_for_accounting_monthly, clear
keep if month==3 & year==2007
gen yyyymm=100*year+month
keep yyyymm monthly_avg_OWN
sort yyyymm 
save temp, replace
restore

preserve
keep if month==3 & year==2007
egen double sweight = sum(weight3), missing by(yyyymm)
egen tag=tag(yyyymm) 
keep if tag==1
sort yyyymm
merge yyyymm using temp
drop _m
gen unemp_boost=2
expand 5
gen sweight_cm = (1/12)*unemp_boost*sweight/monthly_avg_OWN
keep if sweight_cm!=.
sort yyyymm
save sweight_cm, replace
mkmat sweight_cm
restore


*** Keep only the relevant variables and reshape for long vector
keep uuid uid huid msa yyyymm weight1 weight2 weight3 weight4 unemp1 unemp2 unemp3 unemp4

reshape long unemp weight, i(uuid) j(ho_vs_all) 
drop if unemp==.

*** Generate duplicates couterfactual mobility
egen geocluster=group(msa yyyymm)
egen clusterint=group(huid geocluster)

drop if yyyymm==200703 & ho_vs_all==4
replace ho_vs_all=5 if ho_vs_all==3 & yyyymm==200703 
replace yyyymm=200803 if ho_vs_all==5

forvalues i=8/8 {
	local j=`i'+1
	expand 2 if yyyymm==200`i'03 & ho_vs_all==5, gen(rep_temp)
	replace yyyymm=200`j'03 if rep_temp==1
	drop rep_temp
}

forvalues i=9/9 {
	local j=`i'+1
	expand 2 if yyyymm==200`i'03 & ho_vs_all==5, gen(rep_temp)
	replace yyyymm=20`j'03 if rep_temp==1
	drop rep_temp
}

forvalues i=10/11 {
	local j=`i'+1
	expand 2 if yyyymm==20`i'03 & ho_vs_all==5, gen(rep_temp)
	replace yyyymm=20`j'03 if rep_temp==1
	drop rep_temp
}

*** Change the ordering
ren ho_vs_all ho_vs_all_o
gen ho_vs_all = ho_vs_all_o if ho_vs_all_o<=3
replace ho_vs_all = 4 if ho_vs_all_o==5
replace ho_vs_all = 5 if ho_vs_all_o==4

#delimit;
label define sample
1 "Home owners"
2 "Labor Force"
3 "March Mobility"
4 "Counterfactual mobility"
5 "March Mobility for job", replace;
#delimit cr
label values ho_vs_all sample

gen yyyymmho = 1000000*ho_vs_all+yyyymm
local i=1
levelsof yyyymmho, local(yyyymmho)
foreach val in `yyyymmho' {
	gen byte ym`val'=yyyymmho==`val'
}

*** Run the twoway cluster regressions and store the estimates 
reg unemp ym* [pw=weight], noc cluster(huid)
matrix c1=e(V)
reg unemp ym* [pw=weight], noc cluster(geocluster)
matrix c2=e(V)
reg unemp ym* [pw=weight], noc cluster(clusterint)
matrix c12=e(V)

matrix V=c1+c2-c12

*** Transform the matrix to a matrix in levels
local T = rowsof(sweight)
matrix eye=J(5,1,1)
matrix A = sweight\sweight[`T',1]\sweight_m\sweight_cm\eye
matrix AA = diag(A)

/*
* verification: Compare to M_OWN_J/monthly_avg_OWN in the accounting data files
matlist AA*e(b)'
*/

matrix big_cov = AA*V*AA
clear
svmat double big_cov

save U_cov_footnote, replace

cd "$do"
