/* This file generates the datasets data4figure1_noimpute.dta and data4trend_HOcorr.dta.
   These get called by the do file "figures_and_misc.do".
	*/ 

clear all  
set mem 2000m

cd "$output"



*****************************
* Prepare data for figure 1 *
*****************************

u year serial weight state migrate1 own empstat qmigrat1 qmigst1a qmigst1b wks* weeks* whymove using  "$output\cps_data", clear
keep if empst<30 		/* We focus on in the labor force status */ 


gen imputed = (qmigrat1==3 | qmigst1b>=3) & year>=1996
egen imputed2 = max(imputed), by(serial year) 
replace imputed2 = 0 if (qmigrat1==0 & qmigst1b==0)
drop if imputed2==1


*** Use CPS Prepare the aggregate level sums and collapse by year
egen POP = sum(weight), by(year) 
label var POP "Labor force level"
egen OWN = sum(weight) if own==10, by(year)
label var OWN "Home owners level"
egen M_OWN = sum(weight) if migrate>1 & own==10 & migrate!=., by(year)
label var M_OWN "Home owners who moved level"
egen M_OWN_COUNTY = sum(weight) if migrate>3 & own==10 & migrate!=., by(year)
label var M_OWN_COUNTY  "Home owners who moved out of county level"
egen M_OWN_J = sum(weight) if migrate>1 & own==10 & (whymove==4|whymove==5|whymove==6|whymove==8) & migrate!=., by(year)
label var M_OWN_J "Home owners who moved for job level"
egen M_OWN_LOOK_WORK = sum(weight) if own==10&whymove==5,by(year)
label var M_OWN_LOOK_WORK "Home owners whose reason for move was 'to look for work or lost job'"
egen OWN_U = sum(weight) if own==10 & empstat==20,by(year)
label var OWN_U "Unemployed owners"

egen RENT = sum(weight) if own>10 & own!=., by(year)
label var RENT "Renters level"
egen M_RENT = sum(weight) if migrate>1 & own>10 & own!=. & migrate!=., by(year)
label var M_RENT "Renters who moved level"
egen M_RENT_COUNTY = sum(weight) if migrate>3 & own>10 & own!=. & migrate!=., by(year)
label var M_RENT_COUNTY "Renters who moved out of county level"
egen M_RENT_J = sum(weight) if migrate>1 & own>10 & own!=. & (whymove==4|whymove==5|whymove==6|whymove==8) & migrate!=., by(year)
label var M_RENT_J "Renters who moved for job level"
egen M_RENT_LOOK_WORK = sum(weight) if own>10&whymove==5,by(year)
label var M_RENT_LOOK_WORK "Home owners whose reason for move was 'to look for work or lost job'"
egen RENT_U = sum(weight) if own>10 & empstat==20,by(year)
label var RENT_U "Unemployed renters"

collapse POP OWN M_OWN M_OWN_COUNTY M_OWN_J RENT M_RENT M_RENT_COUNTY M_RENT_J M_OWN_LOOK_WORK M_RENT_LOOK_WORK OWN_U RENT_U, by(year)


save data4figure1_noimpute.dta, replace



******************************************************
* Prepare data for trend analysis with HO correction *
******************************************************

u year serial age weight migrate1 own empstat qmigrat1 whymove using cps_data, clear
keep if age>15 & age<66 

egen double POP = sum(weight), by(year) 
label var POP "Age 16-65 level"
egen double LF = sum(weight) if empst<30, by(year) 
label var LF "Labor force 16-65 level"

egen double U = sum(weight) if empst==20, by(year)
label var U "Unemployment level"
egen double OWN = sum(weight) if own==10, by(year)
label var OWN "Home owners 16-65 level"
egen double OWN_16 = sum(weight) if own==10 & age==16, by(year) 
label var OWN_16 "Home owners age 16 level"
egen double OWN_65 = sum(weight) if own==10 & age==65, by(year) 
label var OWN_65 "Home owners age 65 level"

egen double RENT = sum(weight) if own>10, by(year)
label var RENT "Home renters 16-65 level"
egen double RENT_16 = sum(weight) if own>10 & age==16, by(year) 
label var RENT_16 "Renters age 16 level"
egen double RENT_65 = sum(weight) if own>10 & age==65, by(year) 
label var RENT_65 "Renters 65 level"

egen double M_OWN = sum(weight) if migrate>1 & own==10 & migrate!=., by(year)
label var M_OWN "Home owners who moved 16-65 level"
egen double M_RENT = sum(weight) if migrate>1 & own>10 & migrate!=., by(year)
label var M_RENT "Home renters who moved 16-65 level"
collapse POP M_OWN M_RENT OWN* RENT*, by(year)

tsset year
foreach name in OWN RENT {
	ren `name'_65 `name'_65_old 
	gen `name'_65=l1.`name'_65_old 
	label var `name'_65 "Home owners age 65 at t-1, level"
}
drop *_old

gen double NH= OWN_16-OWN_65
gen double NR= RENT_16-RENT_65
gen double dOWN=OWN-l1.OWN
gen double dRENT=RENT-l1.RENT

gen double M_OWM_corr=M_OWN+dRENT-NR	/* This is larger therefore for upper bound we take that */ 
gen double M_OWM_corr2=M_OWN-dOWN+NH
gen double M_RENT_corr=M_RENT-dRENT+NR
gen double M_RENT_corr2=M_RENT+dOWN-NH

foreach var of varlist POP- M_RENT_corr2 {
	ren `var' age1665_`var'
} 
sort year
save data4trend_HOcorr.dta, replace
