clear all
set mem 2g
cd "$output"

/* This file generates employment status, ownership, and moving data which gets used as input into the accouting exercise.
	It does this for our baseline sample (labor force), and for the sample which gets used in the homeownership correction.
	
	The eventual dataset called data_for_accounting.dta just has one onsevation for each year from 2006 to 2012.
	*/

*********************************
*** Data Preparation Baseline ***
*********************************
*** Sample selection
u cps_data, clear
keep if year>=2006
keep if empstat<30 & migrate1>0 & migrate1!=. 		/* 	We only work with the individuals in the labor force. 
													Migrate>0 is not binding. migrate!=. is missing for some years
													(last of them is 1995) */ 
keep year weight state migrate1 own empstat qmigrat1 wks* weeks* whymove

*** Use CPS Prepare the aggregate level sums and collapse by year
egen double POP = sum(weight), by(year) 
label var POP "Labor force level"
egen double U = sum(weight) if empst==20, by(year)
label var U "Unemployment level"
egen double OWN = sum(weight) if own==10, by(year)
label var OWN "Home owners level"
egen double RENT = sum(weight) if own>10, by(year)
label var RENT "Home renters level"
egen double M_OWN = sum(weight) if migrate>1 & own==10, by(year)
label var M_OWN "Home owners who moved level"
egen double M_RENT = sum(weight) if migrate>1 & own>10, by(year)
label var M_RENT "Home renters who moved level"
egen double U_OWN = sum(weight) if own==10 & empst==20, by(year)
label var U_OWN "Home owners who are unemployed level"
egen double U_RENT = sum(weight) if own>10 & empst==20, by(year)
label var U_OWN "Home renters who are unemployed level"
egen double M_OWN_J = sum(weight) if migrate>1 & own==10 & (whymove==4|whymove==5|whymove==6|whymove==8), by(year)
label var M_OWN_J "Home owners who moved for job"
egen double M_OWN_NEWJ = sum(weight) if migrate>1 & own==10 & (whymove==4), by(year)
label var M_OWN_NEWJ "Home owners who moved for new job or job transfer"
egen double M_OWN_NEWJ_O = sum(weight) if migrate>1 & own==10 & (whymove==4|whymove==8), by(year)
label var M_OWN_NEWJ_O "Home owners who moved for new job or job transfer, and other job related reasons"
egen double M_RENT_J = sum(weight) if migrate>1 & own>10 & (whymove==4|whymove==5|whymove==6|whymove==8), by(year)
label var M_RENT_J "Home renters who moved for job"
egen double M_OWN_LOOK_WORK = sum(weight) if own==10&whymove==5,by(year)
label var M_OWN_LOOK_WORK "Home owners whose reason for move was 'to look for work or lost job'"
egen double M_RENT_LOOK_WORK = sum(weight) if own>10&whymove==5,by(year)
label var M_RENT_LOOK_WORK "Home owners whose reason for move was 'to look for work or lost job'"
egen double M_OWN_OUT_COUNTY = sum(weight) if migrate>3 & own==10 & migrate!=., by(year)
label var M_OWN_OUT_COUNTY "Home owners who moved out of county level"
egen double M_RENT_OUT_COUNTY = sum(weight) if migrate>3 & own>10 & migrate!=., by(year)
label var M_RENT_OUT_COUNTY "Renters who moved out of county  level"
egen double M_OWN_J_OUT_COUNTY = sum(weight) if migrate>3 & own==10 & migrate!=. & (whymove==4|whymove==5|whymove==6|whymove==8), by(year)
label var M_OWN_J_OUT_COUNTY "Home owners who moved for job out of county level"
egen double M_RENT_J_OUT_COUNTY = sum(weight) if migrate>3 & own>10 & migrate!=. & (whymove==4|whymove==5|whymove==6|whymove==8), by(year)
label var M_RENT_J_OUT_COUNTY "Renters owners who moved out of county  level"
collapse POP U OWN RENT M_OWN M_RENT U_OWN U_RENT M_*_J M_OWN_LOOK_WORK M_RENT_LOOK_WORK M_OWN_OUT_COUNTY M_RENT_OUT_COUNTY M_OWN_NEWJ M_OWN_NEWJ_O M_OWN_J_OUT_COUNTY M_RENT_J_OUT_COUNTY, by(year)
sort year
save baseline, replace


*************************************************
*** Data Preparation Homeownership Correction ***
*************************************************
/* Note that the sample is different as we look at the entire population 16-65 and not just on the labor force */ 
*** Sample selection
u cps_data, clear
keep if year>=2005			/* Keeping 2005 since need to know the age composition at t-1 */ 
keep if age>15 & age<66 & migrate1>0 & migrate1!=. /* the conditions on migrate1 are not binding on this age group */ 
keep year age weight state migrate1 own empstat qmigrat1 wks* weeks* whymove

egen double POP = sum(weight), by(year) 
label var POP "Age 16-65 level"
egen double LF = sum(weight) if empst<30, by(year) 
label var LF "Labor force 16-65 level"

egen double U = sum(weight) if empst==20, by(year)
label var U "Unemployment level"
egen double OWN = sum(weight) if own==10, by(year)
label var OWN "Home owners 16-65 level"
egen double OWN_16 = sum(weight) if own==10 & age==16, by(year) 
label var OWN_16 "Home owners age 16 level"
egen double OWN_65 = sum(weight) if own==10 & age==65, by(year) 
label var OWN_65 "Home owners age 65 level"

egen double RENT = sum(weight) if own>10, by(year)
label var RENT "Home renters 16-65 level"
egen double RENT_16 = sum(weight) if own>10 & age==16, by(year) 
label var RENT_16 "Renters age 16 level"
egen double RENT_65 = sum(weight) if own>10 & age==65, by(year) 
label var RENT_65 "Renters 65 level"

egen double M_OWN = sum(weight) if migrate>1 & own==10, by(year)
label var M_OWN "Home owners who moved 16-65 level"
egen double M_RENT = sum(weight) if migrate>1 & own>10, by(year)
label var M_RENT "Home renters who moved 16-65 level"
egen double U_OWN = sum(weight) if own==10 & empst==20, by(year)
label var U_OWN "Home owners who are unemployed level"
egen double U_RENT = sum(weight) if own>10 & empst==20, by(year)
label var U_OWN "Home renters who are unemployed level"
egen double M_OWN_J = sum(weight) if migrate>1 & own==10 & (whymove==4|whymove==5|whymove==6|whymove==8), by(year)
label var M_OWN_J "Home owners who moved for job 16-65"
egen double M_RENT_J = sum(weight) if migrate>1 & own>10 & (whymove==4|whymove==5|whymove==6|whymove==8), by(year)
label var M_RENT_J "Home renters who moved for job 16-65"
collapse POP* LF U OWN* RENT* M_OWN M_RENT U_OWN U_RENT M_*_J, by(year)

tsset year
foreach name in OWN RENT {
	ren `name'_65 `name'_65_old 
	gen `name'_65=l1.`name'_65_old 
	label var `name'_65 "Home owners age 65 at t-1, level"
}
drop *_old

gen double NH= OWN_16-OWN_65
gen double NR= RENT_16-RENT_65
gen double dOWN=OWN-l1.OWN
gen double dRENT=RENT-l1.RENT

gen double M_OWM_corr=M_OWN+dRENT-NR	/* This is larger therefore for upper bound we take that */ 
gen double M_OWM_corr2=M_OWN-dOWN+NH
gen double M_RENT_corr=M_RENT-dRENT+NR
gen double M_RENT_corr2=M_RENT+dOWN-NH

drop if year==2005
foreach var of varlist POP- M_RENT_corr2 {
	ren `var' age1665_`var'
} 
sort year
save ho_corr, replace


****************************
*** Merge to one dataset ***
****************************
u baseline, clear
merge year using ho_corr
tab _m 
drop _m
sort year


erase baseline.dta
erase ho_corr.dta


save "$output\data_for_accounting", replace

cd "$do"
