/* This file prepares the data needed for the case where we use mobility rates from only the 5 states
	which got hit the worst by the housing bust to conduct our counterfactual exercise.
	
	The final dataset just has the homeowner and renter mobility rates for the years 2007 to 2012 for these 5 states (pooled)*/ 


*====================================
* State of origin - with imputations
*====================================
clear all 
set mem 2g
cd "$output"


*** Variables by state
u cps_data, clear

keep if inlist(state,4,6,12,26,32) 
keep if year>=2006
keep if empstat<30 & migrate1>0 & migrate1!=. 		/* 	We only work with the individuals in the labor force. 
													Migrate>0 is not binding. migrate!=. is missing for some years
													(last of them is 1995) */ 
keep year weight state migrate1 migsta1 own empstat qmigrat1 qmigst1b wks* weeks* whymove

* Note that state!=migsta1 only for the case of inter-state move or move from abroad 

*** Use CPS Prepare the aggregate level sums and collapse by year
egen double RENT = sum(weight) if own>10, by(year)
label var RENT "Home renters level"
egen double U_RENT = sum(weight) if own>10 & empst==20, by(year)
label var U_RENT "Renters who are unemployed level"

egen double OWN = sum(weight) if own==10, by(year)
label var OWN "Homeowners level"
egen double U_OWN = sum(weight) if own==10 & empst==20, by(year)
label var U_OWN "Homeowners who are unemployed levl"

collapse  	OWN RENT U*, by(year)
sort year
save temp, replace


*** Variables by state or origin
u cps_data, clear

keep if inlist(migsta1,4,6,12,26,32)
keep if year>=2006
keep if empstat<30 & migrate1>0 & migrate1!=. 		/* 	We only work with the individuals in the labor force. 
													Migrate>0 is not binding. migrate!=. is missing for some years
													(last of them is 1995) */ 
keep year weight state migrate1 migsta1 own empstat qmigrat1 qmigst1b wks* weeks* whymove

* Note that state!=migsta1 only for the case of inter-state move or move from abroad 

*** Use CPS Prepare the aggregate level sums and collapse by year
egen double M_RENT = sum(weight) if migrate>1 & own>10, by(year)
label var M_RENT "Home renters who moved level"
egen double M_RENT_J = sum(weight) if migrate>1 & own>10 & (whymove==4|whymove==5|whymove==6|whymove==8), by(year)
label var M_RENT_J "Home renters who moved for job"
egen double M_RENT_OUT_COUNTY = sum(weight) if migrate>3 & own>10 & migrate!=., by(year)
label var M_RENT_OUT_COUNTY "Renters who moved out of county  level"
egen double M_RENT_J_OUT_COUNTY = sum(weight) if migrate>3 & own>10 & migrate!=. & (whymove==4|whymove==5|whymove==6|whymove==8), by(year)
label var M_RENT_J_OUT_COUNTY "Renters who moved out of county  level"
egen double M_RENT_OUT_STATE = sum(weight) if migrate==5 & own>10 & migrate!=., by(year)
label var M_RENT_OUT_STATE "Renters who moved out of state level"


egen double M_OWN = sum(weight) if migrate>1 & own==10, by(year)
label var M_OWN "Homeowners who moved level"
egen double M_OWN_J = sum(weight) if migrate>1 & own==10 & (whymove==4|whymove==5|whymove==6|whymove==8), by(year)
label var M_OWN_J "Homeowners who moved for job"
egen double M_OWN_OUT_COUNTY = sum(weight) if migrate>3 & own==10 & migrate!=., by(year)
label var M_OWN_OUT_COUNTY "Homeowners who moved out of county  level"
egen double M_OWN_J_OUT_COUNTY = sum(weight) if migrate>3 & own==10 & migrate!=. & (whymove==4|whymove==5|whymove==6|whymove==8), by(year)
label var M_OWN_J_OUT_COUNTY "Homeowners who moved out of county  level"
egen double M_OWN_OUT_STATE = sum(weight) if migrate==5 & own==10 & migrate!=., by(year)
label var M_OWN_OUT_STATE "Homeowners who moved out of state level"


collapse  	M* , by(year)
sort year 
merge year using temp
drop _m

save temp, replace

*** Prepare baseline
tsset year

gen P_M_OWN		=  M_OWN/l1.OWN
gen P_M_OWN_J	=  M_OWN_J/l1.OWN
gen P_M_RENT	=  M_RENT/l1.RENT
gen P_M_RENT_J	=  M_RENT_J/l1.RENT

foreach var of varlist P_M* {
	ren `var' fstates_`var'
}

keep year fstates*

save 5states_baseline, replace


*** Prepare inter-county
u temp, clear

tsset year

gen P_M_OWN		=  M_OWN_OUT_COUNTY/l1.OWN
gen P_M_OWN_J	=  M_OWN_J_OUT_COUNTY/l1.OWN
gen P_M_RENT	=  M_RENT_OUT_COUNTY/l1.RENT
gen P_M_RENT_J	=  M_RENT_J_OUT_COUNTY/l1.RENT

foreach var of varlist P_M* OWN {
	ren `var' fstates_`var'
}

keep year fstates*

save 5states_county, replace


****************
*** REWEIGHT ***
****************

u cps_data if year>=2006, clear


** Generating the control variables right at the beginning because lfp needs info on other household memebers whom I don't want to drop **

*** How many of the persons in the house are labor force participants (as usual post move)
gen lf_dum 	= empstat<30 
egen lfp 	= sum(lf_dum), by(serial year)
replace lfp = 3 if lfp>3 

**** Generate demographics
* Ownership
gen own=0
replace own=1 if ownershp==10

* Categories of income, age, and schooling
egen p_inc = xtile(ftotval), n(10)

gen age_cat 	= 1 
replace age_cat	= 2 if age>25 & age!=.
replace age_cat	= 3 if age>30 & age!=.
replace age_cat	= 4 if age>35 & age!=.
replace age_cat	= 5 if age>40 & age!=.
replace age_cat	= 6 if age>45 & age!=.
replace age_cat	= 7 if age>50 & age!=.
replace age_cat	= 8 if age>55 & age!=.
replace age_cat	= 9 if age>60 & age!=.

gen young = age<20

gen school= 1
replace school = 2 if educ>=73
replace school = 3 if educ>=111

* family size
replace famsize=5 if famsize>5

xi i.age_cat i.school i.famsize i.lfp i.marst i.p_inc /* When reweighting for the whole sample (not just 5 states), we included the interaction between age_cat and school.
														 Doing the same here leads to the probit being non-concave for the the move for job case. So we don't include the interaction
														 */


** Reducing the sample to labor force and the 5 states
keep if empstat<30 & migrate1>0 & migrate1!=. 		/* 	We only work with the individuals in the labor force. 
													Migrate>0 is not binding. migrate!=. is missing for some years
													(last of them is 1995) */ 

* Only keeping guys who were living in one of the 5 states last year i.e. still living in those states and haven't movedl; and moved with orifginal state being one of these 5 states
gen cur_res_non_mov = 0
replace cur_res_non_mov = 1 if inlist(state,4,6,12,26,32)&migrate1==1
gen mov_old_res = 0
replace mov_old_res = 1 if inlist(migsta1,4,6,12,26,32)&migrate1>1
keep if cur_res_non_mov == 1|mov_old_res==1


** Generating the move variables for INTER-COUNTY only
gen moved=migrate1>3
gen moved_job=whymove==4|whymove==5|whymove==6|whymove==8
gen movedjob=moved*moved_job

*** Run probit
cap drop moved_hat
gen moved_hat = .

cap drop movedjob_hat
gen movedjob_hat = .


forvalues y=2006/2012 {
	probit moved young _I* [pw=weight] if own==0 & year==`y' 
	cap drop moved_hat_temp
	predict moved_hat_temp
	replace moved_hat=moved_hat_temp if year==`y'

	probit movedjob young  _I* [pw=weight] if own==0 & year==`y' 
	cap drop movedjob_hat_temp
	predict movedjob_hat_temp
	replace movedjob_hat=movedjob_hat_temp if year==`y'
}


*** Caclculate the total moves (using homeowner weights, so note that should later devide by homeowner population)
gen temp=weight*moved_hat
egen double fstates_M_OWN_reweight = sum(temp) if owner==10, by(year)
label var fstates_M_OWN_reweight "Renters' mobility reweighted with homeowners' weights"
drop temp

gen temp=weight*movedjob_hat
egen double fstates_M_OWN_J_reweight = sum(temp) if owner==10, by(year)
label var fstates_M_OWN_J_reweight "Renters' mobility for job reweighted with homeowners' weights"
drop temp

collapse  fstates_M_OWN_reweight fstates_M_OWN_J_reweight, by(year)
sort year
merge year using 5states_county
drop _merge
sort year

save 5states_county, replace

cd "$do"
