/* This file generates a matched panel from the CPS 
   This panel will be used to calculate the covariance matrix 
   of the mobility rates and unemployment rates, which is than 
   used as the input to the Delta method for calculating
   standard errors for the counterfactual exercise */ 
   
clear all
set mem 3g
cd "$output"   

*==================================
* Match March files to basic files 
*==================================
u cps_data_nber if year>=2007, clear
keep h_idnum1 h_idnum2 a_lineno h_mis h_month year migrate1 whymove own empstat weight gestcen gtcbsa sex age prdtrace
ren prdtrace race

foreach var in weight own empstat gestcen gtcbsa sex age race {
	ren `var' `var'3
	label var `var'3 "ASEC `var'"
}
ren h_idnum1 hrhhid
ren h_idnum2 hrhhid2
ren a_lineno pulineno
ren h_mis hrmis
ren h_month month
destring hrhhid hrhhid2, replace

sort hrhhid hrhhid2 pulineno year month
save temp, replace

u cpsm_2006_2012 if year>=2007, clear
drop if year==2007 & month<3
sort hrhhid hrhhid2 pulineno year month
merge hrhhid hrhhid2 pulineno year month using temp

tab month _m if year>=2007	/* Verification: should be only 1 for all months other than March, and only 
                               2 or 3 for the March months (since ASEC includes all basic + the CHIP) */ 

gen chip=_m==2
drop _m							   

keep hrhhid hrhhid2 pulineno year month hrmis migrate1 whymove own3 empstat3 weight3 gestcen3 gtcbsa3 sex3 race3 age3 own empst weight gestcen gtcbsa chip sex race age
 
save basic_march4match_C, replace
erase temp.dta	

*=========================================
* Generate unique person and household ID  
*=========================================
u basic_march4match_C, clear

* assign interview year and month
gen intmonth=.
gen intyear=.

replace intmonth=month-hrmis+1 if hrmis<=4
replace intyear=year if hrmis<=4
replace intyear=year-1 if intmonth<=0
replace intmonth=intmonth+12 if intmonth<=0

replace intmonth=month-hrmis+5 if hrmis>=5
replace intyear=year-1 if hrmis>=5
replace intyear=year-2 if intmonth<=0
replace intmonth=intmonth+12 if intmonth<=0

*duplicates report hrhhid hrhhid2 pulineno intmonth intyear chip
/* Note: we do not match the chip to months other than March */ 

egen double uid = group(hrhhid hrhhid2 pulineno intmonth intyear chip)
egen double huid = group(hrhhid hrhhid2 intmonth intyear chip)

*========================
* Apply S|A|R correction 
*========================
gen yyyymm 	 = 100*year+month

replace sex  = sex3  if sex==.
replace race = race3 if race==.
replace age  = age3  if age==.
gen ouid = uid
gen ohuid= huid

local wi = 1

while `wi'>0 {
	sort uid yyyymm
	gen dsex  = sex-sex[_n-1] if uid==uid[_n-1]
	gen drace = race-race[_n-1] if uid==uid[_n-1]
	gen dage = age-age[_n-1] if uid==uid[_n-1]
	gen dmonth = month-month[_n-1] + 12*(year-year[_n-1]) if uid==uid[_n-1]

	foreach var in dsex drace dage dmonth {
		replace `var'=0 if `var'==.
	}

	gen break=dsex!=0 | drace!=0
	replace break=1 if dage<0
	replace break=1 if dage>1 & dmonth<=12 
	replace break=1 if dage>2 & dmonth>12 

	gen byear  = yyyymm if break==1
	egen minby = min(byear), by(huid)

	sum uid
	local max_uid	=	r(max)
	sum huid
	local max_huid	=	r(max)
	count if yyyymm==minby   
	local wi = r(N)
	di `wi'
	replace uid = uid + `max_uid' if yyyymm>=minby
	replace huid = huid + `max_huid' if yyyymm>=minby
	drop  dsex drace dage dmonth break byear minby
	
}

save cps_panel_2007_2012_C, replace

cd "$do"
