
*=========================================
* Prepare spouse's information (CPS based) - this is for the spouse who did not fill the diary
*=========================================
cd "$output"

* Prepare spouse id for matching 
u "$ATUS\atuscps_0315\atuscps_0315",clear
keep if tratusr==1 					/*ATUS respondent*/
keep tucaseid pespouse 
sort tucaseid

save atus_spouse_id, replace

* Prepare spouse's data 
u "$ATUS\atuscps_0315\atuscps_0315",clear

drop pespouse
ren  pulineno pespouse
g work_status=pemlr==1|pemlr==2
keep tucaseid pespouse prtage ptdtrace peeduca prernwa pehruslt	work_status  
destring tucaseid,g(temp)
g year=int(temp/10000000000)
ren prtage sp_age
ren ptdtrace sp_race
ren work_status sp_work_status
g sp_ed=1*(peeduc<=38)+2*(peeduc==39)+3*(peeduc>=40)

gen 	sp_yearschl = 2.5 if peeduc!=.
replace sp_yearschl = 5.5 if peeduc==33
replace sp_yearschl = 7.5 if peeduc==34
replace sp_yearschl = 9 if peeduc==35
replace sp_yearschl = 10 if peeduc==36
replace sp_yearschl = 11 if peeduc==37 | peeduc==38
replace sp_yearschl = 12 if peeduc==39
replace sp_yearschl = 14 if peeduc>=40 & peeduc<=42
replace sp_yearschl = 16 if peeduc==43
replace sp_yearschl = 17 if peeduc>=44 & peeduc!=.

ren peeduca sp_sc
gen sp_wk_earn  = prernwa/100 if prernwa>0
gen sp_wk_hours = pehruslt if pehruslt>0

keep sp* tucaseid pespouse 
save atus_spouse_info, replace

*===============================
* Prepare the file for husbands childcare time
*===============================
/* Note: sample constructed to be similar to the sample of wives */ 
use "$ATUS\atussum_0315\atussum_0315.dta", clear
replace tryhhchild=. if tryhhchild<0
ren tuyear year
ren teage age
gen work_status=telfs==1|telfs==2

* merge spouse information
sort tucaseid
merge tucaseid using atus_spouse_id
drop if _m==2
drop _m

merge 1:1 tucaseid pespouse using atus_spouse_info
drop if _m==2
drop if _m==1
drop _m

* do not use wages that are below 0.5 of min wage
g 		MW=5.15 if year<=2006
replace MW=5.85 if year==2007
replace MW=6.55 if year==2008
replace MW=7.25 if year>=2009

g wg_w=		(trernwa/100)/tehruslt
replace wg_w=. if wg_w<0.5*MW
replace wg_w=. if wg_w>100
g logwg_w	=log(wg_w)

g wg_h			=sp_wk_earn/sp_wk_hours
replace wg_h=. if wg_h<0.5*MW
replace wg_h=. if wg_h>100
gen logwg_h		=log(wg_h)

* Sample
keep if sp_age>=25 & sp_age<=65							/* wives 25 to 26 */
drop if trsppres==3										/* Select couples*/
keep if tesex==1										/* men respondents*/
keep if tryhhchild<=10									/* only families with kids 10 or younger */ 
keep if tudiaryday>=2 & tudiaryday<=6 & trholiday==0	/* only weekdays and no holiday diaries */ 

* childcare time
egen 	cc		=rsum(t030101-t030399 t180381)
replace cc      = (cc)*(52*5/60)

* prepare characteristics for matching
ren sp_ed weduc
g wyb=year-sp_age 

g wcoh=.
replace wcoh=1 if wyb<=1959
replace wcoh=2 if wyb>=1960 & wyb<=1964
replace wcoh=3 if wyb>=1965 & wyb<=1969
replace wcoh=4 if wyb>=1970 & wyb<=1974
replace wcoh=5 if wyb>=1975 & wyb<=1979
replace wcoh=6 if wyb>=1980 

gen log_cc=log(cc)
save husband_avg_childcare_for_descstat,replace

keep if logwg_h!=. & logwg_w!=.							/* Only participating husbands and wives */ 
drop logwg_h logwg_w wg_h wg_w MW

collapse log_cc (count) n_cc=log_cc,by(wcoh year weduc)
save husband_avg_childcare,replace

*===============================
* Prepare the wives' file for estimation (with matched micro and cohort husband data)
*===============================
use "$ATUS\atussum_0315\atussum_0315.dta", clear
replace tryhhchild=. if tryhhchild<0
ren tuyear year
ren teage age
gen work_status=telfs==1|telfs==2
g weduc=1*(peeduc<=38)+2*(peeduc==39)+3*(peeduc>=40)
gen wyb = year-age
g wcoh=.
replace wcoh=1 if wyb<=1959
replace wcoh=2 if wyb>=1960 & wyb<=1964
replace wcoh=3 if wyb>=1965 & wyb<=1969
replace wcoh=4 if wyb>=1970 & wyb<=1974
replace wcoh=5 if wyb>=1975 & wyb<=1979
replace wcoh=6 if wyb>=1980 

* merge spouse information
sort tucaseid
merge tucaseid using atus_spouse_id
drop if _m==2
drop _m

merge 1:1 tucaseid pespouse using atus_spouse_info
drop if _m==2
drop if _m==1
drop _m

merge m:1 weduc wcoh year using husband_avg_childcare
drop if _m==2
drop _m

ren log_cc log_cc_h

* merge CEX consumption
cap drop _m
merge m:1 weduc wcoh year using c_cex_coh_educ
drop if _m==2
drop _m


* Prepare wife's variables
replace tehruslt=0 if tehruslt<0
replace trernwa	=0 if trernwa<0

egen 	cc_w	= rsum(t030101-t030399 t180381)
replace cc_w	= (cc_w)*(52*5/60) 	 
gen log_cc_w 	= log(cc_w)

replace log_cc_h=. if trch==0
replace log_cc_w=. if trch==0
ren log_cc_h logcc_h
ren log_cc_w logcc_w 

replace logcc_w=log(16*5*52) if logcc_w>=log(16*5*52) & logcc_w!=.

* do not use wages that are below 0.5 of min wage
g 		MW=5.15 if year<=2006
replace MW=5.85 if year==2007
replace MW=6.55 if year==2008
replace MW=7.25 if year>=2009

g wg_w=		(trernwa/100)/tehruslt
replace wg_w=. if wg_w<0.5*MW
replace wg_w=. if wg_w>100
gen logwg_w	=log((100/p_totcons)*wg_w)

g wg_h			=sp_wk_earn/sp_wk_hours
replace wg_h=. if wg_h<0.5*MW
replace wg_h=. if wg_h>100
gen logwg_h	=log((100/p_totcons)*wg_h)

ren sp_ed educ

* Prepare the ratio from equation 10 in the text
cap drop ratioT
gen ratioT = (exp(logwg_w)*exp(logcc_w))/(exp(logwg_h)*exp(logcc_h))

* For all the non-aggregated variables, winsorize at the 1 percent top and bottom
foreach var of varlist logwg_w logwg_h logcc_w ratioT {
	gen temp=`var'
	su `var', de 
	replace temp = r(p99) if `var'>=r(p99) & `var'!=.
	replace temp = r(p1) if `var'<=r(p1) 
	replace `var'=temp
	drop temp
}

* prepare education
gen 	wyearschl = 2.5 if peeduc!=.
replace wyearschl = 5.5 if peeduc==33
replace wyearschl = 7.5 if peeduc==34
replace wyearschl = 9 if peeduc==35
replace wyearschl = 10 if peeduc==36
replace wyearschl = 11 if peeduc==37 | peeduc==38
replace wyearschl = 12 if peeduc==39
replace wyearschl = 14 if peeduc>=40 & peeduc<=42
replace wyearschl = 16 if peeduc==43
replace wyearschl = 17 if peeduc>=44 & peeduc!=.

cap drop wedd*
tab weduc, gen(wedd)
cap drop edd*
tab educ, gen(edd)

ren sp_yearschl yearschl 

* Sample
keep if age>=25 & age<=65								/* Wives 25-65 */
drop if trsppres==3										/* Select couples*/
keep if tesex==2										/* Women respondents*/
keep if tryhhchild<=10									/* only families with kids 10 or younger */ 
keep if tudiaryday>=2 & tudiaryday<=6 & trholiday==0	/* only weekdays and no holiday diaries */ 

save data_ATUS_for_descstats, replace

* Prepare mills ratio of participation correction
cap drop both_work 
gen both_work= logwg_h!=. &  logwg_w!=.
probit both_work edd* wedd* yearschl wyearschl
cap drop xb
predict xb,xb
cap drop mills

g mills=normalden(xb)/normal(xb)

keep if logwg_h!=. & logwg_w!=.							/* Only participating husbands and wives */ 

keep ncex n_cc cc_w logcc_w mills logwg_w logwg_h log_c logcc_h ratioT edd* wedd* wyearschl mills

save data_ATUS, replace

erase atus_spouse_id.dta
erase atus_spouse_info.dta
erase husband_avg_childcare.dta
erase c_cex_coh_educ.dta

cd "$MD"
