/* The file takes the March CPS dataset (already extracted from IPUMS into stata format) and recodes some variables.

	The output file generated is cps_data.dta 
	*/

clear all
set mem 2000m
cd "$output"

u "$march_cps_data\cps_variables", clear


/* Recode categorial variables */ 

                                                                                                                                   
*** The weight to be use since working in household level (since we work with mobility)
ren wtsupp weight

*** relate - Relationship to household head 
replace relate =. if relate>9000
replace relate = 0301 if relate == 0303
replace relate = 1001 if relate == 0501 | relate == 0701 | relate == 0901
replace relate = 1260 if relate == 1113 | relate == 1114 | relate == 1115 | relate == 1241 | relate == 1242
 
*** age - recording top codes
gen age_tc = 0
label var age_tc "Age was top coded"
replace age_tc = 1 if age==99
replace age_tc = 1 if age==90 & year>=1988
replace age_tc = 1 if age==80 & year>=2002

*** race and hispan
replace race = 700 if race>200 & race!=.	/* replace all non-white, non-black to other */ 
replace race = 300 if hispan > 0 & hispan!=.

#delimit;
cap label drop racelb_new;
label define racelb_new
	100 "white"
	200 "black"
	300 "hispanic"
	700 "other";
#delimit cr; 

label val race racelb_new
drop hispan

*** marst - Marital status
replace marst =. if marst==9

*** bpl - Birth place 
gen immigrant = (bpl!=09900)		/* Use to generate immigrant dummy */ 
drop bpl

*** educ - Educational attainment
replace educ =. if educ==999

*** empstat - Employment status
replace empstat = 0 if empstat==13	/* Armed forces */ 
gen empstat_det=empstat	
label var empstat_det "Original CPS empstat (detailed)"
replace empstat =. if empstat ==0			/* This is NIU (Not in universe) */
recode empstat (12 13 = 10) (21 22 = 20) (31 32 33 34 35 = 30)
label define empstatlbl 10 "Employed", modify

*** occ1990 - Meyer Osborne coding - no need to recode. 
gen 	occ1990mjr = 1 if occ1990 <=037
replace occ1990mjr = 2 if occ1990 >=043 & occ1990<=200
replace occ1990mjr = 3 if occ1990 >=203 & occ1990<=235
replace occ1990mjr = 4 if occ1990 >=243 & occ1990<=290
replace occ1990mjr = 5 if occ1990 >=303 & occ1990<=391
replace occ1990mjr = 6 if occ1990 >=503 & occ1990<=699
replace occ1990mjr = 7 if occ1990 >=703 & occ1990<=890
replace occ1990mjr = 8 if occ1990 >=415 & occ1990<=427
replace occ1990mjr = 9 if occ1990 >=434 & occ1990<=444 | occ1990>=448 & occ1990<=455
replace occ1990mjr = 10 if occ1990 >=405 & occ1990<=408 | occ1990>=445 & occ1990<=447 /// 
						| occ1990>=456 & occ1990<=469
replace occ1990mjr = 11 if occ1990 >=473 & occ1990<=498
replace occ1990mjr = 12 if occ1990 ==905

replace occ1990 =. if occ1990>=991 

#delimit;
label define occ1990mjr 
	1 "Managerial"
	2 "Professional"
	3 "Technicians"
	4 "Sales"
	5 "Office and clerical"
	6 "Production"
	7 "Operators/Laborers"
	8 "Protective Services"
	9 "Food/Cleaning services"
	10 "Personal Care"
	11 "Agricultural"
	12 "Armed Forces";
#delimit cr
label values occ1990mjr occ1990mjr

*** occ1950 - IPUMS recoding for 1950 - no recoding
replace occ1950 =. if occ1950>=997

*** ind 
/* Need to recode if want to use */

*** ind1950 - IPUMS recoding for 1950 - no recoding
replace ind1950 =. if ind1950>=997
gen 	ind1950mjr = 11 if ind1950<=126
replace ind1950mjr = 1 if ind1950>=206 & ind1950<=236
replace ind1950mjr = 2 if ind1950==246
replace ind1950mjr = 3 if ind1950>=306 & ind1950<=598
replace ind1950mjr = 4 if ind1950>=606 & ind1950<=627
replace ind1950mjr = 5 if ind1950>=636 & ind1950<=699
replace ind1950mjr = 6 if ind1950>=716 & ind1950<=746
replace ind1950mjr = 7 if ind1950>=806 & ind1950<=817
replace ind1950mjr = 8 if ind1950>=826 & ind1950<=859
replace ind1950mjr = 9 if ind1950>=868 & ind1950<=899
replace ind1950mjr = 10 if ind1950>=906 & ind1950<=936

#delimit;
label define ind1950mjr 
	1 "extractive industries"
	2 "construction"
	3 "manufacturing, transportation and utilities"
	4 "wholesale trade"
	5 "retail trade"
	6 "Finance, insurance, and real estate"
	7 "business services"
	8 "personal services and entertainment"
	9 "professional services"
	10 "public administration"
	11 "Agriculture";
#delimit cr
label values ind1950mjr ind1950mjr

*** occ50ly - IPUMS recoding for 1950 - no recoding
replace occ50ly =. if occ50ly>=997

*** ind50ly
replace ind50ly =. if ind50ly>=997

*** wkswork1 - Weeks worked last year, not intervalled - only from 1976
ren wkswork1 weeksly_cont

*** wkswork2 - Weeks worked last year, intervalled 
ren wkswork2 weeksly
replace weeksly =. if weeksly==9

*** hrswork - Hours worked last week
gen hrswork_tc = (hrswork==99) 
label var hrswork_tc "hours top coded"
replace hrswork_tc = 1 if hrswork==98 & year>=1963

*** uhrswork - Usual hours worked per week (last yr) - no recoding
ren uhrswork wuhoursly

*** fullpart - Worked full or part time last year 
replace fullpart =. if fullpart==0 | fullpart==9

*** incwage - Wage and salary income - reported for last year
replace incwage =. if incwage>=999998
gen incwage_tc =(incwage==999997)		/* Topcoding for incwage should be verified */ 

*** migrate1 - migration between last march and this march
replace migrate1=. if migrate1==2 | migrate1==9 /* Not binding in the sample post 1976 */

*** migsta1 - state of residence before the migration. Generate also a string variable. 
***	Note that this variable is missing for some years 
replace migsta1 = state if migrate1==1

gen str30 migsta1_st = ""
levelsof migsta1, local(state_codes)
foreach num in `state_codes' {
	local a: label migsta1_lbl `num'
	replace migsta1_st = upper("`a'") if migsta1==`num'
}

*** whymove - reason for moving
replace whymove=. if whymove==0

save "$output\cps_data", replace

cd "$do"




