|
|
|
||||
|
|
|||||
Below is the Stata code that can be used to merge the NSAF data files.
set mem 200m
set more off
* NOTE: Commands such as renames, renvars and tablist may have to be
* downloaded. Use findit <command> to find the command on the internet
* and follow the instructions given to download it.
* NOTE: Three ways are shown for renaming variables. You can use any of these
* methods. They are integrated into the code so that it is clear how each method
* would be used.
use "D:\focalchd.dta", clear
* keep only the variables that you want; otherwise, the files will get to be
* too big for Stata and you will have to move to Stata S/E
keep hhid persid respid ufamid ucpsid fhosp fdent femer fment fdoct site
order hhid persid respid ufamid ucpsid fhosp fdent femer fment fdoct
* rename all vars to have _1 at end to avoid overwriting variables with the same name during merge
foreach var of varlist * {
rename `var' `var'_1
}
* need to destring the merge variables to avoid problems during the merge
* need to keep all of these variables for later merges with data files
* that have only one of these variables available to merge on
destring hhid_1 persid_1 respid_1 ufamid_1 ucpsid_1, gen(hhidn persidn respidn ufamidn ucpsidn)
compress
gen _file1 = 1
count
* 35938
isid hhidn persidn
sort hhidn persidn
save "D:\focalchds.dta", replace
* you can also use the renames command to add the suffix
use "D:\adult_pr.dta", clear
keep hhid persid lwhunp lfdstmp lgenass site
renames *, suffix(_2)
destring hhid_2 persid_2, gen(hhidn persidn)
compress
gen _file2 = 1
count
* 74719
sort hhidn persidn
save "D:\adult_prs.dta", replace
* You can also use the renvars command to add the suffix
use "D:\adult_rn.dta", clear
keep hhid persid site sex typeint fhosp
renvars *, postfix(_3)
destring hhid_3 persid_3, gen(hhidn persidn)
compress
gen _file3 = 1
count
* 52587
isid hhidn persidn
sort hhidn persidn
save "D:\adult_rns.dta", replace
use "D:\adult_rb.dta", clear
keep hhid persid site age sex state uregion
renvars *, postfix(_4)
destring hhid_4 persid_4, gen(hhidn persidn)
compress
gen _file4 = 1
count
* 16765
isid hhidn persidn
sort hhidn persidn
save "D:\adult_rbs.dta", replace
use "D:\famresp.dta", clear
keep hhid respid site kafdctim lcredit kaskcare
renvars *, postfix(_5)
destring hhid_5 respid_5, gen(hhidn respidn)
* compress
gen _file5 = 1
count
* 46705
isid hhidn respidn
sort hhidn respidn
save "D:\famresps.dta", replace
use "D:\person.dta", clear
keep hhid persid sex site uexprrp upfrel xsupnum
renvars *, postfix(_6)
destring hhid_6 persid_6, gen(hhidn persidn)
compress
gen _file6 = 1
count
* 144766
isid hhidn persidn
sort hhidn persidn
save "D:\persons.dta", replace
use "D:\socfam.dta", clear
keep hhid ufamid site jeaf jab2pov jsupkid jchildf
foreach var of varlist * {
rename `var' `var'_7
}
destring hhid_7 ufamid_7, gen(hhidn ufamidn)
compress
gen _file7 = 1
count
* 44303
isid hhidn ufamidn
sort hhidn ufamidn
save "D:\socfams.dta", replace
use "D:\cpsfam.dta", clear
keep hhid ucpsid site ublinc ublga ubloth uincrpov uwrk
foreach var of varlist * {
rename `var' `var'_8
}
destring hhid_8 ucpsid_8, gen(hhidn ucpsidn)
compress
gen _file8 = 1
count
* 44303
isid hhidn ucpsidn
sort hhidn ucpsidn
save "D:\cpsfams.dta", replace
use "D:\househld.dta", clear
keep hhid varstrat varunit state ufips uhhcount site
foreach var of varlist * {
rename `var' `var'_9
}
destring hhid_9, gen(hhidn)
compress
gen _file9 = 1
count
* 42360
isid hhidn
sort hhidn
save "D:\households.dta", replace
/*
This code counts the number of persons in each size of household
use "D:\persons.dta", clear
gen x = 1
egen s = sum(x), by(hhidn)
tab s
s | Freq. Percent Cum.
------------+-----------------------------------
1 | 4,259 2.94 2.94
2 | 15,942 11.01 13.95
3 | 29,592 20.44 34.40
4 | 46,100 31.84 66.24
5 | 27,905 19.28 85.52
6 | 12,030 8.31 93.83
7 | 4,914 3.39 97.22
8 | 1,936 1.34 98.56
9 | 972 0.67 99.23
10 | 550 0.38 99.61
11 | 297 0.21 99.81
12 | 156 0.11 99.92
13 | 52 0.04 99.96
14 | 28 0.02 99.98
15 | 15 0.01 99.99
18 | 18 0.01 100.00
------------+-----------------------------------
Total | 144,766 100.00
*/
*********************************************************************
* NOTE: The order in which you merge the data files MATTERS!!!
* You need to merge the file with the least restrictive merge last!
* In this case, all of the files have two variables on which to merge,
* except the household data file, which has only one merge variable.
* Because it only has one merge variable, the household data set has the
* least restrictive merge, and needs to be merged last.
use "D:\focalchds.dta"
* sort hhidn persidn
merge hhidn persidn using "D:\adult_prs.dta", _merge(merge1)
save "D:\merge1.dta", replace
tab merge1
* use "D:\merge1.dta"
sort hhidn persidn
merge hhidn persidn using "D:\adult_rns.dta", _merge(merge2)
save "D:\merge2.dta", replace
tab merge2
* use "D:\merge2.dta"
sort hhidn persidn
merge hhidn persidn using "D:\adult_rbs.dta", _merge(merge3)
save "D:\merge3.dta", replace
tab merge3
* use "D:\merge3.dta", clear
sort hhidn respidn
merge hhidn respidn using "D:\famresps.dta", _merge(merge4)
save "D:\merge4.dta", replace
tab merge4
* use "D:\merge4.dta", clear
sort hhidn persidn
merge hhidn persidn using "D:\persons.dta", _merge(merge5)
save "D:\merge5.dta", replace
tab merge5
* use "D:\merge5.dta", clear
sort hhidn ufamidn
merge hhidn ufamidn using "D:\socfams.dta", _merge(merge6)
save "D:\merge6.dta", replace
tab merge6
* use "D:\merge6.dta", clear
sort hhidn ucpsidn
merge hhidn ucpsidn using "D:\cpsfams.dta", _merge(merge7)
save "D:\merge7.dta", replace
tab merge7
* use "D:\merge7.dta", clear
sort hhidn
merge hhidn using "D:\households.dta", _merge(merge8)
save "D:\merge8.dta", replace
tab merge8
compress
format hhidn persidn %20.0f
save "D:\nsaf99.dta", replace
* The commands below are used for verifying that the merge worked as expected.
describe
tab1 merge*
tablist _file*
summ
***************
/*
There are 193,805 observations in the final data set.
The table below is the output from the tablist _file* command indicates which files
* contributed observations. For example, the first line of the table indicates
* that 35,889 observations were contributed from files 1, 5, 6, 7, 8 and 9.
tablist _file*
+----------------------------------------------------------------------------------------+
| _file1 _file2 _file3 _file4 _file5 _file6 _file7 _file8 _file9 Freq |
|----------------------------------------------------------------------------------------|
| 1 . . . 1 1 1 1 1 35889 |
| . 1 1 . . 1 . . 1 35798 |
| . . . . . 1 . . 1 34158 |
| . 1 . . . 1 . . 1 22107 |
| . . . . . . . 1 1 17573 |
|----------------------------------------------------------------------------------------|
| . . . . 1 . . . 1 16765 |
| . 1 1 1 . 1 . . 1 16765 |
| . . . . . . 1 . 1 14701 |
| 1 1 . . 1 1 1 1 1 25 |
| 1 1 1 . 1 1 1 1 1 24 |
+----------------------------------------------------------------------------------------+
The output below is from the tab1 merge* command above.
tab1 merge*
-> tabulation of merge1
merge1 | Freq. Percent Cum.
------------+-----------------------------------
1 | 35,889 32.45 32.45
2 | 74,670 67.51 99.96
3 | 49 0.04 100.00
------------+-----------------------------------
Total | 110,608 100.00
-> tabulation of merge2
merge2 | Freq. Percent Cum.
------------+-----------------------------------
1 | 58,021 52.46 52.46
3 | 52,587 47.54 100.00
------------+-----------------------------------
Total | 110,608 100.00
-> tabulation of merge3
merge3 | Freq. Percent Cum.
------------+-----------------------------------
1 | 93,843 84.84 84.84
3 | 16,765 15.16 100.00
------------+-----------------------------------
Total | 110,608 100.00
-> tabulation of merge4
merge4 | Freq. Percent Cum.
------------+-----------------------------------
1 | 74,670 58.62 58.62
2 | 16,765 13.16 71.79
3 | 35,938 28.21 100.00
------------+-----------------------------------
Total | 127,373 100.00
-> tabulation of merge5
merge5 | Freq. Percent Cum.
------------+-----------------------------------
1 | 16,765 10.38 10.38
2 | 34,158 21.15 31.53
3 | 110,608 68.47 100.00
------------+-----------------------------------
Total | 161,531 100.00
-> tabulation of merge6
merge6 | Freq. Percent Cum.
------------+-----------------------------------
1 | 125,593 71.27 71.27
2 | 14,701 8.34 79.61
3 | 35,938 20.39 100.00
------------+-----------------------------------
Total | 176,232 100.00
-> tabulation of merge7
merge7 | Freq. Percent Cum.
------------+-----------------------------------
1 | 140,294 72.39 72.39
2 | 17,573 9.07 81.46
3 | 35,938 18.54 100.00
------------+-----------------------------------
Total | 193,805 100.00
-> tabulation of merge8
merge8 | Freq. Percent Cum.
------------+-----------------------------------
3 | 193,805 100.00 100.00
------------+-----------------------------------
Total | 193,805 100.00
*/
UCLA Researchers are invited to our Statistical Consulting Services
We recommend others to our list of Other Resources for Statistical Computing Help
These pages are Copyrighted (c) by UCLA Academic Technology Services