# managing data options(digits=7) rm(list=ls()) hs1 <- read.table("http://www.ats.ucla.edu/stat/R/notes/hs1.csv", header=T, sep=",") # attach hs1 to the search path attach(hs1) # keep observations if condition is met hs1.read.well <- hs1[read >= 60, ] dim(hs1.read.well) mean(hs1.read.well$read) mean(hs1$read) # another example hs1.female.readwell<-hs1[female==1 & read>=60, ] dim(hs1.female.readwell) # one more example hs1.female.read.or.write<-hs1[female==1 & (read>=60 | write>=60) , ] dim(hs1.female.read.or.write) # keeping variables, read and write hs2<-hs1[, c("read", "write")] names(hs2) # another way of doing the same thing hs2<-hs1[, c(7, 8)] names(hs2) # dropping variables, read and write hs2.drop<-hs1[, -c(7, 8)] names(hs2.drop) # append # creating a subset containg only females hsfemale<-hs1[female==1, ] # creating a subset containg only males hsmale<-hs1[female==0, ] dim(hsfemale) dim(hsmale) hs.all<-rbind(hsfemale, hsmale) dim(hs.all) # 1-1 merge hs.demo<-hs1[, c("id", "ses", "female", "race")] hs.scores<-hs1[, c("id", "read", "write", "math", "science")] dim(hs.demo) dim(hs.scores) hs.merge <- merge(hs.demo, hs.scores, by="id", all=T) head(hs.merge) dim(hs.merge) # if the linking variable has different name in the two data sets hs.merge1 <- merge(hs.demo, hs.scores, by.x="id", by.y="id", all=T) #sort using the order function sort(hs1$write) hs.ordered<-hs1[order(female),] head(hs.ordered, n=20) hs.ordered<-hs1[order(female, ses),] head(hs.ordered, n=20)