rm(list=ls()) p<-read.table("d:/work/data/stata/poisson_sim.csv", sep=",", header = TRUE) summary(p) p$pcat <-factor(p$prog, labels=c("General", "Academic", "Vocational")) table(p$pcat) tapply(p$num_awards, p$pcat, mean) tapply(p$num_awards, p$pcat, sd) barplot(table(num_awards), space=0, ylim=c(0, 160), xlab="number of awards") # running the model attach(p) b.<-relevel(pcat, "General") m1<-glm(num_awards ~ b. + math, family="poisson", p) summary(m1) # robust standard error library(sandwich) cov.m1<-vcovHC (m1, type="HC0") std.err<-sqrt(diag(cov.m1)) r.est<-cbind(estimate=m1$coefficients, std.err, pvalue=round(2*(1-pnorm(abs(m1$coefficients/std.err))),5), lower=m1$coefficients-1.96*std.err, upper=m1$coefficients+1.96*std.err) r.est # perform a goodness of test using the residual deviance gof<-cbind(res.diviance=m1$deviance, df=m1$df.residual, p=1-pchisq(m1$deviance, m1$df.residual)) gof # over test of the effect of pcat m1<-glm(num_awards ~ b. + math, family="poisson", p) m2<-glm(num_awards ~ math, family="poisson", p) dev.dff<-anova(m2, m1) 1-pchisq(dev.dff[2,4], dev.dff[2,3]) # getting incident rate ratios and their standard errors library(msm) estmean<-coef(m1) var<-cov.m1 s2<-deltamethod (~ exp(x2), estmean, var) s3<-deltamethod (~ exp(x3), estmean, var) s4<-deltamethod (~ exp(x4), estmean, var) irr<-cbind(irr=exp(estmean)[2:4], r.serr=rbind(s2, s3, s4), lower=exp(m1$coefficients-1.96*std.err)[2:4], upper=exp(m1$coefficients+1.96*std.err)[2:4]) irr # marginal effect s1<-as.data.frame(rep(mean(math), 3)) colnames(s1)<-c("math") mypcat <-factor(c(1, 2, 3), labels=c("General", "Academic", "Vocational")) s1$b.<-relevel(mypcat, "General") s1 # standard error is not robust phat1<-predict(m1, s1, type="response", se.fit=TRUE) print(phat1) #graphing the predicted values p$phat<-predict(m1, type="response") pall<-p[, c("pcat", "phat", "math")][order(p$math),] pg<-pall[pall$pcat=="General", ] pa<-pall[pall$pcat=="Academic", ] pv<-pall[pall$pcat=="Vocational", ] plot(pg$math, pg$phat, xlim=c(30, 80), ylim=c(0, 3), type="l", lty=3, xlab="Math Score", ylab="Expected number of awards") lines(pa$math, pa$phat, type="l", lty=1) lines(pv$math, pv$phat, type="l", lty=2) legend(33.5, 2.7,c("General","Academic", "Vocation"), lty=c(3, 1, 2))