################################################### ### chunk number 1: read_boot ################################################### #line 54 "Lab3_2011_answers.Rnw" library(foreign) boot.data <- read.dta("http://www.quantoid.net/boot_data.dta") rownames(boot.data) <- boot.data$country ################################################### ### chunk number 2: mods ################################################### #line 63 "Lab3_2011_answers.Rnw" library(mgcv) library(boot) mod.lm <- gam(rep1 ~ gdppc + logpop + iwar + cwar + voice*veto, data=boot.data) mod.gam <- gam(rep1 ~ gdppc + logpop + iwar + cwar + s(voice,veto), data=boot.data) anova(mod.lm, mod.gam, test='Chisq') ################################################### ### chunk number 3: bsfun ################################################### #line 73 "Lab3_2011_answers.Rnw" resids <- mod.lm$residuals yhat <- mod.lm$fitted test.stat <- deviance(mod.lm) - deviance(mod.gam) boot.gam <- function(dat, inds){ assign(".inds", inds, envir=.GlobalEnv) boot.e <- resids[.inds] boot.y <<- yhat + boot.e g1 <- update(mod.lm, boot.y ~ .) g2 <- update(mod.gam, boot.y ~ .) remove(".inds", envir=.GlobalEnv) deviance(g1)-deviance(g2) } boot.g <- boot(boot.data, boot.gam, R=1000) (sum(boot.g$t>=test.stat)+1)/(1000+1) ################################################### ### chunk number 4: bsint ################################################### #line 96 "Lab3_2011_answers.Rnw" voice.seq <- seq(min(boot.data$voice), max(boot.data$voice), length=25) veto.seq <- seq(min(boot.data$veto), max(boot.data$veto), length=25) pred.dat <- expand.grid(voice = voice.seq, veto = veto.seq, gdppc = mean(boot.data$gdppc, na.rm=T), logpop = mean(boot.data$logpop, na.rm=T), iwar = 0, cwar = 0) resids <- mod.gam$residuals yhat <- mod.gam$fitted boot.preds <- function(dat, inds){ assign(".inds", inds, envir=.GlobalEnv) boot.e <- resids[.inds] boot.y <<- yhat + boot.e g2 <- update(mod.gam, boot.y ~ .) remove(".inds", envir=.GlobalEnv) predict(g2, newdata=pred.dat) } boot.p <- boot(boot.data, boot.preds, R=100) boot.cis <- t(sapply(1:625, function(x)boot.ci( boot.p, index = x, type="perc")$perc[4:5])) fit <- predict(mod.gam, newdata=pred.dat) plot.data <- data.frame( fit = c(fit, boot.cis[,1], boot.cis[,2]), type = rep(c("fit", "lower", "upper"), each=625), voice = rep(pred.dat$voice, 3), veto = rep(pred.dat$veto, 3)) library(lattice) cols <- c("black", "#FF000033", "#FF000033") wireframe(fit ~ voice + veto, groups=type, data=plot.data, screen = list(x=-90, z=0, y=-50), col=cols, col.groups = cols) # library(Rcmdr) # with(plot.data, scatter3d(voice, fit, veto, # groups=type, fit="smooth", df.smooth=100, # parallel=F)) ################################################### ### chunk number 5: cvknots ################################################### #line 143 "Lab3_2011_answers.Rnw" library(splines) res <- NULL kts <- expand.grid(1:10,1:10) for(i in 1:nrow(kts)){ {if(kts[i,1] < 4){ tmp.voice <- with(boot.data, poly(voice, kts[i,1])) } else{ tmp.voice <- with(boot.data, bs(voice, df = kts[i,1])) } } {if(kts[i,2] < 4){ tmp.veto <- with(boot.data, poly(veto, kts[i,2])) } else{ tmp.veto <- with(boot.data, bs(veto, df = kts[i,2])) } } colnames(tmp.veto) <- paste("ve", 1:ncol(tmp.veto), sep="") colnames(tmp.voice) <- paste("vo", 1:ncol(tmp.voice), sep="") tmp.data <- cbind(boot.data, tmp.veto, tmp.voice) form <- paste("rep1 ~ gdppc + logpop + iwar + cwar +", paste(c(colnames(tmp.voice), colnames(tmp.veto)), collapse = " + "), sep="") tmp.mod <-mod1 <- gam(as.formula(form), data=tmp.data) res <- rbind(res, cv.glm(tmp.data, tmp.mod, K=5)$delta) } kts[which.min(res[,2]), ] ################################################### ### chunk number 6: cvmods ################################################### #line 187 "Lab3_2011_answers.Rnw" cvmod <- lm(rep1 ~ gdppc + logpop + iwar + cwar + bs(voice, df=5) + poly(veto, 2), data=boot.data) summary(cvmod) ################################################### ### chunk number 7: cvlin ################################################### #line 195 "Lab3_2011_answers.Rnw" cv.glm(boot.data, mod.lm, K=5)$delta res[which.min(res[,2]), ] ################################################### ### chunk number 8: outliers ################################################### #line 206 "Lab3_2011_answers.Rnw" outmod <- lm(rep1 ~ gdppc + logpop + iwar + cwar + bs(voice, df=5) + poly(veto, 2), data=boot.data) library(car) outlierTest(outmod) ################################################### ### chunk number 9: outsum ################################################### #line 215 "Lab3_2011_answers.Rnw" hats <- hatvalues(outmod) hats[which(hats > 2*(outmod$rank)/length(outmod$residuals))] rstud <- rstudent(outmod) rstud[which(abs(rstud) > 2)] d <- cooks.distance(outmod) d[which(d > 4/outmod$df.residual)] dfb <- dfbeta(outmod) apout <- apply(dfb, 2, function(x)x[which(abs(x) > 2/sqrt(length(outmod$residuals)))]) apout tab <- table(unlist(lapply(apout, names))) tab <- tab[order(tab)] ################################################### ### chunk number 10: bigdfb ################################################### #line 234 "Lab3_2011_answers.Rnw" tab <- table(unlist(lapply(apout, names))) tab <- tab[order(tab)] tab[which(tab > 2)] ################################################### ### chunk number 11: inflplot ################################################### #line 242 "Lab3_2011_answers.Rnw" pdf("labinfl1.pdf", height=6, width=6) influencePlot(outmod, id.n=5) invisible(dev.off()) ################################################### ### chunk number 12: robmod ################################################### #line 256 "Lab3_2011_answers.Rnw" library(MASS) robmod <- rlm(rep1 ~ gdppc + logpop + iwar + cwar + bs(voice, df=5) + poly(veto, 2), data=boot.data, method= "MM") pdf("weightplot.pdf", height=6, width=6) plot(robmod$w, ylab = "Observation Weight from MM-Estimation") ind <- which(robmod$w < .8) text((1:length(robmod$w))[ind], robmod$w[ind], names(robmod$fitted)[ind], pos=1) invisible(dev.off()) ################################################### ### chunk number 13: boot_lm ################################################### #line 277 "Lab3_2011_answers.Rnw" ols.e1 <- residuals(outmod) ols.fit1 <- fitted(outmod) rlm.e1 <- residuals(robmod) rlm.fit1 <- fitted(robmod) pred.dat <- data.frame( voice = c(voice.seq, rep(mean(boot.data$voice), 25)), veto = c(rep(mean(boot.data$veto), 25), veto.seq), gdppc = mean(boot.data$gdppc), logpop = mean(boot.data$logpop), cwar = 0, iwar = 0) fix.boot.ols <- function(data, inds){ assign(".inds", inds, envir=.GlobalEnv) boot.e <- ols.e1[.inds] boot.y <<- ols.fit1 + boot.e tmp.mod <- update(outmod, boot.y ~ .) remove(".inds", envir=.GlobalEnv) predict(tmp.mod, newdata=pred.dat) } fixx.ols1 <- boot(boot.data, fix.boot.ols, R=1000) fix.boot.rlm <- function(data, inds){ assign(".inds", inds, envir=.GlobalEnv) boot.e <- rlm.e1[.inds] boot.y <<- rlm.fit1 + boot.e tmp.mod <- update(robmod, boot.y ~ ., maxit=250) remove(".inds", envir=.GlobalEnv) predict(tmp.mod, newdata=pred.dat) } fixx.rlm1 <- boot(boot.data, fix.boot.rlm, R=1000) ols.ci <- t(sapply(1:50, function(x)boot.ci(fixx.ols1, index=x, type="perc")$perc[4:5])) rlm.ci <- t(sapply(1:50, function(x)boot.ci(fixx.rlm1, index=x, type="perc")$perc[4:5])) ols.fit <- predict(outmod, newdata=pred.dat) rlm.fit <- predict(robmod, newdata=pred.dat) plot.dat <- data.frame( fit = c(ols.fit, rlm.fit), lower = c(ols.ci[,1], rlm.ci[,1]), upper = c(ols.ci[,2], rlm.ci[,2]), vals = rep(c(voice.seq, veto.seq) , 2), var = rep(rep(c("voice", "veto"), each=25), 2), mod = rep(c("OLS", "MM"), each=50)) library(lattice) library(latticeExtra) pdf("robplot.pdf", height=6, width=6) trellis.par.set(strip.background=list(col="White")) print( useOuterStrips(xyplot(fit ~ vals | mod*var, data=plot.dat, scales=list(x="free"), ylim=range(c(plot.dat[,c("lower", "upper")]))*1.1, panel = function(x,y,subscripts){ panel.lines(x,y,col="black") panel.lines(x, plot.dat$lower[subscripts], col="black", lty=2) panel.lines(x, plot.dat$upper[subscripts], col="black", lty=2) } ))) invisible(dev.off()) ################################################### ### chunk number 14: sourcemixtools ################################################### #line 353 "Lab3_2011_answers.Rnw" source("http://www.quantoid.net/mixtureTools.R") ################################################### ### chunk number 15: mix1 ################################################### #line 358 "Lab3_2011_answers.Rnw" library(flexmix) boot.data$gdppc <- boot.data$gdppc/10000 model <- FLXMRglmfix(family = "gaussian", nested=list(k=c(1,1), formula = c(~ voice, ~ veto)), fixed = ~ gdppc + logpop + iwar + cwar) out <- stepFlexmix(rep1 ~ 1, k=2, model=model, data=boot.data, nrep=20) summary(out) ################################################### ### chunk number 16: mix2 ################################################### #line 373 "Lab3_2011_answers.Rnw" out.refit <- refit(out) ################################################### ### chunk number 17: idlist ################################################### #line 379 "Lab3_2011_answers.Rnw" idl <- IdentifyList(out@posterior$scaled, boot.data, case=boot.data$ccode, cluster=F, alpha=.05)