###################################################
### chunk number 1: read_boot
###################################################
#line 54 "Lab3_2011_answers.Rnw"
library(foreign)
 boot.data <- read.dta("http://www.quantoid.net/boot_data.dta")
rownames(boot.data) <- boot.data$country
 
 
###################################################
### chunk number 2: mods
###################################################
#line 63 "Lab3_2011_answers.Rnw"
library(mgcv)
library(boot)
mod.lm <- gam(rep1 ~ gdppc + logpop + iwar + cwar + voice*veto, data=boot.data)
mod.gam <- gam(rep1 ~ gdppc + logpop + iwar + cwar + s(voice,veto), data=boot.data)
anova(mod.lm, mod.gam, test='Chisq')
 
 
###################################################
### chunk number 3: bsfun
###################################################
#line 73 "Lab3_2011_answers.Rnw"
resids <- mod.lm$residuals
yhat <- mod.lm$fitted
test.stat <- deviance(mod.lm) - deviance(mod.gam)
boot.gam <- function(dat, inds){
	assign(".inds", inds, envir=.GlobalEnv)
	boot.e <- resids[.inds]
	boot.y <<- yhat + boot.e
	g1 <- update(mod.lm, boot.y ~ .)
	g2 <- update(mod.gam, boot.y ~ .)
	remove(".inds", envir=.GlobalEnv)
	deviance(g1)-deviance(g2)
}
 
boot.g <- boot(boot.data, boot.gam, R=1000)
(sum(boot.g$t>=test.stat)+1)/(1000+1)
 
 
###################################################
### chunk number 4: bsint
###################################################
#line 96 "Lab3_2011_answers.Rnw"
voice.seq <- seq(min(boot.data$voice),
 	max(boot.data$voice), length=25)
veto.seq <- seq(min(boot.data$veto),
 	max(boot.data$veto), length=25)
pred.dat <- expand.grid(voice = voice.seq, veto = veto.seq, 			
	gdppc = mean(boot.data$gdppc, na.rm=T), 
	logpop = mean(boot.data$logpop, na.rm=T),
	iwar = 0,
	cwar = 0)
resids <- mod.gam$residuals
yhat <- mod.gam$fitted
boot.preds <- function(dat, inds){
	assign(".inds", inds, envir=.GlobalEnv)
	boot.e <- resids[.inds]
	boot.y <<- yhat + boot.e
	g2 <- update(mod.gam, boot.y ~ .)
	remove(".inds", envir=.GlobalEnv)
	predict(g2, newdata=pred.dat)
}
boot.p <- boot(boot.data, boot.preds, R=100)
boot.cis <- t(sapply(1:625, function(x)boot.ci(
	boot.p, index = x,  
	type="perc")$perc[4:5]))
 
fit <- predict(mod.gam, newdata=pred.dat)
 
plot.data <- data.frame(
	fit = c(fit, boot.cis[,1], boot.cis[,2]), 
	type = rep(c("fit", "lower", "upper"), each=625), 
	voice = rep(pred.dat$voice, 3), 
	veto = rep(pred.dat$veto, 3))
library(lattice)
cols <- c("black", "#FF000033", "#FF000033")
wireframe(fit ~ voice + veto, 
 	groups=type, data=plot.data, 
 	screen = list(x=-90, z=0, y=-50), col=cols, 
	col.groups = cols)
 
# library(Rcmdr)
# with(plot.data, scatter3d(voice, fit, veto, 
# 	groups=type, fit="smooth", df.smooth=100, 
# 	parallel=F))
 
 
###################################################
### chunk number 5: cvknots
###################################################
#line 143 "Lab3_2011_answers.Rnw"
library(splines)
res <- NULL
kts <- expand.grid(1:10,1:10)
for(i in 1:nrow(kts)){
{if(kts[i,1] < 4){
	tmp.voice <- with(boot.data, 
		poly(voice, kts[i,1]))
}
else{
	tmp.voice <- with(boot.data, 
		bs(voice, df = kts[i,1]))
}
}
 
{if(kts[i,2] < 4){
	tmp.veto <- with(boot.data, 
		poly(veto, kts[i,2]))
}
else{
	tmp.veto <- with(boot.data, 
		bs(veto, df = kts[i,2]))
}
}
 
colnames(tmp.veto) <- paste("ve", 1:ncol(tmp.veto), 
	sep="")
colnames(tmp.voice) <- paste("vo", 1:ncol(tmp.voice), 
	sep="")
 
tmp.data <- cbind(boot.data, tmp.veto, tmp.voice)
 
form <- paste("rep1 ~ gdppc + logpop + 
	iwar + cwar +", paste(c(colnames(tmp.voice), colnames(tmp.veto)), collapse = " + "), sep="")
tmp.mod <-mod1 <- gam(as.formula(form), 
	data=tmp.data)
res <- rbind(res, cv.glm(tmp.data, tmp.mod, 
	K=5)$delta)
}
kts[which.min(res[,2]), ]
 
 
###################################################
### chunk number 6: cvmods
###################################################
#line 187 "Lab3_2011_answers.Rnw"
cvmod <- lm(rep1 ~ gdppc + logpop + iwar + cwar + 
	bs(voice, df=5) + poly(veto, 2), data=boot.data)
summary(cvmod)
 
 
###################################################
### chunk number 7: cvlin
###################################################
#line 195 "Lab3_2011_answers.Rnw"
cv.glm(boot.data, mod.lm, K=5)$delta
res[which.min(res[,2]), ]
 
 
###################################################
### chunk number 8: outliers
###################################################
#line 206 "Lab3_2011_answers.Rnw"
outmod <- lm(rep1 ~ gdppc + logpop + iwar + cwar + 
	bs(voice, df=5) + poly(veto, 2), data=boot.data)
library(car)
outlierTest(outmod)
 
 
###################################################
### chunk number 9: outsum
###################################################
#line 215 "Lab3_2011_answers.Rnw"
hats <- hatvalues(outmod)
hats[which(hats > 2*(outmod$rank)/length(outmod$residuals))]
 
rstud <- rstudent(outmod)
rstud[which(abs(rstud) > 2)]
 
d <- cooks.distance(outmod)
d[which(d > 4/outmod$df.residual)]
 
dfb <- dfbeta(outmod)
apout <- apply(dfb, 2, function(x)x[which(abs(x) > 2/sqrt(length(outmod$residuals)))])
apout
tab <- table(unlist(lapply(apout, names)))
tab <- tab[order(tab)]
 
 
###################################################
### chunk number 10: bigdfb
###################################################
#line 234 "Lab3_2011_answers.Rnw"
tab <- table(unlist(lapply(apout, names)))
tab <- tab[order(tab)]
tab[which(tab > 2)]
 
 
###################################################
### chunk number 11: inflplot
###################################################
#line 242 "Lab3_2011_answers.Rnw"
pdf("labinfl1.pdf", height=6, width=6)
influencePlot(outmod, id.n=5)
invisible(dev.off())
 
 
###################################################
### chunk number 12: robmod
###################################################
#line 256 "Lab3_2011_answers.Rnw"
library(MASS)
robmod <- rlm(rep1 ~ gdppc + logpop + iwar + cwar + 
	bs(voice, df=5) + poly(veto, 2), data=boot.data, 
	method= "MM")
pdf("weightplot.pdf", height=6, width=6)
plot(robmod$w, ylab = 
	"Observation Weight from MM-Estimation")
ind <- which(robmod$w < .8)
text((1:length(robmod$w))[ind], robmod$w[ind],
 	names(robmod$fitted)[ind], pos=1)
invisible(dev.off())
 
 
###################################################
### chunk number 13: boot_lm
###################################################
#line 277 "Lab3_2011_answers.Rnw"
ols.e1 <- residuals(outmod)
ols.fit1 <- fitted(outmod)
rlm.e1 <- residuals(robmod)
rlm.fit1 <- fitted(robmod)
 
 
pred.dat <- data.frame(
	voice = c(voice.seq, 
		rep(mean(boot.data$voice), 25)), 
	veto = c(rep(mean(boot.data$veto), 25), veto.seq), 
	gdppc = mean(boot.data$gdppc), 
	logpop = mean(boot.data$logpop), 
	cwar = 0, iwar = 0)
 
fix.boot.ols <- function(data, inds){
	assign(".inds", inds, envir=.GlobalEnv)
	boot.e <- ols.e1[.inds]
	boot.y <<- ols.fit1 + boot.e
	tmp.mod <- update(outmod, boot.y ~ .)
	remove(".inds", envir=.GlobalEnv)
	predict(tmp.mod, newdata=pred.dat)
}
fixx.ols1 <- boot(boot.data, fix.boot.ols, R=1000)
 
fix.boot.rlm <- function(data, inds){
	assign(".inds", inds, envir=.GlobalEnv)
	boot.e <- rlm.e1[.inds]
	boot.y <<- rlm.fit1 + boot.e
	tmp.mod <- update(robmod, boot.y ~ ., maxit=250)
	remove(".inds", envir=.GlobalEnv)
	predict(tmp.mod, newdata=pred.dat)
}
fixx.rlm1 <- boot(boot.data, fix.boot.rlm, R=1000)
 
ols.ci <- t(sapply(1:50, function(x)boot.ci(fixx.ols1, index=x, type="perc")$perc[4:5]))
 
rlm.ci <- t(sapply(1:50, function(x)boot.ci(fixx.rlm1, index=x, type="perc")$perc[4:5]))
 
ols.fit <- predict(outmod, newdata=pred.dat)
rlm.fit <- predict(robmod, newdata=pred.dat)
 
plot.dat <- data.frame(
	fit = c(ols.fit, rlm.fit), 
	lower = c(ols.ci[,1], rlm.ci[,1]), 
	upper = c(ols.ci[,2], rlm.ci[,2]),
	vals  = rep(c(voice.seq, veto.seq) , 2), 
	var = rep(rep(c("voice", "veto"), each=25), 2), 
	mod = rep(c("OLS", "MM"), each=50))
 
library(lattice)
library(latticeExtra)
pdf("robplot.pdf", height=6, width=6)
trellis.par.set(strip.background=list(col="White"))
print(
useOuterStrips(xyplot(fit ~ vals | mod*var, data=plot.dat, 
	scales=list(x="free"),
	ylim=range(c(plot.dat[,c("lower", "upper")]))*1.1,
	panel = function(x,y,subscripts){
		panel.lines(x,y,col="black")
		panel.lines(x, plot.dat$lower[subscripts], 	
			col="black", lty=2)
		panel.lines(x, plot.dat$upper[subscripts], 
			col="black", lty=2)
	}
	)))
invisible(dev.off())
 
 
###################################################
### chunk number 14: sourcemixtools
###################################################
#line 353 "Lab3_2011_answers.Rnw"
source("http://www.quantoid.net/mixtureTools.R")
 
 
###################################################
### chunk number 15: mix1
###################################################
#line 358 "Lab3_2011_answers.Rnw"
library(flexmix)
boot.data$gdppc <- boot.data$gdppc/10000
model <- FLXMRglmfix(family = "gaussian", 
	nested=list(k=c(1,1), 
	formula = c(~ voice, ~ veto)), 
	fixed = ~ gdppc + logpop + iwar + cwar)
out <- stepFlexmix(rep1 ~ 1, k=2, model=model, 
	data=boot.data, nrep=20)
 
summary(out)
 
 
###################################################
### chunk number 16: mix2
###################################################
#line 373 "Lab3_2011_answers.Rnw"
out.refit <- refit(out)
 
 
###################################################
### chunk number 17: idlist
###################################################
#line 379 "Lab3_2011_answers.Rnw"
idl <- IdentifyList(out@posterior$scaled, boot.data, case=boot.data$ccode, cluster=F, alpha=.05)

Created by Pretty R at inside-R.org