################################################### ### chunk number 1: bsmed1 ################################################### #line 450 "Bootstrapping_2011.Rnw" options(useFancyQuotes=F) set.seed(123) n <- 25 x <- rchisq(n,1,1) meds <- rep(NA, 1000) for(i in 1:1000){ meds[i] <- median(x[sample(1:n, n, replace=T)]) } median(x) summary(meds) ################################################### ### chunk number 2: normci ################################################### #line 472 "Bootstrapping_2011.Rnw" se <- sd(meds) bias <- mean(meds) - median(x) norm.ci <- median(x)- bias + qnorm((1+.95)/2)*se*c(-1,1) ################################################### ### chunk number 3: pctci ################################################### #line 480 "Bootstrapping_2011.Rnw" med.ord <- meds[order(meds)] pct.ci <- med.ord[c(25,975)] ################################################### ### chunk number 4: bcaci ################################################### #line 487 "Bootstrapping_2011.Rnw" zhat <- qnorm(mean(meds < median(x))) medi <- sapply(1:n, function(i)median(x[-i])) Tdot <- mean(medi) ahat <- sum((Tdot-medi)^3)/(6*sum((Tdot-medi)^2)^(3/2)) zalpha <- 1.96 z1alpha <- -1.96 a1 <- pnorm(zhat + ((zhat + zalpha)/(1-ahat*(zhat + zalpha)))) a2 <- pnorm(zhat + ((zhat + z1alpha)/(1-ahat*(zhat + z1alpha)))) a1 <- floor(a1*1000) a2 <- ceiling(a2*1000) bca.ci <- med.ord[c(a2, a1)] ################################################### ### chunk number 5: makemat ################################################### #line 508 "Bootstrapping_2011.Rnw" mat <- rbind(norm.ci, pct.ci, bca.ci) rownames(mat) <- c("norm", "pct", "bca") colnames(mat) <- c("lower", "upper") round(mat, 3) ################################################### ### chunk number 6: withboot ################################################### #line 520 "Bootstrapping_2011.Rnw" library(boot) set.seed(123) med.fun <- function(dat, inds){ assign(".inds", inds, envir=.GlobalEnv) med <- median(x[.inds]) remove(".inds", envir=.GlobalEnv) med } boot.med <- boot(x, med.fun, R=1000) boot.ci(boot.med) ################################################### ### chunk number 7: bscor ################################################### #line 541 "Bootstrapping_2011.Rnw" set.seed(123) library(car) data(Mroz) Mroz <- Mroz[order(Mroz$hc), ] cor.fun <- function(dat, inds, strata=Mroz$hc){ assign(".inds", inds, envir=.GlobalEnv) tmp <- dat[.inds, ] cor1 <- cor(tmp$age[1:458], tmp$lwg[1:458], use="complete") cor2 <- cor(tmp$age[459:753], tmp$lwg[459:753], use="complete") remove(".inds", envir=.GlobalEnv) cor1-cor2 } boot.cor <- boot(Mroz, cor.fun, R=2000) ################################################### ### chunk number 8: printcor ################################################### #line 568 "Bootstrapping_2011.Rnw" library(boot) boot.ci(boot.cor) ################################################### ### chunk number 9: weak ################################################### #line 607 "Bootstrapping_2011.Rnw" dat <- read.table("~/Desktop/ICPSR_Slides/Lecture 1/Weakliem.txt", header=T) dat <- dat[-c(25,49), ] mod <- lm(secpay ~ gini*democrat,data=dat) summary(mod) ################################################### ### chunk number 10: fixx ################################################### #line 624 "Bootstrapping_2011.Rnw" set.seed(123) resids <- mod$residuals yhat <- mod$fitted reg.fun <- function(dat, inds){ assign(".inds", inds, envir=.GlobalEnv) boot.e <- resids[.inds] boot.y <- yhat + boot.e mod <- lm(boot.y ~ gini*democrat, data=dat) remove(".inds", envir=.GlobalEnv) mod$coef } boot.reg1 <- boot(dat, reg.fun, R=2000) ################################################### ### chunk number 11: fixxci ################################################### #line 643 "Bootstrapping_2011.Rnw" sapply(1:4, function(x)boot.ci(boot.reg1, type=c("perc", "bca"), index = x)$percent)[4:5,] sapply(1:4, function(x)boot.ci(boot.reg1, type=c("perc", "bca"), index = x)$bca)[4:5,] ################################################### ### chunk number 12: ranx ################################################### #line 664 "Bootstrapping_2011.Rnw" set.seed(123) reg.fun2 <- function(dat, inds){ assign(".inds", inds, envir=.GlobalEnv) tmp <- dat[.inds, ] mod <- lm(secpay ~ gini*democrat, data=tmp) remove(".inds", envir=.GlobalEnv) mod$coef } boot.reg2 <- boot(dat, reg.fun2, R=2000) ################################################### ### chunk number 13: ranxci ################################################### #line 679 "Bootstrapping_2011.Rnw" sapply(1:4, function(x)boot.ci(boot.reg2, type=c("perc", "bca"), index = x)$percent)[4:5,] sapply(1:4, function(x)boot.ci(boot.reg2, type=c("perc", "bca"), index = x)$bca)[4:5,] ################################################### ### chunk number 14: refit ################################################### #line 692 "Bootstrapping_2011.Rnw" dat <- read.table("~/Desktop/ICPSR_Slides/Lecture 1/Weakliem.txt", header=T) mod <- lm(secpay ~ gini*democrat,data=dat) set.seed(123) resids <- mod$residuals yhat <- mod$fitted reg.fun <- function(dat, inds){ assign(".inds", inds, envir=.GlobalEnv) boot.e <- resids[.inds] boot.y <- yhat + boot.e mod <- lm(boot.y ~ gini*democrat, data=dat) remove(".inds", envir=.GlobalEnv) mod$coef } boot.reg1 <- boot(dat, reg.fun, R=2000) set.seed(123) reg.fun2 <- function(dat, inds){ assign(".inds", inds, envir=.GlobalEnv) tmp <- dat[.inds, ] mod <- lm(secpay ~ gini*democrat, data=tmp) remove(".inds", envir=.GlobalEnv) mod$coef } boot.reg2 <- boot(dat, reg.fun2, R=2000) ################################################### ### chunk number 15: morefixxci ################################################### #line 718 "Bootstrapping_2011.Rnw" sapply(1:4, function(x)boot.ci(boot.reg1, type=c("perc", "bca"), index = x)$percent)[4:5,] sapply(1:4, function(x)boot.ci(boot.reg1, type=c("perc", "bca"), index = x)$bca)[4:5,] sapply(1:4, function(x)boot.ci(boot.reg2, type=c("perc", "bca"), index = x)$percent)[4:5,] sapply(1:4, function(x)boot.ci(boot.reg2, type=c("perc", "bca"), index = x)$bca)[4:5,] ################################################### ### chunk number 16: bsdens ################################################### #line 740 "Bootstrapping_2011.Rnw" pdf("boot_dist.pdf", height=6, width=6) par(mfrow=c(2,2)) for(j in 1:4){ plot(density(boot.reg1$t[,j]), xlab="T*", ylab="Density") lines(density(boot.reg2$t[,j]), lty=2) } invisible(dev.off()) ################################################### ### chunk number 17: jab1 ################################################### #line 792 "Bootstrapping_2011.Rnw" pdf("jab1.pdf", height=6, width=6) par(mfrow=c(2,2)) jack.after.boot(boot.reg1, index=1) jack.after.boot(boot.reg1, index=2) jack.after.boot(boot.reg1, index=3) jack.after.boot(boot.reg1, index=4) invisible(dev.off()) pdf("jab2.pdf", height=6, width=6) par(mfrow=c(2,2)) jack.after.boot(boot.reg2, index=1) jack.after.boot(boot.reg2, index=2) jack.after.boot(boot.reg2, index=3) jack.after.boot(boot.reg2, index=4) invisible(dev.off()) ################################################### ### chunk number 18: fixx_loess ################################################### #line 842 "Bootstrapping_2011.Rnw" library(foreign) dat <- read.dta("~/Desktop/ICPSR_Slides/Lecture 6/jacob.dta") dat$perot <- (dat$perotvote - min(dat$perotvote))/ diff(range(dat$perotvote)) dat$chal <- (dat$chal_vote - min(dat$chal_vote))/ diff(range(dat$chal_vote)) seq.range <- function(x)seq(min(x), max(x), length=250) s <- seq.range(dat$perot) df <- data.frame(perot = s) lo.mod <- loess(chal ~ perot, data=dat, span=.75) resids <- lo.mod$residuals yhat <- lo.mod$fitted lo.fun <- function(dat, inds){ assign(".inds", inds, envir=.GlobalEnv) boot.e <- resids[.inds] boot.y <- yhat + boot.e tmp.lo <- loess(boot.y ~ perot, data=dat, span=.75) preds <- predict(tmp.lo, newdata=df) remove(".inds", envir=.GlobalEnv) preds } boot.perot1 <- boot(dat, lo.fun, R=1000) ################################################### ### chunk number 19: ranx_loess ################################################### #line 879 "Bootstrapping_2011.Rnw" lo.fun2 <- function(dat, inds){ assign(".inds", inds, envir=.GlobalEnv) tmp.lo <- loess(boot.y ~ perot, data=dat[.inds,], span=.75) preds <- predict(tmp.lo, newdata=df) remove(".inds", envir=.GlobalEnv) preds } boot.perot2 <- boot(dat, lo.fun, R=1000) ################################################### ### chunk number 20: loci ################################################### #line 898 "Bootstrapping_2011.Rnw" out1 <- sapply(1:250, function(i)boot.ci(boot.perot1, index=i, type="perc")$perc) out2 <- sapply(1:250, function(i)boot.ci(boot.perot2, index=i, type="perc")$perc) preds <- predict(lo.mod, newdata=df,se=T) lower <- preds$fit - 1.96*preds$se upper <- preds$fit + 1.96*preds$se ci1 <- t(out1[4:5, ]) ci2 <- t(out2[4:5, ]) ci3 <- cbind(lower, upper) pdf("lo_ci.pdf", height=6, width=6) plot(s, preds$fit, type = "l", ylim=range(c(ci1, ci2, ci3))) poly.x <- c(s, rev(s)) polygon(poly.x, y=c(ci1[,1], rev(ci1[,2])), col=rgb(1,0,0,.25), border=NA) polygon(poly.x, y=c(ci2[,1], rev(ci2[,2])), col=rgb(0,1,0,.25), border=NA) polygon(poly.x, y=c(ci3[,1], rev(ci3[,2])), col=rgb(0,0,1,.25), border=NA) legend("topleft", c("Random-X", "Fixed-X", "Analytical"), fill = rgb(c(1,0,0), c(0,1,0), c(0,0,1), c(.25,.25,.25)), inset=.01) invisible(dev.off()) ################################################### ### chunk number 21: nltest ################################################### #line 995 "Bootstrapping_2011.Rnw" library(mgcv) library(boot) library(car) mod1 <- gam(prestige ~ log(income) + poly(women, 2) + poly(education, 2), data=Prestige) mod2 <- gam(prestige ~ s(income, bs="cr") + poly(women, 2) + poly(education, 2), data=Prestige) orig.t <- deviance(mod1) - deviance(mod2) resids <- mod1$residuals yhat <- mod1$fitted test.fun <- function(dat, inds){ assign(".inds", inds, envir=.GlobalEnv) boot.e <- resids[.inds] boot.y <- yhat + boot.e tmp.mod1 <- gam(boot.y ~ log(income) + poly(women, 2) + poly(education, 2), data=dat) tmp.mod2 <- gam(boot.y ~ s(income, bs="cr") + poly(women, 2) + poly(education, 2), data=dat) remove(".inds", envir=.GlobalEnv) deviance(tmp.mod1) - deviance(tmp.mod2) } ################################################### ### chunk number 22: nltest.res ################################################### #line 1027 "Bootstrapping_2011.Rnw" boot.t <- boot(Prestige, test.fun, R=1000) (sum(boot.t$t >= orig.t)+1)/(1000+1) boot.ci(boot.t, type=c("perc", "bca")) ################################################### ### chunk number 23: cv_secpay ################################################### #line 1119 "Bootstrapping_2011.Rnw" dat <- read.table("~/Desktop/ICPSR_Slides/Lecture 1/Weakliem.txt", header=T) dat <- dat[-c(25,49), ] mod1 <- glm(secpay ~ gini*democrat, data=dat) mod2 <- glm(secpay ~ gini+democrat, data=dat) cv.glm(dat, mod1, K=5)$delta cv.glm(dat, mod2, K=5)$delta ################################################### ### chunk number 24: cv_prest ################################################### #line 1133 "Bootstrapping_2011.Rnw" library(car) data(Prestige) mod1 <- glm(prestige ~ log(income) + women + education, data=Prestige) mod2 <- glm(prestige ~ log(income) + poly(women,2) + education, data=Prestige) mod3 <- glm(prestige ~ log(income) + poly(women,2) + poly(education, 3), data=Prestige) cv.glm(Prestige, mod1, K=10)$delta cv.glm(Prestige, mod2, K=10)$delta cv.glm(Prestige, mod3, K=10)$delta ################################################### ### chunk number 25: cv_spline ################################################### #line 1154 "Bootstrapping_2011.Rnw" dat <- read.dta("~/Desktop/ICPSR_Slides/Lecture 6/jacob.dta") dat$perot <- (dat$perotvote - min(dat$perotvote))/ diff(range(dat$perotvote)) dat$chal <- (dat$chal_vote - min(dat$chal_vote))/ diff(range(dat$chal_vote)) library(splines) mod1 <- glm(chal ~ perot, data=dat) mod2 <- glm(chal ~ poly(perot, 2), data=dat) mod3 <- glm(chal ~ poly(perot, 3), data=dat) mod4 <- glm(chal ~ bs(perot, df=4, Boundary.knots=c(0,1)), data=dat) mod5 <- glm(chal ~ bs(perot, df=5, Boundary.knots=c(0,1)), data=dat) mod6 <- glm(chal ~ bs(perot, df=6, Boundary.knots=c(0,1)), data=dat) cv.glm(dat, mod1, K=10)$delta cv.glm(dat, mod2, K=10)$delta cv.glm(dat, mod3, K=10)$delta cv.glm(dat, mod4, K=10)$delta cv.glm(dat, mod5, K=10)$delta cv.glm(dat, mod6, K=10)$delta ################################################### ### chunk number 26: read ################################################### #line 1197 "Bootstrapping_2011.Rnw" library(foreign) dat <- read.dta("~/Desktop/ICPSR_slides/Lecture 6/jacob.dta") dat$perot <- (dat$perotvote - min(dat$perotvote))/ diff(range(dat$perotvote)) dat$chal <- (dat$chal_vote - min(dat$chal_vote))/ diff(range(dat$chal_vote)) lo.mod <- loess(chal ~ perot, data=dat, span=.75) ################################################### ### chunk number 27: cvlo ################################################### #line 1216 "Bootstrapping_2011.Rnw" source("cv.lo.R") out.cv <- matrix(ncol=2, nrow=10) out.cv[1,] <- cv.lo(dat, "chal", update(lo.mod, span=.1), numiter=25, K=10) out.cv[2,] <- cv.lo(dat, "chal", update(lo.mod, span=.2), numiter=25, K=10) out.cv[3,] <- cv.lo(dat, "chal", update(lo.mod, span=.3), numiter=25, K=10) out.cv[4,] <- cv.lo(dat, "chal", update(lo.mod, span=.4), numiter=25, K=10) out.cv[5,] <- cv.lo(dat, "chal", update(lo.mod, span=.5), numiter=25, K=10) out.cv[6,] <- cv.lo(dat, "chal", update(lo.mod, span=.6), numiter=25, K=10) out.cv[7,] <- cv.lo(dat, "chal", update(lo.mod, span=.7), numiter=25, K=10) out.cv[8,] <- cv.lo(dat, "chal", update(lo.mod, span=.8), numiter=25, K=10) out.cv[9,] <- cv.lo(dat, "chal", update(lo.mod, span=.9), numiter=25, K=10) out.cv[10,] <- cv.lo(dat, "chal", update(lo.mod, span=1), numiter=25, K=10) ################################################### ### chunk number 28: cvres ################################################### #line 1247 "Bootstrapping_2011.Rnw" s <- seq(.1,1, by=.1) pdf("cvres.pdf", height=4, width=6) plot(s, out.cv[,1], xlab = "Span", ylab="CV Error", ylim=range(c(out.cv)), type="b", pch=16) lines(s, out.cv[,2], type="b", col="red", pch=15) legend("topright", c("Raw", "Corrected"), pch=c(16,15), col=c("black", "red"), inset=.01) invisible(dev.off())