Bootstrap本意是拎着靴带让自己跳起来,在统计学中是一种重采样的方法,通常在样本量小的时候,可以从中不断再次抽样。

1. Bootstrap 简单应用

Bootstrap bias偏差 &variance 方差

library(bootstrap)
bslogc<-function(x,B){
  n<-length(x)
  thetastar<-replicate(B,{xstar<-sample(x,n,replace = T)
                       median(xstar)} )
  thetastar
}
 
x <- diabetes$logCpeptide   
hist(x, freq = FALSE, main = "Histogram of diabetes data")
median(x)
bscpep<-bslogc(x,1000)
hist(bscpep,freq = FALSE, breaks = 7,main = "Histogram of bootstrap diabetes medians")
var(bscpep)
bias<-mean(bscpep)-median(x)

Bootstrap correlation

samplecorr <- function(data, B) {
  n <- nrow(data)
  res <- numeric(B)
  for(i in 1:B) {
    ind <- sample(n, n, replace = TRUE)
    bs_data <-  data[ind, ]
    res[i] <- cor(bs_data[ , 1], bs_data[ , 2])
  }
  res 
}
cor(law)
cor(law[ , 1], law[ , 2])
bs_law <- samplecorr(law, 10000)
hist(bs_law,  freq = FALSE,
     main = "Histogram of law data bootstrap corellation coefficients")
bias.law <- mean(bs_law) - cor.law[1, 2]
bias.law
var(bs_law)

cor(law82)
bs_law82 <- samplecorr(law82[ , 2:3], 10000)
hist(bs_law82,  freq = FALSE,
     main = "Histogram of law82 data bootstrap corellation coefficients")
bias.law82 <- mean(bs_law82) - cor.law82[1, 2]
bias.law82
var(bs_law82)

Bootstrap方法比较实验组与对照组

mean(mouse.t)-mean(mouse.c)
bsmice<-function(x,y,B){
  n1<-length(x)
  n2<-length(y)
  thet1<-replicate(B,{xstar<-sample(x,n1,replace = T)
                       mean(xstar)})
  thet2<-replicate(B,{ystar<-sample(y,n2,replace = T)
                       mean(ystar)})
  d=thet1-thet2
  
}

df1=bsmice(mouse.t,mouse.c,1000)
head(df1)
pihat <- sum(df1 > 10) / length(df1) #probability that survival time difference > 10
pihat

2.Bootstrap residuals

ebs_eps<-function(y,x,B){
  fit <- lm(y ~ x)
  e <- residuals(fit)
  y_fitted <- fitted(fit) # or : beta[1] + beta[2] * 
  x <- wood$density # for convenience
  bs_resid <- matrix(0, nrow = B, ncol = 2)
  n<-length(y)
  for(i in 1:B){
    e_star <- sample(e, n, replace = TRUE)
    y_star <- y_fitted + e_star
    fit_star <- lm(y_star ~ x)
    bs_resid[i, ] <- coef(fit_star)
  }
  bs_resid
}

wood <- read.table("wood.txt", header = TRUE)
bs_beta <-ebs_eps(y = wood$hardness, x = wood$density, 1000)

fit_wood <- lm(hardness ~ density, data = wood)
beta <- coef(fit_wood)

plot(wood$density, wood$hardness, main = "",  xlab = "density", ylab = "hardness")
yreg <- beta[1] + beta[2] * wood$density
lines(wood$density, yreg, col = "red")
for(i in 1:10) {
    yest <- bs_beta[i, 1] + bs_beta[i, 2] * wood$density
    lines(wood$density, yest)
}

hist(bs_beta[, 1], xlab = "Estimate of alpha", freq = FALSE)
hist(bs_beta[ , 2], xlab = "Estimate of beta", freq = FALSE)

3.Bootstrap 置信区间

s1=rgamma(50,shape = 4,rate = 1)
hist(s1)
n <- length(s1)
B=10000
zstar<-{}
for (i in 1:B){
  xstar<- sample(s1,n,replace = T)
  ths<-mean(xstar)
  se<-sd(xstar)/sqrt(n)
  zstar[i]<- (ths-theta_hat)/se
}

tq<-quantile(zstar,c(0.05,1-0.05))
theta_hat-c(tq[2],tq[1])*se_hat
library(bootstrap)
help(bootstrap)
boott(s1,theta = function(x) mean(x),
      perc=c(.025,.05,.10,.50,.90,.95,.975))

更多推荐

Bootstrap方法在R语言中的运用