13. Modern estimation methods

Shrinkage calculations: RStudio site

A nice example of the R code plus results with the a GUSTO-I sample is presented here.

Shrinkage calculations: R code with glmnet package

The R code to calculate shrunk regression coefficients for the GUSTO example is as follows:

# Small program, illustrating shrinkage techniques in logistic models for gusto subsample
# Oct 2018: use glmnet

library(foreign)
library(rms)

# Import sample5
sample5 <- as.data.frame(read.spss('/Users/ewsteyerberg/Documents/Externe harde schijf/Book/SPSS/sample5.sav'))
sample4 <- as.data.frame(read.spss('/Users/ewsteyerberg/Documents/Externe harde schijf/Book/SPSS/sample4.sav'))

# gustos  <- sample5
gustos  <- sample4  # choose one of the samples for illustration
describe(gustos)

# Fit a full model in gustos; 8 / 17 predictors
full <- lrm(DAY30~SHO+A65+ANT+DIA+HYP+HRT+TTR+SEX+PMI+HEI+WEI+HTN+SMK+LIP+PAN+FAM+ST4,
			data=gustos,x=T,y=T,linear.predictors=F)
full8 <- lrm(DAY30~SHO+A65+HIG+DIA+HYP+HRT+TTR+SEX, data=gustos,x=T,y=T,linear.predictors=T)
val.full <- validate(full, B=500)     # 500 replications for bootstrap
val.full8 <- validate(full8, B=500)   # 500 replications for bootstrap
full
full8

# Make shrunk models
full8.shrunk <- full8
full8.shrunk$coef <- val.full8[4,5] * full8.shrunk$coef  # use result from bootstrapping
shrinkage8 <- (full8$stats[3]-full8$stats[4])/full8$stats[3]
full8.shrunk$coef <- shrinkage8 * full8$coef  
# val.full[4,5] is shrinkage factor; heuristic estimate (LR - df) / LR = (62.6-8)/62.6=0.87

# Estimate new intercept, with shrunk lp as offset variable, i.e. coef fixed at unity
full8.shrunk$coef[1] <- lrm.fit(y=full8$y, offset= full8$x %*% full8.shrunk$coef[2:9])$coef[1]
full8.shrunk$coef /full8$coef
signif(full8.shrunk$coef,2)
full8.shrunk

# Make a penalized model
p8	<- pentrace(full8, c(0,1,2,3,4,5,6,7,8,10,12,14,20, 24, 32,40))
p	<- pentrace(full, c(0,2,4,6,8,10,12,14,16,18,20,22, 24, 28, 32,40))
## Fig 13.1 ##
plot(x=p8$results.all[,1],y=p8$results.all[,5],type="l",xlim=c(0,40), ylim=c(40,48), 
  xlab="Penalty", ylab="AIC", cex.lab=1.2, col = Cs(darkgreen) )
lines(x=p$results.all[,1],y=p$results.all[,5], lty=2, col = Cs(darkred))
text(x=20,y=47,"8 predictors", adj=0, col = Cs(darkgreen))
text(x=15,y=44,"17 predictors", adj=0, col = Cs(darkred))

###############################################################################
### Now use glmnet for all forms of penalization: L1, L2, L1+L2 (elastic net) #
library(glmnet)

# ridge regression: L2
# Find the best lambda using cross-validation
set.seed(123) 
cv <- cv.glmnet(x=full8$x, y=full8$y, alpha = 0, family=c("binomial"))
# Display the best lambda value
cv$lambda.min # 0.01
# Fit the model on the training data
model8.L2 <- glmnet(x=full8$x, y=full8$y, alpha = 0, lambda = cv$lambda.min, family=c("binomial"))
# Display regression coefficients
coef(model8.L2) # this is identical to what rms produces with pentrace
# end ridge

# lasso regression: L1
# Find the best lambda using cross-validation
set.seed(123) 
cv <- cv.glmnet(x=full8$x, y=full8$y, alpha = 1, family=c("binomial"))
# Display the best lambda value
cv$lambda.min # 0.0026
# Fit the model on the training data
model8.L1 <- glmnet(x=full8$x, y=full8$y, alpha = 1, lambda = cv$lambda.min, family=c("binomial"))
# plot(x=glmnet(x=full8$x, y=full8$y, alpha = 1, family=c("binomial")), xvar = c("la"))

# Display regression coefficients
coef(model8.L1) # this is very similar to what glmpath produced
# end lasso
####

# elastic net regression
## Use alpha / lambda optimization
library("glmnetUtils")
set.seed(10)
cvL1L2 <- cva.glmnet(x=full$x, y=full8$y, alpha = seq(0, 1, len = 11)^3, family="binomial", nfolds = 10)
minlossplot(cvL1L2, cv.type='min')  # best alpha = 0, so L2 penalty
## end Elastic Net  ##

## Firth regression ##
library(brglm)
# Default: bias-reducing modified scores
model8.Firth <- brglm(DAY30~SHO+A65+HIG+DIA+HYP+HRT+TTR+SEX, data=gustos, family=binomial, pl=F)
# Set pl=T for Jeffreys invariant prior
model8.Firth <- brglm(DAY30~SHO+A65+HIG+DIA+HYP+HRT+TTR+SEX, data=gustos, family=binomial, pl=T)
## Identical results

# Inspect coefs
coef(model8.Firth)
coef(model8.Firth) / coef(full8) # effective shrinkage quite limited

###################################
## Plot densities of predictions ##
###################################
# ML fit
plot(density(full8$linear.predictor, bw=.4), main="", ylim=c(0,.4), xlim=c(-6.1,2), lwd=2, 
     xlab="Linear predictor", cex.lab=1.2)

lpshrunk  <- full8$x  %*% full8.shrunk$coef[2:9] + full8.shrunk$coef[1]
mean(plogis(lpshrunk)) # shrinkage
lines(density(lpshrunk, bw=.4), lty=2, lwd=2)

lpL2  <- as.numeric(full8$x  %*% model8.L2$beta + model8.L2$a0)
mean(plogis(lpL2)) # ridge
lines(density(lpL2, bw=.4), lty=3, lwd=2)

lpFirth  <- predict(model8.Firth)
lines(density(lpFirth, bw=.4), lty=4, lwd=2)

lpL1  <- as.numeric(full8$x  %*% model8.L1$beta + model8.L1$a0)
mean(plogis(lpL1)) # Lasso
lines(density(lpL1, bw=.4), lty=5, lwd=2)

legend(-2.4, .4, c("Standard ML", "Uniform shrinkage", "Ridge","Firth", "Lasso"),
       bty="n", lty = c(1:5), lwd=2)

## End plot densities of predictions ##
#######################################

## Effective shrinkage by each method ##
Cal.slope <- c(coef(lrm(full8$y~full8$linear.predictor))[2], 
               coef(lrm(full8$y~lpshrunk))[2],
               coef(lrm(full8$y~lpL2))[2],
               coef(lrm(full8$y~lpFirth))[2],
               coef(lrm(full8$y~lpL1))[2]) # 1.00, 1.14, 1.14, 1.02, 1.10
round(1/Cal.slope,2)
# full8 lpshrunk[1]       lpL2     lpFirth        lpL1 
# 1.00        0.87        0.87        0.98        0.91 

#####################################
##### May repeat for 17 var model ###
#####################################

Fig 13.1

# Make a penalized model

p8	<- pentrace(full8, c(0,1,2,3,4,5,6,7,8,10,12,14,20, 24, 32,40))
p	<- pentrace(full, c(0,2,4,6,8,10,12,14,16,18,20,22, 24, 28, 32,40))
p8
p
plot(x=p8$results.all[,1],y=p8$results.all[,5],type="l",xlim=c(0,40), ylim=c(40,48), 
  xlab="Penalty", ylab="AIC", cex.lab=1.2, col = Cs(darkgreen) )
lines(x=p$results.all[,1],y=p$results.all[,5], lty=2, col = Cs(darkred))
text(x=20,y=47,"8 predictors", adj=0, col = Cs(darkgreen))
text(x=15,y=44,"17 predictors", adj=0, col = Cs(darkred))

Fig 13.2

################################################# #### Check penalty in bootstrap ### #################################################

## Bootstrap procedure to find optimal penalty ##
Pen.boot  <- function(fit, dataset, pen=c(0,2,4,6,8,12,16,20,24,30,36,44), B=40) {
nrowB	<- nrow(fit$x)
nrowmat <- B*length(pen)
# Matrix for results
matB <- matrix(NA,nrow=nrowmat,ncol=4)
dimnames(matB) <- list(1:nrowmat, Cs(Penalty, R2test, Ctest, Slopetest))

# Start loop
for (i in 1:B) {
cat("Start Bootstrap sample nr", i, "\n")
Brows <- sample(nrowB,replace=T)
dataBoot  <- dataset[Brows,]
j <- (i-1)*length(pen)
  for (p in pen) {
    j <- j+1
    fit.penB <- update(fit, data=dataBoot, penalty=p, maxit=99)  # fit in Boot sample
    matB[j,1] <- p
    # lp in test
    lp  <- fit$x  %*% coef(fit.penB)[-1]       # performance in original sample: test
    matB[j,2:4]  <- val.prob(y=fit$y, logit=lp, pl=F, smooth=F, logistic.cal=F)[c(3,2,13)]
    } # end loop over penalties
} # End loop over B
cat("\n\nEnd over bootstraps\n\n")
return(matB)
} # end function Pen.boot

## Search penalty in B
set.seed(1)
sample4.pen17 <- Pen.boot(fit=full, dataset=gustos, B=200, pen=seq(0,60, by=6))
Pen17 <- aggregate(sample4.pen17[,2:4], list(Pen = sample4.pen17[,1]), function(x)mean(x,trim=0.95))
Pen17

set.seed(1)
sample4.pen8 <- Pen.boot(fit=full8, dataset=gustos, B=500, pen=c(0,4,6,8,10,12,16,20,36))
Pen8 <- aggregate(sample4.pen8[,2:4], list(Pen = sample4.pen8[,1]), function(x)mean(x,trim=0.95))
Pen8

### Plot of bootstrapped penalties vs slope, C stat, R2
plot(x=Pen17[,1], y=Pen17[,4], type="l", xlim=c(0,37),ylim=c(.7,1.42), 
  xlab="Penalty", ylab="Slope of linear predictor", cex.lab=1.2, col = Cs(darkred)) 
lines(x=Pen8[,1], y=Pen8[,4], type="l", lty=2, col = Cs(darkgreen))
abline(a=1, b=0)
text(x=c(36,36), y=c(Pen17[Pen17[,1]==36,4],Pen8[nrow(Pen8),4])-.02, label=c("17", "8"), col = Cs(darkred, darkgreen), adj=0)

# C stat
plot(x=Pen17[,1], y=Pen17[,3], type="l", xlim=c(0,37),ylim=c(.76,.8), 
  xlab="Penalty", ylab="C statistic", cex.lab=1.2, col = Cs(darkred)) 
lines(x=Pen8[,1], y=Pen8[,3], type="l", lty=2, col = Cs(darkgreen))
text(x=c(36,36), y=c(Pen17[nrow(Pen17),3],Pen8[nrow(Pen8),3])-.002, label=c("17", "8"), col = Cs(darkred, darkgreen), adj=0)

## End ##

rcode_and_data/chapter13.txt · Last modified: 2018/11/04 10:37 by ewsteyerberg
chimeric.de = chi`s home Creative Commons License Valid CSS Driven by DokuWiki do yourself a favour and use a real browser - get firefox!! Recent changes RSS feed Valid XHTML 1.0