Chapter 7: Missing values

R script for simulations for MCAR, MAR and MNAR


###################################
# Ewout Steyerberg, March 8, 2015 #
# Missing values: illustrate MCAR, MAR, MNAR mechanism
# Use simple linear models
###################################

library(rms)  # Harrell's library with many useful functions
# library(mice)
# source('C:/Program Files/R/R-2.3.1/library/Hmisc/R/new.s')

#########################

set.seed(1) # For identical results at repetition
n <- 1000               # arbitrary sample size
x2  <- rnorm(n=n, mean=0, sd=1)  # x2 standard normal
x1	<- rnorm(n=n, mean=0, sd=1)  # Uncorrelated x1
# x1   <- sqrt(.5) * x2 + rnorm(n=n, mean=0, sd=sqrt(1-.5))  # x2 correlated with x1

y1   <- 1 * x1 + 1 * x2 + rnorm(n=n, mean=0, sd=sqrt(1-0)) # generate y
# var of y1 larger with correlated x1 - x2

plot(x=x1,y=x2,pch='.',xlim=c(-4,4),ylim=c(-4,4),ps=.1)
abline(ols(x2~x1))               

# Make approx half missing
# MCAR, MAR and MNAR mechanisms
x1MCAR   <- ifelse(runif(n) < .5, x1, NA)          # MCAR mechanism for 50% of x1
x1MARx   <- ifelse(rnorm(n=n,sd=.8) < x2, x1, NA)  # MAR on x2, R2 50%, 50% missing (since mean x2==0)
x1MARy   <- ifelse(rnorm(n=n,sd=(sqrt(3)*.8)) >y1, x1, NA) # MAR on y, R2 50%, 50% missing (since mean y1==0)
x1MNAR   <- ifelse(rnorm(n=n,sd=.8) < x1, x1, NA)  # MNAR on x1, R2 50%, 50% missing (since mean x1==0)

# MCAR
plot(x=x1,y=x2,pch='.',xlim=c(-4,4),ylim=c(-4,4),ps=.1) # Orig
abline(ols(x2~x1), lty=2)                   
points(x=x1MCAR,y=x2,pch='o',ps=.1) # MCAR
abline(ols(x2~x1MCAR), lty=1, lwd=2)     
title('MCAR')

# x1MARx
plot(x=x1,y=x2,pch='.',xlim=c(-4,4),ylim=c(-4,4),ps=.1) # Orig
abline(ols(x2~x1), lty=2)     
points(x=x1MARx,y=x2,pch='o',xlim=c(-4,4),ylim=c(-4,4),ps=.1) # MCAR
abline(ols(x2~x1MARx), lty=1, lwd=2)     
title('x1 MAR on x2')

# x1MARy
plot(x=x1,y=x2,pch='.',xlim=c(-4,4),ylim=c(-4,4),ps=.1) # Orig
abline(ols(x2~x1), lty=2)     
points(x=x1MARy,y=x2,pch='o',xlim=c(-4,4),ylim=c(-4,4),ps=.1) # MCAR
abline(ols(x2~x1MARy), lty=1, lwd=2)     
title('x1 MAR on y')

# x1MNAR
plot(x=x1,y=x2,pch='.',xlim=c(-4,4),ylim=c(-4,4),ps=.1) # Orig
abline(ols(x2~x1), lty=2)     
points(x=x1MNAR,y=x2,pch='o',xlim=c(-4,4),ylim=c(-4,4),ps=.1) # MCAR
abline(ols(x2~x1MNAR), lty=1, lwd=2)     
title('x1 MNAR')

#################
## CC analysis ##
# MCAR
plot(x=x1,y=y1,pch='.',xlim=c(-4,4),ylim=c(-6,6),ps=.1) # Orig
abline(ols(y1~x1), lty=2)     
points(x=x1MCAR,y=y1,pch='o',ps=.1) # MCAR
abline(ols(y1~x1MCAR), lty=1, lwd=2)    
title('MCAR')

# x1MARx
plot(x=x1,y=y1,pch='.',xlim=c(-4,4),ylim=c(-6,6),ps=.1) # Orig
abline(ols(y1~x1), lty=2)     
points(x=x1MARx,y=y1,pch='o',ps=.1) # MCAR
abline(ols(y1~x1MARx), lty=1, lwd=2)    
title('x1 MAR on x2')

# x1MARy
plot(x=x1,y=y1,pch='.',xlim=c(-4,4),ylim=c(-6,6),ps=.1) # Orig
abline(ols(y1~x1), lty=2)     
points(x=x1MARy,y=y1,pch='o',ps=.1) # MCAR
abline(ols(y1~x1MARy), lty=1, lwd=2)    
title('x1 MAR on y')

# x1MNAR
plot(x=x1,y=y1,pch='.',xlim=c(-4,4),ylim=c(-6,6),ps=.1) # Orig
abline(ols(y1~x1), lty=2)     
points(x=x1MNAR,y=y1,pch='o',ps=.1) # MCAR
abline(ols(y1~x1MNAR), lty=1, lwd=2)    
title('x1 MNAR')

#############
# Compare fits in the various selections
fx1.CC  <- ols(y1~x1+x2)
fx1MARx.CC  <- ols(y1~x1MARx+x2)
fx1MARy.CC  <- ols(y1~x1MARy+x2)
fx1MNAR.CC  <- ols(y1~x1MNAR+x2)

# Regression coefficients
print(rbind(coef(fx1.CC), coef(fx1MARx.CC), 
            coef(fx1MARy.CC), coef(fx1MNAR.CC)), digits=2)

# Imputation; make data sets with different types of missings
d <- as.data.frame(cbind(y1,x1,x2,x1MCAR, x1MARx,x1MARy,x1MNAR))
dMCAR <- d[,c(1,3,4)] # data set with x1 missings according to MCAR
dMARx <- d[,c(1,3,5)] # x1 MAR on x2
dMARy <- d[,c(1,3,6)] # x1 MAR on y
dMNAR <- d[,c(1,3,7)] # x1 MNAR at x1

## MI using the aregImpute function from rms
## help(areImpute) for more info
g <- aregImpute(~ y1+ x1MCAR + x2, n.impute=5, data=d, pr=F, type='pmm')
## fit models per imputed set and combine results using Rubin's rules
## Use fit.mult.impute() function from rms
fx1MCAR.MI <- fit.mult.impute(y1 ~ x1MCAR + x2, ols, xtrans=g, data=d, pr=F)

g <- aregImpute(~ y1+ x1MARx + x2, n.impute=5, data=d, pr=F, type='pmm')
fx1MARx.MI <- fit.mult.impute(y1 ~ x1MARx + x2, ols, xtrans=g, data=d, pr=F) ## areg

g <- aregImpute(~ y1+ x1MARy + x2, n.impute=5, data=d, pr=F, type='pmm')
fx1MARy.MI <- fit.mult.impute(y1 ~ x1MARy + x2, ols, xtrans=g, data=d, pr=F) ## areg

g <- aregImpute(~ y1+ x1MNAR + x2, n.impute=5, data=d, pr=F, type='pmm')
fx1MNAR.MI <- fit.mult.impute(y1 ~ x1MNAR + x2, ols, xtrans=g, data=d, pr=F) ## areg

# Regression coefficients after MI with aregImpute
print(rbind(coef(fx1MCAR.MI), coef(fx1MARx.MI), 
            coef(fx1MARy.MI), coef(fx1MNAR.MI)), digits=3)

#### End illustration missing values and imputation ####


rcode_and_data/chapter07.txt · Last modified: 2015/03/09 09:55 by ewsteyerberg
chimeric.de = chi`s home Creative Commons License Valid CSS Driven by DokuWiki do yourself a favour and use a real browser - get firefox!! Recent changes RSS feed Valid XHTML 1.0