8. Case study on missing values

Part of the IMPACT data set is made available for didactic purposes (TBI.sav as .zip file) The R codefor imputations with the mice library in R is shown below:

IMPACT imputation 20July09.r

# IMPACT illustrations with missing values

library(Design)
library(mice)
library(foreign)

TBI <- read.spss('f:/Book/SPSS/TBI.sav',use.value.labels=F,to.data.frame=T)

describe(TBI)

TBI$pupil	<- ifelse(TBI$d.pupil==9,NA,TBI$d.pupil)

# Motor in 5 categories: 1/2; 3; 4; 5/6; 9/NA
TBI$motor	<- ifelse(is.na(TBI$d.motor),9,TBI$d.motor)
TBI$motor1	<- ifelse(TBI$motor==1,1,0)
TBI$motor2	<- ifelse(TBI$motor==2,1,0)
TBI$motor3      <- ifelse(TBI$motor==3,1,0)
TBI$motor4  	<- ifelse(TBI$motor==4,1,0)
TBI$motor56	<- ifelse(TBI$motor==5 | TBI$motor==6,1,0)
TBI$motor9	<- ifelse(TBI$motor==9,1,0)
TBI$motorG	<- ifelse(TBI$motor1==1,5,
                      ifelse(TBI$motor2==1,4,
			ifelse(TBI$motor3==1,3,
			   ifelse(TBI$motor4==1,2,
				ifelse(TBI$motor56==1,1,
				   ifelse(TBI$motor9==1,9,NA))))))
TBI$motorG	<- as.factor(TBI$motorG)
# levels(TBI$motorG)	<- c("5/6", "4", "3", "2", "1", "9")

# mort and unfav outcome
TBI$mort    <- ifelse(TBI$d.gos==1,1,ifelse(TBI$d.gos==6,NA,0))
TBI$deadveg <- ifelse(TBI$d.gos<3,1,ifelse(TBI$d.gos==6,NA,0))
TBI$unfav   <- ifelse(TBI$d.gos<4,1,ifelse(TBI$d.gos==6,NA,0))
TBI$good    <- ifelse(TBI$d.gos<5,1,ifelse(TBI$d.gos==6,NA,0))

TBI$ctclass2  <- ifelse(TBI$ctclass==2,1,0)
TBI$ctclass1  <- ifelse(TBI$ctclass==1,1,0)
TBI$ctclass34 <- ifelse(TBI$ctclass==3 | TBI$ctclass==4,1,0)
TBI$ctclass56 <- ifelse(TBI$ctclass==5 | TBI$ctclass==6,1,0)
TBI$ctclassr4 <- ifelse(TBI$ctclass1==1,2,
		ifelse(TBI$ctclass2==1,1,
		ifelse(TBI$ctclass34==1,3,
		ifelse(TBI$ctclass56==1,4,NA))))
TBI$ctclassr4	<- as.factor(TBI$ctclassr4)
TBI$ctclassr3   <- ifelse(TBI$ctclass1==1 | TBI$ctclass2==1,1,
                     ifelse(TBI$ctclass34==1,2,
			ifelse(TBI$ctclass56==1,3,NA)))
TBI$ctclassr4	<- as.factor(TBI$ctclassr4)
TBI$ctclassr3	<- as.factor(TBI$ctclassr3)

## define smaller set TBI1 for imputation use later to define R2 R3 and R4 
TBI1	<- TBI[,Cs(trial, age, hypoxia, hypotens, cisterns, shift,
                   tsah, edh, pupil, motorG, ctclassr3,
       		   d.sysbpt,hbt,glucoset, unfav, mort	)]
TBI1$trial<-as.numeric(TBI1$trial)
TBI1$trial<-as.factor(TBI1$trial)
TBI1$pupil<-as.factor(TBI1$pupil)

names(TBI1) <- Cs(trial, age, hypoxia, hypotens, cisterns, shift,
                 tsah, edh, pupil, motor, ctclass,
                 d.sysbp,hb,glucose, unfav, mort	)
label(TBI1$unfav) <- "GOS6 unfav"
TBI1$Age30  <- ifelse(TBI1$age<30,1,0)
TBI1$cisterns <- ifelse(TBI1$cisterns>1,1,0)
TBI1$pupil<-as.factor(TBI1$pupil)

dd <- datadist(TBI1)
options(datadist="dd")

describe(TBI1)
########################## 
## Missing value analysis
par(mfrow=c(1,1))

na.patterns <- naclus(TBI1)
plot(na.patterns, ylab="Fraction of NAs in common")

par(mfrow=c(2,2))
naplot(na.patterns)
na.pattern(TBI1)

#### imputation
### define the matrix pmat for mice   predictorMatrix  A value of '1' means that 
### the column variable is used as a predictor for the target variable (in the 
### rows). The diagonal of 'predictorMatrix' must be zero. In our matrix we dont 
### want 'trial', 'age', 'motor', 'unfav' and 'mort' to be imputed.

dim(TBI1)
TBI1  <- TBI1[, -17]
names(TBI1)

p<-16
pmat  <- matrix(rep(1,p*p),nrow=p,ncol=p)                   			
diag(pmat)  <- rep(0,p)
pmat[,c(1:2, 10, 15:16)]  <- 0
pmat[ c(1:2, 10, 15:16),] <- 0
 
## defines data to be used and the imputation method for each column, seed =1 
gm <- mice(TBI1, m=10, 
    imputationMethod =c("logreg","pmm","logreg","logreg", "logreg","logreg",
    "logreg","logreg","polyreg","polyreg","polyreg","pmm",
    "pmm", "pmm","logreg","logreg"),
     predictorMatrix = pmat, seed=1)
gm
                    

## Adjusted analyses
## MI, n  = 2159
fit.mult.impute(unfav ~ as.factor(trial) + age + as.factor(motor) +as.factor(pupil) + 
     hypoxia + hypotens + as.factor(ctclass) + tsah, lrm, xtrans = gm, data = TBI1)

## pupils original, imputation for other covars, n=2036
TBI2  <- TBI1[!is.na(TBI1$pupil),]
dim(TBI2) # n=2036
fit.mult.impute(unfav ~ as.factor(trial) + age + as.factor(motor) +as.factor(TBI1$pupil) + 
     hypoxia + hypotens + as.factor(ctclass) + tsah, lrm, xtrans = gm, data = TBI2)

# MICE SI, n  = 2159
lrm(unfav ~  age + as.factor(motor) +as.factor(pupil) + hypoxia + hypotens +
	as.factor(ctclass) + tsah, data = complete(gm,1))

rcode_and_data/chapter08.txt · Last modified: 2009/07/20 13:35 by ewsteyerberg
chimeric.de = chi`s home Creative Commons License Valid CSS Driven by DokuWiki do yourself a favour and use a real browser - get firefox!! Recent changes RSS feed Valid XHTML 1.0