Datasets from Grahams Missing Data Book
Datasets from Grahams missing data book (2012).
data(data.graham.ex3) data(data.graham.ex6) data(data.graham.ex8a) data(data.graham.ex8b) data(data.graham.ex8c)
Dataset data.graham.ex3
:
'data.frame': 2756 obs. of 20 variables:
$ school : int 1 1 1 1 1 1 1 1 1 1 ...
$ alc7 : int 1 1 1 7 3 6 1 5 4 3 ...
$ rskreb71: int 1 3 1 2 1 NA 1 2 1 2 ...
$ rskreb72: int NA NA NA NA NA NA NA 3 2 3 ...
$ rskreb73: int NA NA NA NA NA NA NA 2 1 2 ...
$ rskreb74: int NA NA NA NA NA NA NA 3 2 4 ...
$ likepa71: int 4 2 3 3 2 NA 1 4 3 3 ...
$ likepa72: int 5 2 4 2 2 NA 5 3 3 2 ...
$ likepa73: int 4 1 3 3 2 NA 1 3 2 3 ...
$ likepa74: int 5 3 1 5 4 4 3 4 3 2 ...
$ likepa75: int 4 4 4 4 3 3 4 4 3 3 ...
$ posatt71: int 1 1 1 1 1 2 1 NA NA NA ...
$ posatt72: int 1 2 1 1 1 2 4 NA NA NA ...
$ posatt73: int 1 1 1 1 1 2 1 NA NA NA ...
$ alc8 : int 1 8 4 8 5 7 1 3 5 3 ...
$ rskreb81: int 1 4 1 2 2 3 2 3 1 4 ...
$ rskreb82: int NA NA NA NA NA NA NA 3 1 4 ...
$ rskreb83: int NA NA NA NA NA NA NA 2 1 2 ...
$ rskreb84: int NA NA NA NA NA NA NA 3 2 4 ...
$ alc9 : int 3 NA 7 NA 5 7 NA 6 6 7 ...
Dataset data.graham.ex6
:
'data.frame': 2756 obs. of 9 variables:
$ school : int 1 1 1 1 1 1 1 1 1 1 ...
$ program : int 0 0 0 0 0 0 0 0 0 0 ...
$ alc7 : int 1 1 1 7 3 6 1 5 4 3 ...
$ riskreb7: int 1 3 1 2 1 NA 1 2 1 2 ...
$ likepar7: int 4 2 3 3 2 NA 1 4 3 3 ...
$ posatt7 : int 1 1 1 1 1 2 1 NA NA NA ...
$ alc8 : int 1 8 4 8 5 7 1 3 5 3 ...
$ riskreb8: int 1 4 1 2 2 3 2 3 1 4 ...
$ alc9 : int 3 NA 7 NA 5 7 NA 6 6 7 ...
Dataset data.graham.ex8a
:
'data.frame': 1023 obs. of 20 variables:
$ skill1 : int 28 29 27 29 29 NA NA NA 29 NA ...
$ skill2 : int NA NA 29 29 NA NA NA NA NA 21 ...
$ skill3 : int NA NA 29 29 29 NA 28 10 29 25 ...
$ skill4 : int NA 29 25 29 29 28 29 NA NA NA ...
$ skill5 : int 29 29 28 28 29 NA 29 10 NA 25 ...
$ iplanV1: int 14 18 15 17 16 NA NA NA 18 NA ...
$ iplanV2: int NA NA 17 16 NA NA NA NA NA 16 ...
$ iplanV3: int NA NA 16 18 18 NA 17 1 18 16 ...
$ iplanV4: int NA 18 14 18 14 6 18 NA NA NA ...
$ iplanV5: int 13 18 12 18 18 NA 18 3 NA 5 ...
$ planA1 : int 1 0 2 8 3 NA NA NA 7 NA ...
$ planA2 : int NA NA 0 4 NA NA NA NA NA 6 ...
$ planA3 : int NA NA 1 4 7 NA 2 0 1 7 ...
$ planA4 : int NA 8 0 4 6 0 0 NA NA NA ...
$ planA5 : int 0 7 1 5 7 NA 2 0 NA 6 ...
$ planV1 : int NA NA NA NA NA NA NA NA NA NA ...
$ planV2 : int NA NA NA NA NA NA NA NA NA 1 ...
$ planV3 : int NA NA 1 NA NA NA NA 0 NA 1 ...
$ planV4 : int NA NA NA NA 2 NA NA NA NA NA ...
$ planV5 : int 2 NA 2 NA NA NA NA 0 NA NA ...
Dataset data.graham.ex8b
:
'data.frame': 2570 obs. of 6 variables:
$ rskreb71: int 1 3 1 2 1 NA 1 2 1 2 ...
$ rskreb72: int NA NA NA NA NA NA NA 3 2 3 ...
$ posatt71: int 1 1 1 1 1 2 1 NA NA NA ...
$ posatt72: int 1 2 1 1 1 2 4 NA NA NA ...
$ posatt73: int 1 1 1 1 1 2 1 NA NA NA ...
$ posatt : int 3 4 3 3 3 6 6 NA NA NA ...
Dataset data.graham.ex8c
:
'data.frame': 2756 obs. of 16 variables:
$ s1 : int 1 1 1 1 1 1 1 1 1 1 ...
$ s2 : int 0 0 0 0 0 0 0 0 0 0 ...
$ s3 : int 0 0 0 0 0 0 0 0 0 0 ...
$ s4 : int 0 0 0 0 0 0 0 0 0 0 ...
$ s5 : int 0 0 0 0 0 0 0 0 0 0 ...
$ s6 : int 0 0 0 0 0 0 0 0 0 0 ...
$ s7 : int 0 0 0 0 0 0 0 0 0 0 ...
$ s8 : int 0 0 0 0 0 0 0 0 0 0 ...
$ s9 : int 0 0 0 0 0 0 0 0 0 0 ...
$ s10 : int 0 0 0 0 0 0 0 0 0 0 ...
$ s11 : int 0 0 0 0 0 0 0 0 0 0 ...
$ xalc7 : int 1 1 1 7 3 6 1 5 4 3 ...
$ rskreb72: int NA NA NA NA NA NA NA 3 2 3 ...
$ likepa71: int 4 2 3 3 2 NA 1 4 3 3 ...
$ posatt71: int 1 1 1 1 1 2 1 NA NA NA ...
$ alc8 : int 1 8 4 8 5 7 1 3 5 3 ...
The datasets were downloaded from http://methodology.psu.edu/pubs/books/missing.
Graham, J. W. (2012). Missing data. New York: Springer. doi: 10.1007/978-1-4614-4018-5
## Not run: library(mitools) library(mice) library(Amelia) library(jomo) ############################################################################# # EXAMPLE 1: data.graham.8a | Imputation under multivariate normal model ############################################################################# data(data.graham.ex8a) dat <- data.graham.ex8a dat <- dat[,1:10] vars <- colnames(dat) V <- length(vars) # remove persons with completely missing data dat <- dat[ rowMeans( is.na(dat) ) < 1, ] summary(dat) # some descriptive statistics psych::describe(dat) #************** # imputation under a multivariate normal model M <- 7 # number of imputations #--------- mice package # define imputation method impM <- rep("norm", V) names(impM) <- vars # mice imputation imp1a <- mice::mice( dat, method=impM, m=M, maxit=4 ) summary(imp1a) # convert into a list of datasets datlist1a <- miceadds::mids2datlist(imp1a) #--------- Amelia package imp1b <- Amelia::amelia( dat, m=M ) summary(imp1b) datlist1b <- imp1b$imputations #--------- jomo package imp1c <- jomo::jomo1con(Y=dat, nburn=100, nbetween=10, nimp=M) str(imp1c) # convert into a list of datasets datlist1c <- miceadds::jomo2datlist(imp1c) # alternatively one can use the jomo wrapper function imp1c1 <- jomo::jomo(Y=dat, nburn=100, nbetween=10, nimp=M) ############################################################################# # EXAMPLE 2: data.graham.8b | Imputation with categorical variables ############################################################################# data(data.graham.ex8b) dat <- data.graham.ex8b vars <- colnames(dat) V <- length(vars) # descriptive statistics psych::describe(dat) #******************************* # imputation in mice using predictive mean matching imp1a <- mice::mice( dat, m=5, maxit=10) datlist1a <- mitools::imputationList( miceadds::mids2datlist(imp1a) ) print(datlist1a) #******************************* # imputation in jomo treating all variables as categorical # Note that variables must have values from 1 to N # use categorize function from sirt package here dat.categ <- sirt::categorize( dat, categorical=colnames(dat), lowest=1 ) dat0 <- dat.categ$data # imputation in jomo treating all variables as categorical Y_numcat <- apply( dat0, 2, max, na.rm=TRUE ) imp1b <- jomo::jomo1cat(Y.cat=dat0, Y.numcat=Y_numcat, nburn=100, nbetween=10, nimp=5) # recode original categories datlist1b <- sirt::decategorize( imp1b, categ_design=dat.categ$categ_design ) # convert into a list of datasets datlist1b <- miceadds::jomo2datlist(datlist1b) datlist1b <- mitools::imputationList( datlist1b ) # Alternatively, jomo can be used but categorical variables must be # declared as factors dat <- dat0 # define two variables as factors vars <- miceadds::scan.vec(" rskreb71 rskreb72") for (vv in vars){ dat[, vv] <- as.factor( dat[,vv] ) } # use jomo imp1b1 <- jomo::jomo(Y=dat, nburn=30, nbetween=10, nimp=5) #**************************** # compare frequency tables for both imputation packages fun_prop <- function( variable ){ t1 <- table(variable) t1 / sum(t1) } # variable rskreb71 res1a <- with( datlist1a, fun_prop(rskreb71) ) res1b <- with( datlist1b, fun_prop(rskreb71) ) summary( miceadds::NMIcombine(qhat=res1a, NMI=FALSE ) ) summary( miceadds::NMIcombine(qhat=res1b, NMI=FALSE ) ) # variable posatt res2a <- with( datlist1a, fun_prop(posatt) ) res2b <- with( datlist1b, fun_prop(posatt) ) summary( miceadds::NMIcombine(qhat=res2a, NMI=FALSE ) ) summary( miceadds::NMIcombine(qhat=res2b, NMI=FALSE ) ) ## End(Not run)
Please choose more modern alternatives, such as Google Chrome or Mozilla Firefox.