Dataset Reading
This dataset contains N=328 students and I=12 items measuring reading competence. All 12 items are arranged into 3 testlets (items with common text stimulus) labeled as A, B and C. The allocation of items to testlets is indicated by their variable names.
data(data.read)
A data frame with 328 persons on the following 12 variables.
Rows correspond to persons and columns to items. The following items are
included in data.read
:
Testlet A: A1
, A2
, A3
, A4
Testlet B: B1
, B2
, B3
, B4
Testlet C: C1
, C2
, C3
, C4
## Not run: data(data.read) dat <- data.read I <- ncol(dat) # list of needed packages for the following examples packages <- scan(what="character") eRm ltm TAM mRm CDM mirt psychotools IsingFit igraph qgraph pcalg poLCA randomLCA psychomix MplusAutomation lavaan # load packages. make an installation if necessary miceadds::library_install(packages) #***************************************************** # Model 1: Rasch model #***************************************************** #---- M1a: rasch.mml2 (in sirt) mod1a <- sirt::rasch.mml2(dat) summary(mod1a) #---- M1b: smirt (in sirt) Qmatrix <- matrix(1,nrow=I, ncol=1) mod1b <- sirt::smirt(dat,Qmatrix=Qmatrix) summary(mod1b) #---- M1c: gdm (in CDM) theta.k <- seq(-6,6,len=21) mod1c <- CDM::gdm(dat,theta.k=theta.k,irtmodel="1PL", skillspace="normal") summary(mod1c) #---- M1d: tam.mml (in TAM) mod1d <- TAM::tam.mml( resp=dat ) summary(mod1d) #---- M1e: RM (in eRm) mod1e <- eRm::RM( dat ) # eRm uses Conditional Maximum Likelihood (CML) as the estimation method. summary(mod1e) eRm::plotPImap(mod1e) #---- M1f: mrm (in mRm) mod1f <- mRm::mrm( dat, cl=1) # CML estimation mod1f$beta # item parameters #---- M1g: mirt (in mirt) mod1g <- mirt::mirt( dat, model=1, itemtype="Rasch", verbose=TRUE ) print(mod1g) summary(mod1g) coef(mod1g) # arrange coefficients in nicer layout sirt::mirt.wrapper.coef(mod1g)$coef #---- M1h: rasch (in ltm) mod1h <- ltm::rasch( dat, control=list(verbose=TRUE ) ) summary(mod1h) coef(mod1h) #---- M1i: RaschModel.fit (in psychotools) mod1i <- psychotools::RaschModel.fit(dat) # CML estimation summary(mod1i) plot(mod1i) #---- M1j: noharm.sirt (in sirt) Fpatt <- matrix( 0, I, 1 ) Fval <- 1 + 0*Fpatt Ppatt <- Pval <- matrix(1,1,1) mod1j <- sirt::noharm.sirt( dat=dat, Ppatt=Ppatt, Fpatt=Fpatt, Fval=Fval, Pval=Pval) summary(mod1j) # Normal-ogive model, multiply item discriminations with constant D=1.7. # The same holds for other examples with noharm.sirt and R2noharm. plot(mod1j) #---- M1k: rasch.pml3 (in sirt) mod1k <- sirt::rasch.pml3( dat=dat) # pairwise marginal maximum likelihood estimation summary(mod1k) #---- M1l: running Mplus (using MplusAutomation package) mplus_path <- "c:/Mplus7/Mplus.exe" # locate Mplus executable #**************** # specify Mplus object mplusmod <- MplusAutomation::mplusObject( TITLE="1PL in Mplus ;", VARIABLE=paste0( "CATEGORICAL ARE ", paste0(colnames(dat),collapse=" ") ), MODEL=" ! fix all item loadings to 1 F1 BY A1@1 A2@1 A3@1 A4@1 ; F1 BY B1@1 B2@1 B3@1 B4@1 ; F1 BY C1@1 C2@1 C3@1 C4@1 ; ! estimate variance F1 ; ", ANALYSIS="ESTIMATOR=MLR;", OUTPUT="stand;", usevariables=colnames(dat), rdata=dat ) #**************** # write Mplus syntax filename <- "mod1u" # specify file name # create Mplus syntaxes res2 <- MplusAutomation::mplusModeler(object=mplusmod, dataout=paste0(filename,".dat"), modelout=paste0(filename,".inp"), run=0 ) # run Mplus model MplusAutomation::runModels( filefilter=paste0(filename,".inp"), Mplus_command=mplus_path) # alternatively, the system() command can also be used # get results mod1l <- MplusAutomation::readModels(target=getwd(), filefilter=filename ) mod1l$summaries # summaries mod1l$parameters$unstandardized # parameter estimates #***************************************************** # Model 2: 2PL model #***************************************************** #---- M2a: rasch.mml2 (in sirt) mod2a <- sirt::rasch.mml2(dat, est.a=1:I) summary(mod2a) #---- M2b: smirt (in sirt) mod2b <- sirt::smirt(dat,Qmatrix=Qmatrix,est.a="2PL") summary(mod2b) #---- M2c: gdm (in CDM) mod2c <- CDM::gdm(dat,theta.k=theta.k,irtmodel="2PL", skillspace="normal") summary(mod2c) #---- M2d: tam.mml (in TAM) mod2d <- TAM::tam.mml.2pl( resp=dat ) summary(mod2d) #---- M2e: mirt (in mirt) mod2e <- mirt::mirt( dat, model=1, itemtype="2PL" ) print(mod2e) summary(mod2e) sirt::mirt.wrapper.coef(mod1g)$coef #---- M2f: ltm (in ltm) mod2f <- ltm::ltm( dat ~ z1, control=list(verbose=TRUE ) ) summary(mod2f) coef(mod2f) plot(mod2f) #---- M2g: R2noharm (in NOHARM, running from within R using sirt package) # define noharm.path where 'NoharmCL.exe' is located noharm.path <- "c:/NOHARM" # covariance matrix P.pattern <- matrix( 1, ncol=1, nrow=1 ) P.init <- P.pattern P.init[1,1] <- 1 # loading matrix F.pattern <- matrix(1,I,1) F.init <- F.pattern # estimate model mod2g <- sirt::R2noharm( dat=dat, model.type="CFA", F.pattern=F.pattern, F.init=F.init, P.pattern=P.pattern, P.init=P.init, writename="ex2g", noharm.path=noharm.path, dec="," ) summary(mod2g) #---- M2h: noharm.sirt (in sirt) mod2h <- sirt::noharm.sirt( dat=dat, Ppatt=P.pattern,Fpatt=F.pattern, Fval=F.init, Pval=P.init ) summary(mod2h) plot(mod2h) #---- M2i: rasch.pml2 (in sirt) mod2i <- sirt::rasch.pml2(dat, est.a=1:I) summary(mod2i) #---- M2j: WLSMV estimation with cfa (in lavaan) lavmodel <- "F=~ A1+A2+A3+A4+B1+B2+B3+B4+ C1+C2+C3+C4" mod2j <- lavaan::cfa( data=dat, model=lavmodel, std.lv=TRUE, ordered=colnames(dat)) summary(mod2j, standardized=TRUE, fit.measures=TRUE, rsquare=TRUE) #***************************************************** # Model 3: 3PL model (note that results can be quite unstable!) #***************************************************** #---- M3a: rasch.mml2 (in sirt) mod3a <- sirt::rasch.mml2(dat, est.a=1:I, est.c=1:I) summary(mod3a) #---- M3b: smirt (in sirt) mod3b <- sirt::smirt(dat,Qmatrix=Qmatrix,est.a="2PL", est.c=1:I) summary(mod3b) #---- M3c: mirt (in mirt) mod3c <- mirt::mirt( dat, model=1, itemtype="3PL", verbose=TRUE) summary(mod3c) coef(mod3c) # stabilize parameter estimating using informative priors for guessing parameters mirtmodel <- mirt::mirt.model(" F=1-12 PRIOR=(1-12, g, norm, -1.38, 0.25) ") # a prior N(-1.38,.25) is specified for transformed guessing parameters: qlogis(g) # simulate values from this prior for illustration N <- 100000 logit.g <- stats::rnorm(N, mean=-1.38, sd=sqrt(.5) ) graphics::plot( stats::density(logit.g) ) # transformed qlogis(g) graphics::plot( stats::density( stats::plogis(logit.g)) ) # g parameters # estimate 3PL with priors mod3c1 <- mirt::mirt(dat, mirtmodel, itemtype="3PL",verbose=TRUE) coef(mod3c1) # In addition, set upper bounds for g parameters of .35 mirt.pars <- mirt::mirt( dat, mirtmodel, itemtype="3PL", pars="values") ind <- which( mirt.pars$name=="g" ) mirt.pars[ ind, "value" ] <- stats::plogis(-1.38) mirt.pars[ ind, "ubound" ] <- .35 # prior distribution for slopes ind <- which( mirt.pars$name=="a1" ) mirt.pars[ ind, "prior_1" ] <- 1.3 mirt.pars[ ind, "prior_2" ] <- 2 mod3c2 <- mirt::mirt(dat, mirtmodel, itemtype="3PL", pars=mirt.pars,verbose=TRUE, technical=list(NCYCLES=100) ) coef(mod3c2) sirt::mirt.wrapper.coef(mod3c2) #---- M3d: ltm (in ltm) mod3d <- ltm::tpm( dat, control=list(verbose=TRUE), max.guessing=.3) summary(mod3d) coef(mod3d) #=> numerical instabilities #***************************************************** # Model 4: 3-dimensional Rasch model #***************************************************** # define Q-matrix Q <- matrix( 0, nrow=12, ncol=3 ) Q[ cbind(1:12, rep(1:3,each=4) ) ] <- 1 rownames(Q) <- colnames(dat) colnames(Q) <- c("A","B","C") # define nodes theta.k <- seq(-6,6,len=13) #---- M4a: smirt (in sirt) mod4a <- sirt::smirt(dat,Qmatrix=Q,irtmodel="comp", theta.k=theta.k, maxiter=30) summary(mod4a) #---- M4b: rasch.mml2 (in sirt) mod4b <- sirt::rasch.mml2(dat,Q=Q,theta.k=theta.k, mmliter=30) summary(mod4b) #---- M4c: gdm (in CDM) mod4c <- CDM::gdm( dat, irtmodel="1PL", theta.k=theta.k, skillspace="normal", Qmatrix=Q, maxiter=30, centered.latent=TRUE ) summary(mod4c) #---- M4d: tam.mml (in TAM) mod4d <- TAM::tam.mml( resp=dat, Q=Q, control=list(nodes=theta.k, maxiter=30) ) summary(mod4d) #---- M4e: R2noharm (in NOHARM, running from within R using sirt package) noharm.path <- "c:/NOHARM" # covariance matrix P.pattern <- matrix( 1, ncol=3, nrow=3 ) P.init <- 0.8+0*P.pattern diag(P.init) <- 1 # loading matrix F.pattern <- 0*Q F.init <- Q # estimate model mod4e <- sirt::R2noharm( dat=dat, model.type="CFA", F.pattern=F.pattern, F.init=F.init, P.pattern=P.pattern, P.init=P.init, writename="ex4e", noharm.path=noharm.path, dec="," ) summary(mod4e) #---- M4f: mirt (in mirt) cmodel <- mirt::mirt.model(" F1=1-4 F2=5-8 F3=9-12 # equal item slopes correspond to the Rasch model CONSTRAIN=(1-4, a1), (5-8, a2), (9-12,a3) COV=F1*F2, F1*F3, F2*F3 " ) mod4f <- mirt::mirt(dat, cmodel, verbose=TRUE) summary(mod4f) #***************************************************** # Model 5: 3-dimensional 2PL model #***************************************************** #---- M5a: smirt (in sirt) mod5a <- sirt::smirt(dat,Qmatrix=Q,irtmodel="comp", est.a="2PL", theta.k=theta.k, maxiter=30) summary(mod5a) #---- M5b: rasch.mml2 (in sirt) mod5b <- sirt::rasch.mml2(dat,Q=Q,theta.k=theta.k,est.a=1:12, mmliter=30) summary(mod5b) #---- M5c: gdm (in CDM) mod5c <- CDM::gdm( dat, irtmodel="2PL", theta.k=theta.k, skillspace="loglinear", Qmatrix=Q, maxiter=30, centered.latent=TRUE, standardized.latent=TRUE) summary(mod5c) #---- M5d: tam.mml (in TAM) mod5d <- TAM::tam.mml.2pl( resp=dat, Q=Q, control=list(nodes=theta.k, maxiter=30) ) summary(mod5d) #---- M5e: R2noharm (in NOHARM, running from within R using sirt package) noharm.path <- "c:/NOHARM" # covariance matrix P.pattern <- matrix( 1, ncol=3, nrow=3 ) diag(P.pattern) <- 0 P.init <- 0.8+0*P.pattern diag(P.init) <- 1 # loading matrix F.pattern <- Q F.init <- Q # estimate model mod5e <- sirt::R2noharm( dat=dat, model.type="CFA", F.pattern=F.pattern, F.init=F.init, P.pattern=P.pattern, P.init=P.init, writename="ex5e", noharm.path=noharm.path, dec="," ) summary(mod5e) #---- M5f: mirt (in mirt) cmodel <- mirt::mirt.model(" F1=1-4 F2=5-8 F3=9-12 COV=F1*F2, F1*F3, F2*F3 " ) mod5f <- mirt::mirt(dat, cmodel, verbose=TRUE) summary(mod5f) #***************************************************** # Model 6: Network models (Graphical models) #***************************************************** #---- M6a: Ising model using the IsingFit package (undirected graph) # - fit Ising model using the "OR rule" (AND=FALSE) mod6a <- IsingFit::IsingFit(x=dat, family="binomial", AND=FALSE) summary(mod6a) ## Network Density: 0.29 ## Gamma: 0.25 ## Rule used: Or-rule # plot results qgraph::qgraph(mod6a$weiadj,fade=FALSE) #**-- graph estimation using pcalg package # some packages from Bioconductor must be downloaded at first (if not yet done) if (FALSE){ # set 'if (TRUE)' if packages should be downloaded source("http://bioconductor.org/biocLite.R") biocLite("RBGL") biocLite("Rgraphviz") } #---- M6b: graph estimation based on Pearson correlations V <- colnames(dat) n <- nrow(dat) mod6b <- pcalg::pc(suffStat=list(C=stats::cor(dat), n=n ), indepTest=gaussCItest, ## indep.test: partial correlations alpha=0.05, labels=V, verbose=TRUE) plot(mod6b) # plot in qgraph package qgraph::qgraph(mod6b, label.color=rep( c( "red", "blue","darkgreen" ), each=4 ), edge.color="black") summary(mod6b) #---- M6c: graph estimation based on tetrachoric correlations mod6c <- pcalg::pc(suffStat=list(C=sirt::tetrachoric2(dat)$rho, n=n ), indepTest=gaussCItest, alpha=0.05, labels=V, verbose=TRUE) plot(mod6c) summary(mod6c) #---- M6d: Statistical implicative analysis (in sirt) mod6d <- sirt::sia.sirt(dat, significance=.85 ) # plot results with igraph and qgraph package plot( mod6d$igraph.obj, vertex.shape="rectangle", vertex.size=30 ) qgraph::qgraph( mod6d$adj.matrix ) #***************************************************** # Model 7: Latent class analysis with 3 classes #***************************************************** #---- M7a: randomLCA (in randomLCA) # - use two trials of starting values mod7a <- randomLCA::randomLCA(dat,nclass=3, notrials=2, verbose=TRUE) summary(mod7a) plot(mod7a,type="l", xlab="Item") #---- M7b: rasch.mirtlc (in sirt) mod7b <- sirt::rasch.mirtlc( dat, Nclasses=3,seed=-30, nstarts=2 ) summary(mod7b) matplot( t(mod7b$pjk), type="l", xlab="Item" ) #---- M7c: poLCA (in poLCA) # define formula for outcomes f7c <- paste0( "cbind(", paste0(colnames(dat),collapse=","), ") ~ 1 " ) dat1 <- as.data.frame( dat + 1 ) # poLCA needs integer values from 1,2,.. mod7c <- poLCA::poLCA( stats::as.formula(f7c),dat1,nclass=3, verbose=TRUE) plot(mod7c) #---- M7d: gom.em (in sirt) # - the latent class model is a special grade of membership model mod7d <- sirt::gom.em( dat, K=3, problevels=c(0,1), model="GOM" ) summary(mod7d) #---- - M7e: mirt (in mirt) # define three latent classes Theta <- diag(3) # define mirt model I <- ncol(dat) # I=12 mirtmodel <- mirt::mirt.model(" C1=1-12 C2=1-12 C3=1-12 ") # get initial parameter values mod.pars <- mirt::mirt(dat, model=mirtmodel, pars="values") # modify parameters: only slopes refer to item-class probabilities set.seed(9976) # set starting values for class specific item probabilities mod.pars[ mod.pars$name=="d","value" ] <- 0 mod.pars[ mod.pars$name=="d","est" ] <- FALSE b1 <- stats::qnorm( colMeans( dat ) ) mod.pars[ mod.pars$name=="a1","value" ] <- b1 # random starting values for other classes mod.pars[ mod.pars$name %in% c("a2","a3"),"value" ] <- b1 + stats::runif(12*2,-1,1) mod.pars #** define prior for latent class analysis lca_prior <- function(Theta,Etable){ # number of latent Theta classes TP <- nrow(Theta) # prior in initial iteration if ( is.null(Etable) ){ prior <- rep( 1/TP, TP ) } # process Etable (this is correct for datasets without missing data) if ( ! is.null(Etable) ){ # sum over correct and incorrect expected responses prior <- ( rowSums(Etable[, seq(1,2*I,2)]) + rowSums(Etable[,seq(2,2*I,2)]) )/I } prior <- prior / sum(prior) return(prior) } #** estimate model mod7e <- mirt::mirt(dat, mirtmodel, pars=mod.pars, verbose=TRUE, technical=list( customTheta=Theta, customPriorFun=lca_prior) ) # compare estimated results print(mod7e) summary(mod7b) # The number of estimated parameters is incorrect because mirt does not correctly count # estimated parameters from the user customized prior distribution. mod7e@nest <- as.integer(sum(mod.pars$est) + 2) # two additional class probabilities # extract log-likelihood mod7e@logLik # compute AIC and BIC ( AIC <- -2*mod7e@logLik+2*mod7e@nest ) ( BIC <- -2*mod7e@logLik+log(mod7e@Data$N)*mod7e@nest ) # RMSEA and SRMSR fit statistic mirt::M2(mod7e) # TLI and CFI does not make sense in this example #** extract item parameters sirt::mirt.wrapper.coef(mod7e) #** extract class-specific item-probabilities probs <- apply( coef1[, c("a1","a2","a3") ], 2, stats::plogis ) matplot( probs, type="l", xlab="Item", main="mirt::mirt") #** inspect estimated distribution mod7e@Theta mod7e@Prior[[1]] #***************************************************** # Model 8: Mixed Rasch model with two classes #***************************************************** #---- M8a: raschmix (in psychomix) mod8a <- psychomix::raschmix(data=as.matrix(dat), k=2, scores="saturated") summary(mod8a) #---- M8b: mrm (in mRm) mod8b <- mRm::mrm(data.matrix=dat, cl=2) mod8b$conv.to.bound plot(mod8b) print(mod8b) #---- M8c: mirt (in mirt) #* define theta grid theta.k <- seq( -5, 5, len=9 ) TP <- length(theta.k) Theta <- matrix( 0, nrow=2*TP, ncol=4) Theta[1:TP,1:2] <- cbind(theta.k, 1 ) Theta[1:TP + TP,3:4] <- cbind(theta.k, 1 ) Theta # define model I <- ncol(dat) # I=12 mirtmodel <- mirt::mirt.model(" F1a=1-12 # slope Class 1 F1b=1-12 # difficulty Class 1 F2a=1-12 # slope Class 2 F2b=1-12 # difficulty Class 2 CONSTRAIN=(1-12,a1),(1-12,a3) ") # get initial parameter values mod.pars <- mirt::mirt(dat, model=mirtmodel, pars="values") # set starting values for class specific item probabilities mod.pars[ mod.pars$name=="d","value" ] <- 0 mod.pars[ mod.pars$name=="d","est" ] <- FALSE mod.pars[ mod.pars$name=="a1","value" ] <- 1 mod.pars[ mod.pars$name=="a3","value" ] <- 1 # initial values difficulties b1 <- stats::qlogis( colMeans(dat) ) mod.pars[ mod.pars$name=="a2","value" ] <- b1 mod.pars[ mod.pars$name=="a4","value" ] <- b1 + stats::runif(I, -1, 1) #* define prior for mixed Rasch analysis mixed_prior <- function(Theta,Etable){ NC <- 2 # number of theta classes TP <- nrow(Theta) / NC prior1 <- stats::dnorm( Theta[1:TP,1] ) prior1 <- prior1 / sum(prior1) if ( is.null(Etable) ){ prior <- c( prior1, prior1 ) } if ( ! is.null(Etable) ){ prior <- ( rowSums( Etable[, seq(1,2*I,2)] ) + rowSums( Etable[,seq(2,2*I,2)]) )/I a1 <- stats::aggregate( prior, list( rep(1:NC, each=TP) ), sum ) a1[,2] <- a1[,2] / sum( a1[,2]) # print some information during estimation cat( paste0( " Class proportions: ", paste0( round(a1[,2], 3 ), collapse=" " ) ), "\n") a1 <- rep( a1[,2], each=TP ) # specify mixture of two normal distributions prior <- a1*c(prior1,prior1) } prior <- prior / sum(prior) return(prior) } #* estimate model mod8c <- mirt::mirt(dat, mirtmodel, pars=mod.pars, verbose=TRUE, technical=list( customTheta=Theta, customPriorFun=mixed_prior ) ) # Like in Model 7e, the number of estimated parameters must be included. mod8c@nest <- as.integer(sum(mod.pars$est) + 1) # two class proportions and therefore one probability is freely estimated. #* extract item parameters sirt::mirt.wrapper.coef(mod8c) #* estimated distribution mod8c@Theta mod8c@Prior #---- M8d: tamaan (in TAM) tammodel <- " ANALYSIS: TYPE=MIXTURE ; NCLASSES(2); NSTARTS(7,20); LAVAAN MODEL: F=~ A1__C4 F ~~ F ITEM TYPE: ALL(Rasch); " mod8d <- TAM::tamaan( tammodel, resp=dat ) summary(mod8d) # plot item parameters I <- 12 ipars <- mod8d$itempartable_MIXTURE[ 1:I, ] plot( 1:I, ipars[,3], type="o", ylim=range( ipars[,3:4] ), pch=16, xlab="Item", ylab="Item difficulty") lines( 1:I, ipars[,4], type="l", col=2, lty=2) points( 1:I, ipars[,4], col=2, pch=2) #***************************************************** # Model 9: Mixed 2PL model with two classes #***************************************************** #---- M9a: tamaan (in TAM) tammodel <- " ANALYSIS: TYPE=MIXTURE ; NCLASSES(2); NSTARTS(10,30); LAVAAN MODEL: F=~ A1__C4 F ~~ F ITEM TYPE: ALL(2PL); " mod9a <- TAM::tamaan( tammodel, resp=dat ) summary(mod9a) #***************************************************** # Model 10: Rasch testlet model #***************************************************** #---- M10a: tam.fa (in TAM) dims <- substring( colnames(dat),1,1 ) # define dimensions mod10a <- TAM::tam.fa( resp=dat, irtmodel="bifactor1", dims=dims, control=list(maxiter=60) ) summary(mod10a) #---- M10b: mirt (in mirt) cmodel <- mirt::mirt.model(" G=1-12 A=1-4 B=5-8 C=9-12 CONSTRAIN=(1-12,a1), (1-4, a2), (5-8, a3), (9-12,a4) ") mod10b <- mirt::mirt(dat, model=cmodel, verbose=TRUE) summary(mod10b) coef(mod10b) mod10b@logLik # equivalent is slot( mod10b, "logLik") #alternatively, using a dimensional reduction approach (faster and better accuracy) cmodel <- mirt::mirt.model(" G=1-12 CONSTRAIN=(1-12,a1), (1-4, a2), (5-8, a3), (9-12,a4) ") item_bundles <- rep(c(1,2,3), each=4) mod10b1 <- mirt::bfactor(dat, model=item_bundles, model2=cmodel, verbose=TRUE) coef(mod10b1) #---- M10c: smirt (in sirt) # define Q-matrix Qmatrix <- matrix(0,12,4) Qmatrix[,1] <- 1 Qmatrix[ cbind( 1:12, match( dims, unique(dims)) +1 ) ] <- 1 # uncorrelated factors variance.fixed <- cbind( c(1,1,1,2,2,3), c(2,3,4,3,4,4), 0 ) # estimate model mod10c <- sirt::smirt( dat, Qmatrix=Qmatrix, irtmodel="comp", variance.fixed=variance.fixed, qmcnodes=1000, maxiter=60) summary(mod10c) #***************************************************** # Model 11: Bifactor model #***************************************************** #---- M11a: tam.fa (in TAM) dims <- substring( colnames(dat),1,1 ) # define dimensions mod11a <- TAM::tam.fa( resp=dat, irtmodel="bifactor2", dims=dims, control=list(maxiter=60) ) summary(mod11a) #---- M11b: bfactor (in mirt) dims1 <- match( dims, unique(dims) ) mod11b <- mirt::bfactor(dat, model=dims1, verbose=TRUE) summary(mod11b) coef(mod11b) mod11b@logLik #---- M11c: smirt (in sirt) # define Q-matrix Qmatrix <- matrix(0,12,4) Qmatrix[,1] <- 1 Qmatrix[ cbind( 1:12, match( dims, unique(dims)) +1 ) ] <- 1 # uncorrelated factors variance.fixed <- cbind( c(1,1,1,2,2,3), c(2,3,4,3,4,4), 0 ) # estimate model mod11c <- sirt::smirt( dat, Qmatrix=Qmatrix, irtmodel="comp", est.a="2PL", variance.fixed=variance.fixed, qmcnodes=1000, maxiter=60) summary(mod11c) #***************************************************** # Model 12: Located latent class model: Rasch model with three theta classes #***************************************************** # use 10th item as the reference item ref.item <- 10 # ability grid theta.k <- seq(-4,4,len=9) #---- M12a: rasch.mirtlc (in sirt) mod12a <- sirt::rasch.mirtlc(dat, Nclasses=3, modeltype="MLC1", ref.item=ref.item) summary(mod12a) #---- M12b: gdm (in CDM) theta.k <- seq(-1, 1, len=3) # initial matrix b.constraint <- matrix( c(10,1,0), nrow=1,ncol=3) # estimate model mod12b <- CDM::gdm( dat, theta.k=theta.k, skillspace="est", irtmodel="1PL", b.constraint=b.constraint, maxiter=200) summary(mod12b) #---- M12c: mirt (in mirt) items <- colnames(dat) # define three latent classes Theta <- diag(3) # define mirt model I <- ncol(dat) # I=12 mirtmodel <- mirt::mirt.model(" C1=1-12 C2=1-12 C3=1-12 CONSTRAIN=(1-12,a1),(1-12,a2),(1-12,a3) ") # get parameters mod.pars <- mirt(dat, model=mirtmodel, pars="values") # set starting values for class specific item probabilities mod.pars[ mod.pars$name=="d","value" ] <- stats::qlogis( colMeans(dat,na.rm=TRUE) ) # set item difficulty of reference item to zero ind <- which( ( paste(mod.pars$item)==items[ref.item] ) & ( ( paste(mod.pars$name)=="d" ) ) ) mod.pars[ ind,"value" ] <- 0 mod.pars[ ind,"est" ] <- FALSE # initial values for a1, a2 and a3 mod.pars[ mod.pars$name %in% c("a1","a2","a3"),"value" ] <- c(-1,0,1) mod.pars #* define prior for latent class analysis lca_prior <- function(Theta,Etable){ # number of latent Theta classes TP <- nrow(Theta) # prior in initial iteration if ( is.null(Etable) ){ prior <- rep( 1/TP, TP ) } # process Etable (this is correct for datasets without missing data) if ( ! is.null(Etable) ){ # sum over correct and incorrect expected responses prior <- ( rowSums( Etable[, seq(1,2*I,2)] ) + rowSums( Etable[, seq(2,2*I,2)] ) )/I } prior <- prior / sum(prior) return(prior) } #* estimate model mod12c <- mirt(dat, mirtmodel, technical=list( customTheta=Theta, customPriorFun=lca_prior), pars=mod.pars, verbose=TRUE ) # estimated parameters from the user customized prior distribution. mod12c@nest <- as.integer(sum(mod.pars$est) + 2) #* extract item parameters coef1 <- sirt::mirt.wrapper.coef(mod12c) #* inspect estimated distribution mod12c@Theta coef1$coef[1,c("a1","a2","a3")] mod12c@Prior[[1]] #***************************************************** # Model 13: Multidimensional model with discrete traits #***************************************************** # define Q-Matrix Q <- matrix( 0, nrow=12,ncol=3) Q[1:4,1] <- 1 Q[5:8,2] <- 1 Q[9:12,3] <- 1 # define discrete theta distribution with 3 dimensions Theta <- scan(what="character",nlines=1) 000 100 010 001 110 101 011 111 Theta <- as.numeric( unlist( lapply( Theta, strsplit, split="") ) ) Theta <- matrix(Theta, 8, 3, byrow=TRUE ) Theta #---- Model 13a: din (in CDM) mod13a <- CDM::din( dat, q.matrix=Q, rule="DINA") summary(mod13a) # compare used Theta distributions cbind( Theta, mod13a$attribute.patt.splitted) #---- Model 13b: gdm (in CDM) mod13b <- CDM::gdm( dat, Qmatrix=Q, theta.k=Theta, skillspace="full") summary(mod13b) #---- Model 13c: mirt (in mirt) # define mirt model I <- ncol(dat) # I=12 mirtmodel <- mirt::mirt.model(" F1=1-4 F2=5-8 F3=9-12 ") # get parameters mod.pars <- mirt(dat, model=mirtmodel, pars="values") # starting values d parameters (transformed guessing parameters) ind <- which( mod.pars$name=="d" ) mod.pars[ind,"value"] <- stats::qlogis(.2) # starting values transformed slipping parameters ind <- which( ( mod.pars$name %in% paste0("a",1:3) ) & ( mod.pars$est ) ) mod.pars[ind,"value"] <- stats::qlogis(.8) - stats::qlogis(.2) mod.pars #* define prior for latent class analysis lca_prior <- function(Theta,Etable){ TP <- nrow(Theta) if ( is.null(Etable) ){ prior <- rep( 1/TP, TP ) } if ( ! is.null(Etable) ){ prior <- ( rowSums( Etable[, seq(1,2*I,2)] ) + rowSums( Etable[, seq(2,2*I,2)] ) )/I } prior <- prior / sum(prior) return(prior) } #* estimate model mod13c <- mirt(dat, mirtmodel, technical=list( customTheta=Theta, customPriorFun=lca_prior), pars=mod.pars, verbose=TRUE ) # estimated parameters from the user customized prior distribution. mod13c@nest <- as.integer(sum(mod.pars$est) + 2) #* extract item parameters coef13c <- sirt::mirt.wrapper.coef(mod13c)$coef #* inspect estimated distribution mod13c@Theta mod13c@Prior[[1]] #-* comparisons of estimated parameters # extract guessing and slipping parameters from din dfr <- coef(mod13a)[, c("guess","slip") ] colnames(dfr) <- paste0("din.",c("guess","slip") ) # estimated parameters from gdm dfr$gdm.guess <- stats::plogis(mod13b$item$b) dfr$gdm.slip <- 1 - stats::plogis( rowSums(mod13b$item[,c("b.Cat1","a.F1","a.F2","a.F3")] ) ) # estimated parameters from mirt dfr$mirt.guess <- stats::plogis( coef13c$d ) dfr$mirt.slip <- 1 - stats::plogis( rowSums(coef13c[,c("d","a1","a2","a3")]) ) # comparison round(dfr[, c(1,3,5,2,4,6)],3) ## din.guess gdm.guess mirt.guess din.slip gdm.slip mirt.slip ## A1 0.691 0.684 0.686 0.000 0.000 0.000 ## A2 0.491 0.489 0.489 0.031 0.038 0.036 ## A3 0.302 0.300 0.300 0.184 0.193 0.190 ## A4 0.244 0.239 0.240 0.337 0.340 0.339 ## B1 0.568 0.579 0.577 0.163 0.148 0.151 ## B2 0.329 0.344 0.340 0.344 0.326 0.329 ## B3 0.817 0.827 0.825 0.014 0.007 0.009 ## B4 0.431 0.463 0.456 0.104 0.089 0.092 ## C1 0.188 0.191 0.189 0.013 0.013 0.013 ## C2 0.050 0.050 0.050 0.239 0.238 0.239 ## C3 0.000 0.002 0.001 0.065 0.065 0.065 ## C4 0.000 0.004 0.000 0.212 0.212 0.212 # estimated class sizes dfr <- data.frame( "Theta"=Theta, "din"=mod13a$attribute.patt$class.prob, "gdm"=mod13b$pi.k, "mirt"=mod13c@Prior[[1]]) # comparison round(dfr,3) ## Theta.1 Theta.2 Theta.3 din gdm mirt ## 1 0 0 0 0.039 0.041 0.040 ## 2 1 0 0 0.008 0.009 0.009 ## 3 0 1 0 0.009 0.007 0.008 ## 4 0 0 1 0.394 0.417 0.412 ## 5 1 1 0 0.011 0.011 0.011 ## 6 1 0 1 0.017 0.042 0.037 ## 7 0 1 1 0.042 0.008 0.016 ## 8 1 1 1 0.480 0.465 0.467 #***************************************************** # Model 14: DINA model with two skills #***************************************************** # define some simple Q-Matrix (does not really make in this application) Q <- matrix( 0, nrow=12,ncol=2) Q[1:4,1] <- 1 Q[5:8,2] <- 1 Q[9:12,1:2] <- 1 # define discrete theta distribution with 3 dimensions Theta <- scan(what="character",nlines=1) 00 10 01 11 Theta <- as.numeric( unlist( lapply( Theta, strsplit, split="") ) ) Theta <- matrix(Theta, 4, 2, byrow=TRUE ) Theta #---- Model 14a: din (in CDM) mod14a <- CDM::din( dat, q.matrix=Q, rule="DINA") summary(mod14a) # compare used Theta distributions cbind( Theta, mod14a$attribute.patt.splitted) #---- Model 14b: mirt (in mirt) # define mirt model I <- ncol(dat) # I=12 mirtmodel <- mirt::mirt.model(" F1=1-4 F2=5-8 (F1*F2)=9-12 ") #-> constructions like (F1*F2*F3) are also allowed in mirt.model # get parameters mod.pars <- mirt(dat, model=mirtmodel, pars="values") # starting values d parameters (transformed guessing parameters) ind <- which( mod.pars$name=="d" ) mod.pars[ind,"value"] <- stats::qlogis(.2) # starting values transformed slipping parameters ind <- which( ( mod.pars$name %in% paste0("a",1:3) ) & ( mod.pars$est ) ) mod.pars[ind,"value"] <- stats::qlogis(.8) - stats::qlogis(.2) mod.pars #* use above defined prior lca_prior # lca_prior <- function(prior,Etable) ... #* estimate model mod14b <- mirt(dat, mirtmodel, technical=list( customTheta=Theta, customPriorFun=lca_prior), pars=mod.pars, verbose=TRUE ) # estimated parameters from the user customized prior distribution. mod14b@nest <- as.integer(sum(mod.pars$est) + 2) #* extract item parameters coef14b <- sirt::mirt.wrapper.coef(mod14b)$coef #-* comparisons of estimated parameters # extract guessing and slipping parameters from din dfr <- coef(mod14a)[, c("guess","slip") ] colnames(dfr) <- paste0("din.",c("guess","slip") ) # estimated parameters from mirt dfr$mirt.guess <- stats::plogis( coef14b$d ) dfr$mirt.slip <- 1 - stats::plogis( rowSums(coef14b[,c("d","a1","a2","a3")]) ) # comparison round(dfr[, c(1,3,2,4)],3) ## din.guess mirt.guess din.slip mirt.slip ## A1 0.674 0.671 0.030 0.030 ## A2 0.423 0.420 0.049 0.050 ## A3 0.258 0.255 0.224 0.225 ## A4 0.245 0.243 0.394 0.395 ## B1 0.534 0.543 0.166 0.164 ## B2 0.338 0.347 0.382 0.380 ## B3 0.796 0.802 0.016 0.015 ## B4 0.421 0.436 0.142 0.140 ## C1 0.850 0.851 0.000 0.000 ## C2 0.480 0.480 0.097 0.097 ## C3 0.746 0.746 0.026 0.026 ## C4 0.575 0.577 0.136 0.137 # estimated class sizes dfr <- data.frame( "Theta"=Theta, "din"=mod13a$attribute.patt$class.prob, "mirt"=mod14b@Prior[[1]]) # comparison round(dfr,3) ## Theta.1 Theta.2 din mirt ## 1 0 0 0.357 0.369 ## 2 1 0 0.044 0.049 ## 3 0 1 0.047 0.031 ## 4 1 1 0.553 0.551 #***************************************************** # Model 15: Rasch model with non-normal distribution #***************************************************** # A non-normal theta distributed is specified by log-linear smoothing # the distribution as described in # Xu, X., & von Davier, M. (2008). Fitting the structured general diagnostic model # to NAEP data. ETS Research Report ETS RR-08-27. Princeton, ETS. # define theta grid theta.k <- matrix( seq(-4,4,len=15), ncol=1 ) # define design matrix for smoothing (up to cubic moments) delta.designmatrix <- cbind( 1, theta.k, theta.k^2, theta.k^3 ) # constrain item difficulty of fifth item (item B1) to zero b.constraint <- matrix( c(5,1,0), ncol=3 ) #---- Model 15a: gdm (in CDM) mod15a <- CDM::gdm( dat, irtmodel="1PL", theta.k=theta.k, b.constraint=b.constraint ) summary(mod15a) # plot estimated distribution graphics::barplot( mod15a$pi.k[,1], space=0, names.arg=round(theta.k[,1],2), main="Estimated Skewed Distribution (gdm function)") #---- Model 15b: mirt (in mirt) # define mirt model mirtmodel <- mirt::mirt.model(" F=1-12 ") # get parameters mod.pars <- mirt::mirt(dat, model=mirtmodel, pars="values", itemtype="Rasch") # fix variance (just for correct counting of parameters) mod.pars[ mod.pars$name=="COV_11", "est"] <- FALSE # fix item difficulty ind <- which( ( mod.pars$item=="B1" ) & ( mod.pars$name=="d" ) ) mod.pars[ ind, "value"] <- 0 mod.pars[ ind, "est"] <- FALSE # define prior loglinear_prior <- function(Theta,Etable){ TP <- nrow(Theta) if ( is.null(Etable) ){ prior <- rep( 1/TP, TP ) } # process Etable (this is correct for datasets without missing data) if ( ! is.null(Etable) ){ # sum over correct and incorrect expected responses prior <- ( rowSums( Etable[, seq(1,2*I,2)] ) + rowSums( Etable[, seq(2,2*I,2)] ) )/I # smooth prior using the above design matrix and a log-linear model # see Xu & von Davier (2008). y <- log( prior + 1E-15 ) lm1 <- lm( y ~ 0 + delta.designmatrix, weights=prior ) prior <- exp(fitted(lm1)) # smoothed prior } prior <- prior / sum(prior) return(prior) } #* estimate model mod15b <- mirt::mirt(dat, mirtmodel, technical=list( customTheta=theta.k, customPriorFun=loglinear_prior ), pars=mod.pars, verbose=TRUE ) # estimated parameters from the user customized prior distribution. mod15b@nest <- as.integer(sum(mod.pars$est) + 3) #* extract item parameters coef1 <- sirt::mirt.wrapper.coef(mod15b)$coef #** compare estimated item parameters dfr <- data.frame( "gdm"=mod15a$item$b.Cat1, "mirt"=coef1$d ) rownames(dfr) <- colnames(dat) round(t(dfr),4) ## A1 A2 A3 A4 B1 B2 B3 B4 C1 C2 C3 C4 ## gdm 0.9818 0.1538 -0.7837 -1.3197 0 -1.0902 1.6088 -0.170 1.9778 0.006 1.1859 0.135 ## mirt 0.9829 0.1548 -0.7826 -1.3186 0 -1.0892 1.6099 -0.169 1.9790 0.007 1.1870 0.136 # compare estimated theta distribution dfr <- data.frame( "gdm"=mod15a$pi.k, "mirt"=mod15b@Prior[[1]] ) round(t(dfr),4) ## 1 2 3 4 5 6 7 8 9 10 11 12 13 ## gdm 0 0 1e-04 9e-04 0.0056 0.0231 0.0652 0.1299 0.1881 0.2038 0.1702 0.1129 0.0612 ## mirt 0 0 1e-04 9e-04 0.0056 0.0232 0.0653 0.1300 0.1881 0.2038 0.1702 0.1128 0.0611 ## 14 15 ## gdm 0.0279 0.011 ## mirt 0.0278 0.011 ## End(Not run)
Please choose more modern alternatives, such as Google Chrome or Mozilla Firefox.