Multiple Group Estimation
multipleGroup
performs a full-information
maximum-likelihood multiple group analysis for any combination of dichotomous and polytomous
data under the item response theory paradigm using either Cai's (2010)
Metropolis-Hastings Robbins-Monro (MHRM) algorithm or with an EM algorithm approach. This
function may be used for detecting differential item functioning (DIF), thought the
DIF
function may provide a more convenient approach. If the grouping
variable is not specified then the dentype
input can be modified to fit
mixture models to estimate any latent group components.
multipleGroup( data, model, group, invariance = "", method = "EM", dentype = "Gaussian", ... )
data |
a |
model |
string to be passed to, or a model object returned from, |
group |
a |
invariance |
a character vector containing the following possible options:
Additionally, specifying specific item name bundles (from |
method |
a character object that is either |
dentype |
type of density form to use for the latent trait parameters. Current options include
all of the methods described in
|
... |
additional arguments to be passed to the estimation engine. See |
By default the estimation in multipleGroup
assumes that the models are maximally
independent, and therefore could initially be performed by sub-setting the data and running
identical models with mirt
and aggregating the results (e.g., log-likelihood).
However, constrains may be automatically imposed across groups by invoking various
invariance
keywords. Users may also supply a list of parameter equality constraints
to by constrain
argument, of define equality constraints using the
mirt.model
syntax (recommended).
function returns an object of class MultipleGroupClass
(MultipleGroupClass-class).
Phil Chalmers rphilip.chalmers@gmail.com
Chalmers, R., P. (2012). mirt: A Multidimensional Item Response Theory Package for the R Environment. Journal of Statistical Software, 48(6), 1-29. doi: 10.18637/jss.v048.i06
mirt
, DIF
, extract.group
, DRF
## Not run: #single factor set.seed(12345) a <- matrix(abs(rnorm(15,1,.3)), ncol=1) d <- matrix(rnorm(15,0,.7),ncol=1) itemtype <- rep('2PL', nrow(a)) N <- 1000 dataset1 <- simdata(a, d, N, itemtype) dataset2 <- simdata(a, d, N, itemtype, mu = .1, sigma = matrix(1.5)) dat <- rbind(dataset1, dataset2) group <- c(rep('D1', N), rep('D2', N)) mod_configural <- multipleGroup(dat, 1, group = group) #completely separate analyses #limited information fit statistics M2(mod_configural) mod_metric <- multipleGroup(dat, 1, group = group, invariance=c('slopes')) #equal slopes #equal intercepts, free variance and means mod_scalar2 <- multipleGroup(dat, 1, group = group, invariance=c('slopes', 'intercepts', 'free_var','free_means')) mod_scalar1 <- multipleGroup(dat, 1, group = group, #fixed means invariance=c('slopes', 'intercepts', 'free_var')) mod_fullconstrain <- multipleGroup(dat, 1, group = group, invariance=c('slopes', 'intercepts')) extract.mirt(mod_fullconstrain, 'time') #time of estimation components #optionally use Newton-Raphson for (generally) faster convergence in the M-step's mod_fullconstrain <- multipleGroup(dat, 1, group = group, optimizer = 'NR', invariance=c('slopes', 'intercepts')) extract.mirt(mod_fullconstrain, 'time') #time of estimation components summary(mod_scalar2) coef(mod_scalar2, simplify=TRUE) residuals(mod_scalar2) plot(mod_configural) plot(mod_configural, type = 'info') plot(mod_configural, type = 'trace') plot(mod_configural, type = 'trace', which.items = 1:4) itemplot(mod_configural, 2) itemplot(mod_configural, 2, type = 'RE') anova(mod_metric, mod_configural) #equal slopes only anova(mod_scalar2, mod_metric) #equal intercepts, free variance and mean anova(mod_scalar1, mod_scalar2) #fix mean anova(mod_fullconstrain, mod_scalar1) #fix variance #test whether first 6 slopes should be equal across groups values <- multipleGroup(dat, 1, group = group, pars = 'values') values constrain <- list(c(1, 63), c(5,67), c(9,71), c(13,75), c(17,79), c(21,83)) equalslopes <- multipleGroup(dat, 1, group = group, constrain = constrain) anova(equalslopes, mod_configural) #same as above, but using mirt.model syntax newmodel <- ' F = 1-15 CONSTRAINB = (1-6, a1)' equalslopes <- multipleGroup(dat, newmodel, group = group) coef(equalslopes, simplify=TRUE) ############ # vertical scaling (i.e., equating when groups answer items others do not) dat2 <- dat dat2[group == 'D1', 1:2] <- dat2[group != 'D1', 14:15] <- NA head(dat2) tail(dat2) # items with missing responses need to be constrained across groups for identification nms <- colnames(dat2) mod <- multipleGroup(dat2, 1, group, invariance = nms[c(1:2, 14:15)]) # this will throw an error without proper constraints (SEs cannot be computed either) # mod <- multipleGroup(dat2, 1, group) # model still does not have anchors, therefore need to add a few (here use items 3-5) mod_anchor <- multipleGroup(dat2, 1, group, invariance = c(nms[c(1:5, 14:15)], 'free_means', 'free_var')) coef(mod_anchor, simplify=TRUE) # check if identified by computing information matrix mod_anchor <- multipleGroup(dat2, 1, group, pars = mod2values(mod_anchor), TOL=NaN, SE=TRUE, invariance = c(nms[c(1:5, 14:15)], 'free_means', 'free_var')) mod_anchor coef(mod_anchor) coef(mod_anchor, printSE=TRUE) ############# #DIF test for each item (using all other items as anchors) itemnames <- colnames(dat) refmodel <- multipleGroup(dat, 1, group = group, SE=TRUE, invariance=c('free_means', 'free_var', itemnames)) #loop over items (in practice, run in parallel to increase speed). May be better to use ?DIF estmodels <- vector('list', ncol(dat)) for(i in 1:ncol(dat)) estmodels[[i]] <- multipleGroup(dat, 1, group = group, verbose = FALSE, invariance=c('free_means', 'free_var', itemnames[-i])) (anovas <- lapply(estmodels, anova, object2=refmodel, verbose=FALSE)) #family-wise error control p <- do.call(rbind, lapply(anovas, function(x) x[2, 'p'])) p.adjust(p, method = 'BH') #same as above, except only test if slopes vary (1 df) #constrain all intercepts estmodels <- vector('list', ncol(dat)) for(i in 1:ncol(dat)) estmodels[[i]] <- multipleGroup(dat, 1, group = group, verbose = FALSE, invariance=c('free_means', 'free_var', 'intercepts', itemnames[-i])) (anovas <- lapply(estmodels, anova, object2=refmodel, verbose=FALSE)) #quickly test with Wald test using DIF() mod_configural2 <- multipleGroup(dat, 1, group = group, SE=TRUE) DIF(mod_configural2, which.par = c('a1', 'd'), Wald=TRUE, p.adjust = 'fdr') ############# # Three group model where the latent variable parameters are constrained to # be equal in the focal groups set.seed(12345) a <- matrix(abs(rnorm(15,1,.3)), ncol=1) d <- matrix(rnorm(15,0,.7),ncol=1) itemtype <- rep('2PL', nrow(a)) N <- 1000 dataset1 <- simdata(a, d, N, itemtype) dataset2 <- simdata(a, d, N, itemtype, mu = .1, sigma = matrix(1.5)) dataset3 <- simdata(a, d, N, itemtype, mu = .1, sigma = matrix(1.5)) dat <- rbind(dataset1, dataset2, dataset3) group <- rep(c('D1', 'D2', 'D3'), each=N) model <- 'F1 = 1-15 FREE[D2, D3] = (GROUP, MEAN_1), (GROUP, COV_11) CONSTRAINB[D2,D3] = (GROUP, MEAN_1), (GROUP, COV_11)' mod <- multipleGroup(dat, model, group = group, invariance = colnames(dat)) coef(mod, simplify=TRUE) ############# #multiple factors a <- matrix(c(abs(rnorm(5,1,.3)), rep(0,15),abs(rnorm(5,1,.3)), rep(0,15),abs(rnorm(5,1,.3))), 15, 3) d <- matrix(rnorm(15,0,.7),ncol=1) mu <- c(-.4, -.7, .1) sigma <- matrix(c(1.21,.297,1.232,.297,.81,.252,1.232,.252,1.96),3,3) itemtype <- rep('2PL', nrow(a)) N <- 1000 dataset1 <- simdata(a, d, N, itemtype) dataset2 <- simdata(a, d, N, itemtype, mu = mu, sigma = sigma) dat <- rbind(dataset1, dataset2) group <- c(rep('D1', N), rep('D2', N)) #group models model <- ' F1 = 1-5 F2 = 6-10 F3 = 11-15' #define mirt cluster to use parallel architecture mirtCluster() #EM approach (not as accurate with 3 factors, but generally good for quick model comparisons) mod_configural <- multipleGroup(dat, model, group = group) #completely separate analyses mod_metric <- multipleGroup(dat, model, group = group, invariance=c('slopes')) #equal slopes mod_fullconstrain <- multipleGroup(dat, model, group = group, #equal means, slopes, intercepts invariance=c('slopes', 'intercepts')) anova(mod_metric, mod_configural) anova(mod_fullconstrain, mod_metric) #same as above, but with MHRM (generally more accurate with 3+ factors, but slower) mod_configural <- multipleGroup(dat, model, group = group, method = 'MHRM') mod_metric <- multipleGroup(dat, model, group = group, invariance=c('slopes'), method = 'MHRM') mod_fullconstrain <- multipleGroup(dat, model, group = group, method = 'MHRM', invariance=c('slopes', 'intercepts')) anova(mod_metric, mod_configural) anova(mod_fullconstrain, mod_metric) ############ #polytomous item example set.seed(12345) a <- matrix(abs(rnorm(15,1,.3)), ncol=1) d <- matrix(rnorm(15,0,.7),ncol=1) d <- cbind(d, d-1, d-2) itemtype <- rep('graded', nrow(a)) N <- 1000 dataset1 <- simdata(a, d, N, itemtype) dataset2 <- simdata(a, d, N, itemtype, mu = .1, sigma = matrix(1.5)) dat <- rbind(dataset1, dataset2) group <- c(rep('D1', N), rep('D2', N)) model <- 'F1 = 1-15' mod_configural <- multipleGroup(dat, model, group = group) plot(mod_configural) plot(mod_configural, type = 'SE') itemplot(mod_configural, 1) itemplot(mod_configural, 1, type = 'info') plot(mod_configural, type = 'trace') # messy, score function typically better plot(mod_configural, type = 'itemscore') fs <- fscores(mod_configural, full.scores = FALSE) head(fs[["D1"]]) fscores(mod_configural, method = 'EAPsum', full.scores = FALSE) # constrain slopes within each group to be equal (but not across groups) model2 <- 'F1 = 1-15 CONSTRAIN = (1-15, a1)' mod_configural2 <- multipleGroup(dat, model2, group = group) plot(mod_configural2, type = 'SE') plot(mod_configural2, type = 'RE') itemplot(mod_configural2, 10) ############ ## empirical histogram example (normal and bimodal groups) set.seed(1234) a <- matrix(rlnorm(50, .2, .2)) d <- matrix(rnorm(50)) ThetaNormal <- matrix(rnorm(2000)) ThetaBimodal <- scale(matrix(c(rnorm(1000, -2), rnorm(1000,2)))) #bimodal Theta <- rbind(ThetaNormal, ThetaBimodal) dat <- simdata(a, d, 4000, itemtype = '2PL', Theta=Theta) group <- rep(c('G1', 'G2'), each=2000) EH <- multipleGroup(dat, 1, group=group, dentype="empiricalhist", invariance = colnames(dat)) coef(EH, simplify=TRUE) plot(EH, type = 'empiricalhist', npts = 60) #DIF test for item 1 EH1 <- multipleGroup(dat, 1, group=group, dentype="empiricalhist", invariance = colnames(dat)[-1]) anova(EH, EH1) #-------------------------------- # Mixture model (no prior group variable specified) set.seed(12345) nitems <- 20 a1 <- matrix(.75, ncol=1, nrow=nitems) a2 <- matrix(1.25, ncol=1, nrow=nitems) d1 <- matrix(rnorm(nitems,0,1),ncol=1) d2 <- matrix(rnorm(nitems,0,1),ncol=1) itemtype <- rep('2PL', nrow(a1)) N1 <- 500 N2 <- N1*2 # second class twice as large dataset1 <- simdata(a1, d1, N1, itemtype) dataset2 <- simdata(a2, d2, N2, itemtype) dat <- rbind(dataset1, dataset2) # group <- c(rep('D1', N1), rep('D2', N2)) # Mixture Rasch model (Rost, 1990) models <- 'F1 = 1-20 CONSTRAIN = (1-20, a1)' mod_mix <- multipleGroup(dat, models, dentype = 'mixture-2', GenRandomPars = TRUE) coef(mod_mix, simplify=TRUE) summary(mod_mix) plot(mod_mix) plot(mod_mix, type = 'trace') itemplot(mod_mix, 1, type = 'info') head(fscores(mod_mix)) # theta estimates head(fscores(mod_mix, method = 'classify')) # classification probability itemfit(mod_mix) # Mixture 2PL model mod_mix2 <- multipleGroup(dat, 1, dentype = 'mixture-2', GenRandomPars = TRUE) anova(mod_mix2, mod_mix) coef(mod_mix2, simplify=TRUE) itemfit(mod_mix2) # Zero-inflated 2PL IRT model model <- "F = 1-20 START [MIXTURE_1] = (GROUP, MEAN_1, -100), (GROUP, COV_11, .00001), (1-20, a1, 1.0), (1-20, d, 0.0) FIXED [MIXTURE_1] = (GROUP, MEAN_1), (GROUP, COV_11), (1-20, a1), (1-20, d)" zip <- multipleGroup(dat, model, dentype = 'mixture-2') coef(zip, simplify=TRUE) ## End(Not run)
Please choose more modern alternatives, such as Google Chrome or Mozilla Firefox.