Simulate response patterns
Simulates response patterns for compensatory and noncompensatory MIRT models from multivariate normally distributed factor (θ) scores, or from a user input matrix of θ's.
simdata( a, d, N, itemtype, sigma = NULL, mu = NULL, guess = 0, upper = 1, nominal = NULL, t = NULL, Theta = NULL, gpcm_mats = list(), returnList = FALSE, model = NULL, equal.K = TRUE, which.items = NULL, mins = 0, lca_cats = NULL, prob.list = NULL )
a |
a matrix/vector of slope parameters. If slopes are to be constrained to
zero then use |
d |
a matrix/vector of intercepts. The matrix should have as many columns as
the item with the largest number of categories, and filled empty locations
with |
N |
sample size |
itemtype |
a character vector of length If the internal class of the object is specified instead, the inputs can
be |
sigma |
a covariance matrix of the underlying distribution. Default is
the identity matrix. Used when |
mu |
a mean vector of the underlying distribution. Default is a vector
of zeros. Used when |
guess |
a vector of guessing parameters for each item; only applicable for dichotomous items. Must be either a scalar value that will affect all of the dichotomous items, or a vector with as many values as to be simulated items |
upper |
same as |
nominal |
a matrix of specific item category slopes for nominal models.
Should be the dimensions as the intercept specification with one less column, with |
t |
matrix of t-values for the 'ggum' itemtype, where each row corresponds to a given item.
Also determines the number of categories, where |
Theta |
a user specified matrix of the underlying ability parameters,
where |
gpcm_mats |
a list of matrices specifying the scoring scheme for generalized partial
credit models (see |
returnList |
logical; return a list containing the data, item objects defined
by |
model |
a single group object, typically returned by functions such as |
equal.K |
logical; when a |
which.items |
an integer vector used to indicate which items to simulate when a
|
mins |
an integer vector (or single value to be used for each item) indicating what
the lowest category should be. If |
lca_cats |
a vector indicating how many categories each lca item should have. If not supplied then it is assumed that 2 categories should be generated for each item |
prob.list |
an optional list containing matrix/data.frames of probabilities values for each category to be simulated. This is useful when creating customized probability functions to be sampled from |
Returns a data matrix simulated from the parameters, or a list containing the data, item objects, and Theta matrix.
Phil Chalmers rphilip.chalmers@gmail.com
Chalmers, R., P. (2012). mirt: A Multidimensional Item Response Theory Package for the R Environment. Journal of Statistical Software, 48(6), 1-29. doi: 10.18637/jss.v048.i06
Reckase, M. D. (2009). Multidimensional Item Response Theory. New York: Springer.
### Parameters from Reckase (2009), p. 153 set.seed(1234) a <- matrix(c( .7471, .0250, .1428, .4595, .0097, .0692, .8613, .0067, .4040, 1.0141, .0080, .0470, .5521, .0204, .1482, 1.3547, .0064, .5362, 1.3761, .0861, .4676, .8525, .0383, .2574, 1.0113, .0055, .2024, .9212, .0119, .3044, .0026, .0119, .8036, .0008, .1905,1.1945, .0575, .0853, .7077, .0182, .3307,2.1414, .0256, .0478, .8551, .0246, .1496, .9348, .0262, .2872,1.3561, .0038, .2229, .8993, .0039, .4720, .7318, .0068, .0949, .6416, .3073, .9704, .0031, .1819, .4980, .0020, .4115,1.1136, .2008, .1536,1.7251, .0345, .1530, .6688, .0020, .2890,1.2419, .0220, .1341,1.4882, .0050, .0524, .4754, .0012, .2139, .4612, .0063, .1761,1.1200, .0870),30,3,byrow=TRUE)*1.702 d <- matrix(c(.1826,-.1924,-.4656,-.4336,-.4428,-.5845,-1.0403, .6431,.0122,.0912,.8082,-.1867,.4533,-1.8398,.4139, -.3004,-.1824,.5125,1.1342,.0230,.6172,-.1955,-.3668, -1.7590,-.2434,.4925,-.3410,.2896,.006,.0329),ncol=1)*1.702 mu <- c(-.4, -.7, .1) sigma <- matrix(c(1.21,.297,1.232,.297,.81,.252,1.232,.252,1.96),3,3) dataset1 <- simdata(a, d, 2000, itemtype = '2PL') dataset2 <- simdata(a, d, 2000, itemtype = '2PL', mu = mu, sigma = sigma) #mod <- mirt(dataset1, 3, method = 'MHRM') #coef(mod) ## Not run: ### Unidimensional graded response model with 5 categories each a <- matrix(rlnorm(20,.2,.3)) # for the graded model, ensure that there is enough space between the intercepts, # otherwise closer categories will not be selected often (minimum distance of 0.3 here) diffs <- t(apply(matrix(runif(20*4, .3, 1), 20), 1, cumsum)) diffs <- -(diffs - rowMeans(diffs)) d <- diffs + rnorm(20) dat <- simdata(a, d, 500, itemtype = 'graded') # mod <- mirt(dat, 1) ### An example of a mixed item, bifactor loadings pattern with correlated specific factors a <- matrix(c( .8,.4,NA, .4,.4,NA, .7,.4,NA, .8,NA,.4, .4,NA,.4, .7,NA,.4),ncol=3,byrow=TRUE) d <- matrix(c( -1.0,NA,NA, 1.5,NA,NA, 0.0,NA,NA, 0.0,-1.0,1.5, #the first 0 here is the recommended constraint for nominal 0.0,1.0,-1, #the first 0 here is the recommended constraint for gpcm 2.0,0.0,NA),ncol=3,byrow=TRUE) nominal <- matrix(NA, nrow(d), ncol(d)) #the first 0 and last (ncat - 1) = 2 values are the recommended constraints nominal[4, ] <- c(0,1.2,2) sigma <- diag(3) sigma[2,3] <- sigma[3,2] <- .25 items <- c('2PL','2PL','2PL','nominal','gpcm','graded') dataset <- simdata(a,d,2000,items,sigma=sigma,nominal=nominal) #mod <- bfactor(dataset, c(1,1,1,2,2,2), itemtype=c(rep('2PL', 3), 'nominal', 'gpcm','graded')) #coef(mod) #### Convert standardized factor loadings to slopes F2a <- function(F, D=1.702){ h2 <- rowSums(F^2) a <- (F / sqrt(1 - h2)) * D a } (F <- matrix(c(rep(.7, 5), rep(.5,5)))) (a <- F2a(F)) d <- rnorm(10) dat <- simdata(a, d, 5000, itemtype = '2PL') mod <- mirt(dat, 1) coef(mod, simplify=TRUE)$items summary(mod) mod2 <- mirt(dat, 'F1 = 1-10 CONSTRAIN = (1-5, a1), (6-10, a1)') summary(mod2) anova(mod, mod2) #### Unidimensional nonlinear factor pattern theta <- rnorm(2000) Theta <- cbind(theta,theta^2) a <- matrix(c( .8,.4, .4,.4, .7,.4, .8,NA, .4,NA, .7,NA),ncol=2,byrow=TRUE) d <- matrix(rnorm(6)) itemtype <- rep('2PL',6) nonlindata <- simdata(a=a, d=d, itemtype=itemtype, Theta=Theta) #model <- ' #F1 = 1-6 #(F1 * F1) = 1-3' #mod <- mirt(nonlindata, model) #coef(mod) #### 2PLNRM model for item 4 (with 4 categories), 2PL otherwise a <- matrix(rlnorm(4,0,.2)) #first column of item 4 is the intercept for the correct category of 2PL model, # otherwise nominal model configuration d <- matrix(c( -1.0,NA,NA,NA, 1.5,NA,NA,NA, 0.0,NA,NA,NA, 1, 0.0,-0.5,0.5),ncol=4,byrow=TRUE) nominal <- matrix(NA, nrow(d), ncol(d)) nominal[4, ] <- c(NA,0,.5,.6) items <- c(rep('2PL',3),'nestlogit') dataset <- simdata(a,d,2000,items,nominal=nominal) #mod <- mirt(dataset, 1, itemtype = c('2PL', '2PL', '2PL', '2PLNRM'), key=c(NA,NA,NA,1)) #coef(mod) #itemplot(mod,4) #return list of simulation parameters listobj <- simdata(a,d,2000,items,nominal=nominal, returnList=TRUE) str(listobj) # generate dataset from converged model mod <- mirt(Science, 1, itemtype = c(rep('gpcm', 3), 'nominal')) sim <- simdata(model=mod, N=1000) head(sim) Theta <- matrix(rnorm(100)) sim <- simdata(model=mod, Theta=Theta) head(sim) # alternatively, define a suitable object with functions from the mirtCAT package # help(generate.mirt_object) library(mirtCAT) nitems <- 50 a1 <- rlnorm(nitems, .2,.2) d <- rnorm(nitems) g <- rbeta(nitems, 20, 80) pars <- data.frame(a1=a1, d=d, g=g) head(pars) obj <- generate.mirt_object(pars, '3PL') dat <- simdata(N=200, model=obj) #### 10 item GGUMs test with 4 categories each a <- rlnorm(10, .2, .2) b <- rnorm(10) #passed to d= input, but used as the b parameters diffs <- t(apply(matrix(runif(10*3, .3, 1), 10), 1, cumsum)) t <- -(diffs - rowMeans(diffs)) dat <- simdata(a, b, 1000, 'ggum', t=t) apply(dat, 2, table) # mod <- mirt(dat, 1, 'ggum') # coef(mod) ###### # prob.list example # custom probabilty function that returns a matrix fun <- function(a, b, theta){ P <- 1 / (1 + exp(-a * (theta-b))) cbind(1-P, P) } set.seed(1) theta <- matrix(rnorm(100)) prob.list <- list() nitems <- 5 a <- rlnorm(nitems, .2, .2); b <- rnorm(nitems, 0, 1/2) for(i in 1:nitems) prob.list[[i]] <- fun(a[i], b[i], theta) str(prob.list) dat <- simdata(prob.list=prob.list) head(dat) # prob.list input is useful when defining custom items as well name <- 'old2PL' par <- c(a = .5, b = -2) est <- c(TRUE, TRUE) P.old2PL <- function(par,Theta, ncat){ a <- par[1] b <- par[2] P1 <- 1 / (1 + exp(-1*a*(Theta - b))) cbind(1-P1, P1) } x <- createItem(name, par=par, est=est, P=P.old2PL) prob.list[[1]] <- x@P(x@par, theta) ## End(Not run)
Please choose more modern alternatives, such as Google Chrome or Mozilla Firefox.