# Simulate response patterns under the framework of IRT

Using mirt package

R
Simulation
Mirt
Author

Jihong Zhang

Published

April 15, 2024

## 1 Research question:

1. How different methods of cutting item response affecting parameter estimation accuracy?
• Merge first two categories
• Merge the middle category into the neighboring category

We need to pre-specify `a` (item slopes) and `b` (item intercepts). `mu` (factor mean) and `sigma` (factor correlation) are optional.

``````library(mirt)
# Simulation for 6 5-category items
set.seed(1234)
J = 6 # number of items
N = 1000 # number of participants
a <- matrix(rlnorm(J, mean = 0, sd = 1), ncol = 1)
a``````
``````           [,1]
[1,] 0.29907355
[2,] 1.31973273
[3,] 2.95778648
[4,] 0.09578035
[5,] 1.53591253
[6,] 1.65873604``````
``````# for the graded model, ensure that there is enough space between the intercepts,
# otherwise closer categories will not be selected often (minimum distance of 0.3 here)
diffs <- t(apply(matrix(runif(J*4, .3, 1), nrow = J), 1, cumsum))
d <- -diffs + rnorm(J) + 4
d``````
``````         [,1]     [,2]     [,3]      [,4]
[1,] 2.664915 2.234209 1.781049 1.1617851
[2,] 5.469432 5.006874 4.139455 3.6538239
[3,] 3.629467 3.107838 2.439850 1.9265796
[4,] 2.623207 2.111322 1.171061 0.5159463
[5,] 3.059096 2.647764 1.765822 1.3390547
[6,] 3.972815 3.644818 3.312779 2.4810091``````

## 2 Data generation

### 2.1 Model 1: population Data

``````## Population data: 5-category: 0-4
dat1 <- simdata(a, d, N, itemtype = rep('graded', 6))
``````     Item_1 Item_2 Item_3 Item_4 Item_5 Item_6
[1,]      4      4      0      2      2      3
[2,]      4      4      0      3      1      1
[3,]      4      4      4      3      4      4
[4,]      4      4      4      4      3      4
[5,]      4      4      2      2      4      4
[6,]      4      4      0      2      4      4``````
``apply(dat1, 2, min)``
``````Item_1 Item_2 Item_3 Item_4 Item_5 Item_6
0      0      0      0      0      0 ``````
``apply(dat1, 2, max)``
``````Item_1 Item_2 Item_3 Item_4 Item_5 Item_6
4      4      4      4      4      4 ``````

### 2.2 Model 2: 4-category data with first two categories merged

``````## Data2: Merged-category data: 4-category, 0-1->0,2->1,3->2,4->3
dat2 <- dat1
dat2[dat2%in%0:1] <- 0
dat2[dat2==2] <- 1
dat2[dat2==3] <- 2
dat2[dat2==4] <- 3
apply(dat2, 2, min)``````
``````Item_1 Item_2 Item_3 Item_4 Item_5 Item_6
0      0      0      0      0      0 ``````
``apply(dat2, 2, max)``
``````Item_1 Item_2 Item_3 Item_4 Item_5 Item_6
3      3      3      3      3      3 ``````

### 2.3 Model 3: 4-category data with middle 2 categories merged

``````## Data3: Merged-category data: 4-category, 0->0,1-2->1,3->2,4->3
dat3 <- dat1
dat3[dat3%in%1:2] <- 1
dat3[dat3==3] <- 2
dat3[dat3==4] <- 3
apply(dat3, 2, min)``````
``````Item_1 Item_2 Item_3 Item_4 Item_5 Item_6
0      0      0      0      0      0 ``````
``apply(dat3, 2, max)``
``````Item_1 Item_2 Item_3 Item_4 Item_5 Item_6
3      3      3      3      3      3 ``````

## 3 Data analysis

``````## Population model
mod1 <- mirt(dat1, model = 1)
mod1_param <- coef(mod1, simplify = TRUE)\$items
mod1_itemfit <- itemfit(mod1)

## Tail merge model: model2
mod2 <- mirt(dat2, model = 1)
mod2_param <- coef(mod2, simplify = TRUE)\$items
mod2_itemfit <- itemfit(mod2)

## Middle category merge model: model3
mod3 <- mirt(dat3, model = 1)
mod3_param <- coef(mod3, simplify = TRUE)\$items
mod3_itemfit <- itemfit(mod3)``````

## 4 Parameter recovery

``d``
``````         [,1]     [,2]     [,3]      [,4]
[1,] 2.664915 2.234209 1.781049 1.1617851
[2,] 5.469432 5.006874 4.139455 3.6538239
[3,] 3.629467 3.107838 2.439850 1.9265796
[4,] 2.623207 2.111322 1.171061 0.5159463
[5,] 3.059096 2.647764 1.765822 1.3390547
[6,] 3.972815 3.644818 3.312779 2.4810091``````
``kableExtra::kable(mod1_param[,2:5], digits = 3)``
d1 d2 d3 d4
Item_1 2.736 2.385 1.904 1.305
Item_2 4.859 4.593 3.841 3.362
Item_3 3.313 2.689 2.066 1.585
Item_4 2.701 2.203 1.191 0.533
Item_5 2.984 2.647 1.815 1.395
Item_6 3.998 3.611 3.188 2.445
``````rev_bias <- function(true, est) {
diffs = abs(true - est)
mean(diffs)
}

rmse <- function(true, est) {
sqrt(mean((true - est)^2))
}

Results <- data.frame(
Bias = c(
rev_bias(true = as.numeric(a), est = mod1_param[,'a1']),
rev_bias(true = as.numeric(a), est = mod2_param[,'a1']),
rev_bias(true = as.numeric(a), est = mod3_param[,'a1'])
),
RMSE = c(
rmse(true = as.numeric(a), est = mod1_param[,'a1']),
rmse(true = as.numeric(a), est = mod2_param[,'a1']),
rmse(true = as.numeric(a), est = mod3_param[,'a1'])
)
)
rownames(Results) <- paste0("Model ", 1:3)
kableExtra::kable(Results, digits = 3)``````
Bias RMSE
Model 1 0.138 0.181
Model 2 0.148 0.192
Model 3 0.144 0.199

## 5 Item fit

Model 1 (population model) seems to have overall best item fits. Model 2 (tail merged model) seems to have most worst-fitting items. Model 3 (middle-range merged model) seems to have the worst-fitting item (item 6).

``````RMSEA_mat <- rbind(
mod1 = mod1_itemfit\$RMSEA.S_X2,
mod2 = mod2_itemfit\$RMSEA.S_X2,
mod3 = mod3_itemfit\$RMSEA.S_X2
)
colnames(RMSEA_mat) <- paste0('Item', 1:6)
rownames(RMSEA_mat) <- paste0('Model ', 1:3)
kableExtra::kable(RMSEA_mat, digits = 3)``````
Item1 Item2 Item3 Item4 Item5 Item6
Model 1 0 0 0.007 0.000 0.000 0.015
Model 2 0 0 0.000 0.014 0.014 0.018
Model 3 0 0 0.000 0.000 0.000 0.026

## 6 Global fit

``````M2_df <- rbind(
M2(mod1, "C2"),
M2(mod2, "C2"),
M2(mod3, "C2")
)
rownames(M2_df) <- paste0("Model ", 1:3)
kableExtra::kable(M2_df, digits = 3)``````
M2 df p RMSEA RMSEA_5 RMSEA_95 SRMSR TLI CFI
Model 1 7.446 9 0.591 0.000 0 0.031 0.024 1.006 1.000
Model 2 9.723 9 0.373 0.009 0 0.037 0.027 0.997 0.998
Model 3 9.265 9 0.413 0.005 0 0.036 0.027 0.999 0.999

## 7 MCMC simulation

``````library(parSim)
Results <- parSim(
J = c(6, 10, 20),
N = c(300, 500, 1000),
# Number of repititions?
reps = 100, # more is better!

# Parallel?
nCores = 8,
export = c("rev_bias"),
expression = {
library(mirt)
a <- matrix(rlnorm(J, mean = 0, sd = 1), ncol = 1)
diffs <- t(apply(matrix(runif(J*4, .3, 1), nrow = J), 1, cumsum))
d <- -diffs + rnorm(J) + 4
dat1 <- simdata(a, d, N, itemtype = rep('graded', J))
dat2 <- dat1
dat2[dat2%in%0:1] <- 0
dat2[dat2==2] <- 1
dat2[dat2==3] <- 2
dat2[dat2==4] <- 3
dat3 <- dat1
dat3[dat3%in%1:2] <- 1
dat3[dat3==3] <- 2
dat3[dat3==4] <- 3
## Population model
mod1 <- mirt(dat1, model = 1)
mod1_param <- coef(mod1, simplify = TRUE)\$items
mod1_gfi <- M2(mod1, "C2")

## Tail merge model: model2
mod2 <- mirt(dat2, model = 1)
mod2_param <- coef(mod2, simplify = TRUE)\$items
mod2_gfi <- M2(mod2, "C2")

## middle category merge model: model3
mod3 <- mirt(dat3, model = 1)
mod3_param <- coef(mod3, simplify = TRUE)\$items
mod3_gfi <- M2(mod3, "C2")
## output
data.frame(
bias_mod1 = rev_bias(true = as.numeric(a), est = mod1_param[,'a1']),
bias_mod2 = rev_bias(true = as.numeric(a), est = mod2_param[,'a1']),
bias_mod3 = rev_bias(true = as.numeric(a), est = mod3_param[,'a1']),
RMSEA_mod1 = mod1_gfi\$RMSEA,
RMSEA_mod2 = mod2_gfi\$RMSEA,
RMSEA_mod3 = mod3_gfi\$RMSEA,
M2_mod1 = mod1_gfi\$M2,
M2_mod2 = mod2_gfi\$M2,
M2_mod3 = mod3_gfi\$M2,
SRMSR_mod1 = mod1_gfi\$SRMSR,
SRMSR_mod2 = mod2_gfi\$SRMSR,
SRMSR_mod3 = mod3_gfi\$SRMSR,
TLI_mod1 = mod1_gfi\$TLI,
TLI_mod2 = mod2_gfi\$TLI,
TLI_mod3 = mod3_gfi\$TLI
)
}
)``````
``````library(ggplot2)
library(dplyr)
library(tidyr)
Results_plot <- Results |>
pivot_longer(c(starts_with("bias"),
starts_with("RMSEA"),
starts_with("SRMSR"),
starts_with("TLI")),
names_to = 'Model', values_to = 'Value') |>
separate(Model, into = c("Measure", "Model")) |>
group_by(J, N, Model, Measure) |>
summarise(Value = mean(Value, na.rm = T)) |>
mutate(N = factor(N, levels = c(300, 500, 1000)))

Results_plot |>
ggplot() +
geom_path(aes(x = N, y = Value, col = Model, group = Model)) +
geom_point(aes(x = N, y = Value, col = Model)) +
facet_grid(Measure ~ J, scales = "free") +
theme_bw()``````

## 8 Conclusion

1. There are not much differences in global model fits regarding varied cutting methods of Likert-scale item responses under the conditions in the simulation.
2. Increases of test length and sample sizes will increase estimation accuracy for both cutting methods.
3. Item fits will differ by varied cutting methods.
4. Goodness-of-fit may not perform well evaluating cutting methods of item responses.