Introduction

This document contains the replication code for:

Schalet, B. D., Lim, S., Cella, D., Choi, S. W. Linking scores with patient-reported health outcome instruments: A validation study and comparison of three linking methods. Psychometrika.

File descriptions

The root path contains 4 .R files and 3 folders.

  • data: contains main datasets used in the study.
  • figures: contains figures and tables used in the study and the code used to create them.
  • results: contains raw simulation output.
  • sim_eval_table.R: contains a helper function used in the main simulation.
  • sim_get_data.R: contains a helper function used in the main simulation.
  • sim_get_ipar.R: contains a helper function used in the main simulation.
  • sim_main.R: contains the main simulation code.

Simulation

Helper functions

Helper functions necessary for the main simulation are first loaded.

gen_theta() is a function that generates 1000 \(\theta\) values from a normal distribution.

  • mu_sigma contains the distribution parameters.

gen_data() is a function that generates a sample dataset from supplied \(\theta\) values.

  • true_theta contains \(\theta\) values.
  • ipar_a contains a-parameters.
  • ipar_d contains d-parameters.
  • The function uses graded response model.
# sim_get_data.R

gen_theta <- function(mu_sigma) {

  if (all(mu_sigma$sigma == 1)) {
    true_theta <- rmvn(1000, mu_sigma$mu[1], mu_sigma$sigma[1, 1])
    true_theta <- cbind(true_theta, true_theta)
  } else {
    true_theta <- rmvn(1000, mu_sigma$mu, mu_sigma$sigma)
    true_theta <- as.matrix(true_theta)
  }

  return(true_theta)

}

gen_data <- function(true_theta, ipar_a, ipar_d) {

  X <- mirt::simdata(ipar_a, ipar_d, itemtype = "graded", Theta = true_theta)
  colnames(X) <- rownames(ipar_a)
  X <- as.data.frame(X)

  return(X)

}

eval_table() aggregates results from a single simulation task.

# sim_eval_table.R

eval_table <- function(
  X_lhs, true_theta, lhs_scale, rhs_scale,
  table_UNI, table_EQP, table_CP) {

  out <- data.frame(
    lhs_raw     = apply(X_lhs, 1, sum),
    lhs_theta   = true_theta[, lhs_scale],
    rhs_theta   = true_theta[, rhs_scale])

  out <- merge(out, table_UNI, by.x = "lhs_raw", by.y = sprintf("raw_%s", lhs_scale))
  out <- out[, 1:4]
  colnames(out)[4] <- "rhs_theta_uni"

  out <- merge(out, table_EQP, by.x = "lhs_raw", by.y = sprintf("raw_%s", lhs_scale))
  out <- out[, 1:5]
  colnames(out)[5] <- "rhs_theta_eqp"

  if (!is.null(table_CP)) {
    out <- merge(out, table_CP, by.x = "lhs_raw", by.y = sprintf("raw_%s", lhs_scale))
    out <- out[, c(
      "lhs_raw",
      "lhs_theta",
      "rhs_theta",
      "rhs_theta_uni",
      "rhs_theta_eqp",
      sprintf("eap_dim%s", rhs_scale)
    )]
    colnames(out)[6] <- "rhs_theta_cp"
  } else {
    out[["rhs_theta_cp"]] <- NA
  }

  return(out)

}

Item parameters

Before the main simulation, item parameters are obtained from PHQ9 validation dataset.

The dataset contains 20 PROMIS items and 9 PHQ items.

# sim_get_ipar.R

d <- loadData(
  response  = "dat_dePHQ9_validation.csv",
  itemmap   = "imap_dePHQ9.csv",
  anchor    = "anchor_dePHQ9.csv",
  input_dir = "data"
)

d <- getCompleteData(d)

set.seed(1)
calib <- runCalibration(d, dimensions = 1, fixedpar = TRUE, technical = list(NCYCLES = 1000))
ipar  <- mirt::coef(calib, IRTpars = FALSE, simplify = TRUE)

ipar   <- ipar$items
ipar_a <- ipar[, c(1, 1)]
ipar_d <- ipar[, 2:5]
colnames(ipar_a) <- paste0("a", 1:2)

pro_items  <- getItemNames(d, 1)
phq_items  <- getItemNames(d, 2)

ipar_a[pro_items, 2] <- 0
ipar_a[phq_items, 1] <- 0

Main simulation

The main simulation uses the PROsetta package.

The simulation consists of 8 conditions * 20 trials = 160 tasks.

# Code: Sangdon Lim

library(PROsetta)
library(mvnfast)
library(mirt)
library(stringr)
library(parallel)
library(foreach)
library(doParallel)

n_cores <- detectCores() - 2
cl      <- makeCluster(n_cores)
registerDoParallel(cl)

source("sim_get_ipar.R")
source("sim_get_data.R")
source("sim_eval_table.R")

d <- loadData(
  response  = "dat_dePHQ9_validation.csv",
  itemmap   = "imap_dePHQ9.csv",
  anchor    = "anchor_dePHQ9.csv",
  input_dir = "data"
)

lhs_scale <- 2
rhs_scale <- 1

c1      <- seq(1.0, 0.6, -.05)
c2      <- 1:20
tasks   <- expand.grid(c1, c2)
n_tasks <- nrow(tasks)

out <- foreach(
  idx_task = 1:n_tasks,
  .packages = c("mvnfast", "PROsetta", "mirt", "stringr")) %dopar% {

  # Fetch task information
  true_corr <- tasks[idx_task, 1]
  idx_trial <- tasks[idx_task, 2]

  # Generate thetas and response data
  set.seed(idx_trial)

  mu_sigma       <- list()
  mu_sigma$mu    <- rep(0, 2)
  mu_sigma$sigma <- matrix(true_corr, 2, 2)
  diag(mu_sigma$sigma) <- 1

  true_theta  <- gen_theta(mu_sigma)
  X           <- gen_data(true_theta, ipar_a, ipar_d)
  dX          <- d
  dX@response <- X
  dX@response[[dX@person_id]] <- 990000 + 1:dim(X)[1]

  # Unidimensional
  set.seed(1)
  tmp_calib <- runLinking(dX, method = "FIXEDPAR")
  tmp_rsss  <- runRSSS(
    dX, tmp_calib, min_score = 0,
    min_theta = -4.0, max_theta = 4.0, inc = 0.05
  )
  table_UNI <- tmp_rsss[[as.character(lhs_scale)]]
  table_UNI <- table_UNI[, c(1, 4:5)]

  # Equipercentile
  set.seed(1)
  tmp <- runEquateObserved(
    dX, smooth = "loglinear",
    scale_from = lhs_scale,
    scale_to   = rhs_scale,
    type_to    = "theta",
    rsss       = tmp_rsss,
    degrees    = list(6, 1),
    reps       = 1000
  )
  table_EQP <- tmp$concordance

  # Calibrated projection
  if (true_corr != 1) {
    set.seed(1)
    tmp_calib  <- runLinking(dX, method = "CP")
    tmp_rsss   <- runRSSS(
      dX, tmp_calib, min_score = 0,
      min_theta = -4.0, max_theta = 4.0, inc = 0.05
    )
    table_CP   <- tmp_rsss[[as.character(lhs_scale)]]
  } else {
    table_CP   <- NULL
  }

  # Evaluate
  X_lhs      <- getResponse(dX, lhs_scale)
  table_out  <- eval_table(
    X_lhs, true_theta,
    lhs_scale, rhs_scale,
    table_UNI, table_EQP, table_CP
  )

  # Write output
  fn <- sprintf("out_%s_%s.csv", true_corr, idx_trial)
  write.csv(table_out, file.path("results", fn), row.names = FALSE)

  NULL

}

stopCluster(cl)

Results

Figure 2

# Code: Sangdon Lim

rstudioapi::restartSession()

library(PROsetta)
library(mirt)
library(mvnfast)
library(stringr)
library(devEMF)

d_original <- loadData(
  response  = "dat_dePHQ9_original.csv",
  itemmap   = "imap_dePHQ9.csv",
  anchor    = "anchor_dePHQ9.csv",
  input_dir = "data"
)
d_validation <- loadData(
  response  = "dat_dePHQ9_validation.csv",
  itemmap   = "imap_dePHQ9.csv",
  anchor    = "anchor_dePHQ9.csv",
  input_dir = "data"
)

# Figure 2

set.seed(1)
ipar_original   <- runCalibration(d_original, fixedpar = TRUE)
set.seed(1)
ipar_validation <- runCalibration(d_validation, fixedpar = TRUE)

theta_grid <- seq(-4, 4, .1)

for (img_format in c("eps", "emf", "svg")) {

  setEPS()
  if (img_format == "eps") postscript("figures/Figure 2.eps", width = 9, height = 9)
  if (img_format == "emf") emf(file = "figures/Figure 2.emf", width = 9, height = 9)
  if (img_format == "svg") svg(file = "figures/Figure 2.svg", width = 9, height = 9)

  par(
    mfrow = c(3, 3),
    mai = c(0.5, 0.5, 0.2, 0.2),
    oma = c(3, 3, 3, 3)
  )

  for (phq_item in getItemNames(d_original, 2)) {

    item_original   <- extract.item(ipar_original, phq_item)
    item_validation <- extract.item(ipar_validation, phq_item)
    scores_original   <- expected.item(item_original  , theta_grid)
    scores_validation <- expected.item(item_validation, theta_grid)

    plot(
      0, 0, type = "n",
      xlim = range(theta_grid), ylim = c(0, 3),
      xlab = "", ylab = "",
      axes = F)

    axis(1)
    axis(2, at = seq(0, 3, 1))

    lines(theta_grid, scores_original  , col = "blue")
    lines(theta_grid, scores_validation, col = "red", lty = 2)

    box(lwd = 1)

    if (phq_item == "PHQ91") item_text = "Little interest"
    if (phq_item == "PHQ92") item_text = "Feeling down"
    if (phq_item == "PHQ93") item_text = "Sleep"
    if (phq_item == "PHQ94") item_text = "Feeling tired"
    if (phq_item == "PHQ95") item_text = "Eating"
    if (phq_item == "PHQ96") item_text = "Feeling bad"
    if (phq_item == "PHQ97") item_text = "Concentration"
    if (phq_item == "PHQ98") item_text = "Slow or fidgety"
    if (phq_item == "PHQ99") item_text = "Suicidal thoughts"

    item_text <- sprintf("%s: %s", phq_item, item_text)
    mtext(item_text, side = 3, outer = FALSE)

    if (phq_item == "PHQ91") {
      legend(
        "topleft",
        c("Original", "Validation"),
        col = c("blue", "red"),
        lty = c(1, 2)
      )
    }

  }

  mtext("Theta", side = 1, cex = 1.2, outer = TRUE)
  mtext("Expected score", side = 2, cex = 1.2, outer = TRUE)

  dev.off()

}

Figure 3

# Code: Sangdon Lim

rstudioapi::restartSession()

library(PROsetta)
library(mirt)
library(mvnfast)
library(stringr)
library(devEMF)

d_original <- loadData(
  response  = "dat_dePHQ9_original.csv",
  itemmap   = "imap_dePHQ9.csv",
  anchor    = "anchor_dePHQ9.csv",
  input_dir = "data"
)
d_validation <- loadData(
  response  = "dat_dePHQ9_validation.csv",
  itemmap   = "imap_dePHQ9.csv",
  anchor    = "anchor_dePHQ9.csv",
  input_dir = "data"
)

# Figure 3

set.seed(1)
ipar_original   <- runCalibration(d_original, fixedpar = TRUE)
set.seed(1)
ipar_validation <- runCalibration(d_validation, fixedpar = TRUE)

for (img_format in c("eps", "emf", "svg")) {

  setEPS()
  if (img_format == "eps") postscript("figures/Figure 3.eps", width = 9, height = 4.5)
  if (img_format == "emf") emf(file = "figures/Figure 3.emf", width = 9, height = 4.5)
  if (img_format == "svg") svg(file = "figures/Figure 3.svg", width = 9, height = 4.5)

  theta_grid <- seq(-4, 4, .1)

  scores_original   <- theta_grid * 0
  scores_validation <- theta_grid * 0

  for (phq_item in getItemNames(d_original, 2)) {

    item_original   <- extract.item(ipar_original  , phq_item)
    item_validation <- extract.item(ipar_validation, phq_item)
    scores_original   <- scores_original   + expected.item(item_original  , theta_grid)
    scores_validation <- scores_validation + expected.item(item_validation, theta_grid)

  }

  par(mfrow = c(1, 2), mar = c(5, 4, 4, 4) + .1)

  # Test score
  plot(
    0, 0, type = "n",
    xlim = range(theta_grid), ylim = c(0, 27),
    xlab = "Theta", ylab = "Expected Test Score",
    axes = F)

  axis(1)
  axis(2, at = c(seq(0, 27, 5), 27))

  lines(theta_grid, scores_original  , col = "blue")
  lines(theta_grid, scores_validation, col = "red", lty = 2)

  legend(
    "topleft",
    c("Original", "Validation"),
    col = c("blue", "red"),
    lty = c(1, 2),
    pch = NA,
    inset = c(0.02, 0),
    bty = "n"
  )

  box(lwd = 1)

  # Test information
  plot(
    0, 0, type = "n",
    xlim = range(theta_grid), ylim = c(0, 15),
    xlab = "Theta", ylab = "Test Information",
    axes = F)

  axis(1)
  axis(2, at = seq(0, 15, 5))

  phq_items <- 21:29

  info_original   <- testinfo(ipar_original  , theta_grid, which.items = phq_items)
  info_validation <- testinfo(ipar_validation, theta_grid, which.items = phq_items)

  lines(theta_grid, info_original  , col = "blue")
  lines(theta_grid, info_validation, col = "red", lty = 2)

  par(new = TRUE)
  plot(
    0, 0, type = "n",
    xlim = range(theta_grid), ylim = c(0, 10),
    xlab = "", ylab = "",
    axes = FALSE
  )

  axis(4, at = seq(0, 10, 2))

  theta_grid <- seq(-4, 4, 0.2)
  info_original   <- testinfo(ipar_original  , theta_grid, which.items = phq_items)
  info_validation <- testinfo(ipar_validation, theta_grid, which.items = phq_items)
  SE_original     <- 1 / sqrt(info_original)
  SE_validation   <- 1 / sqrt(info_validation)

  lines(theta_grid, SE_original  , col = "blue")
  lines(theta_grid, SE_validation, col = "red", lty = 2)
  points(theta_grid, SE_original  , col = "blue", bg = "blue", pch = 21, cex = 0.5)
  points(theta_grid, SE_validation, col = "red" , bg = "red" , pch = 21, cex = 0.5)

  legend(
    "topleft",
    c("Information", "Standard Error"),
    col    = "black",
    lty    = 1,
    pch    = c(NA, 21),
    pt.bg  = c(NA, "black"),
    pt.cex = 0.5,
    inset  = c(0.02, 0),
    bty    = "n"
  )

  mtext("Standard Error", side = 4, line = 2)
  box(lwd = 1)

  dev.off()

}

Figure 4a

# Code: Sangdon Lim

rstudioapi::restartSession()

library(PROsetta)
library(mirt)
library(mvnfast)
library(stringr)
library(devEMF)

d_original <- loadData(
  response  = "dat_dePHQ9_original.csv",
  itemmap   = "imap_dePHQ9.csv",
  anchor    = "anchor_dePHQ9.csv",
  input_dir = "data"
)
d_validation <- loadData(
  response  = "dat_dePHQ9_validation.csv",
  itemmap   = "imap_dePHQ9.csv",
  anchor    = "anchor_dePHQ9.csv",
  input_dir = "data"
)

# Figure 4a

set.seed(1)
fp_original   <- runLinking(d_original  , method = "FIXEDPAR")
set.seed(1)
fp_validation <- runLinking(d_validation, method = "FIXEDPAR")

rsss_fp_original   <- runRSSS(d_original  , fp_original  , min_score = 0)
rsss_fp_validation <- runRSSS(d_validation, fp_validation, min_score = 0)

for (img_format in c("eps", "emf", "svg")) {

  setEPS()
  if (img_format == "eps") postscript("figures/Figure 4a.eps", width = 6, height = 6)
  if (img_format == "emf") emf(file = "figures/Figure 4a.emf", width = 6, height = 6)
  if (img_format == "svg") svg(file = "figures/Figure 4a.svg", width = 6, height = 6)

  lhs_scale <- 2
  lhs_raw   <- rsss_fp_original[[lhs_scale]]$raw_2

  # Main plot
  par(mai = c(1, 0.8, 0.1, 0.1) + 0.02)
  par(fig = c(0, 0.8, 0, 0.8))
  plot(0, 0,
    xlab = "Summed Score", ylab = "PROMIS Depression T-score", type = "n",
    xlim = range(lhs_raw), ylim = c(35, 85))
  grid()
  lines(lhs_raw, rsss_fp_original[[lhs_scale]]$tscore  , lty = 1, col = "blue")
  lines(lhs_raw, rsss_fp_validation[[lhs_scale]]$tscore, lty = 2, col = "red")
  legend("topleft",
    c("Original", "Validation"),
    lty = c(1, 2), col = c("blue", "red"),
    bg = "white")
  box()

  # Density (right)
  par(mai = c(1, 0, 0.1, 0.4) + 0.02)
  par(fig = c(0.8, 1, 0, 0.8), new = TRUE)
  plot(
    0, 0, type = "n", axes = F,
    xlim = c(0, 0.04),
    ylim = c(35, 85), xlab = NA, ylab = NA
  )
  # Density from original dataset
  x1 <- getScaleSum(d_original, 2)
  x2 <- rsss_fp_original[[lhs_scale]][, 1:2]
  x  <- merge(x1, x2, by = "raw_2")
  dd <- density(x$tscore)
  lines(dd$y, dd$x, col = "blue")
  # Density from validation dataset
  x1 <- getScaleSum(d_validation, 2)
  x2 <- rsss_fp_validation[[lhs_scale]][, 1:2]
  x  <- merge(x1, x2, by = "raw_2")
  dd <- density(x$tscore)
  lines(dd$y, dd$x, col = "red", lty = 2)
  box()

  # Density (top)
  par(mai = c(0, 0.8, 0.4, 0.1) + 0.02)
  par(fig = c(0, 0.8, 0.8, 1), new = TRUE)
  plot(
    0, 0, type = "n", axes = F,
    xlim = range(lhs_raw),
    ylim = c(0, 0.12), xlab = NA, ylab = NA
  )
  # Density from original dataset
  x1 <- getScaleSum(d_original, 2)
  dd <- density(x1$raw_2)
  lines(dd$x, dd$y, col = "blue")
  # Density from validation dataset
  x1 <- getScaleSum(d_validation, 2)
  dd <- density(x1$raw_2)
  lines(dd$x, dd$y, col = "red", lty = 2)
  box()

  dev.off()

}

Figure 4b

# Code: Sangdon Lim

rstudioapi::restartSession()

library(PROsetta)
library(mirt)
library(mvnfast)
library(stringr)
library(devEMF)

d_original <- loadData(
  response  = "dat_dePHQ9_original.csv",
  itemmap   = "imap_dePHQ9.csv",
  anchor    = "anchor_dePHQ9.csv",
  input_dir = "data"
)
d_validation <- loadData(
  response  = "dat_dePHQ9_validation.csv",
  itemmap   = "imap_dePHQ9.csv",
  anchor    = "anchor_dePHQ9.csv",
  input_dir = "data"
)

# Figure 4b

set.seed(1)
ipar_original   <- runLinking(d_original  , method = "FIXEDPAR")
set.seed(1)
ipar_validation <- runLinking(d_validation, method = "FIXEDPAR")

rsss_original   <- runRSSS(d_original  , ipar_original)
rsss_validation <- runRSSS(d_validation, ipar_validation)

set.seed(1)
eqp_original <- runEquateObserved(
  d_original,
  scale_from = 2, scale_to = 1,
  type_to = "theta", rsss = rsss_original,
  degrees = list(6, 1),
  reps = 1000
)$concordance
set.seed(1)
eqp_validation <- runEquateObserved(
  d_validation,
  scale_from = 2, scale_to = 1,
  type_to = "theta", rsss = rsss_validation,
  degrees = list(6, 1),
  reps = 1000
)$concordance

eqp_original$theta_1   <- eqp_original$theta_1 * 10 + 50
eqp_validation$theta_1 <- eqp_validation$theta_1 * 10 + 50

for (img_format in c("eps", "emf", "svg")) {

  setEPS()
  if (img_format == "eps") postscript("figures/Figure 4b.eps", width = 6, height = 6)
  if (img_format == "emf") emf(file = "figures/Figure 4b.emf", width = 6, height = 6)
  if (img_format == "svg") svg(file = "figures/Figure 4b.svg", width = 6, height = 6)

  lhs_scale <- 2
  lhs_raw   <- eqp_original$raw_2

  # Main plot
  par(mai = c(1, 0.8, 0.1, 0.1) + 0.02)
  par(fig = c(0, 0.8, 0, 0.8))
  plot(0, 0,
    xlab = "Summed Score", ylab = "PROMIS Depression T-score", type = "n",
    xlim = range(lhs_raw), ylim = c(35, 85))
  grid()
  lines(lhs_raw, eqp_original$theta_1  , lty = 1, col = "blue")
  lines(lhs_raw, eqp_validation$theta_1, lty = 2, col = "red")
  legend("topleft",
    c("Original", "Validation"),
    lty = c(1, 2), col = c("blue", "red"),
    bg = "white")
  box()

  # Density (right)
  par(mai = c(1, 0, 0.1, 0.4) + 0.02)
  par(fig = c(0.8, 1, 0, 0.8), new = TRUE)
  plot(
    0, 0, type = "n", axes = F,
    xlim = c(0, 0.04),
    ylim = c(35, 85), xlab = NA, ylab = NA
  )
  # Density from original dataset
  x1 <- getScaleSum(d_original, 2)
  x2 <- eqp_original[, 1:2]
  x  <- merge(x1, x2, by = "raw_2")
  dd <- density(x$theta_1)
  lines(dd$y, dd$x, col = "blue")
  # Density from validation dataset
  x1 <- getScaleSum(d_validation, 2)
  x2 <- eqp_validation[, 1:2]
  x  <- merge(x1, x2, by = "raw_2")
  dd <- density(x$theta_1)
  lines(dd$y, dd$x, col = "red", lty = 2)
  box()

  # Density (top)
  par(mai = c(0, 0.8, 0.4, 0.1) + 0.02)
  par(fig = c(0, 0.8, 0.8, 1), new = TRUE)
  plot(
    0, 0, type = "n", axes = F,
    xlim = range(lhs_raw),
    ylim = c(0, 0.12), xlab = NA, ylab = NA
  )
  # Density from original dataset
  x1 <- getScaleSum(d_original, 2)
  dd <- density(x1$raw_2)
  lines(dd$x, dd$y, col = "blue")
  # Density from validation dataset
  x1 <- getScaleSum(d_validation, 2)
  dd <- density(x1$raw_2)
  lines(dd$x, dd$y, col = "red", lty = 2)
  box()

  dev.off()

}

Figure 4c

# Code: Sangdon Lim

rstudioapi::restartSession()

library(PROsetta)
library(mirt)
library(mvnfast)
library(stringr)
library(devEMF)

d_original <- loadData(
  response  = "dat_dePHQ9_original.csv",
  itemmap   = "imap_dePHQ9.csv",
  anchor    = "anchor_dePHQ9.csv",
  input_dir = "data"
)
d_validation <- loadData(
  response  = "dat_dePHQ9_validation.csv",
  itemmap   = "imap_dePHQ9.csv",
  anchor    = "anchor_dePHQ9.csv",
  input_dir = "data"
)

# Figure 4c

set.seed(1)
cp_original   <- runLinking(d_original  , method = "CP")
set.seed(1)
cp_validation <- runLinking(d_validation, method = "CP")

rsss_cp_original   <- runRSSS(d_original  , cp_original  , min_score = 0)
rsss_cp_validation <- runRSSS(d_validation, cp_validation, min_score = 0)

for (img_format in c("eps", "emf", "svg")) {

  setEPS()
  if (img_format == "eps") postscript("figures/Figure 4c.eps", width = 6, height = 6)
  if (img_format == "emf") emf(file = "figures/Figure 4c.emf", width = 6, height = 6)
  if (img_format == "svg") svg(file = "figures/Figure 4c.svg", width = 6, height = 6)

  lhs_scale <- 2
  lhs_raw   <- rsss_cp_original[[lhs_scale]]$raw_2

  # Main plot
  par(mai = c(1, 0.8, 0.1, 0.1) + 0.02)
  par(fig = c(0, 0.8, 0, 0.8))
  plot(0, 0,
    xlab = "Summed Score", ylab = "PROMIS Depression T-score", type = "n",
    xlim = range(lhs_raw), ylim = c(35, 85))
  grid()
  lines(lhs_raw, rsss_cp_original[[lhs_scale]]$tscore_dim1  , lty = 1, col = "blue")
  lines(lhs_raw, rsss_cp_validation[[lhs_scale]]$tscore_dim1, lty = 2, col = "red")
  legend("topleft",
    c("Original", "Validation"),
    lty = c(1, 2), col = c("blue", "red"),
    bg = "white")
  box()

  # Density (right)
  par(mai = c(1, 0, 0.1, 0.4) + 0.02)
  par(fig = c(0.8, 1, 0, 0.8), new = TRUE)
  plot(
    0, 0, type = "n", axes = F,
    xlim = c(0, 0.04),
    ylim = c(35, 85), xlab = NA, ylab = NA
  )
  # Density from original dataset
  x1 <- getScaleSum(d_original, 2)
  x2 <- rsss_cp_original[[lhs_scale]][, 1:2]
  x  <- merge(x1, x2, by = "raw_2")
  dd <- density(x$tscore_dim1)
  lines(dd$y, dd$x, col = "blue")
  # Density from validation dataset
  x1 <- getScaleSum(d_validation, 2)
  x2 <- rsss_cp_validation[[lhs_scale]][, 1:2]
  x  <- merge(x1, x2, by = "raw_2")
  dd <- density(x$tscore_dim1)
  lines(dd$y, dd$x, col = "red", lty = 2)
  box()

  # Density (top)
  par(mai = c(0, 0.8, 0.4, 0.1) + 0.02)
  par(fig = c(0, 0.8, 0.8, 1), new = TRUE)
  plot(
    0, 0, type = "n", axes = F,
    xlim = range(lhs_raw),
    ylim = c(0, 0.12), xlab = NA, ylab = NA
  )
  # Density from original dataset
  x1 <- getScaleSum(d_original, 2)
  dd <- density(x1$raw_2)
  lines(dd$x, dd$y, col = "blue")
  # Density from validation dataset
  x1 <- getScaleSum(d_validation, 2)
  dd <- density(x1$raw_2)
  lines(dd$x, dd$y, col = "red", lty = 2)
  box()

  dev.off()

}

Figure 5

# Code: Sangdon Lim

rstudioapi::restartSession()

library(PROsetta)
library(mirt)
library(mvnfast)
library(devEMF)

d <- loadData(
  response  = "dat_dePHQ9_validation.csv",
  itemmap   = "imap_dePHQ9.csv",
  anchor    = "anchor_dePHQ9.csv",
  input_dir = "data"
)

# Figure 5

# The theta quadrature is seq(-4, 4, 0.05)

set.seed(1)
uni_link   <- runLinking(d, method = "FIXEDPAR")
uni_rsss   <- runRSSS(d, uni_link, min_score = c(1, 0, 1))
uni_tscore <- uni_rsss[[2]]$tscore

set.seed(1)
uni_link   <- runLinking(d, method = "FIXEDPAR")
uni_rsss   <- runRSSS(d, uni_link, min_score = c(1, 0, 1))
eqp_out    <- runEquateObserved(
  d,
  scale_from = 2, scale_to = 1,
  type_to = "theta", rsss = uni_rsss,
  degrees = list(6, 1),
  reps = 1000
)
eqp_out    <- eqp_out$concordance
eqp_tscore <- eqp_out$theta_1 * 10 + 50

set.seed(1)
cp_link    <- runLinking(d, method = "CP")
cp_rsss    <- runRSSS(d, cp_link, min_score = c(1, 0, 1))
cp_tscore  <- cp_rsss[[2]]$tscore_dim1
cp_corr    <- cp_link$mu_sigma$corr[1, 2]

for (img_format in c("eps", "emf", "svg")) {

  setEPS()
  if (img_format == "eps") postscript("figures/Figure 5.eps", width = 7, height = 7)
  if (img_format == "emf") emf(file = "figures/Figure 5.emf", width = 7, height = 7)
  if (img_format == "svg") svg(file = "figures/Figure 5.svg", width = 7, height = 7)

  lhs_range <- 0:27

  plot(0, 0,
    xlab = "Summed Score", ylab = "T-score", type = "n",
    xlim = range(lhs_range), ylim = c(30, 90))
  lines(lhs_range, uni_tscore, lty = 3, col = "black")      # UNI
  lines(lhs_range, eqp_tscore, lty = 2, col = "magenta")    # EQP
  lines(lhs_range, cp_tscore , lty = 1, col = "lime green") # CP

  legend("topleft",
    c("Unidimensional IRT", "Equipercentile",
      sprintf("Calibrated projection (r = %1.3f)", cp_corr)),
    lty = c(3, 2, 1), col = c("black", "magenta", "lime green"))
  box()

  dev.off()

}

Figure 6

# Code: Sangdon Lim

rstudioapi::restartSession()

library(PROsetta)
library(mirt)
library(mvnfast)
library(devEMF)

d <- loadData(
  response  = "dat_dePHQ9_validation.csv",
  itemmap   = "imap_dePHQ9.csv",
  anchor    = "anchor_dePHQ9.csv",
  input_dir = "data"
)

# Figure 6

# The theta quadrature is seq(-4, 4, 0.05)

source("figures/cp_hypothetical.R")

set.seed(1)
uni_link   <- runLinking(d, method = "FIXEDPAR")
uni_rsss   <- runRSSS(d, uni_link, min_score = c(1, 0, 1))
uni_tscore <- uni_rsss[[2]]$tscore

set.seed(1)
uni_link   <- runLinking(d, method = "FIXEDPAR")
uni_rsss   <- runRSSS(d, uni_link, min_score = c(1, 0, 1))
eqp_out    <- runEquateObserved(
  d,
  scale_from = 2, scale_to = 1,
  type_to = "theta", rsss = uni_rsss,
  degrees = list(6, 1),
  reps = 1000
)
eqp_out    <- eqp_out$concordance
eqp_tscore <- eqp_out$theta_1 * 10 + 50

cp_tscore_list <- list()

for (r in c(0.99, 0.90, 0.80, 0.70)) {

  message(r)

  set.seed(1)
  calib_r    <- CP_hypothetical(d, r)
  cp_out     <- runRSSS(d, calib_r, min_score = 0)
  cp_tscore_list[[as.character(r)]] <- cp_out[[2]]$tscore_dim1

}

for (img_format in c("eps", "emf", "svg")) {

  setEPS()
  if (img_format == "eps") postscript("figures/Figure 6.eps", width = 9, height = 9)
  if (img_format == "emf") emf(file = "figures/Figure 6.emf", width = 9, height = 9)
  if (img_format == "svg") svg(file = "figures/Figure 6.svg", width = 9, height = 9)

  par(mfrow = c(2, 2))

  for (r in c(0.99, 0.90, 0.80, 0.70)) {

    lhs_range <- 0:27
    cp_tscore <- cp_tscore_list[[as.character(r)]]

    plot(0, 0,
      xlab = "Summed Score", ylab = "T-score", type = "n",
      xlim = range(lhs_range), ylim = c(30, 90))
    lines(lhs_range, uni_tscore, lty = 3, col = "black")      # UNI
    lines(lhs_range, eqp_tscore, lty = 2, col = "magenta")    # EQP
    lines(lhs_range, cp_tscore , lty = 1, col = "lime green") # CP

    legend("topleft",
      c("Unidimensional IRT", "Equipercentile",
        sprintf("Calibrated projection (r = %1.2f)", r)),
      lty = c(3, 2, 1), col = c("black", "magenta", "lime green"))
    box()

  }

  dev.off()

}

Figure 7

# Code: Sangdon Lim

rstudioapi::restartSession()

library(stringr)
library(devEMF)

fp    <- file.path("results")
f_all <- list.files(fp)
corr  <- seq(1.0, 0.6, -0.05)
out_all <- as.data.frame(matrix(NA, 0, 4))

for (r in corr) {

  tmp <- sprintf("out_%s_", r)
  idx <- str_detect(f_all, tmp)
  fs  <- f_all[idx]

  out <- as.data.frame(matrix(NA, length(fs), 4))

  for (i in 1:length(fs)) {

    d <- read.csv(file.path(fp, fs[i]))
    diff_uni  <- d$rhs_theta_uni  - d$rhs_theta
    diff_eqp  <- d$rhs_theta_eqp  - d$rhs_theta
    diff_cp   <- d$rhs_theta_cp   - d$rhs_theta
    rmse_uni  <- sqrt(mean((diff_uni )**2))
    rmse_eqp  <- sqrt(mean((diff_eqp )**2))
    rmse_cp   <- sqrt(mean((diff_cp  )**2))
    out[i, ] <- c(r, rmse_uni, rmse_eqp, rmse_cp)

  }

  out_all <- rbind(out_all, out)

}

out_avg <- aggregate(
  out_all, by = list(out_all[, 1]),
  function(x) sqrt(mean(x**2))
)[, -1]

colnames(out_avg) <- c("corr", "uni", "eqp", "cp")

idx <- order(out_avg$corr, decreasing = TRUE)
out_avg <- out_avg[idx, ]

# Figure 7

for (img_format in c("eps", "emf", "svg")) {

  setEPS()
  if (img_format == "eps") postscript("figures/Figure 7.eps", width = 7, height = 7)
  if (img_format == "emf") emf(file = "figures/Figure 7.emf", width = 7, height = 7)
  if (img_format == "svg") svg(file = "figures/Figure 7.svg", width = 7, height = 7)

  plot(
    out_avg[, 1], out_avg[, 1], type = "n",
    xlab = "Latent correlation", ylab = "RMSE",
    xlim = c(1.0, 0.6),
    ylim = c(0.0, 1.0))

  grid()

  lines(out_avg$corr, out_avg$uni , lty = 3, col = "black")
  lines(out_avg$corr, out_avg$eqp , lty = 2, col = "magenta")
  lines(out_avg$corr, out_avg$cp  , lty = 1, col = "lime green")

  legend("topleft",
    c("Unidimensional IRT", "Equipercentile", "Calibrated projection"),
    col = c("black", "magenta", "lime green"),
    lty = c(3, 2, 1),
    lwd = 1,
    bg = "white")

  box()

  dev.off()

}

Figure 8

# Code: Sangdon Lim

rstudioapi::restartSession()

library(stringr)
library(devEMF)

fp    <- file.path("results")
f_all <- list.files(fp)
corr  <- seq(1.0, 0.6, -0.05)
out_all <- as.data.frame(matrix(NA, 0, 4))

for (r in corr) {

  tmp <- sprintf("out_%s_", r)
  idx <- str_detect(f_all, tmp)
  fs  <- f_all[idx]

  out <- as.data.frame(matrix(NA, length(fs), 4))

  for (i in 1:length(fs)) {

    d <- read.csv(file.path(fp, fs[i]))
    diff_uni  <- d$rhs_theta_uni  - d$rhs_theta
    diff_eqp  <- d$rhs_theta_eqp  - d$rhs_theta
    diff_cp   <- d$rhs_theta_cp   - d$rhs_theta
    rmse_uni  <- sqrt(mean((diff_uni )**2))
    rmse_eqp  <- sqrt(mean((diff_eqp )**2))
    rmse_cp   <- sqrt(mean((diff_cp  )**2))
    out[i, ] <- c(r, rmse_uni, rmse_eqp, rmse_cp)

  }

  out_all <- rbind(out_all, out)

}

out_avg <- aggregate(
  out_all, by = list(out_all[, 1]),
  function(x) sqrt(mean(x**2))
)[, -1]

colnames(out_avg) <- c("corr", "uni", "eqp", "cp")

idx <- order(out_avg$corr, decreasing = TRUE)
out_avg <- out_avg[idx, ]

# Perform RMSE prediction
RMSE_theta_2 <- out_avg$uni[1]
RMSE_theta_1 <- sqrt(1 - ((out_avg$corr ** 2) * (1 - (RMSE_theta_2 ** 2))))

out_avg$pred <- RMSE_theta_1

# Figure 8

for (img_format in c("eps", "emf", "svg")) {

  setEPS()
  if (img_format == "eps") postscript("figures/Figure 8.eps", width = 7, height = 7)
  if (img_format == "emf") emf(file = "figures/Figure 8.emf", width = 7, height = 7)
  if (img_format == "svg") svg(file = "figures/Figure 8.svg", width = 7, height = 7)

  plot(
    out_avg[, 1], out_avg[, 1], type = "n",
    xlab = "Latent correlation", ylab = "RMSE",
    xlim = c(1.0, 0.6),
    ylim = c(0.0, 1.0))

  grid()

  lines(out_avg$corr, out_avg$uni , lty = 3, col = "black")
  lines(out_avg$corr, out_avg$eqp , lty = 2, col = "magenta")
  lines(out_avg$corr, out_avg$cp  , lty = 1, col = "lime green")
  lines(out_avg$corr, out_avg$pred, lty = 2, col = "#c0c0c0", lwd = 4)

  legend("topleft",
    c("Unidimensional IRT", "Equipercentile", "Calibrated projection", "Predicted"),
    col = c("black", "magenta", "lime green", "#c0c0c0"),
    lty = c(3, 2, 1, 2),
    lwd = c(1, 1, 1, 4),
    bg = "white")

  box()

  dev.off()

}