Preparation

Load necessary packages:

library("brms")
options(mc.cores = parallel::detectCores()) # use multiple CPU cores in running brm()
library("gridExtra")
library("grid")
library("devtools") # for session_info()
library("tidyverse")

Here is the environment in which the analysis was conducted.

session_info()
## ─ Session info ──────────────────────────────────────────────────────────
##  setting  value                       
##  version  R version 3.5.2 (2018-12-20)
##  os       macOS Mojave 10.14.6        
##  system   x86_64, darwin15.6.0        
##  ui       X11                         
##  language (EN)                        
##  collate  en_GB.UTF-8                 
##  ctype    en_GB.UTF-8                 
##  tz       Europe/London               
##  date     2019-11-07                  
## 
## ─ Packages ──────────────────────────────────────────────────────────────
##  package        * version date       lib source        
##  abind            1.4-5   2016-07-21 [1] CRAN (R 3.5.0)
##  assertthat       0.2.0   2017-04-11 [1] CRAN (R 3.5.0)
##  backports        1.1.3   2018-12-14 [1] CRAN (R 3.5.0)
##  base64enc        0.1-3   2015-07-28 [1] CRAN (R 3.5.0)
##  bayesplot        1.6.0   2018-08-02 [1] CRAN (R 3.5.0)
##  bindr            0.1.1   2018-03-13 [1] CRAN (R 3.5.0)
##  bindrcpp         0.2.2   2018-03-29 [1] CRAN (R 3.5.0)
##  bridgesampling   0.6-0   2018-10-21 [1] CRAN (R 3.5.0)
##  brms           * 2.10.0  2019-08-29 [1] CRAN (R 3.5.2)
##  Brobdingnag      1.2-6   2018-08-13 [1] CRAN (R 3.5.0)
##  broom            0.5.1   2018-12-05 [1] CRAN (R 3.5.0)
##  callr            3.1.1   2018-12-21 [1] CRAN (R 3.5.0)
##  cellranger       1.1.0   2016-07-27 [1] CRAN (R 3.5.0)
##  cli              1.0.1   2018-09-25 [1] CRAN (R 3.5.0)
##  coda             0.19-2  2018-10-08 [1] CRAN (R 3.5.0)
##  colorspace       1.3-2   2016-12-14 [1] CRAN (R 3.5.0)
##  colourpicker     1.0     2017-09-27 [1] CRAN (R 3.5.0)
##  crayon           1.3.4   2017-09-16 [1] CRAN (R 3.5.0)
##  crosstalk        1.0.0   2016-12-21 [1] CRAN (R 3.5.0)
##  desc             1.2.0   2018-05-01 [1] CRAN (R 3.5.0)
##  devtools       * 2.0.1   2018-10-26 [1] CRAN (R 3.5.2)
##  digest           0.6.18  2018-10-10 [1] CRAN (R 3.5.0)
##  dplyr          * 0.7.8   2018-11-10 [1] CRAN (R 3.5.0)
##  DT               0.5     2018-11-05 [1] CRAN (R 3.5.0)
##  dygraphs         https://protect-eu.mimecast.com/s/ebDMC16N6tqw5vyh3PKn4?domain=1.1.1.6 2018-07-11 [1] CRAN (R 3.5.0)
##  evaluate         0.12    2018-10-09 [1] CRAN (R 3.5.0)
##  forcats        * 0.3.0   2018-02-19 [1] CRAN (R 3.5.0)
##  fs               1.2.6   2018-08-23 [1] CRAN (R 3.5.0)
##  generics         0.0.2   2018-11-29 [1] CRAN (R 3.5.0)
##  ggplot2        * 3.1.0   2018-10-25 [1] CRAN (R 3.5.0)
##  ggridges         0.5.1   2018-09-27 [1] CRAN (R 3.5.0)
##  glue             1.3.0   2018-07-17 [1] CRAN (R 3.5.0)
##  gridExtra      * 2.3     2017-09-09 [1] CRAN (R 3.5.0)
##  gtable           0.2.0   2016-02-26 [1] CRAN (R 3.5.0)
##  gtools           3.8.1   2018-06-26 [1] CRAN (R 3.5.0)
##  haven            2.0.0   2018-11-22 [1] CRAN (R 3.5.0)
##  hms              0.4.2   2018-03-10 [1] CRAN (R 3.5.0)
##  htmltools        0.3.6   2017-04-28 [1] CRAN (R 3.5.0)
##  htmlwidgets      1.3     2018-09-30 [1] CRAN (R 3.5.0)
##  httpuv           https://protect-eu.mimecast.com/s/RLLOC28N8f8oOnqfOJmpR?domain=1.4.5.1 2018-12-18 [1] CRAN (R 3.5.0)
##  httr             1.4.0   2018-12-11 [1] CRAN (R 3.5.0)
##  igraph           1.2.2   2018-07-27 [1] CRAN (R 3.5.0)
##  inline           0.3.15  2018-05-18 [1] CRAN (R 3.5.0)
##  jsonlite         1.6     2018-12-07 [1] CRAN (R 3.5.0)
##  knitr            1.21    2018-12-10 [1] CRAN (R 3.5.2)
##  later            0.7.5   2018-09-18 [1] CRAN (R 3.5.0)
##  lattice          0.20-38 2018-11-04 [1] CRAN (R 3.5.2)
##  lazyeval         0.2.1   2017-10-29 [1] CRAN (R 3.5.0)
##  loo              2.1.0   2019-03-13 [1] CRAN (R 3.5.2)
##  lubridate        1.7.4   2018-04-11 [1] CRAN (R 3.5.0)
##  magrittr         1.5     2014-11-22 [1] CRAN (R 3.5.0)
##  markdown         0.9     2018-12-07 [1] CRAN (R 3.5.0)
##  Matrix           1.2-15  2018-11-01 [1] CRAN (R 3.5.2)
##  matrixStats      0.54.0  2018-07-23 [1] CRAN (R 3.5.0)
##  memoise          1.1.0   2017-04-21 [1] CRAN (R 3.5.0)
##  mime             0.6     2018-10-05 [1] CRAN (R 3.5.0)
##  miniUI           https://protect-eu.mimecast.com/s/fwmtC36X6t2Y0L1T6L-ex?domain=0.1.1.1 2018-05-18 [1] CRAN (R 3.5.0)
##  modelr           0.1.2   2018-05-11 [1] CRAN (R 3.5.0)
##  munsell          0.5.0   2018-06-12 [1] CRAN (R 3.5.0)
##  mvtnorm          1.0-8   2018-05-31 [1] CRAN (R 3.5.0)
##  nlme             3.1-137 2018-04-07 [1] CRAN (R 3.5.2)
##  pillar           1.3.1   2018-12-15 [1] CRAN (R 3.5.0)
##  pkgbuild         1.0.2   2018-10-16 [1] CRAN (R 3.5.0)
##  pkgconfig        2.0.2   2018-08-16 [1] CRAN (R 3.5.0)
##  pkgload          1.0.2   2018-10-29 [1] CRAN (R 3.5.0)
##  plyr             1.8.4   2016-06-08 [1] CRAN (R 3.5.0)
##  prettyunits      1.0.2   2015-07-13 [1] CRAN (R 3.5.0)
##  processx         3.2.1   2018-12-05 [1] CRAN (R 3.5.0)
##  promises         1.0.1   2018-04-13 [1] CRAN (R 3.5.0)
##  ps               1.3.0   2018-12-21 [1] CRAN (R 3.5.0)
##  purrr          * 0.2.5   2018-05-29 [1] CRAN (R 3.5.0)
##  R6               2.3.0   2018-10-04 [1] CRAN (R 3.5.0)
##  Rcpp           * 1.0.0   2018-11-07 [1] CRAN (R 3.5.0)
##  readr          * 1.3.1   2018-12-21 [1] CRAN (R 3.5.0)
##  readxl           1.2.0   2018-12-19 [1] CRAN (R 3.5.0)
##  remotes          2.0.2   2018-10-30 [1] CRAN (R 3.5.0)
##  reshape2         1.4.3   2017-12-11 [1] CRAN (R 3.5.0)
##  rlang            https://protect-eu.mimecast.com/s/ehnNC4636tlA3LNtJ5Piq?domain=0.3.0.1 2018-10-25 [1] CRAN (R 3.5.0)
##  rmarkdown        1.11    2018-12-08 [1] CRAN (R 3.5.0)
##  rprojroot        1.3-2   2018-01-03 [1] CRAN (R 3.5.0)
##  rsconnect        0.8.12  2018-12-05 [1] CRAN (R 3.5.0)
##  rstan            2.19.2  2019-07-09 [1] CRAN (R 3.5.2)
##  rstantools       1.5.1   2018-08-22 [1] CRAN (R 3.5.0)
##  rstudioapi       0.8     2018-10-02 [1] CRAN (R 3.5.0)
##  rvest            0.3.2   2016-06-17 [1] CRAN (R 3.5.0)
##  scales           1.0.0   2018-08-09 [1] CRAN (R 3.5.0)
##  sessioninfo      1.1.1   2018-11-05 [1] CRAN (R 3.5.0)
##  shiny            1.2.0   2018-11-02 [1] CRAN (R 3.5.0)
##  shinyjs          1.0     2018-01-08 [1] CRAN (R 3.5.0)
##  shinystan        2.5.0   2018-05-01 [1] CRAN (R 3.5.0)
##  shinythemes      1.1.2   2018-11-06 [1] CRAN (R 3.5.0)
##  StanHeaders      2.19.0  2019-09-07 [1] CRAN (R 3.5.2)
##  stringi          1.2.4   2018-07-20 [1] CRAN (R 3.5.0)
##  stringr        * 1.3.1   2018-05-10 [1] CRAN (R 3.5.0)
##  threejs          0.3.1   2017-08-13 [1] CRAN (R 3.5.0)
##  tibble         * 2.0.0   2019-01-04 [1] CRAN (R 3.5.2)
##  tidyr          * 0.8.2   2018-10-28 [1] CRAN (R 3.5.0)
##  tidyselect       0.2.5   2018-10-11 [1] CRAN (R 3.5.0)
##  tidyverse      * 1.2.1   2017-11-14 [1] CRAN (R 3.5.0)
##  usethis        * 1.4.0   2018-08-14 [1] CRAN (R 3.5.0)
##  withr            2.1.2   2018-03-15 [1] CRAN (R 3.5.0)
##  xfun             0.4     2018-10-23 [1] CRAN (R 3.5.0)
##  xml2             1.2.0   2018-01-24 [1] CRAN (R 3.5.0)
##  xtable           1.8-3   2018-08-29 [1] CRAN (R 3.5.0)
##  xts              0.11-2  2018-11-05 [1] CRAN (R 3.5.0)
##  yaml             2.2.0   2018-07-25 [1] CRAN (R 3.5.0)
##  zoo              1.8-4   2018-09-19 [1] CRAN (R 3.5.0)
## 
## [1] /Library/Frameworks/R.framework/Versions/3.5/Resources/library

Read the data and convert them into the long format:

comp <- read_csv("L2SpeechData.csv") %>% 
  gather(key, value, -c(ParticipantID:F11)) %>% 
  separate(key, c("RaterID", "var")) %>% 
  spread(var, value) %>% 
  mutate_at(.vars = c("ParticipantID", "RaterID"), .funs = factor)

Here is how the data look like:

comp

Data Visualization (Figure 1 in Supporting Information-E)

Factor labels used in strip text in ggplot panels:

factor.labels <- c(
  "Factor 1:\nExperience Quantity", 
  "Factor 2:\nCurrent L2 Use",
  "Factor 3:\nNativeness Awareness",
  "Factor 4:\nAge of Immersion",
  "Factor 5:\nMotivation",
  "Factor 6:\nNativeness Attitude",
  "Factor 7:\nEFL Experience",
  "Factor 8:\nSpecial Past Experience",
  "Factor 9:\nForeign Accents Attitude",
  "Factor 10:\nComprehensibility Orientation",
  "Factor 11:\nL1 Influence"
)

Restructuring of data to draw a figure:

comp.long <- comp %>% 
  gather(factor, fs, -c(ParticipantID, RaterID, CompRatings, NativelikeRatings)) %>% 
  mutate(
    factor = case_when(
      factor == "F1" ~ factor.labels[1],
      factor == "F2" ~ factor.labels[2],
      factor == "F3" ~ factor.labels[3],
      factor == "F4" ~ factor.labels[4],
      factor == "F5" ~ factor.labels[5],
      factor == "F6" ~ factor.labels[6],
      factor == "F7" ~ factor.labels[7],
      factor == "F8" ~ factor.labels[8],
      factor == "F9" ~ factor.labels[9],
      factor == "F10" ~ factor.labels[10],
      factor == "F11" ~ factor.labels[11]
    ) %>% factor(levels = factor.labels)
  )

Layers used in both comprehensibility and nativelikeness panels:

common.layers <- list(
  geom_jitter(alpha = 0.2, height = 0, width = 0.2),
  geom_boxplot(notch = TRUE, color = "blue", outlier.shape = " ", alpha = 0),
  stat_summary(fun.y = "mean", colour = "red", size = 1.5, geom = "point"),
  coord_flip(),
  facet_wrap(~ factor, scales = "free_x", ncol = 3),
  theme_bw(),
  theme(
    panel.grid.major = element_blank(), 
    panel.grid.minor = element_blank(),
    text = element_text(size = 12)
  ) 
)

Draw a figure:

desc.comp <- ggplot(comp.long, aes(CompRatings %>% factor, fs)) +
  common.layers +
  labs(x = "", y = "", title = "Comprehensibility")

desc.ntv <- ggplot(comp.long, aes(NativelikeRatings %>% factor, fs)) +
  common.layers +
  labs(x = "", y = "", title = "Nativelikeness")

grid.arrange(desc.comp, desc.ntv, nrow = 1,
             bottom = textGrob("Factor Score", gp = gpar(fontsize = 20)),
             left = textGrob("Rating", gp = gpar(fontsize = 20), rot = 90)
)

Bayesian Multivariate Mixed-Effects Ordinal Regression Model

Build a model:

m.normal.prior <- brm(
  mvbind(CompRatings, NativelikeRatings) ~ 
    F1 + F2 + F3 + F4 + F5 + F6 + F7 + F8 + F9 + F10 + F11 + 
    (1 | p | RaterID) + (1 | q | ParticipantID),
  data = comp,
  family = cumulative(),
  prior = prior(normal(0, 1), class = b),
  iter = 10000,
  seed = 1
)

Posterior predictive checks:

pp_check(m.normal.prior, resp = "CompRatings")

pp_check(m.normal.prior, resp = "NativelikeRatings")

Both figures look reasonable.

Posterior distribution (Figures 1 through 3 in Supporting Information-G)

Define the functions that are useful in this section:

calc.stats <- function(x) {
  means <- x %>% 
    group_by(key) %>% 
    summarize(
      mean = mean(value),
      x = quantile(value, probs = 0.025),
      xend = quantile(value, probs = 0.975),
      y = 500,
      yend = 500,
      xintercept = 0
    )
  means$xintercept[grep("^b_.+Ratings_F\\d", means$key, invert = TRUE)] <- NA
  return(means)
}

label.properly <- function(x) {
  x <- x %>% 
    str_replace_all("b_.+Ratings_", "") %>% 
    str_replace_all("Intercept\\[1\\]", "Threshold (1 vs 2)") %>% 
    str_replace_all("Intercept\\[2\\]", "Threshold (2 vs 3)") %>% 
    str_replace_all("Intercept\\[3\\]", "Threshold (3 vs 4)") %>% 
    str_replace_all("Intercept\\[4\\]", "Threshold (4 vs 5)") %>% 
    str_replace_all("Intercept\\[5\\]", "Threshold (5 vs 6)") %>% 
    str_replace_all("Intercept\\[6\\]", "Threshold (6 vs 7)") %>% 
    str_replace_all("Intercept\\[7\\]", "Threshold (7 vs 8)") %>% 
    str_replace_all("Intercept\\[8\\]", "Threshold (8 vs 9)") %>% 
    str_replace_all("^F(\\d+)", "Factor \\1") %>% 
    str_replace_all(
      "sd_ParticipantID__CompRatings_Intercept", 
      "Comprehensibility:\nSD of By-Participant\nRandom Intercepts"
    ) %>% 
    str_replace_all(
      "sd_ParticipantID__NativelikeRatings_Intercept", 
      "Nativelikeness:\nSD of By-Participant\nRandom Intercepts"
    ) %>% 
    str_replace_all(
      "sd_RaterID__CompRatings_Intercept", 
      "Comprehensibility:\nSD of By-Rater\nRandom Intercepts"
    ) %>% 
    str_replace_all(
      "sd_RaterID__NativelikeRatings_Intercept", 
      "Nativelikeness:\nSD of By-Rater\nRandom Intercepts"
    ) %>% 
    str_replace_all(
      "cor_ParticipantID__CompRatings_Intercept__NativelikeRatings_Intercept", 
      "Correlation of\nBy-Participant\nRandom Intercepts"
    ) %>% 
    str_replace_all(
      "cor_RaterID__CompRatings_Intercept__NativelikeRatings_Intercept", 
      "Correlation of\nBy-Rater\nRandom Intercepts"
    ) %>% 
    factor(
      levels = c(
        paste0("Threshold (", 1:8, " vs ", 2:9, ")"),
        paste("Factor", 1:11), 
        "Comprehensibility:\nSD of By-Participant\nRandom Intercepts", 
        "Comprehensibility:\nSD of By-Rater\nRandom Intercepts",
        "Nativelikeness:\nSD of By-Participant\nRandom Intercepts",
        "Nativelikeness:\nSD of By-Rater\nRandom Intercepts",
        "Correlation of\nBy-Participant\nRandom Intercepts",
        "Correlation of\nBy-Rater\nRandom Intercepts"
      ),
      labels = c(
        paste0("Threshold (", 1:8, " vs ", 2:9, ")"),
        factor.labels,
        "Comprehensibility:\nSD of By-Participant\nRandom Intercepts", 
        "Comprehensibility:\nSD of By-Rater\nRandom Intercepts",
        "Nativelikeness:\nSD of By-Participant\nRandom Intercepts",
        "Nativelikeness:\nSD of By-Rater\nRandom Intercepts",
        "Correlation of\nBy-Participant\nRandom Intercepts",
        "Correlation of\nBy-Rater\nRandom Intercepts"
      )
    )
}

Extract posterior distribution:

post.sub.long <- posterior_samples(m.normal.prior)[ , 1:44] %>% 
  gather(key, value) %>% 
  as_tibble

Subset of the data including fixed effects of comprehensibility:

post.sub.long.group1 <- post.sub.long[grep("^b_CompRatings", post.sub.long$key), ]
means1 <- calc.stats(post.sub.long.group1)
post.sub.long.group1$key <- label.properly(post.sub.long.group1$key)
means1$key <- label.properly(means1$key)

Subset of the data including fixed effects of nativelikeness:

post.sub.long.group2 <- post.sub.long[grep("^b_NativelikeRatings", post.sub.long$key), ]
means2 <- calc.stats(post.sub.long.group2)
post.sub.long.group2$key <- label.properly(post.sub.long.group2$key)
means2$key <- label.properly(means2$key)

Subset of the data including random effects and correlations:

post.sub.long.group3 <- post.sub.long[grep("^(sd|cor)_", post.sub.long$key), ]
means3 <- calc.stats(post.sub.long.group3)
post.sub.long.group3$key <- label.properly(post.sub.long.group3$key)
means3$key <- label.properly(means3$key)

Define the function that draws figures demonstrating posterior distributions:

gg.draw <- function(df1, df2, title) {
  ggplot(df1, aes(value)) +
    geom_histogram(alpha = 0.5) +
    geom_vline(data = df2, aes(xintercept = xintercept), color = "red", linetype = 2) +
    geom_segment(data = df2, aes(x = x, xend = xend, y = y, yend = yend), color = "blue") +
    geom_point(data = df2, aes(x = mean, y = y), color = "blue") +
    facet_wrap(~ key, scales = "free") +
    coord_cartesian(ylim = c(0, 4000)) +
    theme_bw() +
    theme(
      axis.title.y = element_blank(),
      axis.text.y = element_blank(),
      axis.ticks.y = element_blank(),
      text = element_text(size = 15)
    ) +
    labs(x = "Estimate", y = "", title = title)
}

Draw figures. Blue horizontal lines represent 95% CIs, while red vertical lines (only drawn for slope parameters) represent 0 (i.e., null effect).

# Fixed Effects of Comprehensibility Ratings
gg.draw(post.sub.long.group1, means1, "")

# Fixed Effects of Nativelikeness Ratings
gg.draw(post.sub.long.group2, means2, "")

# Random Effects and Correlations
gg.draw(post.sub.long.group3, means3, "")

Classification Accuracy

Ccalculate classification accuracy:

# Fitted probabilities
predicted <- fitted(m.normal.prior, comp)
# Predicted ratings
predicted.ratings <- c(apply(predicted[ , 1, 1:9], 1, which.max), apply(predicted[ , 1, 10:18], 1, which.max)) %>% factor(levels = 1:9)
# Cross-tabulation between observed and predicted ratings
cont.tbl <- table(c(comp$CompRatings, comp$NativelikeRatings), predicted.ratings)
cont.tbl
##    predicted.ratings
##       1   2   3   4   5   6   7   8   9
##   1  40  27  26  13   4   1   0   1   0
##   2  20  43  60  28  11   7   2   0   0
##   3  12  25  91  69  28  23   7   0   0
##   4   4   8  71  95  48  43  21   2   0
##   5   0   2  26  92  56  72  31   7   0
##   6   0   1  12  46  43  98  76  24   1
##   7   0   1   1  14  24  67 117  63  13
##   8   0   0   1   3   5  35  70 118  45
##   9   0   0   0   1   0   2  20  78 105
# 763 cases were accurately classified.
sum(diag(cont.tbl)) 
## [1] 763
# The classification accuracy was 34.7%.
sum(diag(cont.tbl))/ sum(cont.tbl) 
## [1] 0.3468182

Classification accuracy with 11 factors alone:

# Model with 11 factors
m.normal.prior2 <- brm(
  mvbind(CompRatings, NativelikeRatings) ~ 
    F1 + F2 + F3 + F4 + F5 + F6 + F7 + F8 + F9 + F10 + F11,
  data = comp,
  family = cumulative(),
  prior = prior(normal(0, 1), class = b),
  iter = 10000,
  seed = 1
)


# Calculate classification accuracy
predicted2 <- fitted(m.normal.prior2, comp)
predicted.ratings2 <- c(apply(predicted2[ , 1, 1:9], 1, which.max), apply(predicted2[ , 1, 10:18], 1, which.max)) %>% factor(levels = 1:9)
cont.tbl2 <- table(c(comp$CompRatings, comp$NativelikeRatings), predicted.ratings2)
# 456 cases were accurately classified.
sum(diag(cont.tbl2))
## [1] 456
# The classification accuracy was 20.7%.
sum(diag(cont.tbl2))/ sum(cont.tbl2) 
## [1] 0.2072727
# Baseline accuracy (383 ratings, or 17.4%)
baseline <- max(table(comp$CompRatings)) + max(table(comp$NativelikeRatings)) 
baseline
## [1] 383
baseline/2200
## [1] 0.1740909
# 456/2200 and 383/2200 are significantly different
matrix(c(456, 2200, 383, 2200), nco = 2) %>% chisq.test
## 
##  Pearson's Chi-squared test with Yates' continuity correction
## 
## data:  .
## X-squared = 5.1628, df = 1, p-value = 0.02308

Classification accuracy when an error by one rating is allowed:

num <- 0
for (i in 2:8) {
  num <- num + sum(cont.tbl2[i, c(i - 1, i + 1)])
}
num <- num + cont.tbl2[1, 2] + cont.tbl2[9, 8]
# 1156 cases were accurately classified.
sum(diag(cont.tbl2)) + num
## [1] 1156
# The classification accuracy was 52.5%
(sum(diag(cont.tbl2)) + num) / sum(cont.tbl2) 
## [1] 0.5254545
# Baseline
# Distribution of comprehensibility ratings
table(comp$CompRatings)
## 
##   1   2   3   4   5   6   7   8   9 
##  19  38  78 113 137 155 204 196 160
# Distribution of nativelikeness ratings
table(comp$NativelikeRatings)
## 
##   1   2   3   4   5   6   7   8   9 
##  93 133 177 179 149 146  96  81  46
# The classification accuracy was 48.2%
(
  155 + # Comprehensibility: Rating = 6
  204 + # Comprehensibility: Rating = 7 (the largest category)
  196 + # Comprehensibility: Rating = 8
  177 + # Nativelikeness: Rating = 3
  179 + # Nativelikeness: Rating = 4 (the largest category)
  149   # Nativelikeness: Rating = 5
  ) / 2200
## [1] 0.4818182

Summary of the model (Table 1):

summary(m.normal.prior)
##  Family: MV(cumulative, cumulative) 
##   Links: mu = logit; disc = identity
##          mu = logit; disc = identity 
## Formula: CompRatings ~ F1 + F2 + F3 + F4 + F5 + F6 + F7 + F8 + F9 + F10 + F11 + (1 | p | RaterID) + (1 | q | ParticipantID) 
##          NativelikeRatings ~ F1 + F2 + F3 + F4 + F5 + F6 + F7 + F8 + F9 + F10 + F11 + (1 | p | RaterID) + (1 | q | ParticipantID) 
##    Data: comp (Number of observations: 1100) 
## Samples: 4 chains, each with iter = 10000; warmup = 5000; thin = 1;
##          total post-warmup samples = 20000
## 
## Group-Level Effects: 
## ~ParticipantID (Number of levels: 110) 
##                                                        Estimate Est.Error
## sd(CompRatings_Intercept)                                  1.60      0.14
## sd(NativelikeRatings_Intercept)                            1.95      0.16
## cor(CompRatings_Intercept,NativelikeRatings_Intercept)     0.98      0.01
##                                                        l-95% CI u-95% CI
## sd(CompRatings_Intercept)                                  1.35     1.90
## sd(NativelikeRatings_Intercept)                            1.65     2.29
## cor(CompRatings_Intercept,NativelikeRatings_Intercept)     0.95     1.00
##                                                        Rhat Bulk_ESS
## sd(CompRatings_Intercept)                              1.00     5625
## sd(NativelikeRatings_Intercept)                        1.00     5334
## cor(CompRatings_Intercept,NativelikeRatings_Intercept) 1.00     4767
##                                                        Tail_ESS
## sd(CompRatings_Intercept)                                 10681
## sd(NativelikeRatings_Intercept)                            9405
## cor(CompRatings_Intercept,NativelikeRatings_Intercept)     8610
## 
## ~RaterID (Number of levels: 10) 
##                                                        Estimate Est.Error
## sd(CompRatings_Intercept)                                  1.96      0.61
## sd(NativelikeRatings_Intercept)                            1.26      0.39
## cor(CompRatings_Intercept,NativelikeRatings_Intercept)    -0.19      0.31
##                                                        l-95% CI u-95% CI
## sd(CompRatings_Intercept)                                  1.16     3.46
## sd(NativelikeRatings_Intercept)                            0.74     2.25
## cor(CompRatings_Intercept,NativelikeRatings_Intercept)    -0.73     0.46
##                                                        Rhat Bulk_ESS
## sd(CompRatings_Intercept)                              1.00     6153
## sd(NativelikeRatings_Intercept)                        1.00     7370
## cor(CompRatings_Intercept,NativelikeRatings_Intercept) 1.00     7765
##                                                        Tail_ESS
## sd(CompRatings_Intercept)                                  8483
## sd(NativelikeRatings_Intercept)                            9748
## cor(CompRatings_Intercept,NativelikeRatings_Intercept)    10220
## 
## Population-Level Effects: 
##                                Estimate Est.Error l-95% CI u-95% CI Rhat
## CompRatings_Intercept[1]          -5.94      0.72    -7.42    -4.53 1.00
## CompRatings_Intercept[2]          -4.60      0.69    -6.03    -3.23 1.00
## CompRatings_Intercept[3]          -3.39      0.69    -4.79    -2.04 1.00
## CompRatings_Intercept[4]          -2.33      0.68    -3.73    -0.98 1.00
## CompRatings_Intercept[5]          -1.31      0.68    -2.70     0.05 1.00
## CompRatings_Intercept[6]          -0.22      0.68    -1.60     1.12 1.00
## CompRatings_Intercept[7]           1.26      0.68    -0.13     2.61 1.00
## CompRatings_Intercept[8]           3.15      0.68     1.75     4.52 1.00
## NativelikeRatings_Intercept[1]    -4.05      0.47    -4.98    -3.11 1.00
## NativelikeRatings_Intercept[2]    -2.50      0.46    -3.42    -1.59 1.00
## NativelikeRatings_Intercept[3]    -1.14      0.46    -2.04    -0.23 1.00
## NativelikeRatings_Intercept[4]     0.09      0.46    -0.80     1.00 1.00
## NativelikeRatings_Intercept[5]     1.17      0.46     0.27     2.09 1.00
## NativelikeRatings_Intercept[6]     2.48      0.46     1.57     3.41 1.00
## NativelikeRatings_Intercept[7]     3.72      0.47     2.79     4.66 1.00
## NativelikeRatings_Intercept[8]     5.51      0.50     4.53     6.52 1.00
## CompRatings_F1                     0.09      0.17    -0.24     0.42 1.00
## CompRatings_F2                     0.43      0.17     0.10     0.76 1.00
## CompRatings_F3                    -0.01      0.16    -0.33     0.32 1.00
## CompRatings_F4                    -0.44      0.16    -0.75    -0.11 1.00
## CompRatings_F5                     0.08      0.16    -0.24     0.41 1.00
## CompRatings_F6                     0.39      0.16     0.07     0.72 1.00
## CompRatings_F7                    -0.17      0.17    -0.49     0.16 1.00
## CompRatings_F8                    -0.44      0.17    -0.77    -0.11 1.00
## CompRatings_F9                     0.01      0.17    -0.32     0.34 1.00
## CompRatings_F10                    0.05      0.16    -0.26     0.37 1.00
## CompRatings_F11                   -0.13      0.17    -0.46     0.20 1.00
## NativelikeRatings_F1               0.15      0.20    -0.25     0.54 1.00
## NativelikeRatings_F2               0.55      0.20     0.15     0.94 1.00
## NativelikeRatings_F3               0.06      0.19    -0.32     0.44 1.00
## NativelikeRatings_F4              -0.64      0.20    -1.02    -0.25 1.00
## NativelikeRatings_F5              -0.01      0.19    -0.39     0.38 1.00
## NativelikeRatings_F6               0.41      0.20     0.03     0.80 1.00
## NativelikeRatings_F7              -0.31      0.20    -0.69     0.08 1.00
## NativelikeRatings_F8              -0.44      0.20    -0.83    -0.05 1.00
## NativelikeRatings_F9               0.02      0.20    -0.37     0.42 1.00
## NativelikeRatings_F10             -0.04      0.19    -0.41     0.33 1.00
## NativelikeRatings_F11             -0.42      0.20    -0.80    -0.03 1.00
##                                Bulk_ESS Tail_ESS
## CompRatings_Intercept[1]           4249     6307
## CompRatings_Intercept[2]           4022     6201
## CompRatings_Intercept[3]           3957     5979
## CompRatings_Intercept[4]           3927     5989
## CompRatings_Intercept[5]           3921     6098
## CompRatings_Intercept[6]           3925     6121
## CompRatings_Intercept[7]           3943     6192
## CompRatings_Intercept[8]           4016     6415
## NativelikeRatings_Intercept[1]     5268     8714
## NativelikeRatings_Intercept[2]     5131     8501
## NativelikeRatings_Intercept[3]     5090     8216
## NativelikeRatings_Intercept[4]     5111     8773
## NativelikeRatings_Intercept[5]     5150     8704
## NativelikeRatings_Intercept[6]     5297     9228
## NativelikeRatings_Intercept[7]     5526     8937
## NativelikeRatings_Intercept[8]     6113    10317
## CompRatings_F1                     6030     9882
## CompRatings_F2                     4529     7655
## CompRatings_F3                     4558     8745
## CompRatings_F4                     4760     7837
## CompRatings_F5                     4526     8397
## CompRatings_F6                     4315     7456
## CompRatings_F7                     5141     8862
## CompRatings_F8                     4425     7792
## CompRatings_F9                     4274     7517
## CompRatings_F10                    4370     8367
## CompRatings_F11                    4461     7735
## NativelikeRatings_F1               6037    10443
## NativelikeRatings_F2               4620     7572
## NativelikeRatings_F3               4430     8188
## NativelikeRatings_F4               4812     8256
## NativelikeRatings_F5               4532     8079
## NativelikeRatings_F6               4444     7736
## NativelikeRatings_F7               5200     8677
## NativelikeRatings_F8               4420     7882
## NativelikeRatings_F9               4318     7014
## NativelikeRatings_F10              4351     7770
## NativelikeRatings_F11              4510     7370
## 
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample 
## is a crude measure of effective sample size, and Rhat is the potential 
## scale reduction factor on split chains (at convergence, Rhat = 1).

Sensitivity Analysis (reported in Supporting Information-H)

Default priors:

get_prior(
  mvbind(CompRatings, NativelikeRatings) ~ F1 + F2 + F3 + F4 + F5 + F6 + F7 + F8 + F9 + F10 + F11 + 
    (1 | p | RaterID) + (1 | q | ParticipantID),
  family = cumulative(),
  data = comp
)
##                  prior     class      coef         group              resp
## 1                              b                                          
## 2               lkj(1)       cor                                          
## 3                            cor           ParticipantID                  
## 4                            cor                 RaterID                  
## 5                      Intercept                                          
## 6                              b                               CompRatings
## 7                              b        F1                     CompRatings
## 8                              b       F10                     CompRatings
## 9                              b       F11                     CompRatings
## 10                             b        F2                     CompRatings
## 11                             b        F3                     CompRatings
## 12                             b        F4                     CompRatings
## 13                             b        F5                     CompRatings
## 14                             b        F6                     CompRatings
## 15                             b        F7                     CompRatings
## 16                             b        F8                     CompRatings
## 17                             b        F9                     CompRatings
## 18 student_t(3, 0, 10) Intercept                               CompRatings
## 19                     Intercept         1                     CompRatings
## 20                     Intercept         2                     CompRatings
## 21                     Intercept         3                     CompRatings
## 22                     Intercept         4                     CompRatings
## 23                     Intercept         5                     CompRatings
## 24                     Intercept         6                     CompRatings
## 25                     Intercept         7                     CompRatings
## 26                     Intercept         8                     CompRatings
## 27 student_t(3, 0, 10)        sd                               CompRatings
## 28                            sd           ParticipantID       CompRatings
## 29                            sd Intercept ParticipantID       CompRatings
## 30                            sd                 RaterID       CompRatings
## 31                            sd Intercept       RaterID       CompRatings
## 32                             b                         NativelikeRatings
## 33                             b        F1               NativelikeRatings
## 34                             b       F10               NativelikeRatings
## 35                             b       F11               NativelikeRatings
## 36                             b        F2               NativelikeRatings
## 37                             b        F3               NativelikeRatings
## 38                             b        F4               NativelikeRatings
## 39                             b        F5               NativelikeRatings
## 40                             b        F6               NativelikeRatings
## 41                             b        F7               NativelikeRatings
## 42                             b        F8               NativelikeRatings
## 43                             b        F9               NativelikeRatings
## 44 student_t(3, 0, 10) Intercept                         NativelikeRatings
## 45                     Intercept         1               NativelikeRatings
## 46                     Intercept         2               NativelikeRatings
## 47                     Intercept         3               NativelikeRatings
## 48                     Intercept         4               NativelikeRatings
## 49                     Intercept         5               NativelikeRatings
## 50                     Intercept         6               NativelikeRatings
## 51                     Intercept         7               NativelikeRatings
## 52                     Intercept         8               NativelikeRatings
## 53 student_t(3, 0, 10)        sd                         NativelikeRatings
## 54                            sd           ParticipantID NativelikeRatings
## 55                            sd Intercept ParticipantID NativelikeRatings
## 56                            sd                 RaterID NativelikeRatings
## 57                            sd Intercept       RaterID NativelikeRatings
##    dpar nlpar bound
## 1                  
## 2                  
## 3                  
## 4                  
## 5                  
## 6                  
## 7                  
## 8                  
## 9                  
## 10                 
## 11                 
## 12                 
## 13                 
## 14                 
## 15                 
## 16                 
## 17                 
## 18                 
## 19                 
## 20                 
## 21                 
## 22                 
## 23                 
## 24                 
## 25                 
## 26                 
## 27                 
## 28                 
## 29                 
## 30                 
## 31                 
## 32                 
## 33                 
## 34                 
## 35                 
## 36                 
## 37                 
## 38                 
## 39                 
## 40                 
## 41                 
## 42                 
## 43                 
## 44                 
## 45                 
## 46                 
## 47                 
## 48                 
## 49                 
## 50                 
## 51                 
## 52                 
## 53                 
## 54                 
## 55                 
## 56                 
## 57

Gradually change the SD of prior normal distribution:

sds <- c(0.8, 0.9, 1:3)
sa <- vector("list", length(sds) + 1)
for (i in seq(length(sds))) {
  # SD = 1
  if (i == 3) {
    sa[[3]] <- summary(m.normal.prior)$fixed[17:38, c(1, 3, 4)]
    next
  }
  iter <- sds[i]
  normal.prior <- prior(normal(0, iter), class = "b")
  stanvars <- stanvar(iter, name = "iter")

  m <- brm(
    mvbind(CompRatings, NativelikeRatings) ~ 
      F1 + F2 + F3 + F4 + F5 + F6 + F7 + F8 + F9 + F10 + F11 +
      (1 | p | RaterID) + (1 | q | ParticipantID),
    data = comp,
    family = cumulative(),
    prior = normal.prior,
    stanvars = stanvars,
    iter = 10000,
    seed = 1
  ) %>% 
    summary 
  sa[[i]] <- m$fixed[17:38, c(1, 3, 4)]
}

Flat prior:

m <- brm(
  mvbind(CompRatings, NativelikeRatings) ~ 
    F1 + F2 + F3 + F4 + F5 + F6 + F7 + F8 + F9 + F10 + F11 + 
    (1 | p | RaterID) + (1 | q | ParticipantID),
  data = comp,
  family = cumulative(),
  iter = 10000,
  seed = 1
) %>% 
  summary   
sa[[length(sa)]] <- m$fixed[17:38, c(1, 3, 4)]

Draw a figure:

sa.df <- sa %>% 
  lapply(data.frame) %>% 
  bind_rows %>% 
  mutate(
    outcome = c("Comprehensibility", "Nativelikeness") %>% rep(each = 11) %>% rep(length(sa)),
    factor = rep(1:11) %>% rep(2) %>% rep(length(sa)) %>% paste("Factor", .) %>% factor(levels = paste("Factor", 1:11), labels = factor.labels),
    prior = c(paste("SD =", c("0.8", "0.9", 1:3)), "Flat") %>% rep(each = 22) %>% factor(levels = c(paste("SD =", c("0.8", "0.9", 1:3)), "Flat"))
  ) %>% 
  rename(estimate = 1, lower = 2, upper = 3) %>% 
  mutate(
    lower.posneg = ifelse(lower < 0, 0, 1) %>% factor,
    upper.posneg = ifelse(upper < 0, 0, 1) %>% factor
  ) %>% 
  as_tibble

sa.comp.gg <- ggplot(sa.df %>% filter(outcome == "Comprehensibility"), aes(prior, estimate)) +
  geom_pointrange(aes(ymin = lower, ymax = upper)) +
  geom_hline(yintercept = 0, linetype = 2, alpha = 0.7) +
  geom_text(aes(y = lower, label = lower %>% round(2) %>% format(nsmall = 2), color = lower.posneg), vjust = 1, size = 3) +
  geom_text(aes(y = upper, label = upper %>% round(2) %>% format(nsmall = 2), color = upper.posneg), vjust = 1, size = 3) +
  facet_wrap(~ factor) +
  scale_y_continuous(breaks = c(-1, 0, 1)) +
  scale_color_manual(values = c("red", "blue")) +
  coord_flip(ylim = c(-1.2, 1.2)) +
  theme_bw() +
  theme(
    text = element_text(size = 13),
    legend.position = "none"
  ) +
  labs(x = "Prior Distribution", y = "Posterior Mean and 95% Credible Interval")

sa.comp.gg

sa.comp.gg %+% filter(sa.df, outcome == "Nativelikeness")

Secret Weapon (Figure 1 in Supporting Information-I)

Build Bayesian mixed-effects logistic regression models targeting the observations with two adjacent ratings:

coefs.comp <- ci.comp <- coefs.ntv <- ci.ntv <- vector("list", 8)
for (i in 1:8) {
  # Comprehensibility
  m1 <- brm(
    CompRatings ~ F2 + F4 + F6 + F8 + (1 | RaterID) + (1 | ParticipantID),
    family = "bernoulli",
    seed = 1,
    iter = 10000,
    data = comp %>% filter(CompRatings %in% i:(i+1)) %>% mutate(CompRatings = factor(CompRatings)),
  )
  # Fixed-effects posterior means
  coefs.comp[[i]] <- summary(m1)$fixed[2:5 , 1]
  # Their 95% CIs
  post1 <- posterior_samples(m1)
  ci.comp[[i]] <- apply(post1[ , 2:5], 2, quantile, probs = c(0.025, 0.975))
  
  # Nativelikeness
  m2 <- brm(
    NativelikeRatings ~  F2  + F4 + F6 + F8 + F11 + (1 | RaterID) + (1 | ParticipantID),
    family = "bernoulli",
    seed = 1,
    iter = 10000,
    data = comp %>% filter(NativelikeRatings %in% i:(i+1)) %>% mutate(NativelikeRatings = factor(NativelikeRatings)),
  )
  coefs.ntv[[i]] <- summary(m2)$fixed[2:6 , 1]
  post2 <- posterior_samples(m2)
  ci.ntv[[i]] <- apply(post2[ , 2:6], 2, quantile, probs = c(0.025, 0.975))
}

Restructure and modify the data to draw a figure:

# Comprehensibility
coefs.comp.df <- unlist(coefs.comp) %>% 
  matrix(byrow = TRUE, ncol = 4) %>% 
  as.data.frame
colnames(coefs.comp.df) <- factor.labels[c(2, 4, 6, 8)]
coefs.comp.df$diff <- paste(1:8, 2:9, sep = " vs ")
ci.comp.df <- ci.comp %>% 
  lapply(as.data.frame) %>% 
  bind_rows %>% 
  mutate(
    diff = rep(paste(1:8, 2:9, sep = " vs "), each = 2),
    lh = rep(c("lb", "ub"), 8)
  ) %>% 
  gather(parameter, coef, -c(diff, lh)) %>% 
  spread(lh, coef) %>% 
  mutate(parameter = recode(parameter, 
                            b_F2 = factor.labels[2], 
                            b_F4 = factor.labels[4], 
                            b_F6 = factor.labels[6], 
                            b_F8 = factor.labels[8]))

sw.comp <- gather(coefs.comp.df, parameter, coef, -diff) %>% 
  left_join(ci.comp.df) %>% 
  filter(diff != "1 vs 2") %>% 
  mutate(
    diff = factor(diff, levels = paste(1:8, 2:9, sep = " vs ")),
    parameter = factor(parameter, levels = factor.labels[c(2, 4, 6, 8)])
  ) %>% 
  rbind(tibble(
  diff = "1 vs 2",
  parameter = factor.labels[c(2, 4, 6, 8)],
  coef = NA,
  lb = NA,
  ub = NA
  ))

# Nativelikeness
coefs.ntv.df <- unlist(coefs.ntv) %>% matrix(byrow = TRUE, ncol = 5) %>% as.data.frame
colnames(coefs.ntv.df) <- factor.labels[c(2, 4, 6, 8, 11)]
coefs.ntv.df$diff <- paste(1:8, 2:9, sep = " vs ")
ci.ntv.df <- ci.ntv %>% 
  lapply(as.data.frame) %>% 
  bind_rows %>% 
  mutate(
    diff = rep(paste(1:8, 2:9, sep = " vs "), each = 2),
    lh = rep(c("lb", "ub"), 8)
  ) %>% 
  gather(parameter, coef, -c(diff, lh)) %>% 
  spread(lh, coef) %>% 
  mutate(parameter = recode(parameter, 
                            b_F2 = factor.labels[2], 
                            b_F4 = factor.labels[4], 
                            b_F6 = factor.labels[6], 
                            b_F8 = factor.labels[8], 
                            b_F11 = factor.labels[11]))

sw.ntv <- gather(coefs.ntv.df, parameter, coef, -diff) %>% 
  left_join(ci.ntv.df) %>% 
  mutate(parameter = factor(parameter, levels = factor.labels[c(2, 4, 6, 8, 11)]))

Draw the figure:

sw.comp.gg <- ggplot(sw.comp, aes(diff, coef)) +
  geom_pointrange(aes(ymin = lb, ymax = ub)) +
  geom_hline(yintercept = 0, linetype = 2) +
  facet_wrap(~ parameter, nrow = 2) +
  coord_flip() +
  theme_bw() +
  theme(text = element_text(size = 13)) +
  labs(x = "", y = "", title = "Comprehensibility")

sw.ntv.gg <- sw.comp.gg %+% sw.ntv +
  labs(x = "", y = "", title = "Nativelikeness")

grid.arrange(sw.comp.gg, sw.ntv.gg, nrow = 1, widths = c(415, 585),
             bottom = textGrob("Posterior Mean and 95% Credible Interval", gp = gpar(fontsize = 20)),
             left = textGrob("Contrasted Ratings", gp = gpar(fontsize = 20), rot = 90)
)