Load necessary packages:
library("brms")
options(mc.cores = parallel::detectCores()) # use multiple CPU cores in running brm()
library("gridExtra")
library("grid")
library("devtools") # for session_info()
library("tidyverse")
Here is the environment in which the analysis was conducted.
session_info()
## ─ Session info ──────────────────────────────────────────────────────────
## setting value
## version R version 3.5.2 (2018-12-20)
## os macOS Mojave 10.14.6
## system x86_64, darwin15.6.0
## ui X11
## language (EN)
## collate en_GB.UTF-8
## ctype en_GB.UTF-8
## tz Europe/London
## date 2019-11-07
##
## ─ Packages ──────────────────────────────────────────────────────────────
## package * version date lib source
## abind 1.4-5 2016-07-21 [1] CRAN (R 3.5.0)
## assertthat 0.2.0 2017-04-11 [1] CRAN (R 3.5.0)
## backports 1.1.3 2018-12-14 [1] CRAN (R 3.5.0)
## base64enc 0.1-3 2015-07-28 [1] CRAN (R 3.5.0)
## bayesplot 1.6.0 2018-08-02 [1] CRAN (R 3.5.0)
## bindr 0.1.1 2018-03-13 [1] CRAN (R 3.5.0)
## bindrcpp 0.2.2 2018-03-29 [1] CRAN (R 3.5.0)
## bridgesampling 0.6-0 2018-10-21 [1] CRAN (R 3.5.0)
## brms * 2.10.0 2019-08-29 [1] CRAN (R 3.5.2)
## Brobdingnag 1.2-6 2018-08-13 [1] CRAN (R 3.5.0)
## broom 0.5.1 2018-12-05 [1] CRAN (R 3.5.0)
## callr 3.1.1 2018-12-21 [1] CRAN (R 3.5.0)
## cellranger 1.1.0 2016-07-27 [1] CRAN (R 3.5.0)
## cli 1.0.1 2018-09-25 [1] CRAN (R 3.5.0)
## coda 0.19-2 2018-10-08 [1] CRAN (R 3.5.0)
## colorspace 1.3-2 2016-12-14 [1] CRAN (R 3.5.0)
## colourpicker 1.0 2017-09-27 [1] CRAN (R 3.5.0)
## crayon 1.3.4 2017-09-16 [1] CRAN (R 3.5.0)
## crosstalk 1.0.0 2016-12-21 [1] CRAN (R 3.5.0)
## desc 1.2.0 2018-05-01 [1] CRAN (R 3.5.0)
## devtools * 2.0.1 2018-10-26 [1] CRAN (R 3.5.2)
## digest 0.6.18 2018-10-10 [1] CRAN (R 3.5.0)
## dplyr * 0.7.8 2018-11-10 [1] CRAN (R 3.5.0)
## DT 0.5 2018-11-05 [1] CRAN (R 3.5.0)
## dygraphs https://protect-eu.mimecast.com/s/ebDMC16N6tqw5vyh3PKn4?domain=1.1.1.6 2018-07-11 [1] CRAN (R 3.5.0)
## evaluate 0.12 2018-10-09 [1] CRAN (R 3.5.0)
## forcats * 0.3.0 2018-02-19 [1] CRAN (R 3.5.0)
## fs 1.2.6 2018-08-23 [1] CRAN (R 3.5.0)
## generics 0.0.2 2018-11-29 [1] CRAN (R 3.5.0)
## ggplot2 * 3.1.0 2018-10-25 [1] CRAN (R 3.5.0)
## ggridges 0.5.1 2018-09-27 [1] CRAN (R 3.5.0)
## glue 1.3.0 2018-07-17 [1] CRAN (R 3.5.0)
## gridExtra * 2.3 2017-09-09 [1] CRAN (R 3.5.0)
## gtable 0.2.0 2016-02-26 [1] CRAN (R 3.5.0)
## gtools 3.8.1 2018-06-26 [1] CRAN (R 3.5.0)
## haven 2.0.0 2018-11-22 [1] CRAN (R 3.5.0)
## hms 0.4.2 2018-03-10 [1] CRAN (R 3.5.0)
## htmltools 0.3.6 2017-04-28 [1] CRAN (R 3.5.0)
## htmlwidgets 1.3 2018-09-30 [1] CRAN (R 3.5.0)
## httpuv https://protect-eu.mimecast.com/s/RLLOC28N8f8oOnqfOJmpR?domain=1.4.5.1 2018-12-18 [1] CRAN (R 3.5.0)
## httr 1.4.0 2018-12-11 [1] CRAN (R 3.5.0)
## igraph 1.2.2 2018-07-27 [1] CRAN (R 3.5.0)
## inline 0.3.15 2018-05-18 [1] CRAN (R 3.5.0)
## jsonlite 1.6 2018-12-07 [1] CRAN (R 3.5.0)
## knitr 1.21 2018-12-10 [1] CRAN (R 3.5.2)
## later 0.7.5 2018-09-18 [1] CRAN (R 3.5.0)
## lattice 0.20-38 2018-11-04 [1] CRAN (R 3.5.2)
## lazyeval 0.2.1 2017-10-29 [1] CRAN (R 3.5.0)
## loo 2.1.0 2019-03-13 [1] CRAN (R 3.5.2)
## lubridate 1.7.4 2018-04-11 [1] CRAN (R 3.5.0)
## magrittr 1.5 2014-11-22 [1] CRAN (R 3.5.0)
## markdown 0.9 2018-12-07 [1] CRAN (R 3.5.0)
## Matrix 1.2-15 2018-11-01 [1] CRAN (R 3.5.2)
## matrixStats 0.54.0 2018-07-23 [1] CRAN (R 3.5.0)
## memoise 1.1.0 2017-04-21 [1] CRAN (R 3.5.0)
## mime 0.6 2018-10-05 [1] CRAN (R 3.5.0)
## miniUI https://protect-eu.mimecast.com/s/fwmtC36X6t2Y0L1T6L-ex?domain=0.1.1.1 2018-05-18 [1] CRAN (R 3.5.0)
## modelr 0.1.2 2018-05-11 [1] CRAN (R 3.5.0)
## munsell 0.5.0 2018-06-12 [1] CRAN (R 3.5.0)
## mvtnorm 1.0-8 2018-05-31 [1] CRAN (R 3.5.0)
## nlme 3.1-137 2018-04-07 [1] CRAN (R 3.5.2)
## pillar 1.3.1 2018-12-15 [1] CRAN (R 3.5.0)
## pkgbuild 1.0.2 2018-10-16 [1] CRAN (R 3.5.0)
## pkgconfig 2.0.2 2018-08-16 [1] CRAN (R 3.5.0)
## pkgload 1.0.2 2018-10-29 [1] CRAN (R 3.5.0)
## plyr 1.8.4 2016-06-08 [1] CRAN (R 3.5.0)
## prettyunits 1.0.2 2015-07-13 [1] CRAN (R 3.5.0)
## processx 3.2.1 2018-12-05 [1] CRAN (R 3.5.0)
## promises 1.0.1 2018-04-13 [1] CRAN (R 3.5.0)
## ps 1.3.0 2018-12-21 [1] CRAN (R 3.5.0)
## purrr * 0.2.5 2018-05-29 [1] CRAN (R 3.5.0)
## R6 2.3.0 2018-10-04 [1] CRAN (R 3.5.0)
## Rcpp * 1.0.0 2018-11-07 [1] CRAN (R 3.5.0)
## readr * 1.3.1 2018-12-21 [1] CRAN (R 3.5.0)
## readxl 1.2.0 2018-12-19 [1] CRAN (R 3.5.0)
## remotes 2.0.2 2018-10-30 [1] CRAN (R 3.5.0)
## reshape2 1.4.3 2017-12-11 [1] CRAN (R 3.5.0)
## rlang https://protect-eu.mimecast.com/s/ehnNC4636tlA3LNtJ5Piq?domain=0.3.0.1 2018-10-25 [1] CRAN (R 3.5.0)
## rmarkdown 1.11 2018-12-08 [1] CRAN (R 3.5.0)
## rprojroot 1.3-2 2018-01-03 [1] CRAN (R 3.5.0)
## rsconnect 0.8.12 2018-12-05 [1] CRAN (R 3.5.0)
## rstan 2.19.2 2019-07-09 [1] CRAN (R 3.5.2)
## rstantools 1.5.1 2018-08-22 [1] CRAN (R 3.5.0)
## rstudioapi 0.8 2018-10-02 [1] CRAN (R 3.5.0)
## rvest 0.3.2 2016-06-17 [1] CRAN (R 3.5.0)
## scales 1.0.0 2018-08-09 [1] CRAN (R 3.5.0)
## sessioninfo 1.1.1 2018-11-05 [1] CRAN (R 3.5.0)
## shiny 1.2.0 2018-11-02 [1] CRAN (R 3.5.0)
## shinyjs 1.0 2018-01-08 [1] CRAN (R 3.5.0)
## shinystan 2.5.0 2018-05-01 [1] CRAN (R 3.5.0)
## shinythemes 1.1.2 2018-11-06 [1] CRAN (R 3.5.0)
## StanHeaders 2.19.0 2019-09-07 [1] CRAN (R 3.5.2)
## stringi 1.2.4 2018-07-20 [1] CRAN (R 3.5.0)
## stringr * 1.3.1 2018-05-10 [1] CRAN (R 3.5.0)
## threejs 0.3.1 2017-08-13 [1] CRAN (R 3.5.0)
## tibble * 2.0.0 2019-01-04 [1] CRAN (R 3.5.2)
## tidyr * 0.8.2 2018-10-28 [1] CRAN (R 3.5.0)
## tidyselect 0.2.5 2018-10-11 [1] CRAN (R 3.5.0)
## tidyverse * 1.2.1 2017-11-14 [1] CRAN (R 3.5.0)
## usethis * 1.4.0 2018-08-14 [1] CRAN (R 3.5.0)
## withr 2.1.2 2018-03-15 [1] CRAN (R 3.5.0)
## xfun 0.4 2018-10-23 [1] CRAN (R 3.5.0)
## xml2 1.2.0 2018-01-24 [1] CRAN (R 3.5.0)
## xtable 1.8-3 2018-08-29 [1] CRAN (R 3.5.0)
## xts 0.11-2 2018-11-05 [1] CRAN (R 3.5.0)
## yaml 2.2.0 2018-07-25 [1] CRAN (R 3.5.0)
## zoo 1.8-4 2018-09-19 [1] CRAN (R 3.5.0)
##
## [1] /Library/Frameworks/R.framework/Versions/3.5/Resources/library
Read the data and convert them into the long format:
comp <- read_csv("L2SpeechData.csv") %>%
gather(key, value, -c(ParticipantID:F11)) %>%
separate(key, c("RaterID", "var")) %>%
spread(var, value) %>%
mutate_at(.vars = c("ParticipantID", "RaterID"), .funs = factor)
Here is how the data look like:
comp
Factor labels used in strip text in ggplot panels:
factor.labels <- c(
"Factor 1:\nExperience Quantity",
"Factor 2:\nCurrent L2 Use",
"Factor 3:\nNativeness Awareness",
"Factor 4:\nAge of Immersion",
"Factor 5:\nMotivation",
"Factor 6:\nNativeness Attitude",
"Factor 7:\nEFL Experience",
"Factor 8:\nSpecial Past Experience",
"Factor 9:\nForeign Accents Attitude",
"Factor 10:\nComprehensibility Orientation",
"Factor 11:\nL1 Influence"
)
Restructuring of data to draw a figure:
comp.long <- comp %>%
gather(factor, fs, -c(ParticipantID, RaterID, CompRatings, NativelikeRatings)) %>%
mutate(
factor = case_when(
factor == "F1" ~ factor.labels[1],
factor == "F2" ~ factor.labels[2],
factor == "F3" ~ factor.labels[3],
factor == "F4" ~ factor.labels[4],
factor == "F5" ~ factor.labels[5],
factor == "F6" ~ factor.labels[6],
factor == "F7" ~ factor.labels[7],
factor == "F8" ~ factor.labels[8],
factor == "F9" ~ factor.labels[9],
factor == "F10" ~ factor.labels[10],
factor == "F11" ~ factor.labels[11]
) %>% factor(levels = factor.labels)
)
Layers used in both comprehensibility and nativelikeness panels:
common.layers <- list(
geom_jitter(alpha = 0.2, height = 0, width = 0.2),
geom_boxplot(notch = TRUE, color = "blue", outlier.shape = " ", alpha = 0),
stat_summary(fun.y = "mean", colour = "red", size = 1.5, geom = "point"),
coord_flip(),
facet_wrap(~ factor, scales = "free_x", ncol = 3),
theme_bw(),
theme(
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
text = element_text(size = 12)
)
)
Draw a figure:
desc.comp <- ggplot(comp.long, aes(CompRatings %>% factor, fs)) +
common.layers +
labs(x = "", y = "", title = "Comprehensibility")
desc.ntv <- ggplot(comp.long, aes(NativelikeRatings %>% factor, fs)) +
common.layers +
labs(x = "", y = "", title = "Nativelikeness")
grid.arrange(desc.comp, desc.ntv, nrow = 1,
bottom = textGrob("Factor Score", gp = gpar(fontsize = 20)),
left = textGrob("Rating", gp = gpar(fontsize = 20), rot = 90)
)
Build a model:
m.normal.prior <- brm(
mvbind(CompRatings, NativelikeRatings) ~
F1 + F2 + F3 + F4 + F5 + F6 + F7 + F8 + F9 + F10 + F11 +
(1 | p | RaterID) + (1 | q | ParticipantID),
data = comp,
family = cumulative(),
prior = prior(normal(0, 1), class = b),
iter = 10000,
seed = 1
)
Posterior predictive checks:
pp_check(m.normal.prior, resp = "CompRatings")
pp_check(m.normal.prior, resp = "NativelikeRatings")
Both figures look reasonable.
Define the functions that are useful in this section:
calc.stats <- function(x) {
means <- x %>%
group_by(key) %>%
summarize(
mean = mean(value),
x = quantile(value, probs = 0.025),
xend = quantile(value, probs = 0.975),
y = 500,
yend = 500,
xintercept = 0
)
means$xintercept[grep("^b_.+Ratings_F\\d", means$key, invert = TRUE)] <- NA
return(means)
}
label.properly <- function(x) {
x <- x %>%
str_replace_all("b_.+Ratings_", "") %>%
str_replace_all("Intercept\\[1\\]", "Threshold (1 vs 2)") %>%
str_replace_all("Intercept\\[2\\]", "Threshold (2 vs 3)") %>%
str_replace_all("Intercept\\[3\\]", "Threshold (3 vs 4)") %>%
str_replace_all("Intercept\\[4\\]", "Threshold (4 vs 5)") %>%
str_replace_all("Intercept\\[5\\]", "Threshold (5 vs 6)") %>%
str_replace_all("Intercept\\[6\\]", "Threshold (6 vs 7)") %>%
str_replace_all("Intercept\\[7\\]", "Threshold (7 vs 8)") %>%
str_replace_all("Intercept\\[8\\]", "Threshold (8 vs 9)") %>%
str_replace_all("^F(\\d+)", "Factor \\1") %>%
str_replace_all(
"sd_ParticipantID__CompRatings_Intercept",
"Comprehensibility:\nSD of By-Participant\nRandom Intercepts"
) %>%
str_replace_all(
"sd_ParticipantID__NativelikeRatings_Intercept",
"Nativelikeness:\nSD of By-Participant\nRandom Intercepts"
) %>%
str_replace_all(
"sd_RaterID__CompRatings_Intercept",
"Comprehensibility:\nSD of By-Rater\nRandom Intercepts"
) %>%
str_replace_all(
"sd_RaterID__NativelikeRatings_Intercept",
"Nativelikeness:\nSD of By-Rater\nRandom Intercepts"
) %>%
str_replace_all(
"cor_ParticipantID__CompRatings_Intercept__NativelikeRatings_Intercept",
"Correlation of\nBy-Participant\nRandom Intercepts"
) %>%
str_replace_all(
"cor_RaterID__CompRatings_Intercept__NativelikeRatings_Intercept",
"Correlation of\nBy-Rater\nRandom Intercepts"
) %>%
factor(
levels = c(
paste0("Threshold (", 1:8, " vs ", 2:9, ")"),
paste("Factor", 1:11),
"Comprehensibility:\nSD of By-Participant\nRandom Intercepts",
"Comprehensibility:\nSD of By-Rater\nRandom Intercepts",
"Nativelikeness:\nSD of By-Participant\nRandom Intercepts",
"Nativelikeness:\nSD of By-Rater\nRandom Intercepts",
"Correlation of\nBy-Participant\nRandom Intercepts",
"Correlation of\nBy-Rater\nRandom Intercepts"
),
labels = c(
paste0("Threshold (", 1:8, " vs ", 2:9, ")"),
factor.labels,
"Comprehensibility:\nSD of By-Participant\nRandom Intercepts",
"Comprehensibility:\nSD of By-Rater\nRandom Intercepts",
"Nativelikeness:\nSD of By-Participant\nRandom Intercepts",
"Nativelikeness:\nSD of By-Rater\nRandom Intercepts",
"Correlation of\nBy-Participant\nRandom Intercepts",
"Correlation of\nBy-Rater\nRandom Intercepts"
)
)
}
Extract posterior distribution:
post.sub.long <- posterior_samples(m.normal.prior)[ , 1:44] %>%
gather(key, value) %>%
as_tibble
Subset of the data including fixed effects of comprehensibility:
post.sub.long.group1 <- post.sub.long[grep("^b_CompRatings", post.sub.long$key), ]
means1 <- calc.stats(post.sub.long.group1)
post.sub.long.group1$key <- label.properly(post.sub.long.group1$key)
means1$key <- label.properly(means1$key)
Subset of the data including fixed effects of nativelikeness:
post.sub.long.group2 <- post.sub.long[grep("^b_NativelikeRatings", post.sub.long$key), ]
means2 <- calc.stats(post.sub.long.group2)
post.sub.long.group2$key <- label.properly(post.sub.long.group2$key)
means2$key <- label.properly(means2$key)
Subset of the data including random effects and correlations:
post.sub.long.group3 <- post.sub.long[grep("^(sd|cor)_", post.sub.long$key), ]
means3 <- calc.stats(post.sub.long.group3)
post.sub.long.group3$key <- label.properly(post.sub.long.group3$key)
means3$key <- label.properly(means3$key)
Define the function that draws figures demonstrating posterior distributions:
gg.draw <- function(df1, df2, title) {
ggplot(df1, aes(value)) +
geom_histogram(alpha = 0.5) +
geom_vline(data = df2, aes(xintercept = xintercept), color = "red", linetype = 2) +
geom_segment(data = df2, aes(x = x, xend = xend, y = y, yend = yend), color = "blue") +
geom_point(data = df2, aes(x = mean, y = y), color = "blue") +
facet_wrap(~ key, scales = "free") +
coord_cartesian(ylim = c(0, 4000)) +
theme_bw() +
theme(
axis.title.y = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
text = element_text(size = 15)
) +
labs(x = "Estimate", y = "", title = title)
}
Draw figures. Blue horizontal lines represent 95% CIs, while red vertical lines (only drawn for slope parameters) represent 0 (i.e., null effect).
# Fixed Effects of Comprehensibility Ratings
gg.draw(post.sub.long.group1, means1, "")
# Fixed Effects of Nativelikeness Ratings
gg.draw(post.sub.long.group2, means2, "")
# Random Effects and Correlations
gg.draw(post.sub.long.group3, means3, "")
Ccalculate classification accuracy:
# Fitted probabilities
predicted <- fitted(m.normal.prior, comp)
# Predicted ratings
predicted.ratings <- c(apply(predicted[ , 1, 1:9], 1, which.max), apply(predicted[ , 1, 10:18], 1, which.max)) %>% factor(levels = 1:9)
# Cross-tabulation between observed and predicted ratings
cont.tbl <- table(c(comp$CompRatings, comp$NativelikeRatings), predicted.ratings)
cont.tbl
## predicted.ratings
## 1 2 3 4 5 6 7 8 9
## 1 40 27 26 13 4 1 0 1 0
## 2 20 43 60 28 11 7 2 0 0
## 3 12 25 91 69 28 23 7 0 0
## 4 4 8 71 95 48 43 21 2 0
## 5 0 2 26 92 56 72 31 7 0
## 6 0 1 12 46 43 98 76 24 1
## 7 0 1 1 14 24 67 117 63 13
## 8 0 0 1 3 5 35 70 118 45
## 9 0 0 0 1 0 2 20 78 105
# 763 cases were accurately classified.
sum(diag(cont.tbl))
## [1] 763
# The classification accuracy was 34.7%.
sum(diag(cont.tbl))/ sum(cont.tbl)
## [1] 0.3468182
Classification accuracy with 11 factors alone:
# Model with 11 factors
m.normal.prior2 <- brm(
mvbind(CompRatings, NativelikeRatings) ~
F1 + F2 + F3 + F4 + F5 + F6 + F7 + F8 + F9 + F10 + F11,
data = comp,
family = cumulative(),
prior = prior(normal(0, 1), class = b),
iter = 10000,
seed = 1
)
# Calculate classification accuracy
predicted2 <- fitted(m.normal.prior2, comp)
predicted.ratings2 <- c(apply(predicted2[ , 1, 1:9], 1, which.max), apply(predicted2[ , 1, 10:18], 1, which.max)) %>% factor(levels = 1:9)
cont.tbl2 <- table(c(comp$CompRatings, comp$NativelikeRatings), predicted.ratings2)
# 456 cases were accurately classified.
sum(diag(cont.tbl2))
## [1] 456
# The classification accuracy was 20.7%.
sum(diag(cont.tbl2))/ sum(cont.tbl2)
## [1] 0.2072727
# Baseline accuracy (383 ratings, or 17.4%)
baseline <- max(table(comp$CompRatings)) + max(table(comp$NativelikeRatings))
baseline
## [1] 383
baseline/2200
## [1] 0.1740909
# 456/2200 and 383/2200 are significantly different
matrix(c(456, 2200, 383, 2200), nco = 2) %>% chisq.test
##
## Pearson's Chi-squared test with Yates' continuity correction
##
## data: .
## X-squared = 5.1628, df = 1, p-value = 0.02308
Classification accuracy when an error by one rating is allowed:
num <- 0
for (i in 2:8) {
num <- num + sum(cont.tbl2[i, c(i - 1, i + 1)])
}
num <- num + cont.tbl2[1, 2] + cont.tbl2[9, 8]
# 1156 cases were accurately classified.
sum(diag(cont.tbl2)) + num
## [1] 1156
# The classification accuracy was 52.5%
(sum(diag(cont.tbl2)) + num) / sum(cont.tbl2)
## [1] 0.5254545
# Baseline
# Distribution of comprehensibility ratings
table(comp$CompRatings)
##
## 1 2 3 4 5 6 7 8 9
## 19 38 78 113 137 155 204 196 160
# Distribution of nativelikeness ratings
table(comp$NativelikeRatings)
##
## 1 2 3 4 5 6 7 8 9
## 93 133 177 179 149 146 96 81 46
# The classification accuracy was 48.2%
(
155 + # Comprehensibility: Rating = 6
204 + # Comprehensibility: Rating = 7 (the largest category)
196 + # Comprehensibility: Rating = 8
177 + # Nativelikeness: Rating = 3
179 + # Nativelikeness: Rating = 4 (the largest category)
149 # Nativelikeness: Rating = 5
) / 2200
## [1] 0.4818182
Summary of the model (Table 1):
summary(m.normal.prior)
## Family: MV(cumulative, cumulative)
## Links: mu = logit; disc = identity
## mu = logit; disc = identity
## Formula: CompRatings ~ F1 + F2 + F3 + F4 + F5 + F6 + F7 + F8 + F9 + F10 + F11 + (1 | p | RaterID) + (1 | q | ParticipantID)
## NativelikeRatings ~ F1 + F2 + F3 + F4 + F5 + F6 + F7 + F8 + F9 + F10 + F11 + (1 | p | RaterID) + (1 | q | ParticipantID)
## Data: comp (Number of observations: 1100)
## Samples: 4 chains, each with iter = 10000; warmup = 5000; thin = 1;
## total post-warmup samples = 20000
##
## Group-Level Effects:
## ~ParticipantID (Number of levels: 110)
## Estimate Est.Error
## sd(CompRatings_Intercept) 1.60 0.14
## sd(NativelikeRatings_Intercept) 1.95 0.16
## cor(CompRatings_Intercept,NativelikeRatings_Intercept) 0.98 0.01
## l-95% CI u-95% CI
## sd(CompRatings_Intercept) 1.35 1.90
## sd(NativelikeRatings_Intercept) 1.65 2.29
## cor(CompRatings_Intercept,NativelikeRatings_Intercept) 0.95 1.00
## Rhat Bulk_ESS
## sd(CompRatings_Intercept) 1.00 5625
## sd(NativelikeRatings_Intercept) 1.00 5334
## cor(CompRatings_Intercept,NativelikeRatings_Intercept) 1.00 4767
## Tail_ESS
## sd(CompRatings_Intercept) 10681
## sd(NativelikeRatings_Intercept) 9405
## cor(CompRatings_Intercept,NativelikeRatings_Intercept) 8610
##
## ~RaterID (Number of levels: 10)
## Estimate Est.Error
## sd(CompRatings_Intercept) 1.96 0.61
## sd(NativelikeRatings_Intercept) 1.26 0.39
## cor(CompRatings_Intercept,NativelikeRatings_Intercept) -0.19 0.31
## l-95% CI u-95% CI
## sd(CompRatings_Intercept) 1.16 3.46
## sd(NativelikeRatings_Intercept) 0.74 2.25
## cor(CompRatings_Intercept,NativelikeRatings_Intercept) -0.73 0.46
## Rhat Bulk_ESS
## sd(CompRatings_Intercept) 1.00 6153
## sd(NativelikeRatings_Intercept) 1.00 7370
## cor(CompRatings_Intercept,NativelikeRatings_Intercept) 1.00 7765
## Tail_ESS
## sd(CompRatings_Intercept) 8483
## sd(NativelikeRatings_Intercept) 9748
## cor(CompRatings_Intercept,NativelikeRatings_Intercept) 10220
##
## Population-Level Effects:
## Estimate Est.Error l-95% CI u-95% CI Rhat
## CompRatings_Intercept[1] -5.94 0.72 -7.42 -4.53 1.00
## CompRatings_Intercept[2] -4.60 0.69 -6.03 -3.23 1.00
## CompRatings_Intercept[3] -3.39 0.69 -4.79 -2.04 1.00
## CompRatings_Intercept[4] -2.33 0.68 -3.73 -0.98 1.00
## CompRatings_Intercept[5] -1.31 0.68 -2.70 0.05 1.00
## CompRatings_Intercept[6] -0.22 0.68 -1.60 1.12 1.00
## CompRatings_Intercept[7] 1.26 0.68 -0.13 2.61 1.00
## CompRatings_Intercept[8] 3.15 0.68 1.75 4.52 1.00
## NativelikeRatings_Intercept[1] -4.05 0.47 -4.98 -3.11 1.00
## NativelikeRatings_Intercept[2] -2.50 0.46 -3.42 -1.59 1.00
## NativelikeRatings_Intercept[3] -1.14 0.46 -2.04 -0.23 1.00
## NativelikeRatings_Intercept[4] 0.09 0.46 -0.80 1.00 1.00
## NativelikeRatings_Intercept[5] 1.17 0.46 0.27 2.09 1.00
## NativelikeRatings_Intercept[6] 2.48 0.46 1.57 3.41 1.00
## NativelikeRatings_Intercept[7] 3.72 0.47 2.79 4.66 1.00
## NativelikeRatings_Intercept[8] 5.51 0.50 4.53 6.52 1.00
## CompRatings_F1 0.09 0.17 -0.24 0.42 1.00
## CompRatings_F2 0.43 0.17 0.10 0.76 1.00
## CompRatings_F3 -0.01 0.16 -0.33 0.32 1.00
## CompRatings_F4 -0.44 0.16 -0.75 -0.11 1.00
## CompRatings_F5 0.08 0.16 -0.24 0.41 1.00
## CompRatings_F6 0.39 0.16 0.07 0.72 1.00
## CompRatings_F7 -0.17 0.17 -0.49 0.16 1.00
## CompRatings_F8 -0.44 0.17 -0.77 -0.11 1.00
## CompRatings_F9 0.01 0.17 -0.32 0.34 1.00
## CompRatings_F10 0.05 0.16 -0.26 0.37 1.00
## CompRatings_F11 -0.13 0.17 -0.46 0.20 1.00
## NativelikeRatings_F1 0.15 0.20 -0.25 0.54 1.00
## NativelikeRatings_F2 0.55 0.20 0.15 0.94 1.00
## NativelikeRatings_F3 0.06 0.19 -0.32 0.44 1.00
## NativelikeRatings_F4 -0.64 0.20 -1.02 -0.25 1.00
## NativelikeRatings_F5 -0.01 0.19 -0.39 0.38 1.00
## NativelikeRatings_F6 0.41 0.20 0.03 0.80 1.00
## NativelikeRatings_F7 -0.31 0.20 -0.69 0.08 1.00
## NativelikeRatings_F8 -0.44 0.20 -0.83 -0.05 1.00
## NativelikeRatings_F9 0.02 0.20 -0.37 0.42 1.00
## NativelikeRatings_F10 -0.04 0.19 -0.41 0.33 1.00
## NativelikeRatings_F11 -0.42 0.20 -0.80 -0.03 1.00
## Bulk_ESS Tail_ESS
## CompRatings_Intercept[1] 4249 6307
## CompRatings_Intercept[2] 4022 6201
## CompRatings_Intercept[3] 3957 5979
## CompRatings_Intercept[4] 3927 5989
## CompRatings_Intercept[5] 3921 6098
## CompRatings_Intercept[6] 3925 6121
## CompRatings_Intercept[7] 3943 6192
## CompRatings_Intercept[8] 4016 6415
## NativelikeRatings_Intercept[1] 5268 8714
## NativelikeRatings_Intercept[2] 5131 8501
## NativelikeRatings_Intercept[3] 5090 8216
## NativelikeRatings_Intercept[4] 5111 8773
## NativelikeRatings_Intercept[5] 5150 8704
## NativelikeRatings_Intercept[6] 5297 9228
## NativelikeRatings_Intercept[7] 5526 8937
## NativelikeRatings_Intercept[8] 6113 10317
## CompRatings_F1 6030 9882
## CompRatings_F2 4529 7655
## CompRatings_F3 4558 8745
## CompRatings_F4 4760 7837
## CompRatings_F5 4526 8397
## CompRatings_F6 4315 7456
## CompRatings_F7 5141 8862
## CompRatings_F8 4425 7792
## CompRatings_F9 4274 7517
## CompRatings_F10 4370 8367
## CompRatings_F11 4461 7735
## NativelikeRatings_F1 6037 10443
## NativelikeRatings_F2 4620 7572
## NativelikeRatings_F3 4430 8188
## NativelikeRatings_F4 4812 8256
## NativelikeRatings_F5 4532 8079
## NativelikeRatings_F6 4444 7736
## NativelikeRatings_F7 5200 8677
## NativelikeRatings_F8 4420 7882
## NativelikeRatings_F9 4318 7014
## NativelikeRatings_F10 4351 7770
## NativelikeRatings_F11 4510 7370
##
## Samples were drawn using sampling(NUTS). For each parameter, Eff.Sample
## is a crude measure of effective sample size, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).
Default priors:
get_prior(
mvbind(CompRatings, NativelikeRatings) ~ F1 + F2 + F3 + F4 + F5 + F6 + F7 + F8 + F9 + F10 + F11 +
(1 | p | RaterID) + (1 | q | ParticipantID),
family = cumulative(),
data = comp
)
## prior class coef group resp
## 1 b
## 2 lkj(1) cor
## 3 cor ParticipantID
## 4 cor RaterID
## 5 Intercept
## 6 b CompRatings
## 7 b F1 CompRatings
## 8 b F10 CompRatings
## 9 b F11 CompRatings
## 10 b F2 CompRatings
## 11 b F3 CompRatings
## 12 b F4 CompRatings
## 13 b F5 CompRatings
## 14 b F6 CompRatings
## 15 b F7 CompRatings
## 16 b F8 CompRatings
## 17 b F9 CompRatings
## 18 student_t(3, 0, 10) Intercept CompRatings
## 19 Intercept 1 CompRatings
## 20 Intercept 2 CompRatings
## 21 Intercept 3 CompRatings
## 22 Intercept 4 CompRatings
## 23 Intercept 5 CompRatings
## 24 Intercept 6 CompRatings
## 25 Intercept 7 CompRatings
## 26 Intercept 8 CompRatings
## 27 student_t(3, 0, 10) sd CompRatings
## 28 sd ParticipantID CompRatings
## 29 sd Intercept ParticipantID CompRatings
## 30 sd RaterID CompRatings
## 31 sd Intercept RaterID CompRatings
## 32 b NativelikeRatings
## 33 b F1 NativelikeRatings
## 34 b F10 NativelikeRatings
## 35 b F11 NativelikeRatings
## 36 b F2 NativelikeRatings
## 37 b F3 NativelikeRatings
## 38 b F4 NativelikeRatings
## 39 b F5 NativelikeRatings
## 40 b F6 NativelikeRatings
## 41 b F7 NativelikeRatings
## 42 b F8 NativelikeRatings
## 43 b F9 NativelikeRatings
## 44 student_t(3, 0, 10) Intercept NativelikeRatings
## 45 Intercept 1 NativelikeRatings
## 46 Intercept 2 NativelikeRatings
## 47 Intercept 3 NativelikeRatings
## 48 Intercept 4 NativelikeRatings
## 49 Intercept 5 NativelikeRatings
## 50 Intercept 6 NativelikeRatings
## 51 Intercept 7 NativelikeRatings
## 52 Intercept 8 NativelikeRatings
## 53 student_t(3, 0, 10) sd NativelikeRatings
## 54 sd ParticipantID NativelikeRatings
## 55 sd Intercept ParticipantID NativelikeRatings
## 56 sd RaterID NativelikeRatings
## 57 sd Intercept RaterID NativelikeRatings
## dpar nlpar bound
## 1
## 2
## 3
## 4
## 5
## 6
## 7
## 8
## 9
## 10
## 11
## 12
## 13
## 14
## 15
## 16
## 17
## 18
## 19
## 20
## 21
## 22
## 23
## 24
## 25
## 26
## 27
## 28
## 29
## 30
## 31
## 32
## 33
## 34
## 35
## 36
## 37
## 38
## 39
## 40
## 41
## 42
## 43
## 44
## 45
## 46
## 47
## 48
## 49
## 50
## 51
## 52
## 53
## 54
## 55
## 56
## 57
Gradually change the SD of prior normal distribution:
sds <- c(0.8, 0.9, 1:3)
sa <- vector("list", length(sds) + 1)
for (i in seq(length(sds))) {
# SD = 1
if (i == 3) {
sa[[3]] <- summary(m.normal.prior)$fixed[17:38, c(1, 3, 4)]
next
}
iter <- sds[i]
normal.prior <- prior(normal(0, iter), class = "b")
stanvars <- stanvar(iter, name = "iter")
m <- brm(
mvbind(CompRatings, NativelikeRatings) ~
F1 + F2 + F3 + F4 + F5 + F6 + F7 + F8 + F9 + F10 + F11 +
(1 | p | RaterID) + (1 | q | ParticipantID),
data = comp,
family = cumulative(),
prior = normal.prior,
stanvars = stanvars,
iter = 10000,
seed = 1
) %>%
summary
sa[[i]] <- m$fixed[17:38, c(1, 3, 4)]
}
Flat prior:
m <- brm(
mvbind(CompRatings, NativelikeRatings) ~
F1 + F2 + F3 + F4 + F5 + F6 + F7 + F8 + F9 + F10 + F11 +
(1 | p | RaterID) + (1 | q | ParticipantID),
data = comp,
family = cumulative(),
iter = 10000,
seed = 1
) %>%
summary
sa[[length(sa)]] <- m$fixed[17:38, c(1, 3, 4)]
Draw a figure:
sa.df <- sa %>%
lapply(data.frame) %>%
bind_rows %>%
mutate(
outcome = c("Comprehensibility", "Nativelikeness") %>% rep(each = 11) %>% rep(length(sa)),
factor = rep(1:11) %>% rep(2) %>% rep(length(sa)) %>% paste("Factor", .) %>% factor(levels = paste("Factor", 1:11), labels = factor.labels),
prior = c(paste("SD =", c("0.8", "0.9", 1:3)), "Flat") %>% rep(each = 22) %>% factor(levels = c(paste("SD =", c("0.8", "0.9", 1:3)), "Flat"))
) %>%
rename(estimate = 1, lower = 2, upper = 3) %>%
mutate(
lower.posneg = ifelse(lower < 0, 0, 1) %>% factor,
upper.posneg = ifelse(upper < 0, 0, 1) %>% factor
) %>%
as_tibble
sa.comp.gg <- ggplot(sa.df %>% filter(outcome == "Comprehensibility"), aes(prior, estimate)) +
geom_pointrange(aes(ymin = lower, ymax = upper)) +
geom_hline(yintercept = 0, linetype = 2, alpha = 0.7) +
geom_text(aes(y = lower, label = lower %>% round(2) %>% format(nsmall = 2), color = lower.posneg), vjust = 1, size = 3) +
geom_text(aes(y = upper, label = upper %>% round(2) %>% format(nsmall = 2), color = upper.posneg), vjust = 1, size = 3) +
facet_wrap(~ factor) +
scale_y_continuous(breaks = c(-1, 0, 1)) +
scale_color_manual(values = c("red", "blue")) +
coord_flip(ylim = c(-1.2, 1.2)) +
theme_bw() +
theme(
text = element_text(size = 13),
legend.position = "none"
) +
labs(x = "Prior Distribution", y = "Posterior Mean and 95% Credible Interval")
sa.comp.gg
sa.comp.gg %+% filter(sa.df, outcome == "Nativelikeness")
Build Bayesian mixed-effects logistic regression models targeting the observations with two adjacent ratings:
coefs.comp <- ci.comp <- coefs.ntv <- ci.ntv <- vector("list", 8)
for (i in 1:8) {
# Comprehensibility
m1 <- brm(
CompRatings ~ F2 + F4 + F6 + F8 + (1 | RaterID) + (1 | ParticipantID),
family = "bernoulli",
seed = 1,
iter = 10000,
data = comp %>% filter(CompRatings %in% i:(i+1)) %>% mutate(CompRatings = factor(CompRatings)),
)
# Fixed-effects posterior means
coefs.comp[[i]] <- summary(m1)$fixed[2:5 , 1]
# Their 95% CIs
post1 <- posterior_samples(m1)
ci.comp[[i]] <- apply(post1[ , 2:5], 2, quantile, probs = c(0.025, 0.975))
# Nativelikeness
m2 <- brm(
NativelikeRatings ~ F2 + F4 + F6 + F8 + F11 + (1 | RaterID) + (1 | ParticipantID),
family = "bernoulli",
seed = 1,
iter = 10000,
data = comp %>% filter(NativelikeRatings %in% i:(i+1)) %>% mutate(NativelikeRatings = factor(NativelikeRatings)),
)
coefs.ntv[[i]] <- summary(m2)$fixed[2:6 , 1]
post2 <- posterior_samples(m2)
ci.ntv[[i]] <- apply(post2[ , 2:6], 2, quantile, probs = c(0.025, 0.975))
}
Restructure and modify the data to draw a figure:
# Comprehensibility
coefs.comp.df <- unlist(coefs.comp) %>%
matrix(byrow = TRUE, ncol = 4) %>%
as.data.frame
colnames(coefs.comp.df) <- factor.labels[c(2, 4, 6, 8)]
coefs.comp.df$diff <- paste(1:8, 2:9, sep = " vs ")
ci.comp.df <- ci.comp %>%
lapply(as.data.frame) %>%
bind_rows %>%
mutate(
diff = rep(paste(1:8, 2:9, sep = " vs "), each = 2),
lh = rep(c("lb", "ub"), 8)
) %>%
gather(parameter, coef, -c(diff, lh)) %>%
spread(lh, coef) %>%
mutate(parameter = recode(parameter,
b_F2 = factor.labels[2],
b_F4 = factor.labels[4],
b_F6 = factor.labels[6],
b_F8 = factor.labels[8]))
sw.comp <- gather(coefs.comp.df, parameter, coef, -diff) %>%
left_join(ci.comp.df) %>%
filter(diff != "1 vs 2") %>%
mutate(
diff = factor(diff, levels = paste(1:8, 2:9, sep = " vs ")),
parameter = factor(parameter, levels = factor.labels[c(2, 4, 6, 8)])
) %>%
rbind(tibble(
diff = "1 vs 2",
parameter = factor.labels[c(2, 4, 6, 8)],
coef = NA,
lb = NA,
ub = NA
))
# Nativelikeness
coefs.ntv.df <- unlist(coefs.ntv) %>% matrix(byrow = TRUE, ncol = 5) %>% as.data.frame
colnames(coefs.ntv.df) <- factor.labels[c(2, 4, 6, 8, 11)]
coefs.ntv.df$diff <- paste(1:8, 2:9, sep = " vs ")
ci.ntv.df <- ci.ntv %>%
lapply(as.data.frame) %>%
bind_rows %>%
mutate(
diff = rep(paste(1:8, 2:9, sep = " vs "), each = 2),
lh = rep(c("lb", "ub"), 8)
) %>%
gather(parameter, coef, -c(diff, lh)) %>%
spread(lh, coef) %>%
mutate(parameter = recode(parameter,
b_F2 = factor.labels[2],
b_F4 = factor.labels[4],
b_F6 = factor.labels[6],
b_F8 = factor.labels[8],
b_F11 = factor.labels[11]))
sw.ntv <- gather(coefs.ntv.df, parameter, coef, -diff) %>%
left_join(ci.ntv.df) %>%
mutate(parameter = factor(parameter, levels = factor.labels[c(2, 4, 6, 8, 11)]))
Draw the figure:
sw.comp.gg <- ggplot(sw.comp, aes(diff, coef)) +
geom_pointrange(aes(ymin = lb, ymax = ub)) +
geom_hline(yintercept = 0, linetype = 2) +
facet_wrap(~ parameter, nrow = 2) +
coord_flip() +
theme_bw() +
theme(text = element_text(size = 13)) +
labs(x = "", y = "", title = "Comprehensibility")
sw.ntv.gg <- sw.comp.gg %+% sw.ntv +
labs(x = "", y = "", title = "Nativelikeness")
grid.arrange(sw.comp.gg, sw.ntv.gg, nrow = 1, widths = c(415, 585),
bottom = textGrob("Posterior Mean and 95% Credible Interval", gp = gpar(fontsize = 20)),
left = textGrob("Contrasted Ratings", gp = gpar(fontsize = 20), rot = 90)
)