library(tidyverse) library(lme4) library(lattice) library(lmerTest) library(reshape2) library(plyr) library(psych) library(BlandAltmanLeh) library(cowplot) library(irr) library(cAIC4) library(tidyr) library(RePsychLing) #----Functions---- #collinearity diagnostics vif.mer <- function (fit) { ## adapted from rms::vif v <- vcov(fit) nam <- names(fixef(fit)) ## exclude intercepts ns <- sum(1 * (nam == "Intercept" | nam == "(Intercept)")) if (ns > 0) { v <- v[-(1:ns), -(1:ns), drop = FALSE] nam <- nam[-(1:ns)] } d <- diag(v)^0.5 v <- diag(solve(v/(d %o% d))) names(v) <- nam v } kappa.mer <- function (fit, scale = TRUE, center = FALSE, add.intercept = TRUE, exact = FALSE) { X <- fit@pp$X nam <- names(fixef(fit)) ## exclude intercepts nrp <- sum(1 * (nam == "(Intercept)")) if (nrp > 0) { X <- X[, -(1:nrp), drop = FALSE] nam <- nam[-(1:nrp)] } if (add.intercept) { X <- cbind(rep(1), scale(X, scale = scale, center = center)) kappa(X, exact = exact) } else { kappa(scale(X, scale = scale, center = scale), exact = exact) } } #centring continuous variables c. = function (x) scale(x, scale = FALSE) #Wilcox effect size rFromWilcox <-function(wilcoxModel, N) { z <- qnorm(wilcoxModel$p.value/2) r <-z /sqrt (N) cat(wilcoxModel$data.name, "Effect Size, r = ", r) } #----- Descriptive statistics---- dane.mixed = read.csv("data.mixed.csv", header = TRUE) dane.proste = read.csv("Dane-proste.csv", header = TRUE) describeBy(subset (dane.proste, dane.proste$talker.gr =="MONO")) describeBy(subset (dane.proste, dane.proste$talker.gr =="BI")) describeBy(dane.proste) #----Study 1 analyses (CLI) - Wilcoxon ---- Wilcox.syllables = wilcox.test(AL.syllables ~ talker.gr, paired = FALSE, data = dane.proste) Wilcox.stress = wilcox.test(AL.stress ~ talker.gr, paired = FALSE, data = dane.proste) Wilcox.consontants = wilcox.test(AL.consontants ~ talker.gr, paired = FALSE, data = dane.proste) Wilcox.vowel.qual = wilcox.test(AL.vowel.qual ~ talker.gr, paired = FALSE, data = dane.proste) Wilcox.vowel.quan = wilcox.test(AL.vowel.quan ~ talker.gr, paired = FALSE, data = dane.proste) Wilcox.VOT = wilcox.test(AL.VOT ~ talker.gr, paired = FALSE, data = dane.proste) Wilcox.V.assimilation = wilcox.test(AL.V.assimilation ~ talker.gr, paired = FALSE, data = dane.proste) Wilcox.nasals = wilcox.test(AL.nasals ~ talker.gr, paired = FALSE, data = dane.proste) Wilcox.palatalisation = wilcox.test(AL.palatalisation ~ talker.gr, paired = FALSE, data = dane.proste) Wilcox.V.reduction = wilcox.test(AL.V.reduction ~ talker.gr, paired = FALSE, data = dane.proste) Wilcox.cluster.reduction = wilcox.test(AL.cluster.reduction ~ talker.gr, paired = FALSE, data = dane.proste) Wilcox.cluster.sub = wilcox.test(AL.cluster.sub ~ talker.gr, paired = FALSE, data = dane.proste) Wilcox.TOTAL = wilcox.test(AL.TOTAL ~ talker.gr, paired = FALSE, data = dane.proste) rFromWilcox(Wilcox.syllables, 42) rFromWilcox(Wilcox.stress, 42) rFromWilcox(Wilcox.consontants, 42) rFromWilcox(Wilcox.vowel.qual, 42) rFromWilcox(Wilcox.vowel.quan, 42) rFromWilcox(Wilcox.vowel.qual, 42) rFromWilcox(Wilcox.V.assimilation, 42) rFromWilcox(Wilcox.nasals, 42) rFromWilcox(Wilcox.palatalisation, 42) rFromWilcox(Wilcox.V.reduction, 42) rFromWilcox(Wilcox.cluster.reduction, 42) rFromWilcox(Wilcox.cluster.sub, 42) rFromWilcox(Wilcox.TOTAL, 42) Wilcox.syllables Wilcox.stress Wilcox.consontants Wilcox.vowel.qual Wilcox.vowel.quan Wilcox.VOT Wilcox.V.assimilation Wilcox.nasals Wilcox.palatalisation Wilcox.V.reduction Wilcox.cluster.reduction Wilcox.cluster.sub Wilcox.TOTAL p.adjust( 0.2629, method = "bonferroni", n = 13) p.adjust( 0.1158, method = "bonferroni", n = 13) p.adjust( 0.1514, method = "bonferroni", n = 13) p.adjust( 7.17e-05, method = "bonferroni", n = 13) p.adjust( 0.008404, method = "bonferroni", n = 13) p.adjust( 0.258, method = "bonferroni", n = 13) p.adjust( 0.00143, method = "bonferroni", n = 13) p.adjust( 0.05473, method = "bonferroni", n = 13) p.adjust( 0.01077, method = "bonferroni", n = 13) p.adjust( 0.003084, method = "bonferroni", n = 13) p.adjust( 0.0003602, method = "bonferroni", n = 13) p.adjust( 0.07926, method = "bonferroni", n = 13) p.adjust( 3.497e-05, method = "bonferroni", n = 13) #----Study 2: Analysis A (reliability statistics) ---- #Accentedness dataA.Accent = subset(dane.mixed, dane.mixed$batch == "A")[,c(1:3,7,46)] dataA.Accent <- spread(dataA.Accent, rater.id, Accentedness) alpha (subset(dataA.Accent, dataA.Accent$task=="NAR")[,-c(1:3)]) alpha (subset(dataA.Accent, dataA.Accent$task=="SRT")[,-c(1:3)]) kripp.alpha (t(subset(dataA.Accent, dataA.Accent$task=="NAR")[,-c(1:3)]), method = "interval") kripp.alpha (t(subset(dataA.Accent, dataA.Accent$task=="SRT")[,-c(1:3)]), method = "interval") dataB.Accent = subset(dane.mixed, dane.mixed$batch == "B")[,c(1:3,7,46)] dataB.Accent <- spread(dataB.Accent, rater.id, Accentedness) alpha (subset(dataB.Accent, dataB.Accent$task=="NAR")[,-c(1:3)]) alpha (subset(dataB.Accent, dataB.Accent$task=="SRT")[,-c(1:3)]) kripp.alpha (t(subset(dataB.Accent, dataB.Accent$task=="NAR")[,-c(1:3)]), method = "interval") kripp.alpha (t(subset(dataB.Accent, dataB.Accent$task=="SRT")[,-c(1:3)]), method = "interval") #Acceptability dataA.Accept = subset(dane.mixed, dane.mixed$batch == "A")[,c(1:3,7,47)] dataA.Accept <- spread(dataA.Accept, rater.id, Acceptability) alpha (subset(dataA.Accept, dataA.Accept$task=="NAR")[,-c(1:3)]) alpha (subset(dataA.Accept, dataA.Accept$task=="SRT")[,-c(1:3)]) kripp.alpha (t(subset(dataA.Accept, dataA.Accept$task=="NAR")[,-c(1:3)]), method = "interval") kripp.alpha (t(subset(dataA.Accept, dataA.Accept$task=="SRT")[,-c(1:3)]), method = "interval") dataB.Accept = subset(dane.mixed, dane.mixed$batch == "B")[,c(1:3,7,47)] dataB.Accept <- spread(dataB.Accept, rater.id, Acceptability) alpha (subset(dataB.Accept, dataB.Accept$task=="NAR")[,-c(1:3)]) alpha (subset(dataB.Accept, dataB.Accept$task=="SRT")[,-c(1:3)]) kripp.alpha (t(subset(dataB.Accept, dataB.Accept$task=="NAR")[,-c(1:3)]), method = "interval") kripp.alpha (t(subset(dataB.Accept, dataB.Accept$task=="SRT")[,-c(1:3)]), method = "interval") #Intelligibility dataA.Intel = subset(dane.mixed, dane.mixed$batch == "A")[,c(1:3,7,49)] dataA.Intel <- spread(dataA.Intel, rater.id, Intelligibility) alpha (subset(dataA.Intel, dataA.Intel$task=="NAR")[,-c(1:3)]) alpha (subset(dataA.Intel, dataA.Intel$task=="SRT")[,-c(1:3)]) kripp.alpha (t(subset(dataA.Intel, dataA.Intel$task=="NAR")[,-c(1:3)]), method = "interval") kripp.alpha (t(subset(dataA.Intel, dataA.Intel$task=="SRT")[,-c(1:3)]), method = "interval") dataB.Intel = subset(dane.mixed, dane.mixed$batch == "B")[,c(1:3,7,49)] dataB.Intel <- spread(dataB.Intel, rater.id, Intelligibility) alpha (subset(dataB.Intel, dataB.Intel$task=="NAR")[,-c(1:3)]) alpha (subset(dataB.Intel, dataB.Intel$task=="SRT")[,-c(1:3)]) kripp.alpha (t(subset(dataB.Intel, dataB.Intel$task=="NAR")[,-c(1:3)]), method = "interval") kripp.alpha (t(subset(dataB.Intel, dataB.Intel$task=="SRT")[,-c(1:3)]), method = "interval") #Perceived Age dataA.Age = subset(dane.mixed, dane.mixed$batch == "A")[,c(1:3,7,48)] dataA.Age <- spread(dataA.Age, rater.id, Perceived.age) alpha (subset(dataA.Age, dataA.Age$task=="NAR")[,-c(1:3)]) alpha (subset(dataA.Age, dataA.Age$task=="SRT")[,-c(1:3)]) kripp.alpha (t(subset(dataA.Age, dataA.Age$task=="NAR")[,-c(1:3)]), method = "interval") kripp.alpha (t(subset(dataA.Age, dataA.Age$task=="SRT")[,-c(1:3)]), method = "interval") dataB.Age = subset(dane.mixed, dane.mixed$batch == "B")[,c(1:3,7,48)] dataB.Age <- spread(dataB.Age, rater.id, Perceived.age) alpha (subset(dataB.Age, dataB.Age$task=="NAR")[,-c(1:3)]) alpha (subset(dataB.Age, dataB.Age$task=="SRT")[,-c(1:3)]) kripp.alpha (t(subset(dataB.Age, dataB.Age$task=="NAR")[,-c(1:3)]), method = "interval") kripp.alpha (t(subset(dataB.Age, dataB.Age$task=="SRT")[,-c(1:3)]), method = "interval") #----Study 2: Analysis B (relationship between parameters)---- #Correlations between different parameters #BILINGUALS cor.test(dane.proste$AccentALL[dane.proste$talker.gr== "BI"], dane.proste$AcceptALL[dane.proste$talker.gr=="BI"]) cor.test(dane.proste$AccentALL[dane.proste$talker.gr== "BI"], dane.proste$IntelALL[dane.proste$talker.gr== "BI"]) cor.test(dane.proste$AcceptALL[dane.proste$talker.gr== "BI"], dane.proste$IntelALL[dane.proste$talker.gr== "BI"]) cor.test(dane.proste$AcceptALL[dane.proste$talker.gr== "BI"], dane.proste$AgeALL[dane.proste$talker.gr== "BI"]) cor.test(dane.proste$IntelALL[dane.proste$talker.gr== "BI"], dane.proste$AgeALL[dane.proste$talker.gr== "BI"]) cor.test(dane.proste$AccentALL[dane.proste$talker.gr== "BI"], dane.proste$AgeALL[dane.proste$talker.gr== "BI"]) #MONOLINGUALS cor.test(dane.proste$AccentALL[dane.proste$talker.gr== "MONO"], dane.proste$AcceptALL[dane.proste$talker.gr=="MONO"]) cor.test(dane.proste$AccentALL[dane.proste$talker.gr== "MONO"], dane.proste$IntelALL[dane.proste$talker.gr== "MONO"]) cor.test(dane.proste$AcceptALL[dane.proste$talker.gr== "MONO"], dane.proste$IntelALL[dane.proste$talker.gr== "MONO"]) cor.test(dane.proste$AcceptALL[dane.proste$talker.gr== "MONO"], dane.proste$AgeALL[dane.proste$talker.gr== "MONO"]) cor.test(dane.proste$IntelALL[dane.proste$talker.gr== "MONO"], dane.proste$AgeALL[dane.proste$talker.gr== "MONO"]) cor.test(dane.proste$AccentALL[dane.proste$talker.gr== "MONO"], dane.proste$AgeALL[dane.proste$talker.gr== "MONO"]) #Bland-Altman Plots Accent_Accept.BI = bland.altman.plot(dane.proste$AccentALL[dane.proste$talker.gr== "BI"], dane.proste$AcceptALL[dane.proste$talker.gr== "BI"], graph.sys = "ggplot2") Accept_Intel.BI = bland.altman.plot(dane.proste$AcceptALL[dane.proste$talker.gr== "BI"], dane.proste$IntelALL[dane.proste$talker.gr== "BI"], graph.sys = "ggplot2") Accent_Intel.BI = bland.altman.plot(dane.proste$AccentALL[dane.proste$talker.gr== "BI"], dane.proste$IntelALL[dane.proste$talker.gr== "BI"], graph.sys = "ggplot2") bland.bi <- plot_grid (Accent_Accept.BI, Accept_Intel.BI, Accent_Intel.BI, labels = c("Native accent vs. Acceptability", "Acceptability vs. Intelligiblity", "Native accent vs. Intelligibility", align = "h")) ggsave("bland.bi.tiff", bland.bi, dpi = 600, width = 10, height = 7) Accent_Accept.MONO = bland.altman.plot(dane.proste$AccentALL[dane.proste$talker.gr== "MONO"], dane.proste$AcceptALL[dane.proste$talker.gr== "MONO"], graph.sys = "ggplot2") Accept_Intel.MONO = bland.altman.plot(dane.proste$AcceptALL[dane.proste$talker.gr== "MONO"], dane.proste$IntelALL[dane.proste$talker.gr== "MONO"], graph.sys = "ggplot2") Accent_Intel.MONO = bland.altman.plot(dane.proste$AccentALL[dane.proste$talker.gr== "MONO"], dane.proste$IntelALL[dane.proste$talker.gr== "MONO"], graph.sys = "ggplot2") bland.mono <- plot_grid (Accent_Accept.MONO, Accept_Intel.MONO, Accent_Intel.MONO, labels = c("Native accent vs. Acceptability", "Acceptability vs. Intelligiblity", "Native accent vs. Intelligibility", align = "h")) ggsave("bland.mono.tiff", bland.mono, dpi = 600, width = 10, height = 7) #Factor analyses on 3 parameters dane.mixed = dane.mixed[!is.na(dane.mixed$Accentedness),] fit = factanal(dane.mixed[,c(46:47,49)], factors = 1, scores = "regression", rotation = "varimax") dane.mixed = cbind(dane.mixed, fit$scores) #----Study 2: Analysis C (mixed models - do bilinguals and monolinguals differ on accentedness? controlled for effects of task and raters) ---- # Making sure that both random variables are factors str(dane.mixed) dane.mixed$rater.id = as.factor(dane.mixed$rater.id) #Recoding and centring categorial variables dane.mixed$taskC=ifelse(dane.mixed$task=="SRT", 0.5, -0.5) dane.mixed$talker.grC=ifelse(dane.mixed$talker.gr=="BI", 0.5, -0.5) dane.mixed$rater.phon.trC=ifelse(dane.mixed$rater.phon.tr=="tak", 0.5, -0.5) #model on the EFA factor MAXmodel = lmer(Factor1 ~ talker.grC*taskC*rater.phon.trC + (1|talker.id) + (0+rater.phon.trC|talker.id) + (0+taskC|talker.id) + (0+taskC:rater.phon.trC|talker.id) + (1|rater.id) + (0+talker.grC|rater.id) + (0+taskC|rater.id) + (0+talker.grC:taskC|rater.id), dane.mixed) OPmodel = lmer(Factor1 ~ talker.grC*taskC*rater.phon.trC + (1|talker.id) + (0+taskC|talker.id) + (1|rater.id) + (0+talker.grC|rater.id) + (0+taskC|rater.id) + (0+talker.grC:taskC|rater.id), dane.mixed) summary(OPmodel) anova(MAXmodel,OPmodel) summary(rePCA(OPmodel)) #diagnostics: plot(OPmodel) qqnorm(residuals(OPmodel)) qqline(residuals(OPmodel)) max(vif.mer(OPmodel)) kappa.mer(OPmodel) #collinearrity within norms #----Study 2: Analysis D (regression with study 1)---- dane.mixed.bi = subset (dane.mixed, dane.mixed$talker.gr == "BI") #model on grouped speech errors dane.mixed.bi$AL.CON = dane.mixed.bi$AL.consontants + dane.mixed.bi$AL.VOT + dane.mixed.bi$AL.palatalisation + dane.mixed.bi$AL.V.assimilation + dane.mixed.bi$AL.cluster.reduction + dane.mixed.bi$AL.cluster.sub dane.mixed.bi$AL.VOW = dane.mixed.bi$AL.vowel.qual + dane.mixed.bi$AL.vowel.quan + dane.mixed.bi$AL.V.reduction + dane.mixed.bi$AL.nasals dane.mixed.bi$AL.PROS = dane.mixed.bi$AL.syllables + dane.mixed.bi$AL.stress model1 = lmer(Factor1 ~ c.(AL.CON) + c.(AL.VOW) + c.(AL.PROS) + (1|talker.id) + (1|rater.id) + (0+c.(AL.CON)|rater.id) + (0+c.(AL.VOW)|rater.id) + (0+c.(AL.PROS)|rater.id), dane.mixed.bi) summary(model1) max(vif.mer(model1)) kappa.mer(model1) #degree of collinearity signaled by VIF over the values model2 = lmer(Factor1 ~ c.(AL.CON) + c.(AL.PROS) + (1|talker.id) + (1|rater.id) + (0+c.(AL.CON)|rater.id) + (0+c.(AL.VOW)|rater.id) + (0+c.(AL.PROS)|rater.id), dane.mixed.bi) summary(model2) anova(model1, model2) max(vif.mer(model2)) kappa.mer(model2) model3 = lmer(Factor1 ~ c.(AL.PROS) + (1|talker.id) + (1|rater.id) + (0+c.(AL.CON)|rater.id) + (0+c.(AL.VOW)|rater.id) + (0+c.(AL.PROS)|rater.id), dane.mixed.bi) summary(model3) anova(model3, model2) max(vif.mer(model3)) kappa.mer(model3) modelP = lmer(Factor1 ~ c.(AL.PROS) + (1|talker.id) + (1|rater.id), dane.mixed.bi) anova(model3, modelP) plot(modelP) qqnorm(residuals(modelP)) qqline(residuals(modelP)) #----Study 2: Analysis E (background regression)---- #analysis on the EFA factor mFull = lmer (Factor1 ~ c.(age) + c.(input.pl) + c.(input.en) + c.(age.contact.en) + (1|talker.id) + (1|rater.id) + (0+c.(age)|rater.id) + (0+c.(input.pl)|rater.id) + (0+c.(input.en)|rater.id) + (0+c.(age.contact.en)|rater.id), dane.mixed.bi) summary(mFull) max(vif.mer(mFull)) kappa.mer(mFull) # little collinearity mOpt = lmer (Factor1 ~ c.(age) + c.(input.pl) + c.(input.en) + c.(age.contact.en) + (1|talker.id) + (1|rater.id), dane.mixed.bi) summary(mOpt) anova(mFull, mOpt) plot(mOpt) qqnorm(residuals(mOpt)) qqline(residuals(mOpt))