This R markdown document provides a detailed account of the data analysis and visualization used in the paper An acoustic study of Tetso????t?????n?? stress in Phonology (Jaker & Howson, 2022).
The markdown is orientated in the same way as the paper. First, the analysis and visualization for the duration results are provided. Following this, we present the analysis and visualization for Intensity, F0, and then and finally the the LDA analysis.
1. Duration
a. 2-Syllable Words
require(lmerTest)
## Loading required package: lmerTest
## Loading required package: lme4
## Loading required package: Matrix
##
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
##
## lmer
## The following object is masked from 'package:stats':
##
## step
require(MuMIn)
## Loading required package: MuMIn
require(emmeans)
## Loading required package: emmeans
dur.two = read.csv("duration_two_syllables_Jaker_Howson.txt", header=T, sep="\t", encoding="UTF-8", stringsAsFactors = TRUE)
summary(dur.two)
## Participant Word Stress Duration Tone
## A01:388 nEyaN : 114 stressed :788 Min. :0.03833 High:650
## A02:409 shEkiN : 82 unstressed:755 1st Qu.:0.14377 Low :893
## A03:376 hEya : 78 Median :0.18728
## A04:370 nIjaN : 78 Mean :0.19441
## hegha : 77 3rd Qu.:0.23528
## hedaN : 72 Max. :0.45635
## (Other):1042
## Syllable Vowel
## FINAL :789 e :571
## NON-FINAL:754 i :291
## a :268
## an :190
## in :119
## u : 85
## (Other): 19
fit.two = lmer(Duration ~ Stress * Syllable + Tone + Vowel + (1 | Participant) + (1 | Word), data = dur.two)
anova(fit.two)
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## Stress 0.064400 0.064400 1 554.94 36.3747 2.974e-09 ***
## Syllable 0.304258 0.304258 1 702.52 171.8540 < 2.2e-16 ***
## Tone 0.033910 0.033910 1 203.23 19.1531 1.928e-05 ***
## Vowel 0.120786 0.017255 7 932.24 9.7462 9.386e-12 ***
## Stress:Syllable 0.000007 0.000007 1 44.94 0.0040 0.9496
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(fit.two)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Duration ~ Stress * Syllable + Tone + Vowel + (1 | Participant) +
## (1 | Word)
## Data: dur.two
##
## REML criterion at convergence: -5216.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.7721 -0.6227 -0.0785 0.5732 4.5787
##
## Random effects:
## Groups Name Variance Std.Dev.
## Word (Intercept) 0.000354 0.01881
## Participant (Intercept) 0.001142 0.03380
## Residual 0.001770 0.04208
## Number of obs: 1543, groups: Word, 39; Participant, 4
##
## Fixed effects:
## Estimate Std. Error df t value
## (Intercept) 2.505e-01 1.777e-02 3.637e+00 14.094
## Stressunstressed -2.461e-02 7.909e-03 6.648e+01 -3.111
## SyllableNON-FINAL -6.002e-02 8.148e-03 6.971e+01 -7.367
## ToneLow -2.495e-02 5.701e-03 2.032e+02 -4.376
## Vowelan 1.711e-02 6.138e-03 1.159e+03 2.787
## Vowele -7.854e-03 4.988e-03 1.081e+03 -1.575
## Voweli -1.405e-02 6.761e-03 8.824e+02 -2.079
## Vowelin 1.346e-02 7.171e-03 1.012e+03 1.877
## Vowelu -1.255e-02 8.296e-03 1.181e+03 -1.513
## Vowelui 1.642e-01 2.364e-02 5.903e+02 6.945
## Vowelun -1.860e-02 1.567e-02 8.557e+02 -1.187
## Stressunstressed:SyllableNON-FINAL 8.821e-04 1.387e-02 4.494e+01 0.064
## Pr(>|t|)
## (Intercept) 0.000264 ***
## Stressunstressed 0.002743 **
## SyllableNON-FINAL 2.72e-10 ***
## ToneLow 1.93e-05 ***
## Vowelan 0.005404 **
## Vowele 0.115642
## Voweli 0.037941 *
## Vowelin 0.060763 .
## Vowelu 0.130533
## Vowelui 1.00e-11 ***
## Vowelun 0.235611
## Stressunstressed:SyllableNON-FINAL 0.949578
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) Strssn SNON-F ToneLw Vowelan Vowele Voweli Vowelin Vowelu
## Strssnstrss -0.053
## SyNON-FINAL -0.104 0.582
## ToneLow -0.137 -0.292 0.282
## Vowelan -0.075 -0.153 -0.090 -0.191
## Vowele -0.148 -0.044 -0.302 0.036 0.429
## Voweli -0.118 -0.031 -0.361 0.009 0.386 0.745
## Vowelin -0.062 -0.179 -0.147 -0.211 0.475 0.466 0.329
## Vowelu -0.080 0.001 -0.230 -0.008 0.180 0.556 0.548 0.241
## Vowelui -0.024 -0.113 -0.060 -0.026 0.139 0.115 0.068 0.139 0.047
## Vowelun 0.004 0.077 -0.153 -0.243 0.096 0.181 0.211 0.129 0.195
## S:SNON-FINA 0.114 -0.862 -0.830 -0.094 0.133 0.028 0.024 0.148 -0.058
## Vowelui Vowelun
## Strssnstrss
## SyNON-FINAL
## ToneLow
## Vowelan
## Vowele
## Voweli
## Vowelin
## Vowelu
## Vowelui
## Vowelun 0.019
## S:SNON-FINA 0.096 -0.015
r.squaredGLMM(fit.two)
## Warning: 'r.squaredGLMM' now calculates a revised statistic. See the help page.
## R2m R2c
## [1,] 0.3688811 0.6579695
warp.emm = emmeans(fit.two, "Stress")
## NOTE: Results may be misleading due to involvement in interactions
pairs(warp.emm, simple = "each", combine = TRUE, options = get_emm_option("contrast"), adjust = "Tukey")
## contrast estimate SE df t.ratio p.value
## stressed - unstressed 0.0242 0.00404 618 5.984 <.0001
##
## Results are averaged over some or all of the levels of: Syllable, Tone, Vowel
## Degrees-of-freedom method: kenward-roger
warp.emm = emmeans(fit.two, "Syllable")
## NOTE: Results may be misleading due to involvement in interactions
pairs(warp.emm, simple = "each", combine = TRUE, options = get_emm_option("contrast"), adjust = "Tukey")
## contrast estimate SE df t.ratio p.value
## FINAL - (NON-FINAL) 0.0596 0.00458 769 13.016 <.0001
##
## Results are averaged over some or all of the levels of: Stress, Tone, Vowel
## Degrees-of-freedom method: kenward-roger
warp.emm = emmeans(fit.two, "Tone")
pairs(warp.emm, simple = "each", combine = TRUE, options = get_emm_option("contrast"), adjust = "Tukey")
## contrast estimate SE df t.ratio p.value
## High - Low 0.025 0.00578 236 4.316 <.0001
##
## Results are averaged over some or all of the levels of: Stress, Syllable, Vowel
## Degrees-of-freedom method: kenward-roger
warp.emm = emmeans(fit.two, "Vowel")
pairs(warp.emm, simple = "each", combine = TRUE, options = get_emm_option("contrast"), adjust = "Tukey")
## contrast estimate SE df t.ratio p.value
## a - an -0.01711 0.00617 1206 -2.772 0.1031
## a - e 0.00785 0.00502 1134 1.565 0.7710
## a - i 0.01405 0.00681 946 2.064 0.4393
## a - in -0.01346 0.00722 1069 -1.865 0.5753
## a - u 0.01255 0.00834 1225 1.506 0.8045
## a - ui -0.16419 0.02381 654 -6.897 <.0001
## a - un 0.01860 0.01578 920 1.179 0.9379
## an - e 0.02496 0.00605 1341 4.126 0.0010
## an - i 0.03116 0.00720 1351 4.331 0.0004
## an - in 0.00364 0.00692 1217 0.527 0.9995
## an - u 0.02966 0.00944 1268 3.143 0.0363
## an - ui -0.14708 0.02375 654 -6.193 <.0001
## an - un 0.03571 0.01638 920 2.180 0.3650
## e - i 0.00620 0.00454 978 1.365 0.8729
## e - in -0.02132 0.00659 1227 -3.235 0.0273
## e - u 0.00470 0.00694 1103 0.677 0.9976
## e - ui -0.17204 0.02376 652 -7.241 <.0001
## e - un 0.01074 0.01568 792 0.685 0.9974
## i - in -0.02752 0.00813 1076 -3.385 0.0168
## i - u -0.00150 0.00733 1026 -0.205 1.0000
## i - ui -0.17824 0.02432 625 -7.329 <.0001
## i - un 0.00455 0.01582 843 0.287 1.0000
## in - u 0.02602 0.00962 1236 2.704 0.1222
## in - ui -0.15073 0.02390 659 -6.307 <.0001
## in - un 0.03206 0.01648 932 1.945 0.5200
## u - ui -0.17674 0.02486 669 -7.110 <.0001
## u - un 0.00605 0.01635 981 0.370 1.0000
## ui - un 0.18279 0.02831 697 6.457 <.0001
##
## Results are averaged over some or all of the levels of: Stress, Syllable, Tone
## Degrees-of-freedom method: kenward-roger
## P value adjustment: tukey method for comparing a family of 8 estimates
require(ggplot2)
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.0.5
dur.two$Syllable = factor(dur.two$Syllable, levels = c("NON-FINAL", "FINAL"))
p = ggplot(dur.two, aes(x = Stress, y = Duration, fill = Stress)) +
geom_violin(trim = TRUE) +
geom_boxplot(width = 0.25) +
facet_wrap(~Syllable) +
labs(y = expression(paste("Duration (seconds)")))
p + scale_y_continuous(breaks = seq(-1, 1, 0.1), limits = c(0, 0.5)) +
theme(axis.text.y = element_text(size = 9, family = "serif"),
axis.text.x = element_text(size = 9, family = "serif"),
axis.title.y = element_text(size = 9, family = "serif"),
axis.title.x = element_blank(),
strip.text.x = element_text(size = 9, family = "serif"),
strip.text.y = element_text(size = 9, family = "serif"),
legend.title = element_text(size = 9, family = "serif"),
legend.text = element_text(size = 9, family = "serif"),
legend.position = "none",
panel.background = element_rect(fill = "white", colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
strip.background = element_rect(colour = "black", fill = "grey"))
b. 3-Syllable Words
dur.three = read.csv("duration_three_syllables_Jaker_Howson.txt", header=T, sep="\t", encoding="UTF-8", stringsAsFactors = TRUE)
summary(dur.three)
## Participant Word Stress Duration Tone
## A01:352 heneye : 81 stressed :902 Min. :0.03551 High:654
## A02:398 henEke : 74 unstressed:624 1st Qu.:0.11135 Low :872
## A03:352 ?edets'Er: 64 Median :0.14764
## A04:424 henEyaN : 63 Mean :0.15919
## tunedIN : 63 3rd Qu.:0.19161
## nInIdEl : 61 Max. :0.40199
## (Other) :1120
## Syllable Vowel
## FINAL : 511 a :278
## NON-FINAL:1015 an: 22
## e :702
## i :263
## in:212
## n : 1
## u : 48
fit.three = lmer(Duration ~ Stress * Syllable + Tone + Vowel + (1 | Participant) + (1 | Word), data = dur.three)
anova(fit.three)
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## Stress 0.16542 0.16542 1 1428.5 100.5615 < 2.2e-16 ***
## Syllable 0.89950 0.89950 1 1487.2 546.8084 < 2.2e-16 ***
## Tone 0.04134 0.04134 1 1353.2 25.1278 6.076e-07 ***
## Vowel 0.01260 0.00210 6 1329.3 1.2764 0.2651
## Stress:Syllable 0.00121 0.00121 1 1452.2 0.7329 0.3921
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(fit.three)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Duration ~ Stress * Syllable + Tone + Vowel + (1 | Participant) +
## (1 | Word)
## Data: dur.three
##
## REML criterion at convergence: -5257.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.3245 -0.6471 -0.0930 0.5512 4.3384
##
## Random effects:
## Groups Name Variance Std.Dev.
## Word (Intercept) 0.0004794 0.02190
## Participant (Intercept) 0.0007372 0.02715
## Residual 0.0016450 0.04056
## Number of obs: 1526, groups: Word, 46; Participant, 4
##
## Fixed effects:
## Estimate Std. Error df t value
## (Intercept) 2.274e-01 1.443e-02 3.776e+00 15.760
## Stressunstressed -3.284e-02 5.585e-03 1.351e+03 -5.881
## SyllableNON-FINAL -6.478e-02 2.913e-03 1.478e+03 -22.236
## ToneLow -1.605e-02 3.201e-03 1.353e+03 -5.013
## Vowelan -1.984e-02 1.182e-02 1.480e+03 -1.678
## Vowele -6.491e-03 4.112e-03 1.273e+03 -1.579
## Voweli -2.934e-04 4.646e-03 1.351e+03 -0.063
## Vowelin 6.126e-04 4.405e-03 1.511e+03 0.139
## Voweln 5.180e-02 4.436e-02 1.090e+03 1.168
## Vowelu -9.433e-03 7.907e-03 1.412e+03 -1.193
## Stressunstressed:SyllableNON-FINAL -4.922e-03 5.749e-03 1.452e+03 -0.856
## Pr(>|t|)
## (Intercept) 0.000139 ***
## Stressunstressed 5.14e-09 ***
## SyllableNON-FINAL < 2e-16 ***
## ToneLow 6.08e-07 ***
## Vowelan 0.093587 .
## Vowele 0.114643
## Voweli 0.949653
## Vowelin 0.889418
## Voweln 0.243125
## Vowelu 0.233084
## Stressunstressed:SyllableNON-FINAL 0.392079
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) Strssn SNON-F ToneLw Vowelan Vowele Voweli Vowelin Voweln
## Strssnstrss -0.059
## SyNON-FINAL -0.103 0.325
## ToneLow -0.047 -0.445 0.081
## Vowelan -0.050 -0.206 0.024 -0.049
## Vowele -0.144 -0.009 -0.140 -0.170 0.331
## Voweli -0.106 0.133 -0.173 -0.101 0.074 0.492
## Vowelin -0.129 0.191 -0.055 -0.124 0.116 0.445 0.354
## Voweln -0.031 0.021 0.039 0.013 0.018 0.049 0.029 0.053
## Vowelu -0.087 0.071 0.031 -0.103 0.100 0.334 0.393 0.266 0.027
## S:SNON-FINA 0.070 -0.843 -0.506 0.167 0.212 0.027 -0.216 -0.052 -0.020
## Vowelu
## Strssnstrss
## SyNON-FINAL
## ToneLow
## Vowelan
## Vowele
## Voweli
## Vowelin
## Voweln
## Vowelu
## S:SNON-FINA -0.113
r.squaredGLMM(fit.three)
## R2m R2c
## [1,] 0.3755636 0.6410463
warp.emm = emmeans(fit.three, "Tone")
pairs(warp.emm, simple = "each", combine = TRUE, options = get_emm_option("contrast"), adjust = "Tukey")
## contrast estimate SE df t.ratio p.value
## High - Low 0.016 0.00321 1389 4.997 <.0001
##
## Results are averaged over some or all of the levels of: Stress, Syllable, Vowel
## Degrees-of-freedom method: kenward-roger
warp.emm = emmeans(fit.three, "Vowel")
pairs(warp.emm, simple = "each", combine = TRUE, options = get_emm_option("contrast"), adjust = "Tukey")
## contrast estimate SE df t.ratio p.value
## a - an 0.019836 0.01185 1488 1.674 0.6338
## a - e 0.006491 0.00413 1324 1.572 0.7003
## a - i 0.000293 0.00466 1388 0.063 1.0000
## a - in -0.000613 0.00441 1511 -0.139 1.0000
## a - n -0.051802 0.04447 1171 -1.165 0.9071
## a - u 0.009433 0.00793 1436 1.189 0.8983
## an - e -0.013345 0.01118 1510 -1.194 0.8966
## an - i -0.019543 0.01241 1456 -1.574 0.6988
## an - in -0.020449 0.01216 1486 -1.682 0.6284
## an - n -0.071638 0.04581 1201 -1.564 0.7056
## an - u -0.010404 0.01359 1464 -0.766 0.9881
## e - i -0.006198 0.00446 1172 -1.389 0.8079
## e - in -0.007104 0.00451 1366 -1.574 0.6989
## e - n -0.058293 0.04446 1172 -1.311 0.8468
## e - u 0.002942 0.00762 1416 0.386 0.9997
## i - in -0.000906 0.00516 1439 -0.176 1.0000
## i - n -0.052095 0.04458 1165 -1.169 0.9058
## i - u 0.009139 0.00744 1512 1.228 0.8834
## in - n -0.051189 0.04445 1180 -1.152 0.9118
## in - u 0.010045 0.00798 1462 1.258 0.8708
## n - u 0.061235 0.04496 1174 1.362 0.8220
##
## Results are averaged over some or all of the levels of: Stress, Syllable, Tone
## Degrees-of-freedom method: kenward-roger
## P value adjustment: tukey method for comparing a family of 7 estimates
warp.emm = emmeans(fit.three, "Syllable")
## NOTE: Results may be misleading due to involvement in interactions
pairs(warp.emm, simple = "each", combine = TRUE, options = get_emm_option("contrast"), adjust = "Tukey")
## contrast estimate SE df t.ratio p.value
## FINAL - (NON-FINAL) 0.0672 0.00288 1493 23.338 <.0001
##
## Results are averaged over some or all of the levels of: Stress, Tone, Vowel
## Degrees-of-freedom method: kenward-roger
warp.emm = emmeans(fit.three, "Stress")
## NOTE: Results may be misleading due to involvement in interactions
pairs(warp.emm, simple = "each", combine = TRUE, options = get_emm_option("contrast"), adjust = "Tukey")
## contrast estimate SE df t.ratio p.value
## stressed - unstressed 0.0353 0.00353 1448 10.001 <.0001
##
## Results are averaged over some or all of the levels of: Syllable, Tone, Vowel
## Degrees-of-freedom method: kenward-roger
dur.three$Syllable = factor(dur.three$Syllable, levels = c("NON-FINAL", "FINAL"))
p = ggplot(dur.three, aes(x = Stress, y = Duration, fill = Stress)) +
geom_violin(trim = TRUE) +
geom_boxplot(width = 0.25) +
facet_wrap(~Syllable) +
labs(y = expression(paste("Duration (seconds)")))
p + scale_y_continuous(breaks = seq(-1, 1, 0.1), limits = c(0, 0.5)) +
theme(axis.text.y = element_text(size = 9, family = "serif"),
axis.text.x = element_text(size = 9, family = "serif"),
axis.title.y = element_text(size = 9, family = "serif"),
axis.title.x = element_blank(),
strip.text.x = element_text(size = 9, family = "serif"),
strip.text.y = element_text(size = 9, family = "serif"),
legend.title = element_text(size = 9, family = "serif"),
legend.text = element_text(size = 9, family = "serif"),
legend.position = "none",
panel.background = element_rect(fill = "white", colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
strip.background = element_rect(colour = "black", fill = "grey"))
2. Intensity
a. 2-Syllable Words
require(mgcv)
## Loading required package: mgcv
## Loading required package: nlme
##
## Attaching package: 'nlme'
## The following object is masked from 'package:lme4':
##
## lmList
## This is mgcv 1.8-33. For overview type 'help("mgcv-package")'.
two.syllables = read.csv("intensity_two_syllables_Jaker_Howson.txt", header=T, sep="\t", encoding="UTF-8", stringsAsFactors = TRUE)
summary(two.syllables)
## Participant Word Stress Syllable Interval
## A01:3784 nEyaN : 1107 stressed :7807 SYL1:7899 Min. : 1.000
## A02:4074 nIjaN : 793 unstressed:7661 SYL2:7569 1st Qu.: 3.000
## A03:3899 shEkiN : 791 Median : 5.000
## A04:3711 hegha : 780 Mean : 5.427
## hEya : 754 3rd Qu.: 8.000
## niyaN : 723 Max. :10.000
## (Other):10520
## Intensity
## Min. :43.51
## 1st Qu.:64.69
## Median :68.91
## Mean :68.54
## 3rd Qu.:72.89
## Max. :82.50
##
g1 = two.syllables[!(two.syllables$Syllable != "SYL1"),]
two.syl.one = data.frame(lapply(g1, function(g1) if (is.factor(g1)){ factor(g1)} else {g1}))
g1 = two.syllables[!(two.syllables$Syllable != "SYL2"),]
two.syl.two = data.frame(lapply(g1, function(g1) if (is.factor(g1)){ factor(g1)} else {g1}))
bam.two.one = bam(Intensity ~ Stress + s(Interval, bs = "cr", k = 10) +
s(Interval, by = Stress, k = 10, bs = "cr") +
s(Interval, Stress, by = Participant, bs = "fs", k = 10, m = 1), data = two.syl.one)
summary(bam.two.one)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## Intensity ~ Stress + s(Interval, bs = "cr", k = 10) + s(Interval,
## by = Stress, k = 10, bs = "cr") + s(Interval, Stress, by = Participant,
## bs = "fs", k = 10, m = 1)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 71.0457 0.1249 568.624 <2e-16 ***
## Stressunstressed -0.1579 0.1549 -1.019 0.308
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(Interval) 6.558e+00 7.535 70.658 <2e-16 ***
## s(Interval):Stressstressed 1.271e+00 1.727 2.275 0.0862 .
## s(Interval):Stressunstressed 1.000e+00 1.000 5.995 0.0144 *
## s(Interval,Stress):ParticipantA01 2.605e-04 20.000 0.000 0.5940
## s(Interval,Stress):ParticipantA02 8.472e+00 20.000 32.675 <2e-16 ***
## s(Interval,Stress):ParticipantA03 1.456e+01 20.000 37.866 <2e-16 ***
## s(Interval,Stress):ParticipantA04 6.141e+00 20.000 267.233 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Rank: 108/109
## R-sq.(adj) = 0.626 Deviance explained = 62.8%
## fREML = 20595 Scale est. = 10.623 n = 7899
bam.two.two = bam(Intensity ~ Stress + s(Interval, bs = "cr", k = 10) +
s(Interval, by = Stress, k = 10, bs = "cr") +
s(Interval, Stress, by = Participant, bs = "fs", k = 10, m = 1), data = two.syl.two)
summary(bam.two.two)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## Intensity ~ Stress + s(Interval, bs = "cr", k = 10) + s(Interval,
## by = Stress, k = 10, bs = "cr") + s(Interval, Stress, by = Participant,
## bs = "fs", k = 10, m = 1)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 71.3828 0.2829 252.368 < 2e-16 ***
## Stressunstressed -1.8993 0.4562 -4.163 3.18e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(Interval) 7.050 7.821 67.687 < 2e-16 ***
## s(Interval):Stressstressed 2.044 2.514 4.848 0.00461 **
## s(Interval):Stressunstressed 1.000 1.001 0.047 0.82924
## s(Interval,Stress):ParticipantA01 10.772 20.000 2.773 < 2e-16 ***
## s(Interval,Stress):ParticipantA02 5.242 20.000 0.894 7.5e-06 ***
## s(Interval,Stress):ParticipantA03 11.045 20.000 11.487 < 2e-16 ***
## s(Interval,Stress):ParticipantA04 6.984 20.000 77.598 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Rank: 108/109
## R-sq.(adj) = 0.651 Deviance explained = 65.3%
## fREML = 20417 Scale est. = 12.698 n = 7569
bp.two.one = ggplot(two.syl.one, aes(x = Interval, y = Intensity, colour = Stress, linetype = Stress)) +
stat_smooth(method = "gam", formula = y ~ s(x, k = 10, bs = "cs")) +
labs(y = expression(paste("Intensity")))
bp.two.one + scale_y_continuous(breaks = seq(0, 100, 5), limits = c(55, 75)) +
scale_x_continuous(breaks = c(1, 3.33, 5.5, 7.66, 10),
labels=c("0%", "25%", "50%", "75%", "100%")) +
theme(axis.text.y = element_text(size = 9, family = "serif"),
axis.text.x = element_text(size = 9, family = "serif"),
axis.title.y = element_text(size = 9, family = "serif"),
axis.title.x = element_blank(),
legend.title = element_text(size = 9, family = "serif"),
legend.text = element_text(size = 9, family = "serif"),
panel.background = element_rect(fill = "white", colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
## Warning: Removed 1229 rows containing non-finite values (stat_smooth).
bp.two.two = ggplot(two.syl.two, aes(x = Interval, y = Intensity, colour = Stress, linetype = Stress)) +
stat_smooth(method = "gam", formula = y ~ s(x, k = 10, bs = "cs")) +
labs(y = expression(paste("Intensity")))
bp.two.two + scale_y_continuous(breaks = seq(0, 100, 5), limits = c(55, 75)) +
scale_x_continuous(breaks = c(1, 3.33, 5.5, 7.66, 10),
labels=c("0%", "25%", "50%", "75%", "100%")) +
theme(axis.text.y = element_text(size = 9, family = "serif"),
axis.text.x = element_text(size = 9, family = "serif"),
axis.title.y = element_text(size = 9, family = "serif"),
axis.title.x = element_blank(),
legend.title = element_text(size = 9, family = "serif"),
legend.text = element_text(size = 9, family = "serif"),
panel.background = element_rect(fill = "white", colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
## Warning: Removed 1065 rows containing non-finite values (stat_smooth).
b. 3-Syllable Words
three.syllables = read.csv("intensity_three_syllables_Jaker_Howson.txt", header=T, sep="\t", encoding="UTF-8", stringsAsFactors = TRUE)
summary(three.syllables)
## Participant Word Stress Syllable Interval
## A01:3523 heneye : 833 stressed :8879 SYL1:5206 Min. : 1.000
## A02:4013 henEke : 762 unstressed:6511 SYL2:5300 1st Qu.: 3.000
## A03:3605 ?edets'Er: 689 SYL3:4884 Median : 5.000
## A04:4249 henEyaN : 629 Mean : 5.465
## tunedIN : 625 3rd Qu.: 8.000
## nInIdEl : 617 Max. :10.000
## (Other) :11235
## Intensity
## Min. :47.82
## 1st Qu.:64.76
## Median :68.46
## Mean :68.27
## 3rd Qu.:72.20
## Max. :82.59
##
g1 = three.syllables[!(three.syllables$Syllable != "SYL1"),]
three.syl.one = data.frame(lapply(g1, function(g1) if (is.factor(g1)){ factor(g1)} else {g1}))
g1 = three.syllables[!(three.syllables$Syllable != "SYL2"),]
three.syl.two = data.frame(lapply(g1, function(g1) if (is.factor(g1)){ factor(g1)} else {g1}))
g1 = three.syllables[!(three.syllables$Syllable != "SYL3"),]
three.syl.three = data.frame(lapply(g1, function(g1) if (is.factor(g1)){ factor(g1)} else {g1}))
bam.three.one = bam(Intensity ~ Stress + s(Interval, bs = "cr", k = 10) +
s(Interval, by = Stress, k = 10, bs = "cr") +
s(Interval, Stress, by = Participant, bs = "fs", k = 10, m = 1), data = three.syl.one)
summary(bam.three.one)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## Intensity ~ Stress + s(Interval, bs = "cr", k = 10) + s(Interval,
## by = Stress, k = 10, bs = "cr") + s(Interval, Stress, by = Participant,
## bs = "fs", k = 10, m = 1)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 71.2938 0.8686 82.079 <2e-16 ***
## Stressunstressed 0.2737 1.2225 0.224 0.823
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(Interval) 4.297e+00 5.176e+00 35.849 <2e-16 ***
## s(Interval):Stressstressed 3.251e+00 3.960e+00 14.311 <2e-16 ***
## s(Interval):Stressunstressed 3.627e-05 4.696e-05 0.040 0.9989
## s(Interval,Stress):ParticipantA01 3.769e+00 2.000e+01 0.318 0.0126 *
## s(Interval,Stress):ParticipantA02 1.111e+00 2.000e+01 0.124 <2e-16 ***
## s(Interval,Stress):ParticipantA03 9.706e+00 2.000e+01 3.226 <2e-16 ***
## s(Interval,Stress):ParticipantA04 5.801e+00 2.000e+01 5.593 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Rank: 108/109
## R-sq.(adj) = 0.513 Deviance explained = 51.6%
## fREML = 13666 Scale est. = 11.002 n = 5206
bam.three.two = bam(Intensity ~ Stress + s(Interval, bs = "cr", k = 10) +
s(Interval, by = Stress, k = 10, bs = "cr") +
s(Interval, Stress, by = Participant, bs = "fs", k = 10, m = 1), data = three.syl.two)
summary(bam.three.two)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## Intensity ~ Stress + s(Interval, bs = "cr", k = 10) + s(Interval,
## by = Stress, k = 10, bs = "cr") + s(Interval, Stress, by = Participant,
## bs = "fs", k = 10, m = 1)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 72.3984 0.1108 653.23 <2e-16 ***
## Stressunstressed -2.4911 0.2281 -10.92 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(Interval) 5.4941901 6.4982 68.312 <2e-16 ***
## s(Interval):Stressstressed 0.3027883 0.4774 0.713 0.560
## s(Interval):Stressunstressed 1.0000474 1.0001 47.580 <2e-16 ***
## s(Interval,Stress):ParticipantA01 0.0004597 20.0000 0.000 0.141
## s(Interval,Stress):ParticipantA02 6.3774018 20.0000 2.375 <2e-16 ***
## s(Interval,Stress):ParticipantA03 5.8221681 20.0000 40.140 <2e-16 ***
## s(Interval,Stress):ParticipantA04 9.3843124 20.0000 195.345 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Rank: 108/109
## R-sq.(adj) = 0.581 Deviance explained = 58.3%
## fREML = 14110 Scale est. = 11.85 n = 5300
bam.three.three = bam(Intensity ~ Stress + s(Interval, bs = "cr", k = 10) +
s(Interval, by = Stress, k = 10, bs = "cr") +
s(Interval, Stress, by = Participant, bs = "fs", k = 10, m = 1), data = three.syl.three)
summary(bam.three.three)
##
## Family: gaussian
## Link function: identity
##
## Formula:
## Intensity ~ Stress + s(Interval, bs = "cr", k = 10) + s(Interval,
## by = Stress, k = 10, bs = "cr") + s(Interval, Stress, by = Participant,
## bs = "fs", k = 10, m = 1)
##
## Parametric coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 69.8449 0.1897 368.135 < 2e-16 ***
## Stressunstressed -0.8194 0.3072 -2.667 0.00768 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Approximate significance of smooth terms:
## edf Ref.df F p-value
## s(Interval) 6.486 7.499 117.770 < 2e-16 ***
## s(Interval):Stressstressed 2.669 3.432 8.597 5.51e-06 ***
## s(Interval):Stressunstressed 1.000 1.000 15.656 7.71e-05 ***
## s(Interval,Stress):ParticipantA01 7.184 20.000 2.358 < 2e-16 ***
## s(Interval,Stress):ParticipantA02 4.844 20.000 0.886 9.48e-05 ***
## s(Interval,Stress):ParticipantA03 1.986 20.000 13.090 < 2e-16 ***
## s(Interval,Stress):ParticipantA04 1.998 20.000 91.009 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Rank: 108/109
## R-sq.(adj) = 0.524 Deviance explained = 52.6%
## fREML = 13562 Scale est. = 14.895 n = 4884
bp.three.one = ggplot(three.syl.one, aes(x = Interval, y = Intensity, colour = Stress, linetype = Stress)) +
stat_smooth(method = "gam", formula = y ~ s(x, k = 10, bs = "cs")) +
labs(y = expression(paste("Intensity")))
bp.three.one + scale_y_continuous(breaks = seq(0, 100, 5), limits = c(55, 75)) +
scale_x_continuous(breaks = c(1, 3.33, 5.5, 7.66, 10),
labels=c("0%", "25%", "50%", "75%", "100%")) +
theme(axis.text.y = element_text(size = 9, family = "serif"),
axis.text.x = element_text(size = 9, family = "serif"),
axis.title.y = element_text(size = 9, family = "serif"),
axis.title.x = element_blank(),
legend.title = element_text(size = 9, family = "serif"),
legend.text = element_text(size = 9, family = "serif"),
panel.background = element_rect(fill = "white", colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
## Warning: Removed 624 rows containing non-finite values (stat_smooth).
bp.three.two = ggplot(three.syl.two, aes(x = Interval, y = Intensity, colour = Stress, linetype = Stress)) +
stat_smooth(method = "gam", formula = y ~ s(x, k = 10, bs = "cs")) +
labs(y = expression(paste("Intensity")))
bp.three.two + scale_y_continuous(breaks = seq(0, 100, 5), limits = c(55, 75)) +
scale_x_continuous(breaks = c(1, 3.33, 5.5, 7.66, 10),
labels=c("0%", "25%", "50%", "75%", "100%")) +
theme(axis.text.y = element_text(size = 9, family = "serif"),
axis.text.x = element_text(size = 9, family = "serif"),
axis.title.y = element_text(size = 9, family = "serif"),
axis.title.x = element_blank(),
legend.title = element_text(size = 9, family = "serif"),
legend.text = element_text(size = 9, family = "serif"),
panel.background = element_rect(fill = "white", colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
## Warning: Removed 749 rows containing non-finite values (stat_smooth).
bp.three.three = ggplot(three.syl.three, aes(x = Interval, y = Intensity, colour = Stress, linetype = Stress)) +
stat_smooth(method = "gam", formula = y ~ s(x, k = 10, bs = "cs")) +
labs(y = expression(paste("Intensity")))
bp.three.three + scale_y_continuous(breaks = seq(0, 100, 5), limits = c(55, 75)) +
scale_x_continuous(breaks = c(1, 3.33, 5.5, 7.66, 10),
labels=c("0%", "25%", "50%", "75%", "100%")) +
theme(axis.text.y = element_text(size = 9, family = "serif"),
axis.text.x = element_text(size = 9, family = "serif"),
axis.title.y = element_text(size = 9, family = "serif"),
axis.title.x = element_blank(),
legend.title = element_text(size = 9, family = "serif"),
legend.text = element_text(size = 9, family = "serif"),
panel.background = element_rect(fill = "white", colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
## Warning: Removed 416 rows containing non-finite values (stat_smooth).
Note here there is a warning indicating rows containing non-finite values that have been removed from the intensity plots. This warning disappears if we zoom out to a y-axis with limits = c(0, 100), but nothing changes on the actual plot. Therefore, we opted to zoom in to limits = c(55, 75) so that it would be easier for the reader to see differences in the plots.
However, we have opted to plot them below with the increased limits to demonstrate there are no differences in the plots that we present in the paper and the plots that have no non-finite values.
bp.two.one = ggplot(two.syl.one, aes(x = Interval, y = Intensity, colour = Stress, linetype = Stress)) +
stat_smooth(method = "gam", formula = y ~ s(x, k = 10, bs = "cs")) +
labs(y = expression(paste("Intensity")))
bp.two.one + scale_y_continuous(breaks = seq(0, 100, 25), limits = c(0, 100)) +
scale_x_continuous(breaks = c(1, 3.33, 5.5, 7.66, 10),
labels=c("0%", "25%", "50%", "75%", "100%")) +
theme(axis.text.y = element_text(size = 9, family = "serif"),
axis.text.x = element_text(size = 9, family = "serif"),
axis.title.y = element_text(size = 9, family = "serif"),
axis.title.x = element_blank(),
legend.title = element_text(size = 9, family = "serif"),
legend.text = element_text(size = 9, family = "serif"),
panel.background = element_rect(fill = "white", colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
bp.two.two = ggplot(two.syl.two, aes(x = Interval, y = Intensity, colour = Stress, linetype = Stress)) +
stat_smooth(method = "gam", formula = y ~ s(x, k = 10, bs = "cs")) +
labs(y = expression(paste("Intensity")))
bp.two.two + scale_y_continuous(breaks = seq(0, 100, 25), limits = c(0, 100)) +
scale_x_continuous(breaks = c(1, 3.33, 5.5, 7.66, 10),
labels=c("0%", "25%", "50%", "75%", "100%")) +
theme(axis.text.y = element_text(size = 9, family = "serif"),
axis.text.x = element_text(size = 9, family = "serif"),
axis.title.y = element_text(size = 9, family = "serif"),
axis.title.x = element_blank(),
legend.title = element_text(size = 9, family = "serif"),
legend.text = element_text(size = 9, family = "serif"),
panel.background = element_rect(fill = "white", colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
bp.three.one = ggplot(three.syl.one, aes(x = Interval, y = Intensity, colour = Stress, linetype = Stress)) +
stat_smooth(method = "gam", formula = y ~ s(x, k = 10, bs = "cs")) +
labs(y = expression(paste("Intensity")))
bp.three.one + scale_y_continuous(breaks = seq(0, 100, 25), limits = c(0, 100)) +
scale_x_continuous(breaks = c(1, 3.33, 5.5, 7.66, 10),
labels=c("0%", "25%", "50%", "75%", "100%")) +
theme(axis.text.y = element_text(size = 9, family = "serif"),
axis.text.x = element_text(size = 9, family = "serif"),
axis.title.y = element_text(size = 9, family = "serif"),
axis.title.x = element_blank(),
legend.title = element_text(size = 9, family = "serif"),
legend.text = element_text(size = 9, family = "serif"),
panel.background = element_rect(fill = "white", colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
bp.three.two = ggplot(three.syl.two, aes(x = Interval, y = Intensity, colour = Stress, linetype = Stress)) +
stat_smooth(method = "gam", formula = y ~ s(x, k = 10, bs = "cs")) +
labs(y = expression(paste("Intensity")))
bp.three.two + scale_y_continuous(breaks = seq(0, 100, 25), limits = c(0, 100)) +
scale_x_continuous(breaks = c(1, 3.33, 5.5, 7.66, 10),
labels=c("0%", "25%", "50%", "75%", "100%")) +
theme(axis.text.y = element_text(size = 9, family = "serif"),
axis.text.x = element_text(size = 9, family = "serif"),
axis.title.y = element_text(size = 9, family = "serif"),
axis.title.x = element_blank(),
legend.title = element_text(size = 9, family = "serif"),
legend.text = element_text(size = 9, family = "serif"),
panel.background = element_rect(fill = "white", colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
bp.three.three = ggplot(three.syl.three, aes(x = Interval, y = Intensity, colour = Stress, linetype = Stress)) +
stat_smooth(method = "gam", formula = y ~ s(x, k = 10, bs = "cs")) +
labs(y = expression(paste("Intensity")))
bp.three.three + scale_y_continuous(breaks = seq(0, 100, 25), limits = c(0, 100)) +
scale_x_continuous(breaks = c(1, 3.33, 5.5, 7.66, 10),
labels=c("0%", "25%", "50%", "75%", "100%")) +
theme(axis.text.y = element_text(size = 9, family = "serif"),
axis.text.x = element_text(size = 9, family = "serif"),
axis.title.y = element_text(size = 9, family = "serif"),
axis.title.x = element_blank(),
legend.title = element_text(size = 9, family = "serif"),
legend.text = element_text(size = 9, family = "serif"),
panel.background = element_rect(fill = "white", colour = "black"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
3. F0
a. 2-Syllable Words
F0.two.syllables = read.csv("F0_two_syllables_Jaker_Howson.txt", header=T, sep="\t", encoding="UTF-8", stringsAsFactors = TRUE)
summary(F0.two.syllables)
## Participant Word Syllable F0 Tone Vowel
## A01:191 hegha : 77 SYL1:386 Min. : 80.36 high-high:271 a :157
## A02:233 hedaN : 72 SYL2:409 1st Qu.:123.65 low-low :524 an: 78
## A03:186 niyaN : 72 Median :158.76 e :328
## A04:185 hesa : 69 Mean :155.46 i :154
## neye : 48 3rd Qu.:180.60 in: 20
## hiya : 44 Max. :393.99 u : 57
## (Other):413 ui: 1
F0.two.fit = lmer(F0 ~ Syllable + Tone + Vowel + (1 | Participant) + (1 | Word), data = F0.two.syllables)
anova(F0.two.fit)
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## Syllable 2442 2442 1 360.54 3.3277 0.068949 .
## Tone 64880 64880 1 32.99 88.4302 7.391e-11 ***
## Vowel 16693 2782 6 148.27 3.7920 0.001524 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(F0.two.fit)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: F0 ~ Syllable + Tone + Vowel + (1 | Participant) + (1 | Word)
## Data: F0.two.syllables
##
## REML criterion at convergence: 7476.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.6847 -0.4581 -0.0839 0.3854 10.4649
##
## Random effects:
## Groups Name Variance Std.Dev.
## Word (Intercept) 13.06 3.614
## Participant (Intercept) 651.94 25.533
## Residual 733.69 27.087
## Number of obs: 795, groups: Word, 21; Participant, 4
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 174.503 13.949 4.220 12.510 0.000171 ***
## SyllableSYL2 -6.047 3.315 360.542 -1.824 0.068949 .
## Tonelow-low -31.663 3.367 32.993 -9.404 7.39e-11 ***
## Vowelan 7.596 4.452 98.411 1.706 0.091106 .
## Vowele 1.153 3.616 127.192 0.319 0.750254
## Voweli 7.207 5.107 124.527 1.411 0.160739
## Vowelin 20.362 7.318 107.214 2.783 0.006375 **
## Vowelu 16.607 6.691 216.314 2.482 0.013822 *
## Vowelui -19.036 27.604 754.638 -0.690 0.490660
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) SySYL2 Tnlw-l Vowelan Vowele Voweli Vowelin Vowelu
## SyllablSYL2 -0.328
## Tonelow-low -0.273 0.429
## Vowelan -0.064 -0.059 -0.131
## Vowele -0.306 0.600 0.225 0.353
## Voweli -0.339 0.716 0.442 0.246 0.722
## Vowelin -0.027 -0.058 -0.121 0.207 0.211 0.097
## Vowelu -0.319 0.688 0.494 0.122 0.638 0.678 0.053
## Vowelui -0.043 0.045 0.087 0.041 0.084 0.087 0.021 0.076
r.squaredGLMM(F0.two.fit)
## R2m R2c
## [1,] 0.183748 0.5718288
warp.emm = emmeans(F0.two.fit, "Tone")
pairs(warp.emm, simple = "each", combine = TRUE, options = get_emm_option("contrast"), adjust = "Tukey")
## contrast estimate SE df t.ratio p.value
## (high-high) - (low-low) 31.7 3.4 26.9 9.308 <.0001
##
## Results are averaged over some or all of the levels of: Syllable, Vowel
## Degrees-of-freedom method: kenward-roger
warp.emm = emmeans(F0.two.fit, "Vowel")
pairs(warp.emm, simple = "each", combine = TRUE, options = get_emm_option("contrast"), adjust = "Tukey")
## contrast estimate SE df t.ratio p.value
## a - an -7.596 4.67 81.5 -1.627 0.6655
## a - e -1.153 3.74 106.1 -0.308 0.9999
## a - i -7.207 5.29 103.8 -1.362 0.8205
## a - in -20.362 7.57 89.0 -2.690 0.1130
## a - u -16.607 6.87 184.6 -2.417 0.1975
## a - ui 19.036 27.62 748.2 0.689 0.9932
## an - e 6.442 4.83 108.2 1.334 0.8344
## an - i 0.389 6.08 195.9 0.064 1.0000
## an - in -12.766 8.02 82.9 -1.591 0.6884
## an - u -9.011 7.79 179.5 -1.157 0.9089
## an - ui 26.631 27.81 741.6 0.958 0.9627
## e - i -6.053 3.66 95.8 -1.655 0.6475
## e - in -19.209 7.69 98.0 -2.498 0.1715
## e - u -15.454 5.30 301.8 -2.916 0.0578
## e - ui 20.189 27.55 750.3 0.733 0.9905
## i - in -13.156 8.80 83.8 -1.495 0.7467
## i - u -9.401 5.09 96.0 -1.846 0.5210
## i - ui 26.242 27.65 739.6 0.949 0.9643
## in - u 3.755 9.95 115.9 0.377 0.9998
## in - ui 39.398 28.48 692.5 1.383 0.8108
## u - ui 35.643 27.93 735.7 1.276 0.8629
##
## Results are averaged over some or all of the levels of: Syllable, Tone
## Degrees-of-freedom method: kenward-roger
## P value adjustment: tukey method for comparing a family of 7 estimates
levels(F0.two.syllables$Syllable) = c("Syllable One", "Syllable Two")
F0.two.sylllables.plot = ggplot(F0.two.syllables, aes(x = Syllable, y = F0, fill = Syllable)) +
geom_violin(trim = TRUE) +
geom_boxplot(width = 0.25) +
facet_wrap(~Tone) +
labs(y = expression(paste("F0")))
F0.two.sylllables.plot + scale_y_continuous(breaks = seq(0, 3000, 50), limits = c(50, 425)) +
theme(axis.text.y = element_text(size = 9, family = "serif"),
axis.text.x = element_text(size = 9, family = "serif"),
strip.text.x = element_text(size = 9, family = "serif"),
axis.title.y = element_text(size = 9, family = "serif"),
axis.title.x = element_blank(),
legend.position = "none",
panel.background = element_rect(fill = "white", colour = "black"),
strip.background = element_rect(colour = "black", fill = "grey"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
b. 3-Syllable Words
F0.three.syllables = read.csv("F0_three_syllables_Jaker_Howson.txt", header=T, sep="\t", encoding="UTF-8", stringsAsFactors = TRUE)
summary(F0.three.syllables)
## Participant Word Syllable F0 Tone
## A01: 61 heneye :81 SYL1:121 Min. : 89.39 high-high-high:128
## A02:139 nInIdEl :61 SYL2:133 1st Qu.:117.39 low-low-low :255
## A03: 97 nathiNja :57 SYL3:129 Median :156.83
## A04: 86 nathiNkiN:54 Mean :152.38
## nInIyU :39 3rd Qu.:186.55
## nInIshU :25 Max. :242.17
## (Other) :66
## Vowel
## a : 63
## an: 5
## e :135
## i : 90
## in: 67
## u : 23
##
F0.three.fit = lmer(F0 ~ Syllable + Tone + Vowel + (1 | Participant) + (1 | Word), data = F0.three.syllables)
anova(F0.three.fit)
## Type III Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## Syllable 6842.1 3421.0 2 370.93 13.1623 3.002e-06 ***
## Tone 20669.5 20669.5 1 20.36 79.5250 1.801e-08 ***
## Vowel 5761.9 1152.4 5 68.51 4.4337 0.001477 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(F0.three.fit)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: F0 ~ Syllable + Tone + Vowel + (1 | Participant) + (1 | Word)
## Data: F0.three.syllables
##
## REML criterion at convergence: 3197.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.8109 -0.3959 0.0110 0.4535 6.4687
##
## Random effects:
## Groups Name Variance Std.Dev.
## Word (Intercept) 9.023 3.004
## Participant (Intercept) 1500.697 38.739
## Residual 259.912 16.122
## Number of obs: 383, groups: Word, 15; Participant, 4
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 186.6275 20.0892 3.4491 9.290 0.001467 **
## SyllableSYL2 -10.1589 2.3451 370.9980 -4.332 1.90e-05 ***
## SyllableSYL3 -11.8103 2.5455 370.9615 -4.640 4.85e-06 ***
## Tonelow-low-low -39.4139 4.4197 20.3584 -8.918 1.80e-08 ***
## Vowelan -12.1242 8.4850 98.1904 -1.429 0.156207
## Vowele 0.9828 3.6571 14.6708 0.269 0.791865
## Voweli -4.8451 5.1011 59.7590 -0.950 0.346029
## Vowelin 11.7811 3.3695 327.0066 3.496 0.000537 ***
## Vowelu -6.4289 6.0943 62.0145 -1.055 0.295560
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) SySYL2 SySYL3 Tnlw-- Vowelan Vowele Voweli Vowelin
## SyllablSYL2 -0.024
## SyllablSYL3 -0.086 0.535
## Tonlw-lw-lw -0.226 0.035 0.222
## Vowelan -0.030 -0.121 -0.231 -0.017
## Vowele -0.141 -0.289 -0.179 0.229 0.328
## Voweli -0.226 -0.134 0.214 0.671 0.142 0.617
## Vowelin -0.055 -0.497 -0.285 -0.012 0.222 0.515 0.329
## Vowelu -0.178 -0.149 -0.120 0.567 0.187 0.541 0.695 0.301
r.squaredGLMM(F0.three.fit)
## R2m R2c
## [1,] 0.1297469 0.8721829
warp.emm = emmeans(F0.three.fit, "Syllable")
pairs(warp.emm, simple = "each", combine = TRUE, options = get_emm_option("contrast"), adjust = "Tukey")
## contrast estimate SE df t.ratio p.value
## SYL1 - SYL2 10.16 2.36 371 4.301 0.0001
## SYL1 - SYL3 11.81 2.56 371 4.620 <.0001
## SYL2 - SYL3 1.65 2.37 371 0.696 0.7663
##
## Results are averaged over some or all of the levels of: Tone, Vowel
## Degrees-of-freedom method: kenward-roger
## P value adjustment: tukey method for comparing a family of 3 estimates
warp.emm = emmeans(F0.three.fit, "Tone")
pairs(warp.emm, simple = "each", combine = TRUE, options = get_emm_option("contrast"), adjust = "Tukey")
## contrast estimate SE df t.ratio p.value
## (high-high-high) - (low-low-low) 39.4 4.67 19.3 8.434 <.0001
##
## Results are averaged over some or all of the levels of: Syllable, Vowel
## Degrees-of-freedom method: kenward-roger
warp.emm = emmeans(F0.three.fit, "Vowel")
pairs(warp.emm, simple = "each", combine = TRUE, options = get_emm_option("contrast"), adjust = "Tukey")
## contrast estimate SE df t.ratio p.value
## a - an 12.124 8.74 94.0 1.387 0.7345
## a - e -0.983 4.04 13.9 -0.243 0.9999
## a - i 4.845 5.39 56.9 0.900 0.9450
## a - in -11.781 3.45 324.5 -3.411 0.0094
## a - u 6.429 6.51 59.1 0.987 0.9204
## an - e -13.107 8.31 170.6 -1.578 0.6145
## an - i -7.279 9.49 188.7 -0.767 0.9726
## an - in -23.905 8.64 101.7 -2.765 0.0716
## an - u -5.695 9.78 175.0 -0.582 0.9921
## e - i 5.828 4.15 272.1 1.405 0.7243
## e - in -10.798 3.82 15.2 -2.824 0.1069
## e - u 7.412 5.43 148.3 1.364 0.7483
## i - in -16.626 5.35 76.4 -3.107 0.0306
## i - u 1.584 4.59 302.5 0.345 0.9994
## in - u 18.210 6.41 68.9 2.842 0.0626
##
## Results are averaged over some or all of the levels of: Syllable, Tone
## Degrees-of-freedom method: kenward-roger
## P value adjustment: tukey method for comparing a family of 6 estimates
levels(F0.three.syllables$Syllable) = c("Syllable One", "Syllable Two", "Syllable Three")
F0.three.sylllables.plot = ggplot(F0.three.syllables, aes(x = Syllable, y = F0, fill = Syllable)) +
geom_violin(trim = TRUE) +
geom_boxplot(width = 0.25) +
facet_wrap(~Tone) +
labs(y = expression(paste("F0")))
F0.three.sylllables.plot + scale_y_continuous(breaks = seq(0, 3000, 50), limits = c(0, 300)) +
theme(axis.text.y = element_text(size = 9, family = "serif"),
axis.text.x = element_text(size = 9, family = "serif"),
strip.text.x = element_text(size = 9, family = "serif"),
axis.title.y = element_text(size = 9, family = "serif"),
axis.title.x = element_blank(),
legend.position = "none",
panel.background = element_rect(fill = "white", colour = "black"),
strip.background = element_rect(colour = "black", fill = "grey"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
4. Linear discriminant analysis: comparing Duration, Intensity, and F0
a. 2-Syllable Words
require(flipMultivariates)
## Loading required package: flipMultivariates
## Registered S3 methods overwritten by 'car':
## method from
## influence.merMod lme4
## cooks.distance.influence.merMod lme4
## dfbeta.influence.merMod lme4
## dfbetas.influence.merMod lme4
## Registered S3 method overwritten by 'pryr':
## method from
## print.bytes Rcpp
## Registered S3 method overwritten by 'flipRegression':
## method from
## print.Anova MASS
LDA.data.two.syllables = read.csv("LDA_two_syllables_Jaker_Howson.txt", header=T, sep="\t", encoding="UTF-8", stringsAsFactors = TRUE)
summary(LDA.data.two.syllables)
## part word Stress Stresses
## A01:401 nEyaN : 114 stressed :799 stressed-unstressed: 538
## A02:410 nIjaN : 82 unstressed:785 unstressed-stressed:1046
## A03:401 shEkiN : 82
## A04:372 hegha : 79
## hEya : 78
## hedaN : 74
## (Other):1075
## Syllable Tones Tone Syllables Rep
## SYL1:795 high-high:276 High:666 Two:1584 Min. : 1.0
## SYL2:789 high-low :538 Low :918 1st Qu.: 598.8
## low-high :233 Median :1569.5
## low-low :537 Mean :1772.2
## 3rd Qu.:2572.5
## Max. :3614.0
##
## Intensity Duration F0
## Min. :-1.5235 Min. :-2.08461 Min. :-3.30934
## 1st Qu.: 0.2634 1st Qu.:-0.63263 1st Qu.:-0.58388
## Median : 0.7841 Median :-0.01944 Median :-0.13853
## Mean : 0.7641 Mean : 0.09877 Mean : 0.09297
## 3rd Qu.: 1.2895 3rd Qu.: 0.74987 3rd Qu.: 0.67916
## Max. : 3.1059 Max. : 4.15385 Max. : 8.80533
##
lda.two.syllables = LDA(Stress ~ Duration + Intensity + F0, data = LDA.data.two.syllables)
lda.two.syllables
lda.pred.two.syllables = LDA(Stress ~ Duration + Intensity + F0, data = LDA.data.two.syllables, output = "Prediction-Accuracy Table")
lda.pred.two.syllables
b. 3-Syllable Words
LDA.data.three.syllables = read.csv("LDA_three_syllables_Jaker_Howson.txt", header=T, sep="\t", encoding="UTF-8", stringsAsFactors = TRUE)
summary(LDA.data.three.syllables)
## part word Stress
## A01:369 heneye : 85 stressed :904
## A02:405 henEke : 79 unstressed:668
## A03:370 ?edets'Er: 74
## A04:428 henEyaN : 66
## nirINkA : 63
## tunedIN : 63
## (Other) :1142
## Stresses Syllable Tones Tone
## stressed-unstressed-stressed :343 SYL1:531 low-high-low :274 High:661
## unstressed-stressed-stressed :792 SYL2:530 low-low-low :264 Low :911
## unstressed-stressed-unstressed:437 SYL3:511 high-low-low :259
## low-low-high :256
## high-high-low:163
## low-high-high:141
## (Other) :215
## Syllables Rep Intensity Duration
## Three:1572 Min. : 601 Min. :-1.8075 Min. :-2.28547
## 1st Qu.:1624 1st Qu.: 0.0696 1st Qu.:-1.11982
## Median :2618 Median : 0.6372 Median :-0.58302
## Mean :2333 Mean : 0.6318 Mean :-0.44668
## 3rd Qu.:3288 3rd Qu.: 1.1965 3rd Qu.: 0.08198
## Max. :4021 Max. : 3.7526 Max. : 3.61683
##
## F0
## Min. :-3.37086
## 1st Qu.:-0.52913
## Median :-0.09657
## Mean :-0.01283
## 3rd Qu.: 0.42981
## Max. :11.03728
##
lda.three.syllables = LDA(Stress ~ Duration + Intensity + F0, data = LDA.data.three.syllables)
lda.three.syllables
lda.pred.three.syllables = LDA(Stress ~ Duration + Intensity + F0, data = LDA.data.three.syllables, output = "Prediction-Accuracy Table")
lda.pred.three.syllables