Set random-number seed:
set.seed(123)
Packages that will be needed:
require(languageR) || install.packages("languageR")
## Loading required package: languageR
## Warning: package 'languageR' was built under R version 3.4.4
## [1] TRUE
require(languageR)
require(lme4) || install.packages("lme4")
## Loading required package: lme4
## Loading required package: Matrix
## [1] TRUE
require(lme4)
require(arm) || install.packages("arm")
## Loading required package: arm
## Loading required package: MASS
##
## arm (Version 1.9-3, built: 2016-11-21)
## Working directory is C:/Users/Kie/Dropbox/PROJECT_Tongan/Tongan_ZOW_PhonologySubmission/RevisionIII/ResubmissionIIIMaterialsSubmitted
## [1] TRUE
require(arm)
require(multcomp) || install.packages("multcomp")
## Loading required package: multcomp
## Loading required package: mvtnorm
## Loading required package: survival
## Loading required package: TH.data
##
## Attaching package: 'TH.data'
## The following object is masked from 'package:MASS':
##
## geyser
## [1] TRUE
require(multcomp)
require(MuMIn) || install.packages("MuMIn")
## Loading required package: MuMIn
## Warning: package 'MuMIn' was built under R version 3.4.4
## [1] TRUE
require(MuMIn)
require(grid) || install.packages("grid")
## Loading required package: grid
## [1] TRUE
require(grid)
require(vcd) || install.packages("vcd")
## Loading required package: vcd
## [1] TRUE
require(vcd)
require(gridExtra) || install.packages("gridExtra")
## Loading required package: gridExtra
## Warning: package 'gridExtra' was built under R version 3.4.4
## [1] TRUE
require(gridExtra)
require(car) || install.packages("car")
## Loading required package: car
##
## Attaching package: 'car'
## The following object is masked from 'package:arm':
##
## logit
## [1] TRUE
require(car)
Some settings for the plots:
#set up plot parameters
myFontFamily="serif"
par(font=list(family=myFontFamily)) #for most plots
#Unfortunately, this doesn't carry over when a .png file is written, so it is necessary to include
#family=myFontFamily
#as one of the arguments to the png() function
#Also unfortunately, this doesn't carry over to mosaic plots, which use gpar (and gpar can't be set like this for the whole script). Instead, we create this object:
gpSerif <- gpar(fontfamily=myFontFamily)
#and then each call to mosaic() has to include the line
#gp_varnames=gpSerif,gp_labels=gpSerif,
#Note that for adding the numbers to the tiles in the mosaic plots with mtext(), we just use
#, fontfamily=myFontFamily
#in the list of gpar() arguments
#and for the font of the main title, use main_gp=gpar(fontfamily=myFontFamily) as an argument to mosaic()
myResMultiplier <- 5 #default is 72 ppi; using this in every call to png() will make it 360
Setting for html knitting:
options(width=100)
Read in the data and take a look
tongan_deletion <- read.table("deleted_i_Jul2018.txt", header=T)
summary(tongan_deletion)
## speaker position is_it_footed V_status_wrt_English deleted
## S1:103 could_be_tertiary : 3 Min. :0.0000 epenthetic :209 Min. :0.000
## S2: 87 secondary__primary :80 1st Qu.:1.0000 notstressed: 77 1st Qu.:0.000
## S3:106 secondary__tertiary :58 Median :1.0000 stressed : 9 Median :0.000
## secondary__unstressed:92 Mean :0.8567 NA's : 1 Mean :0.353
## tertiary__primary :21 3rd Qu.:1.0000 3rd Qu.:1.000
## tertiary__unstressed : 1 Max. :1.0000 Max. :1.000
## unstressed__primary :41 NA's :3
## deleted_binary dictionary_entry definition C_environment footed_or_not
## Min. :0.000 `amipasitoa: 6 ambassador: 6 s_t : 54 beginning: 58
## 1st Qu.:0.000 `atimosifia: 6 atmosphere: 6 n_s : 43 end :238
## Median :0.000 `enisinia : 6 dictionary: 6 n_t : 40
## Mean :0.372 tikisinale : 6 engineer : 6 l_k : 12
## 3rd Qu.:1.000 `esitimeti : 5 estimate : 5 m_p : 12
## Max. :1.000 sakilifisio: 5 sacrifice : 5 s_p : 12
## NA's :3 (Other) :262 (Other) :262 (Other):123
## English_transition_types
## nas_stop :69
## fric_stop:63
## son_fric :38
## stop_fric:11
## fric_son : 9
## (Other) :19
## NA's :87
names(tongan_deletion)
## [1] "speaker" "position" "is_it_footed"
## [4] "V_status_wrt_English" "deleted" "deleted_binary"
## [7] "dictionary_entry" "definition" "C_environment"
## [10] "footed_or_not" "English_transition_types"
List items that are in various metrical positions:
levels(tongan_deletion$position)
## [1] "could_be_tertiary" "secondary__primary" "secondary__tertiary" "secondary__unstressed"
## [5] "tertiary__primary" "tertiary__unstressed" "unstressed__primary"
tongan_deletion[tongan_deletion$position=="secondary__primary" | tongan_deletion$position=="secondary__tertiary" | tongan_deletion$position=="secondary__unstressed", c(2,3,4,7)]
## position is_it_footed V_status_wrt_English dictionary_entry
## 1 secondary__tertiary 1 epenthetic `amipasitoa
## 2 secondary__tertiary 1 epenthetic `amipasitoa
## 3 secondary__tertiary 1 epenthetic `amipasitoa
## 7 secondary__unstressed 1 epenthetic `anitelope
## 8 secondary__unstressed 1 epenthetic `anitelope
## 9 secondary__primary 1 epenthetic `anitema
## 10 secondary__primary 1 epenthetic `anitema
## 11 secondary__primary 1 epenthetic `anitema
## 12 secondary__primary 1 epenthetic `apenitiki
## 13 secondary__primary 1 epenthetic `apenitiki
## 14 secondary__primary 1 epenthetic `apenitiki
## 15 secondary__primary 1 notstressed `?pelikoti
## 16 secondary__primary 1 notstressed `?pelikoti
## 18 secondary__tertiary 1 epenthetic `asipesitosi
## 20 secondary__unstressed 1 epenthetic `asipulini
## 21 secondary__unstressed 1 epenthetic `asipulini
## 22 secondary__unstressed 1 epenthetic `asipulini
## 23 secondary__primary 1 epenthetic `asipol?
## 24 secondary__unstressed 1 epenthetic `asipol?
## 25 secondary__unstressed 1 epenthetic `asipol?
## 26 secondary__unstressed 1 epenthetic `asital?noma
## 27 secondary__unstressed 1 epenthetic `asital?noma
## 28 secondary__unstressed 1 epenthetic `asital?noma
## 35 secondary__tertiary 1 epenthetic `atimosifia
## 36 secondary__tertiary 1 epenthetic `atimosifia
## 37 secondary__tertiary 1 epenthetic `atimosifia
## 38 secondary__unstressed 1 epenthetic `Aositel?lia
## 39 secondary__unstressed 1 epenthetic `Aositel?lia
## 40 secondary__unstressed 1 epenthetic penisi?
## 41 secondary__unstressed 1 epenthetic penisi?
## 42 secondary__unstressed 1 epenthetic penisi?
## 44 secondary__primary 1 epenthetic pelekifasi
## 45 secondary__primary 1 epenthetic pelekifasi
## 46 secondary__unstressed 1 stressed pilikatia_seniale
## 47 secondary__unstressed 1 stressed pilikatia_seniale
## 48 secondary__tertiary 1 notstressed Pilitania
## 49 secondary__tertiary 1 notstressed Pilitania
## 50 secondary__primary 1 epenthetic k?piteni
## 53 secondary__primary 1 notstressed k?ponika_q?sita
## 54 secondary__tertiary 1 epenthetic senituli?
## 55 secondary__tertiary 1 epenthetic senituli?
## 56 secondary__tertiary 1 epenthetic senituli?
## 57 secondary__unstressed 1 epenthetic Kalisitiane
## 58 secondary__unstressed 1 epenthetic Kalisitiane
## 59 secondary__primary 1 epenthetic Kilisimasi
## 60 secondary__primary 1 epenthetic Kilisimasi
## 61 secondary__primary 1 epenthetic Kilisimasi
## 62 secondary__unstressed 1 epenthetic Sinitalela
## 63 secondary__unstressed 1 epenthetic Sinitalela
## 64 secondary__unstressed 1 epenthetic Sinitalela
## 65 secondary__primary 1 stressed faka_siviliani
## 66 secondary__primary 1 notstressed sivilaise
## 67 secondary__primary 1 notstressed sivilaise
## 68 secondary__unstressed 1 notstressed komiunisi
## 69 secondary__unstressed 1 notstressed komiunisi
## 70 secondary__unstressed 1 epenthetic komipauni
## 71 secondary__unstressed 1 epenthetic komipauni
## 72 secondary__unstressed 1 epenthetic komipauni
## 73 secondary__tertiary 1 epenthetic konifelenisi
## 74 secondary__tertiary 1 epenthetic konifelenisi
## 75 secondary__tertiary 1 epenthetic konifelenisi
## 76 secondary__tertiary 1 epenthetic kofilimasio
## 77 secondary__tertiary 1 epenthetic kofilimasio
## 78 secondary__tertiary 1 epenthetic kofilimasio
## 79 secondary__tertiary 1 epenthetic konis?nisi
## 80 secondary__tertiary 1 epenthetic konis?nisi
## 81 secondary__tertiary 1 epenthetic konis?nisi
## 82 secondary__unstressed 1 epenthetic konisinanite
## 83 secondary__unstressed 1 epenthetic konisinanite
## 84 secondary__tertiary 1 notstressed konisinanite
## 85 secondary__tertiary 0 notstressed konisinanite
## 86 secondary__unstressed 1 epenthetic konitineniti
## 87 secondary__unstressed 1 epenthetic konitineniti
## 88 secondary__tertiary 1 notstressed konitineniti
## 89 secondary__primary 1 epenthetic kalisitala
## 90 secondary__primary 1 epenthetic kalisitala
## 91 secondary__primary 1 epenthetic kalisitala
## 95 secondary__unstressed 1 epenthetic kasitom?
## 96 secondary__unstressed 1 epenthetic kasitom?
## 97 secondary__primary 1 epenthetic kasitom?
## 98 secondary__primary 1 notstressed tenominato
## 100 secondary__primary 1 notstressed tip?sitoa
## 101 secondary__primary 1 notstressed tip?sitoa
## 102 secondary__primary 1 notstressed tip?sitoa
## 103 secondary__unstressed 1 epenthetic tikisinale
## 104 secondary__unstressed 1 epenthetic tikisinale
## 105 secondary__unstressed 1 epenthetic tikisinale
## 109 secondary__unstressed 1 epenthetic tisikaunite
## 110 secondary__tertiary 1 epenthetic tisikaunite
## 111 secondary__tertiary 1 epenthetic tisikaunite
## 112 secondary__unstressed 1 epenthetic `emipaea
## 113 secondary__unstressed 1 epenthetic `emipaea
## 114 secondary__unstressed 1 epenthetic `emipaea
## 115 secondary__unstressed 1 epenthetic `enisinia
## 116 secondary__unstressed 1 epenthetic `enisinia
## 117 secondary__unstressed 1 epenthetic `enisinia
## 121 secondary__tertiary 1 notstressed `Epikulio
## 123 secondary__tertiary 1 notstressed `epikolotisi
## 124 secondary__unstressed 1 epenthetic `esitimeti
## 125 secondary__unstressed 1 epenthetic `esitimeti
## 126 secondary__primary 1 notstressed `esitimeti
## 132 secondary__primary 1 epenthetic falakiseni
## 134 secondary__primary 1 epenthetic falakiseni
## 135 secondary__primary 1 epenthetic Falanis?
## 136 secondary__primary 1 epenthetic Falanis?
## 137 secondary__unstressed 1 epenthetic Senitaile
## 138 secondary__unstressed 1 epenthetic Senitaile
## 139 secondary__primary 1 epenthetic Senitaile
## 140 secondary__unstressed 1 epenthetic sinisapia
## 141 secondary__primary 1 notstressed kilisilini
## 142 secondary__primary 1 notstressed kilisilini
## 146 secondary__unstressed 1 notstressed helikopet?
## 147 secondary__unstressed 1 notstressed helikopet?
## 148 secondary__tertiary 1 notstressed helikopet?
## 149 secondary__unstressed 1 notstressed hemisefia
## 151 secondary__tertiary 1 epenthetic hisit?lia
## 152 secondary__tertiary 1 epenthetic hisit?lia
## 153 secondary__tertiary 1 epenthetic hisit?lia
## 157 secondary__tertiary 1 epenthetic `inis?nisi
## 158 secondary__tertiary 1 epenthetic `inis?nisi
## 159 secondary__tertiary 1 epenthetic `inis?nisi
## 160 secondary__tertiary 1 epenthetic `itulis?sia
## 161 secondary__tertiary 1 epenthetic `inis?kite
## 162 secondary__tertiary 1 epenthetic `inis?kite
## 163 secondary__tertiary 1 epenthetic `inis?kite
## 164 secondary__unstressed 1 epenthetic `inisip?kita
## 165 secondary__tertiary 1 epenthetic `inisip?kita
## 166 secondary__tertiary 1 epenthetic `inisip?kita
## 167 secondary__unstressed 1 epenthetic `inisulato
## 168 secondary__unstressed 1 epenthetic `inisulato
## 169 secondary__unstressed 1 epenthetic `inisuline
## 170 secondary__unstressed 1 epenthetic `inisuline
## 171 secondary__unstressed 1 epenthetic `inisuline
## 172 secondary__unstressed 1 epenthetic `inivoisi
## 173 secondary__unstressed 1 epenthetic `inivoisi
## 174 secondary__primary 1 epenthetic `inivoisi
## 175 secondary__unstressed 1 epenthetic kangikal?
## 176 secondary__unstressed 1 epenthetic kangikal?
## 177 secondary__unstressed 1 epenthetic kangikal?
## 178 secondary__primary 1 epenthetic lavenit?
## 179 secondary__primary 1 epenthetic lavenit?
## 181 secondary__unstressed 1 epenthetic Manisulia
## 182 secondary__unstressed 1 epenthetic Manisulia
## 183 secondary__unstressed 1 epenthetic menitelini
## 184 secondary__unstressed 1 epenthetic menitelini
## 185 secondary__unstressed 1 epenthetic menitelini
## 187 secondary__unstressed 1 epenthetic maniusikilipi
## 188 secondary__unstressed 1 notstressed Metiteleniane
## 189 secondary__unstressed 1 notstressed Metiteleniane
## 190 secondary__unstressed 1 stressed melitiane
## 191 secondary__unstressed 1 stressed melitiane
## 192 secondary__unstressed 1 stressed melitiane
## 197 secondary__unstressed 1 notstressed minisit?
## 198 secondary__primary 1 epenthetic minisit?
## 199 secondary__primary 1 epenthetic minisit?
## 201 secondary__tertiary 1 epenthetic monasiteli?
## 202 secondary__primary 1 epenthetic monasiteli?
## 203 secondary__unstressed 1 epenthetic monasiteli?
## 205 secondary__primary 1 epenthetic misiteli?
## 206 secondary__tertiary 1 epenthetic misiteli?
## 207 secondary__primary 1 epenthetic misiteli?
## 208 secondary__tertiary 1 epenthetic nasit?siume
## 209 secondary__tertiary 1 epenthetic nasit?siume
## 210 secondary__primary 1 notstressed `opelikato
## 211 secondary__primary 1 notstressed `opelikato
## 213 secondary__unstressed 1 epenthetic `ositalesi
## 214 secondary__unstressed 1 epenthetic `ositalesi
## 215 secondary__unstressed 1 epenthetic `ositalesi
## 216 secondary__tertiary 1 epenthetic penikiliasi
## 217 secondary__tertiary 1 epenthetic penikiliasi
## 218 secondary__tertiary 1 epenthetic penikiliasi
## 219 secondary__primary 1 epenthetic pasinipi
## 220 secondary__unstressed 1 epenthetic penikuini
## 221 secondary__unstressed 1 epenthetic penikuini
## 222 secondary__unstressed 1 epenthetic penikuini
## 223 secondary__unstressed 1 epenthetic Penitekosi
## 224 secondary__unstressed 1 epenthetic Penitekosi
## 225 secondary__unstressed 1 epenthetic Penitekosi
## 226 secondary__primary 1 epenthetic palasitika
## 227 secondary__primary 1 epenthetic palasitika
## 228 secondary__primary 1 epenthetic palasitika
## 230 secondary__unstressed 1 epenthetic poinisetia
## 231 secondary__tertiary 1 notstressed pomikanite
## 232 secondary__tertiary 1 notstressed pomikanite
## 233 secondary__tertiary 1 notstressed pomikanite
## 234 secondary__primary 1 notstressed paletikasi
## 235 secondary__primary 1 notstressed paletikasi
## 236 secondary__primary 1 notstressed palesiteni
## 238 secondary__primary 1 notstressed palesiteni
## 239 secondary__primary 1 epenthetic pilinisesi
## 240 secondary__primary 1 epenthetic pilinisesi
## 241 secondary__primary 1 epenthetic pilinisesi
## 242 secondary__primary 1 notstressed palofisai
## 243 secondary__primary 1 notstressed palofisai
## 244 secondary__primary 1 notstressed palofisai
## 245 secondary__primary 1 notstressed Pal?tisani
## 246 secondary__primary 1 notstressed Pal?tisani
## 247 secondary__primary 1 notstressed Pal?tisani
## 248 secondary__primary 1 epenthetic kolonitini
## 249 secondary__primary 1 epenthetic kolonitini
## 250 secondary__primary 1 epenthetic kolonitini
## 252 secondary__primary 1 notstressed l?sisita
## 253 secondary__unstressed 1 notstressed l?sisita
## 254 secondary__primary 1 epenthetic l?sisita
## 258 secondary__unstressed 1 notstressed sakilifisio
## 259 secondary__unstressed 1 notstressed sakilifisio
## 260 secondary__unstressed 1 epenthetic sakisefoni
## 261 secondary__unstressed 1 epenthetic sakisefoni
## 265 secondary__tertiary 1 notstressed seminalio
## 266 secondary__tertiary 1 notstressed seminalio
## 267 secondary__primary 1 epenthetic S?pitema
## 268 secondary__primary 1 epenthetic S?pitema
## 269 secondary__primary 1 epenthetic sepitema
## 270 secondary__unstressed 1 epenthetic silivapiti
## 271 secondary__primary 1 epenthetic sitenisila
## 272 secondary__primary 1 epenthetic sitenisila
## 273 secondary__primary 1 epenthetic sitenisila
## 274 secondary__primary 1 epenthetic palasit?
## 275 secondary__primary 1 epenthetic palasit?
## 276 secondary__primary 1 epenthetic palasit?
## 280 secondary__tertiary 1 epenthetic talanisimita
## 285 secondary__primary 1 notstressed iunisoni
## 288 secondary__tertiary 1 notstressed `univesiti
## 289 secondary__tertiary 1 notstressed `univesiti
## 290 secondary__tertiary 1 notstressed `univesiti
## 291 secondary__unstressed 1 epenthetic Uesiliana
## 293 secondary__primary 1 epenthetic uasik?
## 294 secondary__primary 1 epenthetic uasik?
## 295 secondary__primary 1 epenthetic uasik?
## 296 secondary__tertiary 1 epenthetic uisitelia
Plot i-deletion as function of the vowel’s status in the English word. Also do this separately for each speaker. We can see that all three speakers show the same pattern (stressed < unstressed < epenthetic), but with different overall rates.
#hacky way to reorder the levels
tongan_deletion$V_status_wrt_English <- relevel(tongan_deletion$V_status_wrt_English, "notstressed")
tongan_deletion$V_status_wrt_English <- relevel(tongan_deletion$V_status_wrt_English, "stressed")
levels(tongan_deletion$V_status_wrt_English) <- c("str", "unstr", "epenthetic")
#Make a table of counts, for mosaic plot
counts_deletion <- table(tongan_deletion$V_status_wrt_English, tongan_deletion$deleted_binary, dnn=c("V status", "deletion"))
mosaic(counts_deletion,direction="v", pop=FALSE,
gp_varnames=gpSerif, gp_labels=gpSerif, gp=gpar(fill=c("black","grey")), #fonts and colors
labeling_args=list(rot_labels=c(left=0), offset_varnames=c(left=1.5),offset_labels=c(left=1), #label (V status, deletion) and value (str, unstr, epenthetic, not deleted, deleted) placement
varnames=c(TRUE, FALSE), #draw the "V status" variable name, but not the "deletion" one
set_labels=list(deletion=c("not deleted", "deleted")))) #more informative than 0 and 1
# Do the same thing separately for each speaker
counts_deletion_S1 <- table(tongan_deletion[tongan_deletion$speaker=="S1",]$V_status_wrt_English, tongan_deletion[tongan_deletion$speaker=="S1",]$deleted_binary, dnn=c("V status", "deletion"))
counts_deletion_S2 <- table(tongan_deletion[tongan_deletion$speaker=="S2",]$V_status_wrt_English, tongan_deletion[tongan_deletion$speaker=="S2",]$deleted_binary, dnn=c("V status", "deletion"))
counts_deletion_S3 <- table(tongan_deletion[tongan_deletion$speaker=="S3",]$V_status_wrt_English, tongan_deletion[tongan_deletion$speaker=="S3",]$deleted_binary, dnn=c("V status", "deletion"))
#plot them side by side. Doesn't look so great here (plot is cramped), but the .png file looks good
#The extra spaces are a hack to get the subtitles centered under the main plot area
p1 <- grid.grabExpr(mosaic(counts_deletion_S1,direction="v", pop=FALSE, gp_varnames=gpSerif, gp_labels=gpSerif, gp=gpar(fill=c("black","grey")), labeling_args=list(rot_labels=c(left=0), offset_varnames=c(left=1.5),offset_labels=c(left=1), varnames=c(FALSE, FALSE), set_labels=list(deletion=c("not deleted", "deleted"))), margins=c(2,0,0,4)+0.1, sub_gp = gpar(fontfamily=myFontFamily), sub=" Speaker 1"))
## Warning in grabDL(warn, wrap, ...): viewport overwritten (grab MAY not be faithful)
p2 <- grid.grabExpr(mosaic(counts_deletion_S2,direction="v", pop=FALSE, gp_varnames=gpSerif, gp_labels=gpSerif, gp=gpar(fill=c("black","grey")), labeling_args=list(rot_labels=c(left=0), offset_varnames=c(left=1.5),offset_labels=c(left=1), varnames=c(FALSE, FALSE), labels=c(TRUE,FALSE)), margins=c(2,0,0,-2)+0.1, sub_gp = gpar(fontfamily=myFontFamily), sub="Speaker 2 "))
## Warning in grabDL(warn, wrap, ...): viewport overwritten (grab MAY not be faithful)
p3 <- grid.grabExpr(mosaic(counts_deletion_S3,direction="v", pop=FALSE, gp_varnames=gpSerif, gp_labels=gpSerif, gp=gpar(fill=c("black","grey")), labeling_args=list(rot_labels=c(left=0), offset_varnames=c(left=1.5),offset_labels=c(left=1), varnames=c(FALSE, FALSE), labels=c(TRUE,FALSE)), margins=c(2,0,0,-7.8)+0.1, sub_gp = gpar(fontfamily=myFontFamily), sub="Speaker 3 "))
## Warning in grabDL(warn, wrap, ...): viewport overwritten (grab MAY not be faithful)
grid.arrange(p1, p2, p3, ncol=3)
#print the plot with all the speakers together to a file:
png(file="Vowel_deletion_mosaic_plots2.png",width=myResMultiplier*545,height=myResMultiplier*350, res=myResMultiplier*72) #usu. 600, 350, 72
grid.arrange(p1, p2, p3, ncol=3)
dev.off()
## png
## 2
Plot i-deletion as function of the vowel’s surrounding consonants, separately for each status in the English word. There are a lot of different consonant environments (so we’re going to want to group them):
table(tongan_deletion$C_environment)
##
## f_s k_f k_l k_s l_f l_k l_m l_s l_t l_v m_k m_n m_p m_s n_f n_k n_s n_t n_v ng_k
## 6 2 1 9 2 12 3 1 8 1 3 6 12 1 3 6 43 40 6 3
## p_k p_l p_t s_f s_k s_l s_m s_n s_p s_s s_t t_k t_m t_n t_s t_t v_l
## 4 1 6 4 9 5 5 9 12 2 54 2 6 1 3 2 3
Let’s group them:
tongan_deletion$C_enviro_coarse <- ifelse(test=tongan_deletion$C_environment %in% c("s_p","s_t","s_k"), yes="fric_stop", no=ifelse(test=tongan_deletion$C_environment %in% c("m_k","m_p","n_k", "n_t","ng_k","l_k","l_t"), yes="sonorant_stop", no=ifelse(test=tongan_deletion$C_environment %in% c("p_k","p_t","t_k","t_t"), yes="stop_stop", no=ifelse(test=tongan_deletion$C_environment %in% c("k_f","k_s","t_s"), yes="stop_fric", no=ifelse(test=tongan_deletion$C_environment %in% c("f_s","s_f","s_s"), yes="fric_fric", no=ifelse(test=tongan_deletion$C_environment %in% c("k_l","p_l","t_m","t_n"), yes="stop_sonorant", no=ifelse(test=tongan_deletion$C_environment %in% c("l_f","l_s","l_v","m_s","n_f","n_s","n_v"), yes="sonorant_fric", no=ifelse(test=tongan_deletion$C_environment %in% c("s_m","s_n","s_l","v_l"), yes="fric_sonorant", no=ifelse(test=tongan_deletion$C_environment %in% c("l_m","m_n"), yes="sonorant_sonorant", no="OOPS")))))))))
tongan_deletion$C_enviro_coarse <- factor(tongan_deletion$C_enviro_coarse,levels(as.factor(tongan_deletion$C_enviro_coarse))[c(5,4,6,2,1,3,8,7,9)])
table(tongan_deletion$C_enviro_coarse)
##
## sonorant_sonorant sonorant_fric sonorant_stop fric_sonorant fric_fric
## 9 57 84 22 12
## fric_stop stop_sonorant stop_fric stop_stop
## 75 9 14 14
Plot it:
#Make a table of counts, separately for each deletion status
counts_deletion_unstressed <- table(tongan_deletion[tongan_deletion$V_status_wrt_English=="unstr",]$C_enviro_coarse, tongan_deletion[tongan_deletion$V_status_wrt_English=="unstr",]$deleted_binary, dnn=c("surrounding Cs", "deletion"))
counts_deletion_epenthetic <- table(tongan_deletion[tongan_deletion$V_status_wrt_English=="epenthetic",]$C_enviro_coarse, tongan_deletion[tongan_deletion$V_status_wrt_English=="epenthetic",]$deleted_binary, dnn=c("surrounding Cs", "deletion"))
#Plot them side by side
p1 <- grid.grabExpr(mosaic(counts_deletion_unstressed,direction="v", pop=FALSE, gp_varnames=gpSerif, gp_labels=gpSerif, gp=gpar(fill=c("black","grey")), labeling_args=list(rot_labels=c(left=0, top=90), offset_varnames=c(left=1.5),offset_labels=c(left=1, top=-0.5), just_labels="left", varnames=c(FALSE, FALSE), set_labels=list(deletion=c("not deleted", "deleted"))), margins=c(2,0,0,4)+0.1, sub_gp = gpar(fontfamily=myFontFamily), sub=" unstressed"))
## Warning in grabDL(warn, wrap, ...): viewport overwritten (grab MAY not be faithful)
p2 <- grid.grabExpr(mosaic(counts_deletion_epenthetic,direction="v", pop=FALSE, gp_varnames=gpSerif, gp_labels=gpSerif, gp=gpar(fill=c("black","grey")), labeling_args=list(rot_labels=c(left=0, top=90), offset_varnames=c(left=1.5),offset_labels=c(left=1, top=-0.5), just_labels="left", varnames=c(FALSE, FALSE), labels=c(TRUE,FALSE)), margins=c(2,0,0,-2)+0.1, sub_gp = gpar(fontfamily=myFontFamily), sub="epenthetic "))
## Warning in grabDL(warn, wrap, ...): viewport overwritten (grab MAY not be faithful)
grid.arrange(p1, p2, ncol=2)
#print the two plots together to a file:
png(file="Vowel_deletion_mosaic_plots3.png",width=myResMultiplier*545,height=myResMultiplier*350, res=myResMultiplier*72) #usu. 600, 350, 72
grid.arrange(p1, p2, ncol=2)
dev.off()
## png
## 2
A different way of thinking about it: what is the transition type in the English pronunciation? I have coded this by hand in another column. A few notes:
table(tongan_deletion$English_transition_types)
##
## fric_fric fric_son fric_stop nas_stop son_fric son_son son_stop stop_fric stop_son stop_stop
## 4 9 63 69 38 3 1 11 5 6
#Make a table of counts
counts_deletion_byEnglish <- table(tongan_deletion$English_transition_types, tongan_deletion$deleted_binary, dnn=c("English C-C transition type", "deletion"))
#Plot them
p1 <- grid.grabExpr(mosaic(counts_deletion_byEnglish,direction="v", pop=FALSE, gp_varnames=gpSerif, gp_labels=gpSerif, gp=gpar(fill=c("black","grey")), labeling_args=list(rot_labels=c(left=0, top=90), offset_varnames=c(left=1.5),offset_labels=c(left=1, top=-0.5), just_labels="left", varnames=c(FALSE, FALSE), set_labels=list(deletion=c("not deleted", "deleted"))), margins=c(4,0,0,4)+0.1, sub_gp = gpar(fontfamily=myFontFamily), sub=""))
## Warning in grabDL(warn, wrap, ...): viewport overwritten (grab MAY not be faithful)
grid.arrange(p1, ncol=1)
#print the two plots together to a file:
png(file="Vowel_deletion_mosaic_plots4.png",width=myResMultiplier*545,height=myResMultiplier*350, res=myResMultiplier*72) #usu. 600, 350, 72
grid.arrange(p1, ncol=1)
dev.off()
## png
## 2
The three categories where there’s enough data are fricative_stop, nasal_stop, sonorant_fricative. Sonorant_fricative shows less deletion than the other two. There doesn’t seem to be any clear pattern here, so we don’t pursue this further.
A reviewer suggests testing the hypothesis that there will be more deletion when a homorganic consonant sequence results, on the theory that V deletion reflect’s bilingual speaker’s ease of producing various English consonant sequences, and homorganic sequences should be easier. First we code C_environment as homorganic or not:
tongan_deletion$Cs_homorganic_or_not <- ifelse(test=tongan_deletion$C_environment %in% c("l_s","l_t","m_p","n_s","n_t","ng_k","s_l","s_n","s_s","s_t","t_n","t_s","t_t"), yes="homorganic", no="heterorganic")
Plot effect of consonant homorganicity–it looks like there’s a trend in the raw data, but let’s see if it holds up in the regression model below:
#Make a table of counts, separately for each deletion status
counts_deletion_unstressed_homorg_or_not <- table(tongan_deletion[tongan_deletion$V_status_wrt_English=="unstr",]$Cs_homorganic_or_not, tongan_deletion[tongan_deletion$V_status_wrt_English=="unstr",]$deleted_binary, dnn=c("surrounding Cs homorganic?", "deletion"))
counts_deletion_epenthetic_homorg_or_not <- table(tongan_deletion[tongan_deletion$V_status_wrt_English=="epenthetic",]$Cs_homorganic_or_not, tongan_deletion[tongan_deletion$V_status_wrt_English=="epenthetic",]$deleted_binary, dnn=c("surrounding Cs homorganic?", "deletion"))
#Plot them--one by one, since for some mysterious reason grid.grabExpr() isn't working
mosaic(counts_deletion_unstressed_homorg_or_not,direction="v", pop=FALSE, gp_varnames=gpSerif, gp_labels=gpSerif, gp=gpar(fill=c("black","grey")), labeling_args=list(rot_labels=c(left=0, top=90), offset_varnames=c(left=1.5),offset_labels=c(left=1, top=-0.5), just_labels="left", varnames=c(FALSE, FALSE), set_labels=list(deletion=c("not deleted", "deleted"))), margins=c(2,0,0,4)+0.1, sub_gp = gpar(fontfamily=myFontFamily), sub=" unstressed")
mosaic(counts_deletion_epenthetic_homorg_or_not,direction="v", pop=FALSE, gp_varnames=gpSerif, gp_labels=gpSerif, gp=gpar(fill=c("black","grey")), labeling_args=list(rot_labels=c(left=0, top=90), offset_varnames=c(left=1.5),offset_labels=c(left=1, top=-0.5), just_labels="left", varnames=c(FALSE, FALSE), labels=c(TRUE,FALSE)), margins=c(2,0,0,-2)+0.1, sub_gp = gpar(fontfamily=myFontFamily), sub="epenthetic ")
Because there are so few “stressed” cases, and none of them ever have deletion (separability issue), let’s just ignore them and only compare unstressed vs. epenthetic. This allows us to use glmer(), which doesn’t handle separability problems well but does allow random effects.
Do the subsetting:
tongan_deletion_subset <- subset(tongan_deletion, tongan_deletion$V_status_wrt_English != "str")
#Make new variables that separate C_environment into preceding C and following C (these didn't end up getting used, but readers may wish to play with them):
tongan_deletion_subset$precedingC <-substr(tongan_deletion_subset$C_environment,1,1)
tongan_deletion_subset$followingC <-substr(tongan_deletion_subset$C_environment,3,3)
Here is the model we present in the paper. Speaker is treated as a fixed effect because it has only three levels.
deletion.glmer <- glmer(deleted_binary ~
+ (1|C_environment)
+ speaker
+ is_it_footed
+ V_status_wrt_English
, data=tongan_deletion_subset,
family=binomial)
summary(deletion.glmer)
## Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
## Family: binomial ( logit )
## Formula: deleted_binary ~ +(1 | C_environment) + speaker + is_it_footed + V_status_wrt_English
## Data: tongan_deletion_subset
##
## AIC BIC logLik deviance df.resid
## 264.9 286.8 -126.4 252.9 278
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.7363 -0.4417 -0.1572 0.4323 4.7737
##
## Random effects:
## Groups Name Variance Std.Dev.
## C_environment (Intercept) 1.847 1.359
## Number of obs: 284, groups: C_environment, 37
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.9272 0.7153 -4.092 4.27e-05 ***
## speakerS2 -2.7272 0.5298 -5.148 2.64e-07 ***
## speakerS3 1.5430 0.3905 3.951 7.77e-05 ***
## is_it_footed 0.5762 0.5500 1.048 0.29485
## V_status_wrt_Englishepenthetic 1.4922 0.5343 2.793 0.00522 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) spkrS2 spkrS3 is_t_f
## speakerS2 -0.029
## speakerS3 -0.454 0.216
## is_it_footd -0.624 -0.028 0.138
## V_stts_wr_E -0.327 -0.078 0.037 -0.181
Anova(deletion.glmer)
## Analysis of Deviance Table (Type II Wald chisquare tests)
##
## Response: deleted_binary
## Chisq Df Pr(>Chisq)
## speaker 53.4045 2 2.531e-12 ***
## is_it_footed 1.0974 1 0.294845
## V_status_wrt_English 7.8001 1 0.005224 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Have a look at the random intercept for each consonant environment. Here are the C__C environments, in ascending order of how often you get deletion:
temp_frame <- ranef(deletion.glmer)$C_environment
temp_frame[order(temp_frame[,1]),]
## [1] -1.19386217 -1.14381941 -1.02774011 -0.96136729 -0.84805726 -0.80978299 -0.69109353 -0.63169264
## [9] -0.53166310 -0.50069246 -0.50069246 -0.50069246 -0.49574718 -0.49574718 -0.41717657 -0.40648106
## [17] -0.40648106 -0.25418530 -0.14986288 -0.14986288 -0.14114100 -0.11401593 0.01077065 0.08048181
## [25] 0.25624130 0.60681900 0.68595447 0.84039865 0.94372646 1.00998767 1.08709948 1.27261532
## [33] 1.28951061 1.34858070 1.35049853 1.85593292 1.95196688
row.names(temp_frame)[order(temp_frame[,1])]
## [1] "n_v" "t_m" "l_m" "l_k" "l_v" "m_n" "n_k" "l_t" "p_t" "f_s" "m_k" "t_s" "t_k"
## [14] "t_t" "s_f" "l_s" "p_l" "p_k" "l_f" "v_l" "t_n" "n_s" "n_f" "s_n" "s_m" "k_l"
## [27] "s_s" "s_l" "k_s" "ng_k" "k_f" "n_t" "m_p" "s_k" "m_s" "s_p" "s_t"
cbind(row.names(temp_frame)[order(temp_frame[,1])], temp_frame[order(temp_frame[,1]),])
## [,1] [,2]
## [1,] "n_v" "-1.19386217062271"
## [2,] "t_m" "-1.14381940921684"
## [3,] "l_m" "-1.02774010875997"
## [4,] "l_k" "-0.96136729123742"
## [5,] "l_v" "-0.848057257998045"
## [6,] "m_n" "-0.80978299484009"
## [7,] "n_k" "-0.691093532365527"
## [8,] "l_t" "-0.63169264394912"
## [9,] "p_t" "-0.531663099671024"
## [10,] "f_s" "-0.500692456306816"
## [11,] "m_k" "-0.500692456306816"
## [12,] "t_s" "-0.500692456306816"
## [13,] "t_k" "-0.495747178448532"
## [14,] "t_t" "-0.495747178448532"
## [15,] "s_f" "-0.417176566849591"
## [16,] "l_s" "-0.406481060590826"
## [17,] "p_l" "-0.406481060590826"
## [18,] "p_k" "-0.254185298821497"
## [19,] "l_f" "-0.149862879794764"
## [20,] "v_l" "-0.149862879794764"
## [21,] "t_n" "-0.14114100286009"
## [22,] "n_s" "-0.11401592502542"
## [23,] "n_f" "0.0107706475570784"
## [24,] "s_n" "0.0804818080915829"
## [25,] "s_m" "0.256241302944647"
## [26,] "k_l" "0.606818997368661"
## [27,] "s_s" "0.685954469290349"
## [28,] "s_l" "0.84039865192298"
## [29,] "k_s" "0.943726455073432"
## [30,] "ng_k" "1.00998766987648"
## [31,] "k_f" "1.08709948310984"
## [32,] "n_t" "1.27261532172186"
## [33,] "m_p" "1.28951061044545"
## [34,] "s_k" "1.34858070439601"
## [35,] "m_s" "1.35049852978569"
## [36,] "s_p" "1.85593291513217"
## [37,] "s_t" "1.95196687814657"
We can compare this to a model that adds an interaction between speaker and V status–the interaction does not significantly improve the model:
deletion.glmer_interaction <- glmer(deleted_binary ~
+ (1|C_environment)
+ is_it_footed
+ speaker * V_status_wrt_English
, data=tongan_deletion_subset,
family=binomial)
summary(deletion.glmer_interaction)
## Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
## Family: binomial ( logit )
## Formula: deleted_binary ~ +(1 | C_environment) + is_it_footed + speaker * V_status_wrt_English
## Data: tongan_deletion_subset
##
## AIC BIC logLik deviance df.resid
## 265.0 294.2 -124.5 249.0 276
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.2061 -0.3962 -0.1450 0.3885 3.4268
##
## Random effects:
## Groups Name Variance Std.Dev.
## C_environment (Intercept) 1.874 1.369
## Number of obs: 284, groups: C_environment, 37
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.4379 0.8125 -3.001 0.00269 **
## is_it_footed 0.4886 0.5595 0.873 0.38250
## speakerS2 -1.5939 1.2239 -1.302 0.19281
## speakerS3 0.5131 0.7973 0.644 0.51987
## V_status_wrt_Englishepenthetic 0.9629 0.7034 1.369 0.17097
## speakerS2:V_status_wrt_Englishepenthetic -1.2207 1.3422 -0.910 0.36309
## speakerS3:V_status_wrt_Englishepenthetic 1.3427 0.9073 1.480 0.13890
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) is_t_f spkrS2 spkrS3 V_s__E sS2:V_
## is_it_footd -0.599
## speakerS2 -0.313 0.012
## speakerS3 -0.580 0.138 0.331
## V_stts_wr_E -0.531 -0.068 0.402 0.575
## spkS2:V___E 0.314 -0.023 -0.905 -0.307 -0.444
## spkS3:V___E 0.453 -0.086 -0.300 -0.868 -0.645 0.325
Anova(deletion.glmer_interaction)
## Analysis of Deviance Table (Type II Wald chisquare tests)
##
## Response: deleted_binary
## Chisq Df Pr(>Chisq)
## is_it_footed 0.7626 1 0.382502
## speaker 50.2421 2 1.23e-11 ***
## V_status_wrt_English 7.3364 1 0.006757 **
## speaker:V_status_wrt_English 4.3497 2 0.113625
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(deletion.glmer_interaction, deletion.glmer)
## Data: tongan_deletion_subset
## Models:
## deletion.glmer: deleted_binary ~ +(1 | C_environment) + speaker + is_it_footed +
## deletion.glmer: V_status_wrt_English
## deletion.glmer_interaction: deleted_binary ~ +(1 | C_environment) + is_it_footed + speaker *
## deletion.glmer_interaction: V_status_wrt_English
## Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)
## deletion.glmer 6 264.88 286.78 -126.44 252.88
## deletion.glmer_interaction 8 264.97 294.16 -124.49 248.97 3.9121 2 0.1414
Get some predicted probabilities out of the model, for illustration
invlogit(c(fixef(deletion.glmer)[1], # M baseline
fixef(deletion.glmer)[2] + fixef(deletion.glmer)[1], # S2 baseline
fixef(deletion.glmer)[4] + fixef(deletion.glmer)[1], # S1, footed
fixef(deletion.glmer)[5] + fixef(deletion.glmer)[1], # S1, epenthetic
fixef(deletion.glmer)[4] + fixef(deletion.glmer)[5] + fixef(deletion.glmer)[1] # S1, footed and epenthetic
) )
## (Intercept) speakerS2 is_it_footed
## 0.050826854 0.003489842 0.086988488
## V_status_wrt_Englishepenthetic is_it_footed
## 0.192322053 0.297590461
Above, the variable deleted_binary treats items that showed deletion in just one of the two repetitions as deleted (there were 9 such items). Here, we try two other options. First, treating such items as non-deleted:
deletion_treatAsNonDeleted.glmer <- glmer((deleted>0.5) ~
+ (1|C_environment)
+ speaker
+ is_it_footed
+ V_status_wrt_English
, data=tongan_deletion_subset,
family=binomial)
summary(deletion_treatAsNonDeleted.glmer)
## Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
## Family: binomial ( logit )
## Formula: (deleted > 0.5) ~ +(1 | C_environment) + speaker + is_it_footed +
## V_status_wrt_English
## Data: tongan_deletion_subset
##
## AIC BIC logLik deviance df.resid
## 265.8 287.6 -126.9 253.8 278
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.9933 -0.3926 -0.1877 0.4975 5.2921
##
## Random effects:
## Groups Name Variance Std.Dev.
## C_environment (Intercept) 1.643 1.282
## Number of obs: 284, groups: C_environment, 37
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.6352 0.6887 -3.826 0.00013 ***
## speakerS2 -2.9039 0.5947 -4.883 1.05e-06 ***
## speakerS3 1.3792 0.3696 3.732 0.00019 ***
## is_it_footed 0.2913 0.5403 0.539 0.58986
## V_status_wrt_Englishepenthetic 1.2420 0.5243 2.369 0.01784 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) spkrS2 spkrS3 is_t_f
## speakerS2 -0.053
## speakerS3 -0.434 0.221
## is_it_footd -0.609 -0.001 0.107
## V_stts_wr_E -0.314 -0.062 0.019 -0.189
Anova(deletion_treatAsNonDeleted.glmer)
## Analysis of Deviance Table (Type II Wald chisquare tests)
##
## Response: (deleted > 0.5)
## Chisq Df Pr(>Chisq)
## speaker 48.1661 2 3.474e-11 ***
## is_it_footed 0.2906 1 0.58986
## V_status_wrt_English 5.6122 1 0.01784 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
And here, we just excluded such items
tongan_deletion_subset$deleted_excluding <- tongan_deletion_subset$deleted
tongan_deletion_subset$deleted_excluding[tongan_deletion_subset$deleted_excluding==0.5] <- NA
deletion_exclude.glmer <- glmer(deleted_excluding ~
+ (1|C_environment)
+ speaker
+ is_it_footed
+ V_status_wrt_English
, data=tongan_deletion_subset,
family=binomial)
summary(deletion_exclude.glmer)
## Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
## Family: binomial ( logit )
## Formula: deleted_excluding ~ +(1 | C_environment) + speaker + is_it_footed +
## V_status_wrt_English
## Data: tongan_deletion_subset
##
## AIC BIC logLik deviance df.resid
## 247.5 269.2 -117.8 235.5 269
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.8002 -0.3676 -0.1301 0.4251 5.5413
##
## Random effects:
## Groups Name Variance Std.Dev.
## C_environment (Intercept) 2.063 1.436
## Number of obs: 275, groups: C_environment, 37
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.9300 0.7390 -3.965 7.34e-05 ***
## speakerS2 -3.1427 0.6185 -5.081 3.75e-07 ***
## speakerS3 1.5287 0.3967 3.853 0.000117 ***
## is_it_footed 0.4848 0.5664 0.856 0.392057
## V_status_wrt_Englishepenthetic 1.4231 0.5477 2.598 0.009366 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) spkrS2 spkrS3 is_t_f
## speakerS2 -0.006
## speakerS3 -0.450 0.186
## is_it_footd -0.613 -0.025 0.132
## V_stts_wr_E -0.317 -0.083 0.044 -0.183
Anova(deletion_exclude.glmer)
## Analysis of Deviance Table (Type II Wald chisquare tests)
##
## Response: deleted_excluding
## Chisq Df Pr(>Chisq)
## speaker 49.6696 2 1.638e-11 ***
## is_it_footed 0.7326 1 0.392057
## V_status_wrt_English 6.7518 1 0.009366 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Testing the consonant factors from above to see if they contribute significantly to the model. First, English C-C transition type (for epenthetic vowels only, since that’s the only case where there is an English transition). We can see that transition type does have a significant effect, but not in a way that’s particularly interpretable: the significant pairwise differences are that there is less deletion in son_fric than fric_stop or nas_stop:
deletion_transitionType.glm <- glm(deleted_binary ~
#+ (1|C_environment) #no random effect on C_enviro (Hessian is numerically singular)
+ speaker
+ is_it_footed
+ English_transition_types
#+ V_status_wrt_English
, data=tongan_deletion_subset[tongan_deletion_subset$V_status_wrt_English=="epenthetic",],
family=binomial)
summary(deletion_transitionType.glm)
##
## Call:
## glm(formula = deleted_binary ~ +speaker + is_it_footed + English_transition_types,
## family = binomial, data = tongan_deletion_subset[tongan_deletion_subset$V_status_wrt_English ==
## "epenthetic", ])
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.4402 -0.5625 -0.1269 0.5805 2.2526
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.5044 1.4229 -1.760 0.0784 .
## speakerS2 -2.7240 0.5640 -4.830 1.37e-06 ***
## speakerS3 1.9648 0.4766 4.123 3.74e-05 ***
## is_it_footed 0.5387 0.7013 0.768 0.4424
## English_transition_typesfric_son 0.2297 1.6480 0.139 0.8892
## English_transition_typesfric_stop 2.9260 1.3892 2.106 0.0352 *
## English_transition_typesnas_stop 2.2350 1.3733 1.628 0.1036
## English_transition_typesson_fric 0.2654 1.3891 0.191 0.8485
## English_transition_typesson_son -15.1410 1098.1188 -0.014 0.9890
## English_transition_typesson_stop -14.6003 2399.5451 -0.006 0.9951
## English_transition_typesstop_fric 2.5774 1.5383 1.675 0.0938 .
## English_transition_typesstop_son -0.1792 1.7830 -0.100 0.9200
## English_transition_typesstop_stop -0.1275 1.7920 -0.071 0.9433
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 287.41 on 207 degrees of freedom
## Residual deviance: 168.96 on 195 degrees of freedom
## (1 observation deleted due to missingness)
## AIC: 194.96
##
## Number of Fisher Scoring iterations: 15
Anova(deletion_transitionType.glm)
## Analysis of Deviance Table (Type II tests)
##
## Response: deleted_binary
## LR Chisq Df Pr(>Chisq)
## speaker 93.065 2 < 2.2e-16 ***
## is_it_footed 0.588 1 0.443
## English_transition_types 39.814 9 8.213e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#pairwise comparisons of English transition types
deletion_transitionType.glht_English_transition_types <- glht(deletion_transitionType.glm, linfct=mcp(English_transition_types = "Tukey"))
summary(deletion_transitionType.glht_English_transition_types)
## Warning in RET$pfunction("adjusted", ...): Completion with error > abseps
## Warning in RET$pfunction("adjusted", ...): Completion with error > abseps
## Warning in RET$pfunction("adjusted", ...): Completion with error > abseps
## Warning in RET$pfunction("adjusted", ...): Completion with error > abseps
## Warning in RET$pfunction("adjusted", ...): Completion with error > abseps
## Warning in RET$pfunction("adjusted", ...): Completion with error > abseps
## Warning in RET$pfunction("adjusted", ...): Completion with error > abseps
## Warning in RET$pfunction("adjusted", ...): Completion with error > abseps
## Warning in RET$pfunction("adjusted", ...): Completion with error > abseps
##
## Simultaneous Tests for General Linear Hypotheses
##
## Multiple Comparisons of Means: Tukey Contrasts
##
##
## Fit: glm(formula = deleted_binary ~ +speaker + is_it_footed + English_transition_types,
## family = binomial, data = tongan_deletion_subset[tongan_deletion_subset$V_status_wrt_English ==
## "epenthetic", ])
##
## Linear Hypotheses:
## Estimate Std. Error z value Pr(>|z|)
## fric_son - fric_fric == 0 0.22968 1.64800 0.139 1.0000
## fric_stop - fric_fric == 0 2.92604 1.38923 2.106 0.4215
## nas_stop - fric_fric == 0 2.23505 1.37328 1.628 0.7652
## son_fric - fric_fric == 0 0.26543 1.38908 0.191 1.0000
## son_son - fric_fric == 0 -15.14099 1098.11881 -0.014 1.0000
## son_stop - fric_fric == 0 -14.60031 2399.54511 -0.006 1.0000
## stop_fric - fric_fric == 0 2.57740 1.53832 1.675 0.7336
## stop_son - fric_fric == 0 -0.17916 1.78298 -0.100 1.0000
## stop_stop - fric_fric == 0 -0.12750 1.79198 -0.071 1.0000
## fric_stop - fric_son == 0 2.69636 1.09440 2.464 0.2111
## nas_stop - fric_son == 0 2.00537 1.06579 1.882 0.5863
## son_fric - fric_son == 0 0.03575 1.08114 0.033 1.0000
## son_son - fric_son == 0 -15.37067 1098.11846 -0.014 1.0000
## son_stop - fric_son == 0 -14.82999 2399.54495 -0.006 1.0000
## stop_fric - fric_son == 0 2.34772 1.29296 1.816 0.6352
## stop_son - fric_son == 0 -0.40885 1.59243 -0.257 1.0000
## stop_stop - fric_son == 0 -0.35718 1.61532 -0.221 1.0000
## nas_stop - fric_stop == 0 -0.69099 0.49600 -1.393 0.8908
## son_fric - fric_stop == 0 -2.66061 0.63105 -4.216 <0.01 ***
## son_son - fric_stop == 0 -18.06704 1098.11813 -0.016 1.0000
## son_stop - fric_stop == 0 -17.52635 2399.54476 -0.007 1.0000
## stop_fric - fric_stop == 0 -0.34864 0.86741 -0.402 1.0000
## stop_son - fric_stop == 0 -3.10521 1.31717 -2.357 0.2649
## stop_stop - fric_stop == 0 -3.05354 1.34056 -2.278 0.3105
## son_fric - nas_stop == 0 -1.96962 0.58159 -3.387 0.0152 *
## son_son - nas_stop == 0 -17.37604 1098.11810 -0.016 1.0000
## son_stop - nas_stop == 0 -16.83536 2399.54475 -0.007 1.0000
## stop_fric - nas_stop == 0 0.34235 0.85123 0.402 1.0000
## stop_son - nas_stop == 0 -2.41421 1.29999 -1.857 0.6033
## stop_stop - nas_stop == 0 -2.36255 1.32624 -1.781 0.6593
## son_son - son_fric == 0 -15.40643 1098.11810 -0.014 1.0000
## son_stop - son_fric == 0 -14.86574 2399.54478 -0.006 1.0000
## stop_fric - son_fric == 0 2.31197 0.93267 2.479 0.2036
## stop_son - son_fric == 0 -0.44460 1.32180 -0.336 1.0000
## stop_stop - son_fric == 0 -0.39293 1.34907 -0.291 1.0000
## son_stop - son_son == 0 0.54069 2638.87821 0.000 1.0000
## stop_fric - son_son == 0 17.71839 1098.11835 0.016 1.0000
## stop_son - son_son == 0 14.96183 1098.11873 0.014 1.0000
## stop_stop - son_son == 0 15.01350 1098.11876 0.014 1.0000
## stop_fric - son_stop == 0 17.17771 2399.54485 0.007 1.0000
## stop_son - son_stop == 0 14.42114 2399.54507 0.006 1.0000
## stop_stop - son_stop == 0 14.47281 2399.54508 0.006 1.0000
## stop_son - stop_fric == 0 -2.75656 1.47478 -1.869 0.5960
## stop_stop - stop_fric == 0 -2.70490 1.49282 -1.812 0.6378
## stop_stop - stop_son == 0 0.05167 1.74736 0.030 1.0000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)
How about the consonant manners in the Tongan word? They do matter, but ony in that there’s more deletion in fric_stop than in sonorant_fric, and marginally more (p is above but close to 0.05) in fric_stop than in sonorant_stop or stop_stop (not surprising, since after a fricative is a common environment for devoicing/deletion in native words too):
deletion_CEnviro.glm <- glm(deleted_binary ~
#+ (1|C_environment) #no random effect on C_enviro (large eigenvalue ratio)
+ speaker
+ is_it_footed
+ C_enviro_coarse
+ V_status_wrt_English
, data=tongan_deletion_subset,
family=binomial)
summary(deletion_CEnviro.glm)
##
## Call:
## glm(formula = deleted_binary ~ +speaker + is_it_footed + C_enviro_coarse +
## V_status_wrt_English, family = binomial, data = tongan_deletion_subset)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.4162 -0.6638 -0.2040 0.6233 2.5312
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -19.3797 1093.5332 -0.018 0.986
## speakerS2 -2.7912 0.5361 -5.207 1.92e-07 ***
## speakerS3 1.4731 0.3730 3.950 7.83e-05 ***
## is_it_footed 0.6266 0.5097 1.229 0.219
## C_enviro_coarsesonorant_fric 15.9208 1093.5330 0.015 0.988
## C_enviro_coarsesonorant_stop 17.0583 1093.5330 0.016 0.988
## C_enviro_coarsefric_sonorant 16.8936 1093.5332 0.015 0.988
## C_enviro_coarsefric_fric 16.0567 1093.5334 0.015 0.988
## C_enviro_coarsefric_stop 18.3821 1093.5331 0.017 0.987
## C_enviro_coarsestop_sonorant 15.0519 1093.5336 0.014 0.989
## C_enviro_coarsestop_fric 17.3352 1093.5332 0.016 0.987
## C_enviro_coarsestop_stop 14.8838 1093.5336 0.014 0.989
## V_status_wrt_Englishepenthetic 1.7615 0.4504 3.911 9.18e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 378.23 on 283 degrees of freedom
## Residual deviance: 229.12 on 271 degrees of freedom
## (2 observations deleted due to missingness)
## AIC: 255.12
##
## Number of Fisher Scoring iterations: 16
Anova(deletion_CEnviro.glm)
## Analysis of Deviance Table (Type II tests)
##
## Response: deleted_binary
## LR Chisq Df Pr(>Chisq)
## speaker 95.092 2 < 2.2e-16 ***
## is_it_footed 1.537 1 0.215
## C_enviro_coarse 45.202 8 3.369e-07 ***
## V_status_wrt_English 17.305 1 3.184e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#pairwise comparisons of consonant environment
deletion_CEnviro.glht_C_enviro_coarse <- glht(deletion_CEnviro.glm, linfct=mcp(C_enviro_coarse = "Tukey"))
summary(deletion_CEnviro.glht_C_enviro_coarse)
## Warning in RET$pfunction("adjusted", ...): Completion with error > abseps
## Warning in RET$pfunction("adjusted", ...): Completion with error > abseps
## Warning in RET$pfunction("adjusted", ...): Completion with error > abseps
## Warning in RET$pfunction("adjusted", ...): Completion with error > abseps
##
## Simultaneous Tests for General Linear Hypotheses
##
## Multiple Comparisons of Means: Tukey Contrasts
##
##
## Fit: glm(formula = deleted_binary ~ +speaker + is_it_footed + C_enviro_coarse +
## V_status_wrt_English, family = binomial, data = tongan_deletion_subset)
##
## Linear Hypotheses:
## Estimate Std. Error z value Pr(>|z|)
## sonorant_fric - sonorant_sonorant == 0 15.9208 1093.5330 0.015 1.0000
## sonorant_stop - sonorant_sonorant == 0 17.0583 1093.5330 0.016 1.0000
## fric_sonorant - sonorant_sonorant == 0 16.8936 1093.5332 0.015 1.0000
## fric_fric - sonorant_sonorant == 0 16.0567 1093.5334 0.015 1.0000
## fric_stop - sonorant_sonorant == 0 18.3821 1093.5331 0.017 1.0000
## stop_sonorant - sonorant_sonorant == 0 15.0519 1093.5336 0.014 1.0000
## stop_fric - sonorant_sonorant == 0 17.3352 1093.5332 0.016 1.0000
## stop_stop - sonorant_sonorant == 0 14.8838 1093.5336 0.014 1.0000
## sonorant_stop - sonorant_fric == 0 1.1375 0.4665 2.439 0.2064
## fric_sonorant - sonorant_fric == 0 0.9728 0.7812 1.245 0.9243
## fric_fric - sonorant_fric == 0 0.1359 0.9951 0.137 1.0000
## fric_stop - sonorant_fric == 0 2.4613 0.5214 4.720 <0.001 ***
## stop_sonorant - sonorant_fric == 0 -0.8689 1.2119 -0.717 0.9977
## stop_fric - sonorant_fric == 0 1.4144 0.7533 1.878 0.5561
## stop_stop - sonorant_fric == 0 -1.0370 1.1792 -0.879 0.9908
## fric_sonorant - sonorant_stop == 0 -0.1647 0.7385 -0.223 1.0000
## fric_fric - sonorant_stop == 0 -1.0016 0.9724 -1.030 0.9747
## fric_stop - sonorant_stop == 0 1.3237 0.4575 2.894 0.0663 .
## stop_sonorant - sonorant_stop == 0 -2.0064 1.1955 -1.678 0.6978
## stop_fric - sonorant_stop == 0 0.2769 0.7197 0.385 1.0000
## stop_stop - sonorant_stop == 0 -2.1746 1.1604 -1.874 0.5586
## fric_fric - fric_sonorant == 0 -0.8369 1.1400 -0.734 0.9973
## fric_stop - fric_sonorant == 0 1.4885 0.7636 1.949 0.5041
## stop_sonorant - fric_sonorant == 0 -1.8417 1.3452 -1.369 0.8764
## stop_fric - fric_sonorant == 0 0.4416 0.9458 0.467 0.9999
## stop_stop - fric_sonorant == 0 -2.0098 1.3079 -1.537 0.7890
## fric_stop - fric_fric == 0 2.3254 0.9992 2.327 0.2624
## stop_sonorant - fric_fric == 0 -1.0048 1.4818 -0.678 0.9985
## stop_fric - fric_fric == 0 1.2785 1.1397 1.122 0.9579
## stop_stop - fric_fric == 0 -1.1730 1.4495 -0.809 0.9947
## stop_sonorant - fric_stop == 0 -3.3302 1.2188 -2.732 0.1021
## stop_fric - fric_stop == 0 -1.0468 0.7462 -1.403 0.8607
## stop_stop - fric_stop == 0 -3.4983 1.1837 -2.955 0.0553 .
## stop_fric - stop_sonorant == 0 2.2833 1.3322 1.714 0.6730
## stop_stop - stop_sonorant == 0 -0.1681 1.6077 -0.105 1.0000
## stop_stop - stop_fric == 0 -2.4515 1.3024 -1.882 0.5526
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)
And finally, how about homorganicity? Iff we remove the random effect of consonant environment, then we do get a significant effect of homorganicity: more deletion if homorganic (effect is about one-third the size of the vowel-status effect):
deletion_CsHomorganic.glmer <- glm(deleted_binary ~
#+ (1|C_environment) #remove it to give homorganicity its best shot
+ speaker
+ is_it_footed
+ Cs_homorganic_or_not
+ V_status_wrt_English
, data=tongan_deletion_subset,
family=binomial)
summary(deletion_CsHomorganic.glmer)
##
## Call:
## glm(formula = deleted_binary ~ +speaker + is_it_footed + Cs_homorganic_or_not +
## V_status_wrt_English, family = binomial, data = tongan_deletion_subset)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.8539 -0.6334 -0.3155 0.6287 2.7870
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.7730 0.5700 -4.865 1.15e-06 ***
## speakerS2 -2.3584 0.4889 -4.824 1.41e-06 ***
## speakerS3 1.2352 0.3314 3.727 0.000194 ***
## is_it_footed 0.3657 0.4606 0.794 0.427243
## Cs_homorganic_or_nothomorganic 0.9027 0.3247 2.780 0.005430 **
## V_status_wrt_Englishepenthetic 1.7902 0.3971 4.508 6.54e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 378.23 on 283 degrees of freedom
## Residual deviance: 266.33 on 278 degrees of freedom
## (2 observations deleted due to missingness)
## AIC: 278.33
##
## Number of Fisher Scoring iterations: 5
Anova(deletion_CsHomorganic.glmer)
## Analysis of Deviance Table (Type II tests)
##
## Response: deleted_binary
## LR Chisq Df Pr(>Chisq)
## speaker 81.471 2 < 2.2e-16 ***
## is_it_footed 0.638 1 0.424595
## Cs_homorganic_or_not 7.997 1 0.004686 **
## V_status_wrt_English 23.805 1 1.066e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Interestingly, there is no significant interaction of homorganicity and vowel status–suggests that deletion is about the resulting consonant cluster (and, independently, about the vowel status), not about being faithful to English when markedness allows it:
deletion_CsHomorganic_interact.glmer <- glm(deleted_binary ~
#+ (1|C_environment) #remove it to give homorganicity its best shot
+ speaker
+ is_it_footed
+ Cs_homorganic_or_not
* V_status_wrt_English
, data=tongan_deletion_subset,
family=binomial)
summary(deletion_CsHomorganic_interact.glmer)
##
## Call:
## glm(formula = deleted_binary ~ +speaker + is_it_footed + Cs_homorganic_or_not *
## V_status_wrt_English, family = binomial, data = tongan_deletion_subset)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.8126 -0.5480 -0.2954 0.6558 2.6751
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -3.5498 0.8620 -4.118 3.82e-05
## speakerS2 -2.3397 0.4868 -4.807 1.53e-06
## speakerS3 1.2080 0.3317 3.642 0.000271
## is_it_footed 0.4396 0.4677 0.940 0.347343
## Cs_homorganic_or_nothomorganic 1.9615 0.8472 2.315 0.020591
## V_status_wrt_Englishepenthetic 2.6718 0.7976 3.350 0.000808
## Cs_homorganic_or_nothomorganic:V_status_wrt_Englishepenthetic -1.3034 0.9236 -1.411 0.158179
##
## (Intercept) ***
## speakerS2 ***
## speakerS3 ***
## is_it_footed
## Cs_homorganic_or_nothomorganic *
## V_status_wrt_Englishepenthetic ***
## Cs_homorganic_or_nothomorganic:V_status_wrt_Englishepenthetic
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 378.23 on 283 degrees of freedom
## Residual deviance: 264.08 on 277 degrees of freedom
## (2 observations deleted due to missingness)
## AIC: 278.08
##
## Number of Fisher Scoring iterations: 5
Anova(deletion_CsHomorganic_interact.glmer)
## Analysis of Deviance Table (Type II tests)
##
## Response: deleted_binary
## LR Chisq Df Pr(>Chisq)
## speaker 80.076 2 < 2.2e-16 ***
## is_it_footed 0.894 1 0.344433
## Cs_homorganic_or_not 7.997 1 0.004686 **
## V_status_wrt_English 23.805 1 1.066e-06 ***
## Cs_homorganic_or_not:V_status_wrt_English 2.246 1 0.134003
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Read in the file and take a look at it:
tongan_superset <- read.table("Tongan_forR_revised_July2016.txt", header=T)
summary(tongan_superset)
## good_pattern good_pattern_binary order original_order speaker
## 1 :305 Min. :0.0000 139 : 2 139 : 5 S1:156
## V1_long : 66 1st Qu.:0.0000 144 : 2 124 : 4 S2:209
## V2_long : 65 Median :0.0000 147 : 2 125 : 4 S3:251
## V12_diphth: 59 Mean :0.4951 178 : 2 128 : 4
## V3_long : 53 3rd Qu.:1.0000 20 : 2 144 : 4
## V34_diphth: 29 Max. :1.0000 (Other):599 (Other):588
## (Other) : 39 NA's : 7 NA's : 7
## dictionary_entry definition first_V_long first_V_stressed_in_English
## sovaleni,_s?valeni: 5 sacrament : 6 Min. :0.0000 Min. :0.0000
## ‘?pelikoti : 4 sovereign_(coin): 5 1st Qu.:0.0000 1st Qu.:0.0000
## ‘inisulato : 4 apricot : 4 Median :0.0000 Median :1.0000
## ‘univesiti : 4 continent : 4 Mean :0.1033 Mean :0.6136
## hemisefia : 4 hemisphere : 4 3rd Qu.:0.0000 3rd Qu.:1.0000
## hipopotama : 4 hippopotamus : 4 Max. :1.0000 Max. :1.0000
## (Other) :591 (Other) :589 NA's :1
## first_V_epenthetic_in_English first_V first_V_glide_in_English second_V_long
## Min. :0.0000 epenthetic:124 Min. :0.00000 Min. :0.000
## 1st Qu.:0.0000 stressed :378 1st Qu.:0.00000 1st Qu.:0.000
## Median :0.0000 unstressed:114 Median :0.00000 Median :0.000
## Mean :0.2013 Mean :0.04383 Mean :0.112
## 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.000
## Max. :1.0000 Max. :1.00000 Max. :1.000
##
## second_V_stressed_in_English second_V_epenthetic_in_English second_V
## Min. :0.0000 Min. :0.0000 epenthetic:181
## 1st Qu.:0.0000 1st Qu.:0.0000 stressed :200
## Median :0.0000 Median :0.0000 unstressed:234
## Mean :0.3263 Mean :0.2943 NA's : 1
## 3rd Qu.:1.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000
## NA's :1
## second_V_glide_in_English secondV_short_and_third_V_long first_two_Vs_VV speaker_V1_quality
## Min. :0.0000 Min. :0.00000 Min. :0.0000 a :253
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.0000 e :137
## Median :0.0000 Median :0.00000 Median :0.0000 i :108
## Mean :0.1153 Mean :0.08604 Mean :0.1558 o : 95
## 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.0000 u : 21
## Max. :1.0000 Max. :1.00000 Max. :1.0000 NA's: 2
##
## speaker_V2_quality starts_with_possible_cluster first_two_Cs CVC_pattern final_V
## a :137 Min. :0.0000 pl : 62 CVCVCVCVCV :137 CV:484
## e :100 1st Qu.:0.0000 kl : 40 CVCVCVVCV : 47 V:: 51
## i :251 Median :0.0000 ‘n : 33 CVCV:CVCV : 37 VV: 81
## o : 96 Mean :0.2484 s0 : 22 CVCVCV:CVCV: 32
## u : 29 3rd Qu.:0.0000 kn : 21 CVCVCVCV: : 24
## NA's: 3 Max. :1.0000 sk : 20 CVCVCVCVV : 19
## (Other):418 (Other) :320
## final_V_type penult_V_type CVC_pattern_coarse is_1st_V_stressed is_2nd_V_stressed
## epenthetic:411 epenthetic: 36 CVCVCV-F :200 Min. :0.0000 Min. :0.0000
## stressed : 5 glide : 1 CVCV-F : 65 1st Qu.:0.0000 1st Qu.:0.0000
## tense : 42 stressed : 37 CVCVCVVCV: 46 Median :1.0000 Median :0.0000
## unstressed: 73 unstressed: 48 CVCV:-F : 43 Mean :0.6145 Mean :0.3278
## NA's : 85 NA's :494 CVVCV-F : 22 3rd Qu.:1.0000 3rd Qu.:1.0000
## CV:CV-F : 21 Max. :1.0000 Max. :1.0000
## (Other) :219 NA's :9 NA's :12
names(tongan_superset)
## [1] "good_pattern" "good_pattern_binary"
## [3] "order" "original_order"
## [5] "speaker" "dictionary_entry"
## [7] "definition" "first_V_long"
## [9] "first_V_stressed_in_English" "first_V_epenthetic_in_English"
## [11] "first_V" "first_V_glide_in_English"
## [13] "second_V_long" "second_V_stressed_in_English"
## [15] "second_V_epenthetic_in_English" "second_V"
## [17] "second_V_glide_in_English" "secondV_short_and_third_V_long"
## [19] "first_two_Vs_VV" "speaker_V1_quality"
## [21] "speaker_V2_quality" "starts_with_possible_cluster"
## [23] "first_two_Cs" "CVC_pattern"
## [25] "final_V" "final_V_type"
## [27] "penult_V_type" "CVC_pattern_coarse"
## [29] "is_1st_V_stressed" "is_2nd_V_stressed"
Some clean-ups needed:
#pick out just the cases where English final V is unstressed, including tense but unstressed
tongan_length <- subset(tongan_superset, tongan_superset$final_V_type == "unstressed" | tongan_superset$final_V_type == "tense")
#get rid of glide cases
tongan_length <- subset(tongan_length, tongan_length$penult_V_type != "glide")
#Take a look at the words, and how many tokens of each there are
tongan_length$definition <- factor(tongan_length$definition)
table(tongan_length$definition)
##
## alligator Aspro astronomer banjo barometer
## 3 2 3 3 2
## centurion chutney Cinderella commutator cucumber
## 3 1 3 2 3
## customer denominator diameter dictionary dynamo
## 3 2 2 3 1
## etcetera eternity February geography gladiolus_(flower)
## 2 3 1 3 3
## Glaxo helicopter hippopotamus inspector insulator
## 1 3 4 3 4
## January lavender_(scent) magnesia Mercury Mexico
## 1 3 1 1 2
## minister organdie_(fabric) phonography phylactery professor
## 3 2 3 2 2
## programmer register secretary September society
## 2 3 4 2 1
## station sticker-plaster swastika transmitter trustee
## 1 3 3 4 1
## unit university whisky
## 1 4 2
#get rid of empty levels and fix order of levels
tongan_length$final_V <- factor(tongan_length$final_V)
tongan_length$penult_V_type <- factor(tongan_length$penult_V_type)
tongan_length$penult_V_type <- relevel(tongan_length$penult_V_type, "unstressed") #puts this level at the beginning
tongan_length$penult_V_type <- relevel(tongan_length$penult_V_type, "epenthetic")
#now it should bo epenthetic-unstressed-stressed
Plot the dependent variable (whether final V is long or not), by the main independent variable (what kind of English V the penult corresponds to)
length_counts <- table(tongan_length$penult_V_type, tongan_length$final_V, dnn=c("preceding V status", "final V length"))
length_counts
## final V length
## preceding V status CV V:
## epenthetic 9 27
## unstressed 28 15
## stressed 35 0
mosaic(length_counts, direction="v", pop=FALSE, gp_varnames=gpSerif, gp_labels=gpSerif, gp=gpar(fill=c("black","grey")), labeling_args=list(rot_labels=c(left=0), offset_varnames=c(left=1),offset_labels=c(left=0.1), varnames=c(TRUE, TRUE), set_labels=list('final V length'=c("short", "long"))),margins=c(3,0,1,2)+0.1)
#write to file
png(file="Vowel_length_overall_mosaic_plot2.png",width=myResMultiplier*600,height=myResMultiplier*350, res=myResMultiplier*72)
mosaic(length_counts, direction="v", pop=FALSE, gp_varnames=gpSerif, gp_labels=gpSerif, gp=gpar(fill=c("black","grey")), labeling_args=list(rot_labels=c(left=0), offset_varnames=c(left=1),offset_labels=c(left=0.1), varnames=c(TRUE, TRUE), set_labels=list('final V length'=c("short", "long"))),margins=c(3,0,1,2)+0.1)
dev.off()
## png
## 2
#also for each speaker
length_counts_S1 <- table(tongan_length[tongan_length$speaker=="S1",]$penult_V_type, tongan_length[tongan_length$speaker=="S1",]$final_V, dnn=c("preceding V status", "final V length"))
length_counts_S2 <- table(tongan_length[tongan_length$speaker=="S2",]$penult_V_type, tongan_length[tongan_length$speaker=="S2",]$final_V, dnn=c("preceding V status", "final V length"))
length_counts_S3 <- table(tongan_length[tongan_length$speaker=="S3",]$penult_V_type, tongan_length[tongan_length$speaker=="S3",]$final_V, dnn=c("preceding V status", "final V length"))
length_counts_S1
## final V length
## preceding V status CV V:
## epenthetic 2 7
## unstressed 9 4
## stressed 11 0
length_counts_S2
## final V length
## preceding V status CV V:
## epenthetic 4 6
## unstressed 10 4
## stressed 12 0
length_counts_S3
## final V length
## preceding V status CV V:
## epenthetic 3 14
## unstressed 9 7
## stressed 12 0
p1_length <- grid.grabExpr(mosaic(length_counts_S1, direction="v", pop=FALSE, gp_varnames=gpSerif, gp_labels=gpSerif, gp=gpar(fill=c("black","grey")), labeling_args=list(rot_labels=c(left=0,top=60), offset_varnames=c(left=1),offset_labels=c(left=0.1, top=0.9), varnames=c(FALSE, TRUE), set_labels=list('final V length'=c("short", "long"))),margins=c(3,0,0,2)+0.1, sub_gp = gpar(fontfamily=myFontFamily), sub=" Speaker 1"))
## Warning in grabDL(warn, wrap, ...): viewport overwritten (grab MAY not be faithful)
p2_length <- grid.grabExpr(mosaic(length_counts_S2, direction="v", pop=FALSE, gp_varnames=gpSerif, gp_labels=gpSerif, gp=gpar(fill=c("black","grey")), labeling_args=list(rot_labels=c(left=0, top=60), offset_varnames=c(left=1),offset_labels=c(left=0.1, top=0.9), varnames=c(FALSE, FALSE), set_labels=list('final V length'=c("", ""))),margins=c(3,0,0,-4)+0.1, sub_gp = gpar(fontfamily=myFontFamily), sub="Speaker 2 "))
## Warning in grabDL(warn, wrap, ...): viewport overwritten (grab MAY not be faithful)
p3_length <- grid.grabExpr(mosaic(length_counts_S3, direction="v", pop=FALSE, gp_varnames=gpSerif, gp_labels=gpSerif, gp=gpar(fill=c("black","grey")), labeling_args=list(rot_labels=c(left=0, top=60), offset_varnames=c(left=1),offset_labels=c(left=0.1, top=0.9), varnames=c(FALSE, FALSE), set_labels=list('final V length'=c("", ""))),margins=c(3,0,0,-12)+0.1, sub_gp = gpar(fontfamily=myFontFamily), sub="Speaker 3 "))
## Warning in grabDL(warn, wrap, ...): viewport overwritten (grab MAY not be faithful)
grid.arrange(p1_length, p2_length, p3_length, ncol=3)
#Write plot to file for use in paper:
png(file="Vowel_length_individual_speaker_mosaic_plots2.png",width=myResMultiplier*600,height=myResMultiplier*350, res=myResMultiplier*72)
grid.arrange(p1_length, p2_length, p3_length, ncol=3)
dev.off()
## png
## 2
Now split up the data according to whether the final English vowel is schwa or a tense V (/i/ or /o/):
tongan_length_schwa <- subset(tongan_length, tongan_length$final_V_type == "unstressed")
length_counts_schwa <- table(tongan_length_schwa$penult_V_type, tongan_length_schwa$final_V, dnn=c("preceding V status", "final V length"))
tongan_length_tense <- subset(tongan_length, tongan_length$final_V_type == "tense")
length_counts_tense <- table(tongan_length_tense$penult_V_type, tongan_length_tense$final_V, dnn=c("preceding V status", "final V length"))
length_counts_schwa
## final V length
## preceding V status CV V:
## epenthetic 7 16
## unstressed 14 8
## stressed 27 0
length_counts_tense
## final V length
## preceding V status CV V:
## epenthetic 2 11
## unstressed 14 7
## stressed 8 0
pschwa <- grid.grabExpr(mosaic(length_counts_schwa, direction="v", pop=FALSE, gp_varnames=gpSerif, gp_labels=gpSerif, gp=gpar(fill=c("black","grey")), labeling_args=list(rot_labels=c(left=0,top=60), offset_varnames=c(left=1),offset_labels=c(left=0.1, top=0.9), varnames=c(FALSE, TRUE), set_labels=list('final V length'=c("short","long"))),margins=c(3,0,0,2)+0.1, sub_gp = gpar(fontfamily=myFontFamily), sub=" English final schwa"))
## Warning in grabDL(warn, wrap, ...): viewport overwritten (grab MAY not be faithful)
ptense <- grid.grabExpr(mosaic(length_counts_tense, direction="v", pop=FALSE, gp_varnames=gpSerif, gp_labels=gpSerif, gp=gpar(fill=c("black","grey")), labeling_args=list(rot_labels=c(left=0, top=60), offset_varnames=c(left=1),offset_labels=c(left=0.1, top=0.9), varnames=c(FALSE, FALSE), set_labels=list('final V length'=c("", ""))),margins=c(3,0,0,-4)+0.1, sub_gp = gpar(fontfamily=myFontFamily), sub="English final tense V "))
## Warning in grabDL(warn, wrap, ...): viewport overwritten (grab MAY not be faithful)
grid.arrange(pschwa, ptense, ncol=2)
#write to file
png(file="Vowel_length_mosaic_plot_by_V_type2.png",width=myResMultiplier*500,height=myResMultiplier*350, res=myResMultiplier*72)
grid.arrange(pschwa, ptense, ncol=2)
dev.off()
## png
## 2
Here we can use bayesglm() no problem (to handle the separability) because there are no random effects. First, we make the model with maximal interactions:
#make "unstressed" the reference level for the independent variable
tongan_length$penult_V_type <- relevel(tongan_length$penult_V_type, "unstressed")
tongan_length$final_V_type <- relevel(tongan_length$final_V_type, "unstressed")
final_V.bayesGLM=bayesglm(final_V ~
speaker * penult_V_type * final_V_type
,family=binomial,
data=tongan_length)
Then, we use the function step() to choose the best sub-model:
final_V.best <- step(final_V.bayesGLM)
## Start: AIC=163.93
## final_V ~ speaker * penult_V_type * final_V_type
##
## Df Deviance AIC
## - speaker:penult_V_type:final_V_type 22 91.732 119.73
## <none> 91.928 163.93
##
## Step: AIC=140.68
## final_V ~ speaker + penult_V_type + final_V_type + speaker:penult_V_type +
## speaker:final_V_type + penult_V_type:final_V_type
##
## Df Deviance AIC
## - speaker:penult_V_type 14 91.998 112.00
## - penult_V_type:final_V_type 12 92.415 116.42
## - speaker:final_V_type 12 92.672 116.67
## <none> 92.681 140.68
##
## Step: AIC=132.9
## final_V ~ speaker + penult_V_type + final_V_type + speaker:final_V_type +
## penult_V_type:final_V_type
##
## Df Deviance AIC
## - penult_V_type:final_V_type 12 92.679 108.68
## - speaker:final_V_type 12 92.983 108.98
## <none> 92.904 132.90
##
## Step: AIC=121.55
## final_V ~ speaker + penult_V_type + final_V_type + speaker:final_V_type
##
## Df Deviance AIC
## - speaker:final_V_type 8 93.687 105.69
## <none> 93.547 121.55
## - penult_V_type 8 145.031 157.03
##
## Step: AIC=110.38
## final_V ~ speaker + penult_V_type + final_V_type
##
## Df Deviance AIC
## - speaker 4 95.892 103.89
## - final_V_type 3 93.900 103.90
## <none> 94.382 110.38
## - penult_V_type 4 145.686 153.69
##
## Step: AIC=108.58
## final_V ~ penult_V_type + final_V_type
##
## Df Deviance AIC
## - final_V_type 3 96.106 102.11
## <none> 96.578 108.58
## - penult_V_type 4 149.022 153.02
##
## Step: AIC=102.79
## final_V ~ penult_V_type
##
## Df Deviance AIC
## <none> 96.789 102.79
## - penult_V_type 2 150.049 152.05
summary(final_V.best)
##
## Call:
## bayesglm(formula = final_V ~ penult_V_type, family = binomial,
## data = tongan_length)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.6419 -0.9281 -0.1377 0.7756 1.4491
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.6193 0.3153 -1.964 0.049512 *
## penult_V_typeepenthetic 1.6665 0.4833 3.448 0.000565 ***
## penult_V_typestressed -4.0339 1.5842 -2.546 0.010887 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 150.049 on 113 degrees of freedom
## Residual deviance: 96.789 on 111 degrees of freedom
## AIC: 102.79
##
## Number of Fisher Scoring iterations: 19
anova(final_V.best, test="Chisq")
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: final_V
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 113 150.049
## penult_V_type 2 53.26 111 96.789 2.72e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
This actually comes first in the paper.
Read in the data and take a look
tongan_stress <- subset(tongan_superset, (tongan_superset$is_1st_V_stressed + tongan_superset$is_2nd_V_stressed == 1) & tongan_superset$good_pattern_binary==1)
summary(tongan_stress)
## good_pattern good_pattern_binary order original_order speaker
## 1 :293 Min. :1 788 : 2 350 : 4 S1: 97
## 4moras : 0 1st Qu.:1 971 : 2 10 : 3 S2: 92
## antepenult_but_long : 0 Median :1 m167 : 2 100 : 3 S3:104
## antepenult_main_stress: 0 Mean :1 10 : 1 108 : 3
## glide : 0 3rd Qu.:1 100 : 1 12 : 3
## semi_duplicate : 0 Max. :1 (Other):279 (Other):271
## (Other) : 0 NA's : 6 NA's : 6
## dictionary_entry definition first_V_long first_V_stressed_in_English
## ‘amipasitoa : 3 alcohol : 3 Min. :0 Min. :0.0000
## ‘apenitiki : 3 ambassador: 3 1st Qu.:0 1st Qu.:0.0000
## ‘aposetolo : 3 apostle : 3 Median :0 Median :1.0000
## ‘asipulini : 3 appendix : 3 Mean :0 Mean :0.5631
## ‘asital?noma: 3 aspirin : 3 3rd Qu.:0 3rd Qu.:1.0000
## ‘Atalanitiki: 3 astronomer: 3 Max. :0 Max. :1.0000
## (Other) :275 (Other) :275
## first_V_epenthetic_in_English first_V first_V_glide_in_English second_V_long
## Min. :0.0000 epenthetic: 66 Min. :0.00000 Min. :0.000000
## 1st Qu.:0.0000 stressed :165 1st Qu.:0.00000 1st Qu.:0.000000
## Median :0.0000 unstressed: 62 Median :0.00000 Median :0.000000
## Mean :0.2253 Mean :0.04778 Mean :0.003413
## 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.000000
## Max. :1.0000 Max. :1.00000 Max. :1.000000
##
## second_V_stressed_in_English second_V_epenthetic_in_English second_V
## Min. :0.0000 Min. :0.000 epenthetic:109
## 1st Qu.:0.0000 1st Qu.:0.000 stressed :112
## Median :0.0000 Median :0.000 unstressed: 72
## Mean :0.3823 Mean :0.372
## 3rd Qu.:1.0000 3rd Qu.:1.000
## Max. :1.0000 Max. :1.000
##
## second_V_glide_in_English secondV_short_and_third_V_long first_two_Vs_VV speaker_V1_quality
## Min. :0.00000 Min. :0 Min. :0.00000 a :108
## 1st Qu.:0.00000 1st Qu.:0 1st Qu.:0.00000 e : 69
## Median :0.00000 Median :0 Median :0.00000 i : 63
## Mean :0.03413 Mean :0 Mean :0.06143 o : 47
## 3rd Qu.:0.00000 3rd Qu.:0 3rd Qu.:0.00000 u : 4
## Max. :1.00000 Max. :0 Max. :1.00000 NA's: 2
##
## speaker_V2_quality starts_with_possible_cluster first_two_Cs CVC_pattern final_V
## a : 46 Min. :0.0000 pl : 32 CVCVCVCVCV :132 CV:221
## e : 42 1st Qu.:0.0000 kl : 25 CVCVCVCV: : 23 V:: 38
## i :134 Median :0.0000 ‘n : 15 CVCVCVVCV : 23 VV: 34
## o : 52 Mean :0.2765 ‘s : 15 CVCVCVCVV : 16
## u : 17 3rd Qu.:1.0000 pn : 15 CVCVCVCVVCV : 15
## NA's: 2 Max. :1.0000 kn : 13 CVCVCVCV:CVCV: 14
## (Other):178 (Other) : 70
## final_V_type penult_V_type CVC_pattern_coarse is_1st_V_stressed is_2nd_V_stressed
## epenthetic:183 epenthetic: 27 CVCVCV-F :189 Min. :0.0000 Min. :0.0000
## stressed : 3 glide : 1 CVCVCVVCV : 22 1st Qu.:0.0000 1st Qu.:0.0000
## tense : 20 stressed : 21 CVCV-F : 19 Median :1.0000 Median :0.0000
## unstressed: 52 unstressed: 29 CVCVCVCVVCV: 15 Mean :0.6416 Mean :0.3584
## NA's : 35 NA's :215 CVVCV-F : 8 3rd Qu.:1.0000 3rd Qu.:1.0000
## CVCVV-F : 7 Max. :1.0000 Max. :1.0000
## (Other) : 33
names(tongan_stress)
## [1] "good_pattern" "good_pattern_binary"
## [3] "order" "original_order"
## [5] "speaker" "dictionary_entry"
## [7] "definition" "first_V_long"
## [9] "first_V_stressed_in_English" "first_V_epenthetic_in_English"
## [11] "first_V" "first_V_glide_in_English"
## [13] "second_V_long" "second_V_stressed_in_English"
## [15] "second_V_epenthetic_in_English" "second_V"
## [17] "second_V_glide_in_English" "secondV_short_and_third_V_long"
## [19] "first_two_Vs_VV" "speaker_V1_quality"
## [21] "speaker_V2_quality" "starts_with_possible_cluster"
## [23] "first_two_Cs" "CVC_pattern"
## [25] "final_V" "final_V_type"
## [27] "penult_V_type" "CVC_pattern_coarse"
## [29] "is_1st_V_stressed" "is_2nd_V_stressed"
Take a look at the actual words, and how many have each pattern:
##how many word types?
defs_table <- sort(table(as.factor(factor(tongan_stress$definition))))
defs_table
##
## asbestos battalion blancmange brigadier
## 1 1 1 1
## broadcast cauliflower chutney civilian
## 1 1 1 1
## confirmation denominator depositor elocution
## 1 1 1 1
## epiglottis etcetera France Glaxo
## 1 1 1 1
## glycerine indulgence linoleum Mercury
## 1 1 1 1
## metronome nasturtium nectarine organdie_(fabric)
## 1 1 1 1
## penitance Pleiades Protestant republic
## 1 1 1 1
## sacrilege secretary seminary spaniel
## 1 1 1 1
## tabernacle television unit university
## 1 1 1 1
## aluminum antelope apricot Aspro
## 2 2 2 2
## commutator consonant constitution,_charter crucify
## 2 2 2 2
## eucalyptus_oil evolution fraction_(in_arithmetic) geranium_(flower)
## 2 2 2 2
## gramophone heliograph immaculate jonquil_(flower)
## 2 2 2 2
## lithograph magnesium Manchuria manuscript
## 2 2 2 2
## Mediterranean Mexico parachute paragraph
## 2 2 2 2
## pendulum petroleum plastic porcupine
## 2 2 2 2
## potassium predicate programmer register
## 2 2 2 2
## sacrifice sticker-plaster transmitter trombone
## 2 2 2 2
## unison alcohol ambassador apostle
## 2 3 3 3
## appendix aspirin astronomer Atlantic
## 3 3 3 3
## atmosphere banjo breakfast centurion
## 3 3 3 3
## Christian Christmas Cinderella communist
## 3 3 3 3
## conference continent crocodile crystal
## 3 3 3 3
## cucumber customer democracy dictionary
## 3 3 3 3
## engineer estimate gladiolus_(flower) hallelujah
## 3 3 3 3
## helicopter hemisphere hippopotamus inspector
## 3 3 3 3
## insulator insulin kangaroo lavender_(scent)
## 3 3 3 3
## locomotive mandolin math,_matriculation meridian
## 3 3 3 3
## millionaire minister monogram obbligato_(music)
## 3 3 3 3
## ostrich pancreas penguin Pentecost
## 3 3 3 3
## phonography president princess problem
## 3 3 3 3
## prophesy quarantine sacrament Sanhedrin
## 3 3 3 3
## saxophone scholarship stencil swastika
## 3 3 3 3
## telegraph trade_union
## 3 3
dim(defs_table)
## [1] 134
##how many with each stress pattern?
table(tongan_stress$is_1st_V_stressed, tongan_stress$is_2nd_V_stressed)
##
## 0 1
## 0 0 105
## 1 188 0
#make a table of "possible cluster" items
table(tongan_stress[tongan_stress$starts_with_possible_cluster == 1,]$definition)
##
## absolution admiral alcohol
## 0 0 0
## alligator aluminum ambassador
## 0 0 0
## analyse antarctic antelope
## 0 0 0
## apostle appendix apricot
## 0 0 0
## arctic arsenic asbestos
## 0 0 0
## aspirin Aspro astronomer
## 0 0 0
## Atlantic atmosphere Australia
## 0 0 0
## banjo barbarian baritone
## 0 0 0
## barometer battalion bayonet
## 0 0 0
## blancmange blotting-paper boomerang
## 1 0 0
## braces Brazil breakfast
## 0 0 3
## brigadier Britain broadcast
## 1 0 1
## bronze captain carbolic_acid,_carbolic_soap
## 0 0 0
## carnation cauliflower centurion
## 0 1 0
## chlorodyne Christian Christmas
## 0 3 3
## chromatic chutney Cinderella
## 0 0 3
## civilian civilization,_culture claret
## 0 0 0
## clarinet clarkia_(flower) climate
## 0 0 0
## cocaine communist commutator
## 0 0 0
## compound conference confirmation
## 0 0 0
## conscience consonant constitution,_charter
## 0 0 0
## continent crocodile crucify
## 0 3 2
## crystal cucumber customer
## 3 0 0
## cypress democracy denominator
## 0 0 0
## deposit depositor diagonal
## 0 0 0
## diagram diameter diamond
## 0 0 0
## dictionary diocese disciple
## 0 0 0
## discount doctrine dragon
## 0 0 0
## dynamics dynamo ebonite
## 0 0 0
## element elephant elevation
## 0 0 0
## elocution empire enamel
## 0 0 0
## engineer Epicurean epiglottis
## 0 0 0
## estimate etcetera eternity
## 0 0 0
## eucalyptus_oil evolution February
## 0 0 0
## firmament,_sky florin fountain
## 0 0 0
## fraction_(in_arithmetic) France frost
## 2 1 0
## Gentile geography geranium_(flower)
## 0 0 2
## giant gladiolus_(flower) Glaxo
## 0 3 1
## glory glycerine gospel
## 0 1 0
## gramophone granite grenade
## 2 0 0
## grenadilla_(passion_fruit) hallelujah helicopter
## 0 0 0
## heliograph hemisphere hippopotamus
## 0 0 0
## history hyacinth_(flower) hydrangea_(flower)
## 0 0 0
## hydrogen hypotenuse immaculate
## 0 0 0
## incense indulgence insect
## 0 0 0
## inspector insulator insulin
## 0 0 0
## invoice jacket January
## 0 0 0
## Jerusalem jonquil_(flower) kangaroo
## 0 0 0
## lavender_(scent) linoleum lithograph
## 0 0 0
## locomotive magnesia magnesium
## 0 0 0
## malaria Manchuria mandolin
## 0 0 0
## manuscript margarine marigold
## 0 0 0
## math,_matriculation mayonnaise Mediterranean
## 0 0 0
## merchant Mercury meridian
## 0 0 0
## Messiah metronome Mexico
## 0 0 0
## microphone millionaire minister
## 0 0 0
## molasses mongoose monogram
## 0 0 0
## muslin nasturtium nectarine
## 0 0 0
## Norway obbligato_(music) organdie_(fabric)
## 0 0 0
## ostrich pancreas parachute
## 0 0 2
## Paradise paragraph parallel
## 0 2 0
## patriarch pendulum penguin
## 0 0 0
## penitance Pentecost petroleum
## 0 0 0
## phonography phylactery pioneer
## 0 0 0
## plastic Pleiades pneumonia
## 2 1 0
## poinsettia pomegranate porcupine
## 0 0 0
## potassium predicate president
## 0 2 3
## princess private problem
## 3 0 3
## professor programmer promise
## 0 2 0
## prophesy prophet Protestant
## 3 0 1
## proverb quarantine radium
## 0 3 0
## register republic rhinoceros
## 0 0 0
## rosary sacrament sacrifice
## 0 0 0
## sacrilege sandwich Sanhedrin
## 0 0 0
## saxophone scholarship science
## 0 3 0
## scribe secretary seminary
## 0 1 0
## September seraphim servant
## 0 0 0
## serviette sirloin society
## 0 0 0
## sovereign_(coin) spaniel station
## 0 1 0
## stencil sticker-plaster stockings
## 3 2 0
## swastika Sweden tabernacle
## 0 0 0
## telegraph television trade_union
## 3 1 0
## transmitter triceps trifle
## 2 0 0
## trombone trousers trustee
## 2 0 0
## turpentine unison unit
## 0 0 0
## university vehicle violet
## 0 0 0
## violin waltz Wesleyan
## 0 0 0
## whisky widow willow_tree
## 0 0 0
## wisteria xylophone zodiac
## 0 0 0
table(tongan_stress$starts_with_possible_cluster, tongan_stress$first_V)
##
## epenthetic stressed unstressed
## 0 3 149 60
## 1 63 16 2
#How can there be 3 items that don't start with a possible cluster, and yet the first V is epenthetic? These are jonquil (twice) and chutney, where the English post-alveolar is rendered as [si].
#table for paper of V1/V2 status by stress type
stressTable <- table(tongan_stress$first_V, tongan_stress$second_V, tongan_stress$is_1st_V_stressed)
stressTable
## , , = 0
##
##
## epenthetic stressed unstressed
## epenthetic 0 48 1
## stressed 16 0 22
## unstressed 2 16 0
##
## , , = 1
##
##
## epenthetic stressed unstressed
## epenthetic 0 17 0
## stressed 78 0 49
## unstressed 13 31 0
Can we make a nice mosaic plot? A 3-D plot is hard to read. We treat each combination of stress statuses as a separate category.
tongan_stress$combined_categories <- as.factor(paste("V1",tongan_stress$first_V, "V2", tongan_stress$second_V))
#reorder levels
tongan_stress$combined_categories <- relevel(tongan_stress$combined_categories, "V1 stressed V2 epenthetic")
tongan_stress$combined_categories <- relevel(tongan_stress$combined_categories, "V1 unstressed V2 epenthetic")
tongan_stress$combined_categories <- relevel(tongan_stress$combined_categories, "V1 stressed V2 unstressed")
tongan_stress$combined_categories <- relevel(tongan_stress$combined_categories, "V1 unstressed V2 stressed")
tongan_stress$combined_categories <- relevel(tongan_stress$combined_categories, "V1 epenthetic V2 unstressed")
tongan_stress$combined_categories <- relevel(tongan_stress$combined_categories, "V1 epenthetic V2 stressed")
levels(tongan_stress$combined_categories)
## [1] "V1 epenthetic V2 stressed" "V1 epenthetic V2 unstressed" "V1 unstressed V2 stressed"
## [4] "V1 stressed V2 unstressed" "V1 unstressed V2 epenthetic" "V1 stressed V2 epenthetic"
stressTable2 <- table(tongan_stress$combined_categories, tongan_stress$is_1st_V_stressed, dnn=c("Vs status","secondary stress"))
mosaic(stressTable2,direction="v", pop=FALSE,
gp_varnames=gpSerif, gp_labels=gpSerif, gp=gpar(fill=c("black","grey")), #fonts and colors
labeling_args=list(rot_labels=c(left=0, top=60), offset_varnames=c(left=0.5),offset_labels=c(left=0.5,top=3), varnames=c(FALSE, FALSE), set_labels=list("secondary stress"=c("V1", "V2"))))
# Do the same thing separately for each speaker
stressTable_S1 <- table(tongan_stress[tongan_deletion$speaker=="S1",]$combined_categories, tongan_stress[tongan_deletion$speaker=="S1",]$is_1st_V_stressed, dnn=c("Vs status","secondary stress"))
stressTable_S2 <- table(tongan_stress[tongan_deletion$speaker=="S2",]$combined_categories, tongan_stress[tongan_deletion$speaker=="S2",]$is_1st_V_stressed, dnn=c("Vs status","secondary stress"))
stressTable_S3 <- table(tongan_stress[tongan_deletion$speaker=="S3",]$combined_categories, tongan_stress[tongan_deletion$speaker=="S3",]$is_1st_V_stressed, dnn=c("Vs status","secondary stress"))
#plot them side by side. Doesn't look so great here (plot is cramped), but we coded it this way in case we want to make a .png
str1 <- grid.grabExpr(mosaic(stressTable_S1,direction="v", pop=FALSE, gp_varnames=gpSerif, gp_labels=gpSerif, gp=gpar(fill=c("black","grey")), labeling_args=list(rot_labels=c(left=0), offset_varnames=c(left=1.5),offset_labels=c(left=1), varnames=c(FALSE, FALSE), set_labels=list("secondary stress"=c("V1", "V2"))), margins=c(2,0,0,4)+0.1, sub_gp = gpar(fontfamily=myFontFamily), sub=" Speaker 1"))
## Warning in grabDL(warn, wrap, ...): viewport overwritten (grab MAY not be faithful)
str2 <- grid.grabExpr(mosaic(stressTable_S2,direction="v", pop=FALSE, gp_varnames=gpSerif, gp_labels=gpSerif, gp=gpar(fill=c("black","grey")), labeling_args=list(rot_labels=c(left=0), offset_varnames=c(left=1.5),offset_labels=c(left=1), varnames=c(FALSE, FALSE), labels=c(TRUE,FALSE)), margins=c(2,0,0,-2)+0.1, sub_gp = gpar(fontfamily=myFontFamily), sub="Speaker 2 "))
## Warning in grabDL(warn, wrap, ...): viewport overwritten (grab MAY not be faithful)
str3 <- grid.grabExpr(mosaic(stressTable_S3,direction="v", pop=FALSE, gp_varnames=gpSerif, gp_labels=gpSerif, gp=gpar(fill=c("black","grey")), labeling_args=list(rot_labels=c(left=0), offset_varnames=c(left=1.5),offset_labels=c(left=1), varnames=c(FALSE, FALSE), labels=c(TRUE,FALSE)), margins=c(2,0,0,-7.8)+0.1, sub_gp = gpar(fontfamily=myFontFamily), sub="Speaker 3 "))
## Warning in grabDL(warn, wrap, ...): viewport overwritten (grab MAY not be faithful)
grid.arrange(str1, str2, str3, ncol=3)
The problem with treating first_V and second_V as separate factors (possibly with an interaction) is that many combinations are missing. For example, there are no tokens where both Vs are epenthetic. Instead, we treat first_V*second_V as though it were a single factor. We make that factor:
tongan_stress$first_and_secondV <- interaction(tongan_stress$first_V, tongan_stress$second_V)
#get rid of missing levels, plus the level that has just one token ('blancmange')
tongan_stress[tongan_stress$definition=="blancmange",]$first_and_secondV <- NA
tongan_stress$first_and_secondV <- factor(tongan_stress$first_and_secondV)
#set reference level to V1 epenthetic, V2 corr. to stressed
tongan_stress$first_and_secondV <- relevel(tongan_stress$first_and_secondV, "epenthetic.stressed")
#check resulting counts
table(tongan_stress$first_and_secondV)
##
## epenthetic.stressed stressed.epenthetic unstressed.epenthetic unstressed.stressed
## 65 94 15 47
## stressed.unstressed
## 71
There is a binary variable for whether the first two Cs can correspond to a legal initial cluster in English, but no further coding of surrounding consonants. The rationale for this is that while fine details of surrounding consonants could be expected to influence vowel devoicing and deletion, we have no expectation that fine details of consonants will influence stressability, unless speakers have inferred that certain C_C environments attract or repel stress (because they tend to correspond to CVC or CC in English).
#maximal-interactions model
tongan_stress.bayesglm_interactVs <- bayesglm(is_1st_V_stressed ~
speaker*(speaker_V1_quality
+speaker_V2_quality
+ starts_with_possible_cluster
+ first_and_secondV)
, data=tongan_stress,
family=binomial(link="logit"))
#select the best model
tongan_stress.best_interactVs <- step(tongan_stress.bayesglm_interactVs)
## Start: AIC=354.68
## is_1st_V_stressed ~ speaker * (speaker_V1_quality + speaker_V2_quality +
## starts_with_possible_cluster + first_and_secondV)
##
## Df Deviance AIC
## - speaker:speaker_V2_quality 8 269.90 337.90
## - speaker:speaker_V1_quality 8 271.20 339.20
## - speaker:first_and_secondV 8 277.98 345.98
## - speaker:starts_with_possible_cluster 2 268.16 348.16
## <none> 270.68 354.68
##
## Step: AIC=341.42
## is_1st_V_stressed ~ speaker + speaker_V1_quality + speaker_V2_quality +
## starts_with_possible_cluster + first_and_secondV + speaker:speaker_V1_quality +
## speaker:starts_with_possible_cluster + speaker:first_and_secondV
##
## Df Deviance AIC
## - speaker:speaker_V1_quality 8 274.25 326.25
## - speaker:first_and_secondV 8 282.61 334.61
## - speaker_V2_quality 4 275.34 335.34
## - speaker:starts_with_possible_cluster 2 271.86 335.86
## <none> 273.42 341.42
##
## Step: AIC=329.04
## is_1st_V_stressed ~ speaker + speaker_V1_quality + speaker_V2_quality +
## starts_with_possible_cluster + first_and_secondV + speaker:starts_with_possible_cluster +
## speaker:first_and_secondV
##
## Df Deviance AIC
## - speaker:first_and_secondV 8 287.54 323.54
## - speaker_V2_quality 4 279.65 323.65
## - speaker:starts_with_possible_cluster 2 277.05 325.05
## <none> 277.05 329.04
## - speaker_V1_quality 4 293.35 337.35
##
## Step: AIC=324.5
## is_1st_V_stressed ~ speaker + speaker_V1_quality + speaker_V2_quality +
## starts_with_possible_cluster + first_and_secondV + speaker:starts_with_possible_cluster
##
## Df Deviance AIC
## - speaker_V2_quality 4 292.82 320.83
## - speaker:starts_with_possible_cluster 2 289.79 321.79
## <none> 288.50 324.50
## - first_and_secondV 4 306.26 334.26
## - speaker_V1_quality 4 306.43 334.43
##
## Step: AIC=321.64
## is_1st_V_stressed ~ speaker + speaker_V1_quality + starts_with_possible_cluster +
## first_and_secondV + speaker:starts_with_possible_cluster
##
## Df Deviance AIC
## - speaker:starts_with_possible_cluster 2 295.12 319.12
## <none> 293.64 321.64
## - speaker_V1_quality 4 311.17 331.16
## - first_and_secondV 4 313.46 333.46
##
## Step: AIC=319.88
## is_1st_V_stressed ~ speaker + speaker_V1_quality + starts_with_possible_cluster +
## first_and_secondV
##
## Df Deviance AIC
## - starts_with_possible_cluster 1 295.18 317.18
## <none> 295.88 319.88
## - speaker 2 302.13 322.13
## - speaker_V1_quality 4 312.95 328.95
## - first_and_secondV 4 315.80 331.80
##
## Step: AIC=317.56
## is_1st_V_stressed ~ speaker + speaker_V1_quality + first_and_secondV
##
## Df Deviance AIC
## <none> 295.56 317.56
## - speaker 2 302.19 320.19
## - speaker_V1_quality 4 313.24 327.24
## - first_and_secondV 4 342.42 356.42
summary(tongan_stress.best_interactVs)
##
## Call:
## bayesglm(formula = is_1st_V_stressed ~ speaker + speaker_V1_quality +
## first_and_secondV, family = binomial(link = "logit"), data = tongan_stress)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -2.2036 -0.8126 0.4618 0.7989 1.7989
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.0894 0.3668 -2.970 0.002982 **
## speakerS2 0.1508 0.3371 0.447 0.654616
## speakerS3 0.8166 0.3428 2.382 0.017224 *
## speaker_V1_qualitye 0.8987 0.4319 2.081 0.037435 *
## speaker_V1_qualityi -0.8429 0.3586 -2.351 0.018744 *
## speaker_V1_qualityo -0.4585 0.4093 -1.120 0.262668
## speaker_V1_qualityu 0.1222 1.0209 0.120 0.904692
## first_and_secondVstressed.epenthetic 2.3753 0.4152 5.721 1.06e-08 ***
## first_and_secondVunstressed.epenthetic 2.5968 0.7441 3.490 0.000483 ***
## first_and_secondVunstressed.stressed 1.6044 0.4373 3.669 0.000243 ***
## first_and_secondVstressed.unstressed 1.6278 0.4098 3.972 7.12e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 376.13 on 289 degrees of freedom
## Residual deviance: 295.56 on 279 degrees of freedom
## (3 observations deleted due to missingness)
## AIC: 317.56
##
## Number of Fisher Scoring iterations: 8
Anova(tongan_stress.best_interactVs)
## Analysis of Deviance Table (Type II tests)
##
## Response: is_1st_V_stressed
## LR Chisq Df Pr(>Chisq)
## speaker 6.623 2 0.036456 *
## speaker_V1_quality 17.675 4 0.001429 **
## first_and_secondV 46.860 4 1.631e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(tongan_stress.best_interactVs, test="Chisq")
## Analysis of Deviance Table
##
## Model: binomial, link: logit
##
## Response: is_1st_V_stressed
##
## Terms added sequentially (first to last)
##
##
## Df Deviance Resid. Df Resid. Dev Pr(>Chi)
## NULL 289 376.13
## speaker 2 3.535 287 372.60 0.1707
## speaker_V1_quality 4 30.176 283 342.42 4.506e-06 ***
## first_and_secondV 4 46.860 279 295.56 1.631e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Which levels of first_second_V are pairwise different?
tongan_stress_V1V2.glht_interactVs <- glht(tongan_stress.best_interactVs, linfct=mcp(first_and_secondV = "Tukey"))
summary(tongan_stress_V1V2.glht_interactVs) #epenth-str has lower rate of initial secondary stress than the other four
##
## Simultaneous Tests for General Linear Hypotheses
##
## Multiple Comparisons of Means: Tukey Contrasts
##
##
## Fit: bayesglm(formula = is_1st_V_stressed ~ speaker + speaker_V1_quality +
## first_and_secondV, family = binomial(link = "logit"), data = tongan_stress)
##
## Linear Hypotheses:
## Estimate Std. Error z value Pr(>|z|)
## stressed.epenthetic - epenthetic.stressed == 0 2.37529 0.41516 5.721 < 0.001 ***
## unstressed.epenthetic - epenthetic.stressed == 0 2.59677 0.74410 3.490 0.00400 **
## unstressed.stressed - epenthetic.stressed == 0 1.60443 0.43725 3.669 0.00204 **
## stressed.unstressed - epenthetic.stressed == 0 1.62780 0.40979 3.972 < 0.001 ***
## unstressed.epenthetic - stressed.epenthetic == 0 0.22148 0.76216 0.291 0.99834
## unstressed.stressed - stressed.epenthetic == 0 -0.77086 0.43446 -1.774 0.37389
## stressed.unstressed - stressed.epenthetic == 0 -0.74750 0.38719 -1.931 0.28752
## unstressed.stressed - unstressed.epenthetic == 0 -0.99234 0.77530 -1.280 0.69124
## stressed.unstressed - unstressed.epenthetic == 0 -0.96898 0.76083 -1.274 0.69527
## stressed.unstressed - unstressed.stressed == 0 0.02337 0.42511 0.055 1.00000
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)
If we instead treat surrounding C as a random effect, the way we did for V deletion, we have to use glmer(). Unfortunately, it won’t converge with any interactions we tried, so this model has no interactions at all. Nor would it converge with either speaker_V1_quality or speaker_V2_quality.
Here’s the fullest model we were able to get convergence with:
tongan_stress.glmer_interactVs <- glmer(is_1st_V_stressed ~
(1|first_two_Cs)
+ speaker + starts_with_possible_cluster
+ first_and_secondV
, data=tongan_stress,
family=binomial(link="logit"))
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model failed to
## converge with max|grad| = 0.00198649 (tol = 0.001, component 1)
summary(tongan_stress.glmer_interactVs)
## Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
## Family: binomial ( logit )
## Formula: is_1st_V_stressed ~ (1 | first_two_Cs) + speaker + starts_with_possible_cluster +
## first_and_secondV
## Data: tongan_stress
##
## AIC BIC logLik deviance df.resid
## 314.9 348.0 -148.5 296.9 283
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.0352 -0.5166 0.2792 0.4949 2.2877
##
## Random effects:
## Groups Name Variance Std.Dev.
## first_two_Cs (Intercept) 1.754 1.324
## Number of obs: 292, groups: first_two_Cs, 51
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.35282 0.77306 -1.750 0.08013 .
## speakerS2 0.06629 0.38198 0.174 0.86222
## speakerS3 0.94783 0.39846 2.379 0.01737 *
## starts_with_possible_cluster 0.25163 0.82744 0.304 0.76105
## first_and_secondVstressed.epenthetic 3.35499 0.82748 4.054 5.02e-05 ***
## first_and_secondVunstressed.epenthetic 3.26331 1.16557 2.800 0.00511 **
## first_and_secondVunstressed.stressed 1.76523 0.83036 2.126 0.03351 *
## first_and_secondVstressed.unstressed 1.93672 0.67037 2.889 0.00386 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) spkrS2 spkrS3 str___ frst_nd_scndVstrssd.p frst_nd_scndVnstrssd.p
## speakerS2 -0.212
## speakerS3 -0.285 0.468
## strts_wth__ -0.608 -0.063 -0.006
## frst_nd_scndVstrssd.p -0.762 -0.027 0.130 0.519
## frst_nd_scndVnstrssd.p -0.563 -0.023 0.057 0.385 0.578
## frst_nd_scndVnstrssd.s -0.777 -0.035 0.025 0.507 0.682 0.522
## frst_nd_scndVstrssd.n -0.786 -0.007 0.079 0.496 0.695 0.500
## frst_nd_scndVnstrssd.s
## speakerS2
## speakerS3
## strts_wth__
## frst_nd_scndVstrssd.p
## frst_nd_scndVnstrssd.p
## frst_nd_scndVnstrssd.s
## frst_nd_scndVstrssd.n 0.676
## convergence code: 0
## Model failed to converge with max|grad| = 0.00198649 (tol = 0.001, component 1)
In that model, which levels of first_and_secondV are significantly different?
tongan_stress.glht_interactVs <- glht(tongan_stress.glmer_interactVs, linfct=mcp(first_and_secondV = "Tukey"))
summary(tongan_stress.glht_interactVs) #epenth-str < {str-unstr, unstr-epenth, str-epenth}
##
## Simultaneous Tests for General Linear Hypotheses
##
## Multiple Comparisons of Means: Tukey Contrasts
##
##
## Fit: glmer(formula = is_1st_V_stressed ~ (1 | first_two_Cs) + speaker +
## starts_with_possible_cluster + first_and_secondV, data = tongan_stress,
## family = binomial(link = "logit"))
##
## Linear Hypotheses:
## Estimate Std. Error z value Pr(>|z|)
## stressed.epenthetic - epenthetic.stressed == 0 3.35499 0.82748 4.054 <0.001 ***
## unstressed.epenthetic - epenthetic.stressed == 0 3.26331 1.16557 2.800 0.0381 *
## unstressed.stressed - epenthetic.stressed == 0 1.76523 0.83036 2.126 0.1967
## stressed.unstressed - epenthetic.stressed == 0 1.93672 0.67037 2.889 0.0294 *
## unstressed.epenthetic - stressed.epenthetic == 0 -0.09168 0.96385 -0.095 1.0000
## unstressed.stressed - stressed.epenthetic == 0 -1.58976 0.66144 -2.403 0.1066
## stressed.unstressed - stressed.epenthetic == 0 -1.41827 0.60231 -2.355 0.1197
## unstressed.stressed - unstressed.epenthetic == 0 -1.49808 1.01868 -1.471 0.5643
## stressed.unstressed - unstressed.epenthetic == 0 -1.32660 1.01313 -1.309 0.6698
## stressed.unstressed - unstressed.stressed == 0 0.17148 0.62126 0.276 0.9986
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)