Setup

Set random-number seed:

set.seed(123)

Packages that will be needed:

require(languageR) || install.packages("languageR")
## Loading required package: languageR
## Warning: package 'languageR' was built under R version 3.4.4
## [1] TRUE
require(languageR)
require(lme4) || install.packages("lme4")
## Loading required package: lme4
## Loading required package: Matrix
## [1] TRUE
require(lme4)
require(arm) || install.packages("arm")
## Loading required package: arm
## Loading required package: MASS
## 
## arm (Version 1.9-3, built: 2016-11-21)
## Working directory is C:/Users/Kie/Dropbox/PROJECT_Tongan/Tongan_ZOW_PhonologySubmission/RevisionIII/ResubmissionIIIMaterialsSubmitted
## [1] TRUE
require(arm)
require(multcomp) || install.packages("multcomp")
## Loading required package: multcomp
## Loading required package: mvtnorm
## Loading required package: survival
## Loading required package: TH.data
## 
## Attaching package: 'TH.data'
## The following object is masked from 'package:MASS':
## 
##     geyser
## [1] TRUE
require(multcomp)
require(MuMIn) || install.packages("MuMIn")
## Loading required package: MuMIn
## Warning: package 'MuMIn' was built under R version 3.4.4
## [1] TRUE
require(MuMIn)
require(grid) || install.packages("grid") 
## Loading required package: grid
## [1] TRUE
require(grid)
require(vcd) || install.packages("vcd") 
## Loading required package: vcd
## [1] TRUE
require(vcd)
require(gridExtra) || install.packages("gridExtra") 
## Loading required package: gridExtra
## Warning: package 'gridExtra' was built under R version 3.4.4
## [1] TRUE
require(gridExtra)
require(car) || install.packages("car") 
## Loading required package: car
## 
## Attaching package: 'car'
## The following object is masked from 'package:arm':
## 
##     logit
## [1] TRUE
require(car)

Some settings for the plots:

#set up plot parameters
myFontFamily="serif" 

par(font=list(family=myFontFamily)) #for most plots
#Unfortunately, this doesn't carry over when a .png file is written, so it is necessary to include
#family=myFontFamily
#as one of the arguments to the png() function

#Also unfortunately, this doesn't carry over to mosaic plots, which use gpar (and gpar can't be set like this for the whole script). Instead, we create this object:
gpSerif <- gpar(fontfamily=myFontFamily)
#and then each call to mosaic() has to include the line 
#gp_varnames=gpSerif,gp_labels=gpSerif,
#Note that for adding the numbers to the tiles in the mosaic plots with mtext(), we just use
#, fontfamily=myFontFamily
#in the list of gpar() arguments
#and for the font of the main title, use main_gp=gpar(fontfamily=myFontFamily) as an argument to mosaic()


myResMultiplier <- 5 #default is 72 ppi; using this in every call to png() will make it 360

Setting for html knitting:

options(width=100)

Vowel deletion

Read in the data and take a look

tongan_deletion <- read.table("deleted_i_Jul2018.txt", header=T) 
summary(tongan_deletion)
##  speaker                   position   is_it_footed     V_status_wrt_English    deleted     
##  S1:103   could_be_tertiary    : 3   Min.   :0.0000   epenthetic :209       Min.   :0.000  
##  S2: 87   secondary__primary   :80   1st Qu.:1.0000   notstressed: 77       1st Qu.:0.000  
##  S3:106   secondary__tertiary  :58   Median :1.0000   stressed   :  9       Median :0.000  
##           secondary__unstressed:92   Mean   :0.8567   NA's       :  1       Mean   :0.353  
##           tertiary__primary    :21   3rd Qu.:1.0000                         3rd Qu.:1.000  
##           tertiary__unstressed : 1   Max.   :1.0000                         Max.   :1.000  
##           unstressed__primary  :41   NA's   :3                                             
##  deleted_binary     dictionary_entry      definition  C_environment   footed_or_not
##  Min.   :0.000   `amipasitoa:  6     ambassador:  6   s_t    : 54   beginning: 58  
##  1st Qu.:0.000   `atimosifia:  6     atmosphere:  6   n_s    : 43   end      :238  
##  Median :0.000   `enisinia  :  6     dictionary:  6   n_t    : 40                  
##  Mean   :0.372   tikisinale :  6     engineer  :  6   l_k    : 12                  
##  3rd Qu.:1.000   `esitimeti :  5     estimate  :  5   m_p    : 12                  
##  Max.   :1.000   sakilifisio:  5     sacrifice :  5   s_p    : 12                  
##  NA's   :3       (Other)    :262     (Other)   :262   (Other):123                  
##  English_transition_types
##  nas_stop :69            
##  fric_stop:63            
##  son_fric :38            
##  stop_fric:11            
##  fric_son : 9            
##  (Other)  :19            
##  NA's     :87
names(tongan_deletion)
##  [1] "speaker"                  "position"                 "is_it_footed"            
##  [4] "V_status_wrt_English"     "deleted"                  "deleted_binary"          
##  [7] "dictionary_entry"         "definition"               "C_environment"           
## [10] "footed_or_not"            "English_transition_types"

List items that are in various metrical positions:

levels(tongan_deletion$position)
## [1] "could_be_tertiary"     "secondary__primary"    "secondary__tertiary"   "secondary__unstressed"
## [5] "tertiary__primary"     "tertiary__unstressed"  "unstressed__primary"
tongan_deletion[tongan_deletion$position=="secondary__primary" | tongan_deletion$position=="secondary__tertiary" | tongan_deletion$position=="secondary__unstressed", c(2,3,4,7)]
##                  position is_it_footed V_status_wrt_English  dictionary_entry
## 1     secondary__tertiary            1           epenthetic       `amipasitoa
## 2     secondary__tertiary            1           epenthetic       `amipasitoa
## 3     secondary__tertiary            1           epenthetic       `amipasitoa
## 7   secondary__unstressed            1           epenthetic        `anitelope
## 8   secondary__unstressed            1           epenthetic        `anitelope
## 9      secondary__primary            1           epenthetic          `anitema
## 10     secondary__primary            1           epenthetic          `anitema
## 11     secondary__primary            1           epenthetic          `anitema
## 12     secondary__primary            1           epenthetic        `apenitiki
## 13     secondary__primary            1           epenthetic        `apenitiki
## 14     secondary__primary            1           epenthetic        `apenitiki
## 15     secondary__primary            1          notstressed        `?pelikoti
## 16     secondary__primary            1          notstressed        `?pelikoti
## 18    secondary__tertiary            1           epenthetic      `asipesitosi
## 20  secondary__unstressed            1           epenthetic        `asipulini
## 21  secondary__unstressed            1           epenthetic        `asipulini
## 22  secondary__unstressed            1           epenthetic        `asipulini
## 23     secondary__primary            1           epenthetic          `asipol?
## 24  secondary__unstressed            1           epenthetic          `asipol?
## 25  secondary__unstressed            1           epenthetic          `asipol?
## 26  secondary__unstressed            1           epenthetic      `asital?noma
## 27  secondary__unstressed            1           epenthetic      `asital?noma
## 28  secondary__unstressed            1           epenthetic      `asital?noma
## 35    secondary__tertiary            1           epenthetic       `atimosifia
## 36    secondary__tertiary            1           epenthetic       `atimosifia
## 37    secondary__tertiary            1           epenthetic       `atimosifia
## 38  secondary__unstressed            1           epenthetic      `Aositel?lia
## 39  secondary__unstressed            1           epenthetic      `Aositel?lia
## 40  secondary__unstressed            1           epenthetic           penisi?
## 41  secondary__unstressed            1           epenthetic           penisi?
## 42  secondary__unstressed            1           epenthetic           penisi?
## 44     secondary__primary            1           epenthetic        pelekifasi
## 45     secondary__primary            1           epenthetic        pelekifasi
## 46  secondary__unstressed            1             stressed pilikatia_seniale
## 47  secondary__unstressed            1             stressed pilikatia_seniale
## 48    secondary__tertiary            1          notstressed         Pilitania
## 49    secondary__tertiary            1          notstressed         Pilitania
## 50     secondary__primary            1           epenthetic          k?piteni
## 53     secondary__primary            1          notstressed   k?ponika_q?sita
## 54    secondary__tertiary            1           epenthetic         senituli?
## 55    secondary__tertiary            1           epenthetic         senituli?
## 56    secondary__tertiary            1           epenthetic         senituli?
## 57  secondary__unstressed            1           epenthetic       Kalisitiane
## 58  secondary__unstressed            1           epenthetic       Kalisitiane
## 59     secondary__primary            1           epenthetic        Kilisimasi
## 60     secondary__primary            1           epenthetic        Kilisimasi
## 61     secondary__primary            1           epenthetic        Kilisimasi
## 62  secondary__unstressed            1           epenthetic        Sinitalela
## 63  secondary__unstressed            1           epenthetic        Sinitalela
## 64  secondary__unstressed            1           epenthetic        Sinitalela
## 65     secondary__primary            1             stressed    faka_siviliani
## 66     secondary__primary            1          notstressed         sivilaise
## 67     secondary__primary            1          notstressed         sivilaise
## 68  secondary__unstressed            1          notstressed         komiunisi
## 69  secondary__unstressed            1          notstressed         komiunisi
## 70  secondary__unstressed            1           epenthetic         komipauni
## 71  secondary__unstressed            1           epenthetic         komipauni
## 72  secondary__unstressed            1           epenthetic         komipauni
## 73    secondary__tertiary            1           epenthetic      konifelenisi
## 74    secondary__tertiary            1           epenthetic      konifelenisi
## 75    secondary__tertiary            1           epenthetic      konifelenisi
## 76    secondary__tertiary            1           epenthetic       kofilimasio
## 77    secondary__tertiary            1           epenthetic       kofilimasio
## 78    secondary__tertiary            1           epenthetic       kofilimasio
## 79    secondary__tertiary            1           epenthetic        konis?nisi
## 80    secondary__tertiary            1           epenthetic        konis?nisi
## 81    secondary__tertiary            1           epenthetic        konis?nisi
## 82  secondary__unstressed            1           epenthetic      konisinanite
## 83  secondary__unstressed            1           epenthetic      konisinanite
## 84    secondary__tertiary            1          notstressed      konisinanite
## 85    secondary__tertiary            0          notstressed      konisinanite
## 86  secondary__unstressed            1           epenthetic      konitineniti
## 87  secondary__unstressed            1           epenthetic      konitineniti
## 88    secondary__tertiary            1          notstressed      konitineniti
## 89     secondary__primary            1           epenthetic        kalisitala
## 90     secondary__primary            1           epenthetic        kalisitala
## 91     secondary__primary            1           epenthetic        kalisitala
## 95  secondary__unstressed            1           epenthetic          kasitom?
## 96  secondary__unstressed            1           epenthetic          kasitom?
## 97     secondary__primary            1           epenthetic          kasitom?
## 98     secondary__primary            1          notstressed        tenominato
## 100    secondary__primary            1          notstressed         tip?sitoa
## 101    secondary__primary            1          notstressed         tip?sitoa
## 102    secondary__primary            1          notstressed         tip?sitoa
## 103 secondary__unstressed            1           epenthetic        tikisinale
## 104 secondary__unstressed            1           epenthetic        tikisinale
## 105 secondary__unstressed            1           epenthetic        tikisinale
## 109 secondary__unstressed            1           epenthetic       tisikaunite
## 110   secondary__tertiary            1           epenthetic       tisikaunite
## 111   secondary__tertiary            1           epenthetic       tisikaunite
## 112 secondary__unstressed            1           epenthetic          `emipaea
## 113 secondary__unstressed            1           epenthetic          `emipaea
## 114 secondary__unstressed            1           epenthetic          `emipaea
## 115 secondary__unstressed            1           epenthetic         `enisinia
## 116 secondary__unstressed            1           epenthetic         `enisinia
## 117 secondary__unstressed            1           epenthetic         `enisinia
## 121   secondary__tertiary            1          notstressed         `Epikulio
## 123   secondary__tertiary            1          notstressed      `epikolotisi
## 124 secondary__unstressed            1           epenthetic        `esitimeti
## 125 secondary__unstressed            1           epenthetic        `esitimeti
## 126    secondary__primary            1          notstressed        `esitimeti
## 132    secondary__primary            1           epenthetic        falakiseni
## 134    secondary__primary            1           epenthetic        falakiseni
## 135    secondary__primary            1           epenthetic          Falanis?
## 136    secondary__primary            1           epenthetic          Falanis?
## 137 secondary__unstressed            1           epenthetic         Senitaile
## 138 secondary__unstressed            1           epenthetic         Senitaile
## 139    secondary__primary            1           epenthetic         Senitaile
## 140 secondary__unstressed            1           epenthetic         sinisapia
## 141    secondary__primary            1          notstressed        kilisilini
## 142    secondary__primary            1          notstressed        kilisilini
## 146 secondary__unstressed            1          notstressed        helikopet?
## 147 secondary__unstressed            1          notstressed        helikopet?
## 148   secondary__tertiary            1          notstressed        helikopet?
## 149 secondary__unstressed            1          notstressed         hemisefia
## 151   secondary__tertiary            1           epenthetic         hisit?lia
## 152   secondary__tertiary            1           epenthetic         hisit?lia
## 153   secondary__tertiary            1           epenthetic         hisit?lia
## 157   secondary__tertiary            1           epenthetic        `inis?nisi
## 158   secondary__tertiary            1           epenthetic        `inis?nisi
## 159   secondary__tertiary            1           epenthetic        `inis?nisi
## 160   secondary__tertiary            1           epenthetic       `itulis?sia
## 161   secondary__tertiary            1           epenthetic        `inis?kite
## 162   secondary__tertiary            1           epenthetic        `inis?kite
## 163   secondary__tertiary            1           epenthetic        `inis?kite
## 164 secondary__unstressed            1           epenthetic      `inisip?kita
## 165   secondary__tertiary            1           epenthetic      `inisip?kita
## 166   secondary__tertiary            1           epenthetic      `inisip?kita
## 167 secondary__unstressed            1           epenthetic        `inisulato
## 168 secondary__unstressed            1           epenthetic        `inisulato
## 169 secondary__unstressed            1           epenthetic        `inisuline
## 170 secondary__unstressed            1           epenthetic        `inisuline
## 171 secondary__unstressed            1           epenthetic        `inisuline
## 172 secondary__unstressed            1           epenthetic         `inivoisi
## 173 secondary__unstressed            1           epenthetic         `inivoisi
## 174    secondary__primary            1           epenthetic         `inivoisi
## 175 secondary__unstressed            1           epenthetic         kangikal?
## 176 secondary__unstressed            1           epenthetic         kangikal?
## 177 secondary__unstressed            1           epenthetic         kangikal?
## 178    secondary__primary            1           epenthetic          lavenit?
## 179    secondary__primary            1           epenthetic          lavenit?
## 181 secondary__unstressed            1           epenthetic         Manisulia
## 182 secondary__unstressed            1           epenthetic         Manisulia
## 183 secondary__unstressed            1           epenthetic        menitelini
## 184 secondary__unstressed            1           epenthetic        menitelini
## 185 secondary__unstressed            1           epenthetic        menitelini
## 187 secondary__unstressed            1           epenthetic     maniusikilipi
## 188 secondary__unstressed            1          notstressed     Metiteleniane
## 189 secondary__unstressed            1          notstressed     Metiteleniane
## 190 secondary__unstressed            1             stressed         melitiane
## 191 secondary__unstressed            1             stressed         melitiane
## 192 secondary__unstressed            1             stressed         melitiane
## 197 secondary__unstressed            1          notstressed          minisit?
## 198    secondary__primary            1           epenthetic          minisit?
## 199    secondary__primary            1           epenthetic          minisit?
## 201   secondary__tertiary            1           epenthetic       monasiteli?
## 202    secondary__primary            1           epenthetic       monasiteli?
## 203 secondary__unstressed            1           epenthetic       monasiteli?
## 205    secondary__primary            1           epenthetic         misiteli?
## 206   secondary__tertiary            1           epenthetic         misiteli?
## 207    secondary__primary            1           epenthetic         misiteli?
## 208   secondary__tertiary            1           epenthetic       nasit?siume
## 209   secondary__tertiary            1           epenthetic       nasit?siume
## 210    secondary__primary            1          notstressed        `opelikato
## 211    secondary__primary            1          notstressed        `opelikato
## 213 secondary__unstressed            1           epenthetic        `ositalesi
## 214 secondary__unstressed            1           epenthetic        `ositalesi
## 215 secondary__unstressed            1           epenthetic        `ositalesi
## 216   secondary__tertiary            1           epenthetic       penikiliasi
## 217   secondary__tertiary            1           epenthetic       penikiliasi
## 218   secondary__tertiary            1           epenthetic       penikiliasi
## 219    secondary__primary            1           epenthetic          pasinipi
## 220 secondary__unstressed            1           epenthetic         penikuini
## 221 secondary__unstressed            1           epenthetic         penikuini
## 222 secondary__unstressed            1           epenthetic         penikuini
## 223 secondary__unstressed            1           epenthetic        Penitekosi
## 224 secondary__unstressed            1           epenthetic        Penitekosi
## 225 secondary__unstressed            1           epenthetic        Penitekosi
## 226    secondary__primary            1           epenthetic        palasitika
## 227    secondary__primary            1           epenthetic        palasitika
## 228    secondary__primary            1           epenthetic        palasitika
## 230 secondary__unstressed            1           epenthetic        poinisetia
## 231   secondary__tertiary            1          notstressed        pomikanite
## 232   secondary__tertiary            1          notstressed        pomikanite
## 233   secondary__tertiary            1          notstressed        pomikanite
## 234    secondary__primary            1          notstressed        paletikasi
## 235    secondary__primary            1          notstressed        paletikasi
## 236    secondary__primary            1          notstressed        palesiteni
## 238    secondary__primary            1          notstressed        palesiteni
## 239    secondary__primary            1           epenthetic        pilinisesi
## 240    secondary__primary            1           epenthetic        pilinisesi
## 241    secondary__primary            1           epenthetic        pilinisesi
## 242    secondary__primary            1          notstressed         palofisai
## 243    secondary__primary            1          notstressed         palofisai
## 244    secondary__primary            1          notstressed         palofisai
## 245    secondary__primary            1          notstressed        Pal?tisani
## 246    secondary__primary            1          notstressed        Pal?tisani
## 247    secondary__primary            1          notstressed        Pal?tisani
## 248    secondary__primary            1           epenthetic        kolonitini
## 249    secondary__primary            1           epenthetic        kolonitini
## 250    secondary__primary            1           epenthetic        kolonitini
## 252    secondary__primary            1          notstressed          l?sisita
## 253 secondary__unstressed            1          notstressed          l?sisita
## 254    secondary__primary            1           epenthetic          l?sisita
## 258 secondary__unstressed            1          notstressed       sakilifisio
## 259 secondary__unstressed            1          notstressed       sakilifisio
## 260 secondary__unstressed            1           epenthetic        sakisefoni
## 261 secondary__unstressed            1           epenthetic        sakisefoni
## 265   secondary__tertiary            1          notstressed         seminalio
## 266   secondary__tertiary            1          notstressed         seminalio
## 267    secondary__primary            1           epenthetic          S?pitema
## 268    secondary__primary            1           epenthetic          S?pitema
## 269    secondary__primary            1           epenthetic          sepitema
## 270 secondary__unstressed            1           epenthetic        silivapiti
## 271    secondary__primary            1           epenthetic        sitenisila
## 272    secondary__primary            1           epenthetic        sitenisila
## 273    secondary__primary            1           epenthetic        sitenisila
## 274    secondary__primary            1           epenthetic          palasit?
## 275    secondary__primary            1           epenthetic          palasit?
## 276    secondary__primary            1           epenthetic          palasit?
## 280   secondary__tertiary            1           epenthetic      talanisimita
## 285    secondary__primary            1          notstressed          iunisoni
## 288   secondary__tertiary            1          notstressed        `univesiti
## 289   secondary__tertiary            1          notstressed        `univesiti
## 290   secondary__tertiary            1          notstressed        `univesiti
## 291 secondary__unstressed            1           epenthetic         Uesiliana
## 293    secondary__primary            1           epenthetic            uasik?
## 294    secondary__primary            1           epenthetic            uasik?
## 295    secondary__primary            1           epenthetic            uasik?
## 296   secondary__tertiary            1           epenthetic         uisitelia

Plot i-deletion as function of the vowel’s status in the English word. Also do this separately for each speaker. We can see that all three speakers show the same pattern (stressed < unstressed < epenthetic), but with different overall rates.

#hacky way to reorder the levels
tongan_deletion$V_status_wrt_English <- relevel(tongan_deletion$V_status_wrt_English, "notstressed")
tongan_deletion$V_status_wrt_English <- relevel(tongan_deletion$V_status_wrt_English, "stressed")

levels(tongan_deletion$V_status_wrt_English) <- c("str", "unstr", "epenthetic")

#Make a table of counts, for mosaic plot
counts_deletion <- table(tongan_deletion$V_status_wrt_English, tongan_deletion$deleted_binary, dnn=c("V status", "deletion"))

mosaic(counts_deletion,direction="v", pop=FALSE, 
       gp_varnames=gpSerif, gp_labels=gpSerif, gp=gpar(fill=c("black","grey")), #fonts and colors
       labeling_args=list(rot_labels=c(left=0), offset_varnames=c(left=1.5),offset_labels=c(left=1), #label (V status, deletion) and value (str, unstr, epenthetic, not deleted, deleted) placement
       varnames=c(TRUE, FALSE), #draw the "V status" variable name, but not the "deletion" one
       set_labels=list(deletion=c("not deleted", "deleted")))) #more informative than 0 and 1                 

# Do the same thing separately for each speaker
  
counts_deletion_S1 <- table(tongan_deletion[tongan_deletion$speaker=="S1",]$V_status_wrt_English, tongan_deletion[tongan_deletion$speaker=="S1",]$deleted_binary, dnn=c("V status", "deletion"))
counts_deletion_S2 <- table(tongan_deletion[tongan_deletion$speaker=="S2",]$V_status_wrt_English, tongan_deletion[tongan_deletion$speaker=="S2",]$deleted_binary, dnn=c("V status", "deletion"))
counts_deletion_S3 <- table(tongan_deletion[tongan_deletion$speaker=="S3",]$V_status_wrt_English, tongan_deletion[tongan_deletion$speaker=="S3",]$deleted_binary, dnn=c("V status", "deletion"))

#plot them side by side. Doesn't look so great here (plot is cramped), but the .png file looks good
#The extra spaces are a hack to get the subtitles centered under the main plot area
p1 <- grid.grabExpr(mosaic(counts_deletion_S1,direction="v", pop=FALSE, gp_varnames=gpSerif, gp_labels=gpSerif, gp=gpar(fill=c("black","grey")), labeling_args=list(rot_labels=c(left=0), offset_varnames=c(left=1.5),offset_labels=c(left=1), varnames=c(FALSE, FALSE), set_labels=list(deletion=c("not deleted", "deleted"))), margins=c(2,0,0,4)+0.1, sub_gp = gpar(fontfamily=myFontFamily), sub="                    Speaker 1"))
## Warning in grabDL(warn, wrap, ...): viewport overwritten (grab MAY not be faithful)
p2 <- grid.grabExpr(mosaic(counts_deletion_S2,direction="v", pop=FALSE, gp_varnames=gpSerif, gp_labels=gpSerif, gp=gpar(fill=c("black","grey")), labeling_args=list(rot_labels=c(left=0), offset_varnames=c(left=1.5),offset_labels=c(left=1), varnames=c(FALSE, FALSE), labels=c(TRUE,FALSE)), margins=c(2,0,0,-2)+0.1, sub_gp = gpar(fontfamily=myFontFamily), sub="Speaker 2          "))
## Warning in grabDL(warn, wrap, ...): viewport overwritten (grab MAY not be faithful)
p3 <- grid.grabExpr(mosaic(counts_deletion_S3,direction="v", pop=FALSE, gp_varnames=gpSerif, gp_labels=gpSerif, gp=gpar(fill=c("black","grey")), labeling_args=list(rot_labels=c(left=0), offset_varnames=c(left=1.5),offset_labels=c(left=1), varnames=c(FALSE, FALSE), labels=c(TRUE,FALSE)), margins=c(2,0,0,-7.8)+0.1, sub_gp = gpar(fontfamily=myFontFamily), sub="Speaker 3                              "))
## Warning in grabDL(warn, wrap, ...): viewport overwritten (grab MAY not be faithful)
grid.arrange(p1, p2, p3, ncol=3)

#print the plot with all the speakers together to a file:
png(file="Vowel_deletion_mosaic_plots2.png",width=myResMultiplier*545,height=myResMultiplier*350, res=myResMultiplier*72) #usu. 600, 350, 72
grid.arrange(p1, p2, p3, ncol=3)
dev.off()
## png 
##   2

Exploring effect of surrounding consonants on vowel deletion (not in article)

Plot i-deletion as function of the vowel’s surrounding consonants, separately for each status in the English word. There are a lot of different consonant environments (so we’re going to want to group them):

table(tongan_deletion$C_environment)
## 
##  f_s  k_f  k_l  k_s  l_f  l_k  l_m  l_s  l_t  l_v  m_k  m_n  m_p  m_s  n_f  n_k  n_s  n_t  n_v ng_k 
##    6    2    1    9    2   12    3    1    8    1    3    6   12    1    3    6   43   40    6    3 
##  p_k  p_l  p_t  s_f  s_k  s_l  s_m  s_n  s_p  s_s  s_t  t_k  t_m  t_n  t_s  t_t  v_l 
##    4    1    6    4    9    5    5    9   12    2   54    2    6    1    3    2    3

Let’s group them:

tongan_deletion$C_enviro_coarse <- ifelse(test=tongan_deletion$C_environment %in% c("s_p","s_t","s_k"), yes="fric_stop", no=ifelse(test=tongan_deletion$C_environment %in% c("m_k","m_p","n_k", "n_t","ng_k","l_k","l_t"), yes="sonorant_stop", no=ifelse(test=tongan_deletion$C_environment %in% c("p_k","p_t","t_k","t_t"), yes="stop_stop", no=ifelse(test=tongan_deletion$C_environment %in% c("k_f","k_s","t_s"), yes="stop_fric", no=ifelse(test=tongan_deletion$C_environment %in% c("f_s","s_f","s_s"), yes="fric_fric", no=ifelse(test=tongan_deletion$C_environment %in% c("k_l","p_l","t_m","t_n"), yes="stop_sonorant", no=ifelse(test=tongan_deletion$C_environment %in% c("l_f","l_s","l_v","m_s","n_f","n_s","n_v"), yes="sonorant_fric", no=ifelse(test=tongan_deletion$C_environment %in% c("s_m","s_n","s_l","v_l"), yes="fric_sonorant", no=ifelse(test=tongan_deletion$C_environment %in% c("l_m","m_n"), yes="sonorant_sonorant", no="OOPS")))))))))

tongan_deletion$C_enviro_coarse <- factor(tongan_deletion$C_enviro_coarse,levels(as.factor(tongan_deletion$C_enviro_coarse))[c(5,4,6,2,1,3,8,7,9)])

table(tongan_deletion$C_enviro_coarse)
## 
## sonorant_sonorant     sonorant_fric     sonorant_stop     fric_sonorant         fric_fric 
##                 9                57                84                22                12 
##         fric_stop     stop_sonorant         stop_fric         stop_stop 
##                75                 9                14                14

Plot it:

#Make a table of counts, separately for each deletion status
counts_deletion_unstressed <- table(tongan_deletion[tongan_deletion$V_status_wrt_English=="unstr",]$C_enviro_coarse, tongan_deletion[tongan_deletion$V_status_wrt_English=="unstr",]$deleted_binary, dnn=c("surrounding Cs", "deletion"))

counts_deletion_epenthetic <- table(tongan_deletion[tongan_deletion$V_status_wrt_English=="epenthetic",]$C_enviro_coarse, tongan_deletion[tongan_deletion$V_status_wrt_English=="epenthetic",]$deleted_binary, dnn=c("surrounding Cs", "deletion"))

#Plot them side by side
p1 <- grid.grabExpr(mosaic(counts_deletion_unstressed,direction="v", pop=FALSE, gp_varnames=gpSerif, gp_labels=gpSerif, gp=gpar(fill=c("black","grey")), labeling_args=list(rot_labels=c(left=0, top=90), offset_varnames=c(left=1.5),offset_labels=c(left=1, top=-0.5), just_labels="left", varnames=c(FALSE, FALSE), set_labels=list(deletion=c("not deleted", "deleted"))), margins=c(2,0,0,4)+0.1, sub_gp = gpar(fontfamily=myFontFamily), sub="                    unstressed"))
## Warning in grabDL(warn, wrap, ...): viewport overwritten (grab MAY not be faithful)
p2 <- grid.grabExpr(mosaic(counts_deletion_epenthetic,direction="v", pop=FALSE, gp_varnames=gpSerif, gp_labels=gpSerif, gp=gpar(fill=c("black","grey")), labeling_args=list(rot_labels=c(left=0, top=90), offset_varnames=c(left=1.5),offset_labels=c(left=1, top=-0.5), just_labels="left", varnames=c(FALSE, FALSE), labels=c(TRUE,FALSE)), margins=c(2,0,0,-2)+0.1, sub_gp = gpar(fontfamily=myFontFamily), sub="epenthetic          "))
## Warning in grabDL(warn, wrap, ...): viewport overwritten (grab MAY not be faithful)
grid.arrange(p1, p2, ncol=2)

#print the two plots together to a file:
png(file="Vowel_deletion_mosaic_plots3.png",width=myResMultiplier*545,height=myResMultiplier*350, res=myResMultiplier*72) #usu. 600, 350, 72
grid.arrange(p1, p2, ncol=2)
dev.off()
## png 
##   2

A different way of thinking about it: what is the transition type in the English pronunciation? I have coded this by hand in another column. A few notes:

  • in general, both nasals and liquids are coded as “son”
  • except, a nasal+stop sequence is coded as nas_stop, since those sequences tend to be different from other sonorant+stop sequences; namely, you tend to get a long nasal portion followed by a very brief stop portion and then a stop release
  • if the second consonant is an affricate, it’s treated as “stop”, since that’s what the part next to the transition is
  • this is only for the subset of data where the vowel is epenthetic–otherwise, there is no consonant-consonant transition
table(tongan_deletion$English_transition_types)
## 
## fric_fric  fric_son fric_stop  nas_stop  son_fric   son_son  son_stop stop_fric  stop_son stop_stop 
##         4         9        63        69        38         3         1        11         5         6
#Make a table of counts
counts_deletion_byEnglish <- table(tongan_deletion$English_transition_types, tongan_deletion$deleted_binary, dnn=c("English C-C transition type", "deletion"))

#Plot them
p1 <- grid.grabExpr(mosaic(counts_deletion_byEnglish,direction="v", pop=FALSE, gp_varnames=gpSerif, gp_labels=gpSerif, gp=gpar(fill=c("black","grey")), labeling_args=list(rot_labels=c(left=0, top=90), offset_varnames=c(left=1.5),offset_labels=c(left=1, top=-0.5), just_labels="left", varnames=c(FALSE, FALSE), set_labels=list(deletion=c("not deleted", "deleted"))), margins=c(4,0,0,4)+0.1, sub_gp = gpar(fontfamily=myFontFamily), sub=""))
## Warning in grabDL(warn, wrap, ...): viewport overwritten (grab MAY not be faithful)
grid.arrange(p1, ncol=1)

#print the two plots together to a file:
png(file="Vowel_deletion_mosaic_plots4.png",width=myResMultiplier*545,height=myResMultiplier*350, res=myResMultiplier*72) #usu. 600, 350, 72
grid.arrange(p1, ncol=1)
dev.off()
## png 
##   2

The three categories where there’s enough data are fricative_stop, nasal_stop, sonorant_fricative. Sonorant_fricative shows less deletion than the other two. There doesn’t seem to be any clear pattern here, so we don’t pursue this further.

A reviewer suggests testing the hypothesis that there will be more deletion when a homorganic consonant sequence results, on the theory that V deletion reflect’s bilingual speaker’s ease of producing various English consonant sequences, and homorganic sequences should be easier. First we code C_environment as homorganic or not:

tongan_deletion$Cs_homorganic_or_not <- ifelse(test=tongan_deletion$C_environment %in% c("l_s","l_t","m_p","n_s","n_t","ng_k","s_l","s_n","s_s","s_t","t_n","t_s","t_t"), yes="homorganic", no="heterorganic")

Plot effect of consonant homorganicity–it looks like there’s a trend in the raw data, but let’s see if it holds up in the regression model below:

#Make a table of counts, separately for each deletion status
counts_deletion_unstressed_homorg_or_not <- table(tongan_deletion[tongan_deletion$V_status_wrt_English=="unstr",]$Cs_homorganic_or_not, tongan_deletion[tongan_deletion$V_status_wrt_English=="unstr",]$deleted_binary, dnn=c("surrounding Cs homorganic?", "deletion"))

counts_deletion_epenthetic_homorg_or_not <- table(tongan_deletion[tongan_deletion$V_status_wrt_English=="epenthetic",]$Cs_homorganic_or_not, tongan_deletion[tongan_deletion$V_status_wrt_English=="epenthetic",]$deleted_binary, dnn=c("surrounding Cs homorganic?", "deletion"))

#Plot them--one by one, since for some mysterious reason grid.grabExpr() isn't working
mosaic(counts_deletion_unstressed_homorg_or_not,direction="v", pop=FALSE, gp_varnames=gpSerif, gp_labels=gpSerif, gp=gpar(fill=c("black","grey")), labeling_args=list(rot_labels=c(left=0, top=90), offset_varnames=c(left=1.5),offset_labels=c(left=1, top=-0.5), just_labels="left", varnames=c(FALSE, FALSE), set_labels=list(deletion=c("not deleted", "deleted"))), margins=c(2,0,0,4)+0.1, sub_gp = gpar(fontfamily=myFontFamily), sub="                    unstressed")

mosaic(counts_deletion_epenthetic_homorg_or_not,direction="v", pop=FALSE, gp_varnames=gpSerif, gp_labels=gpSerif, gp=gpar(fill=c("black","grey")), labeling_args=list(rot_labels=c(left=0, top=90), offset_varnames=c(left=1.5),offset_labels=c(left=1, top=-0.5), just_labels="left", varnames=c(FALSE, FALSE), labels=c(TRUE,FALSE)), margins=c(2,0,0,-2)+0.1, sub_gp = gpar(fontfamily=myFontFamily), sub="epenthetic          ")

Logistic regression model for i-deletion

Because there are so few “stressed” cases, and none of them ever have deletion (separability issue), let’s just ignore them and only compare unstressed vs. epenthetic. This allows us to use glmer(), which doesn’t handle separability problems well but does allow random effects.

Do the subsetting:

tongan_deletion_subset <- subset(tongan_deletion, tongan_deletion$V_status_wrt_English != "str")

#Make new variables that separate C_environment into preceding C and following C (these didn't end up getting used, but readers may wish to play with them):
tongan_deletion_subset$precedingC <-substr(tongan_deletion_subset$C_environment,1,1)
tongan_deletion_subset$followingC <-substr(tongan_deletion_subset$C_environment,3,3)

Here is the model we present in the paper. Speaker is treated as a fixed effect because it has only three levels.

deletion.glmer <- glmer(deleted_binary ~
  + (1|C_environment)
  + speaker
  + is_it_footed
  + V_status_wrt_English
  , data=tongan_deletion_subset,
  family=binomial)
summary(deletion.glmer)
## Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
##  Family: binomial  ( logit )
## Formula: deleted_binary ~ +(1 | C_environment) + speaker + is_it_footed +      V_status_wrt_English
##    Data: tongan_deletion_subset
## 
##      AIC      BIC   logLik deviance df.resid 
##    264.9    286.8   -126.4    252.9      278 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.7363 -0.4417 -0.1572  0.4323  4.7737 
## 
## Random effects:
##  Groups        Name        Variance Std.Dev.
##  C_environment (Intercept) 1.847    1.359   
## Number of obs: 284, groups:  C_environment, 37
## 
## Fixed effects:
##                                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                     -2.9272     0.7153  -4.092 4.27e-05 ***
## speakerS2                       -2.7272     0.5298  -5.148 2.64e-07 ***
## speakerS3                        1.5430     0.3905   3.951 7.77e-05 ***
## is_it_footed                     0.5762     0.5500   1.048  0.29485    
## V_status_wrt_Englishepenthetic   1.4922     0.5343   2.793  0.00522 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) spkrS2 spkrS3 is_t_f
## speakerS2   -0.029                     
## speakerS3   -0.454  0.216              
## is_it_footd -0.624 -0.028  0.138       
## V_stts_wr_E -0.327 -0.078  0.037 -0.181
Anova(deletion.glmer)
## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: deleted_binary
##                        Chisq Df Pr(>Chisq)    
## speaker              53.4045  2  2.531e-12 ***
## is_it_footed          1.0974  1   0.294845    
## V_status_wrt_English  7.8001  1   0.005224 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Have a look at the random intercept for each consonant environment. Here are the C__C environments, in ascending order of how often you get deletion:

temp_frame <- ranef(deletion.glmer)$C_environment
temp_frame[order(temp_frame[,1]),]
##  [1] -1.19386217 -1.14381941 -1.02774011 -0.96136729 -0.84805726 -0.80978299 -0.69109353 -0.63169264
##  [9] -0.53166310 -0.50069246 -0.50069246 -0.50069246 -0.49574718 -0.49574718 -0.41717657 -0.40648106
## [17] -0.40648106 -0.25418530 -0.14986288 -0.14986288 -0.14114100 -0.11401593  0.01077065  0.08048181
## [25]  0.25624130  0.60681900  0.68595447  0.84039865  0.94372646  1.00998767  1.08709948  1.27261532
## [33]  1.28951061  1.34858070  1.35049853  1.85593292  1.95196688
row.names(temp_frame)[order(temp_frame[,1])]
##  [1] "n_v"  "t_m"  "l_m"  "l_k"  "l_v"  "m_n"  "n_k"  "l_t"  "p_t"  "f_s"  "m_k"  "t_s"  "t_k" 
## [14] "t_t"  "s_f"  "l_s"  "p_l"  "p_k"  "l_f"  "v_l"  "t_n"  "n_s"  "n_f"  "s_n"  "s_m"  "k_l" 
## [27] "s_s"  "s_l"  "k_s"  "ng_k" "k_f"  "n_t"  "m_p"  "s_k"  "m_s"  "s_p"  "s_t"
cbind(row.names(temp_frame)[order(temp_frame[,1])], temp_frame[order(temp_frame[,1]),])
##       [,1]   [,2]                
##  [1,] "n_v"  "-1.19386217062271" 
##  [2,] "t_m"  "-1.14381940921684" 
##  [3,] "l_m"  "-1.02774010875997" 
##  [4,] "l_k"  "-0.96136729123742" 
##  [5,] "l_v"  "-0.848057257998045"
##  [6,] "m_n"  "-0.80978299484009" 
##  [7,] "n_k"  "-0.691093532365527"
##  [8,] "l_t"  "-0.63169264394912" 
##  [9,] "p_t"  "-0.531663099671024"
## [10,] "f_s"  "-0.500692456306816"
## [11,] "m_k"  "-0.500692456306816"
## [12,] "t_s"  "-0.500692456306816"
## [13,] "t_k"  "-0.495747178448532"
## [14,] "t_t"  "-0.495747178448532"
## [15,] "s_f"  "-0.417176566849591"
## [16,] "l_s"  "-0.406481060590826"
## [17,] "p_l"  "-0.406481060590826"
## [18,] "p_k"  "-0.254185298821497"
## [19,] "l_f"  "-0.149862879794764"
## [20,] "v_l"  "-0.149862879794764"
## [21,] "t_n"  "-0.14114100286009" 
## [22,] "n_s"  "-0.11401592502542" 
## [23,] "n_f"  "0.0107706475570784"
## [24,] "s_n"  "0.0804818080915829"
## [25,] "s_m"  "0.256241302944647" 
## [26,] "k_l"  "0.606818997368661" 
## [27,] "s_s"  "0.685954469290349" 
## [28,] "s_l"  "0.84039865192298"  
## [29,] "k_s"  "0.943726455073432" 
## [30,] "ng_k" "1.00998766987648"  
## [31,] "k_f"  "1.08709948310984"  
## [32,] "n_t"  "1.27261532172186"  
## [33,] "m_p"  "1.28951061044545"  
## [34,] "s_k"  "1.34858070439601"  
## [35,] "m_s"  "1.35049852978569"  
## [36,] "s_p"  "1.85593291513217"  
## [37,] "s_t"  "1.95196687814657"

We can compare this to a model that adds an interaction between speaker and V status–the interaction does not significantly improve the model:

deletion.glmer_interaction <- glmer(deleted_binary ~
  + (1|C_environment)
  + is_it_footed
  + speaker * V_status_wrt_English
  , data=tongan_deletion_subset,
  family=binomial)
summary(deletion.glmer_interaction)
## Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
##  Family: binomial  ( logit )
## Formula: deleted_binary ~ +(1 | C_environment) + is_it_footed + speaker *      V_status_wrt_English
##    Data: tongan_deletion_subset
## 
##      AIC      BIC   logLik deviance df.resid 
##    265.0    294.2   -124.5    249.0      276 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.2061 -0.3962 -0.1450  0.3885  3.4268 
## 
## Random effects:
##  Groups        Name        Variance Std.Dev.
##  C_environment (Intercept) 1.874    1.369   
## Number of obs: 284, groups:  C_environment, 37
## 
## Fixed effects:
##                                          Estimate Std. Error z value Pr(>|z|)   
## (Intercept)                               -2.4379     0.8125  -3.001  0.00269 **
## is_it_footed                               0.4886     0.5595   0.873  0.38250   
## speakerS2                                 -1.5939     1.2239  -1.302  0.19281   
## speakerS3                                  0.5131     0.7973   0.644  0.51987   
## V_status_wrt_Englishepenthetic             0.9629     0.7034   1.369  0.17097   
## speakerS2:V_status_wrt_Englishepenthetic  -1.2207     1.3422  -0.910  0.36309   
## speakerS3:V_status_wrt_Englishepenthetic   1.3427     0.9073   1.480  0.13890   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) is_t_f spkrS2 spkrS3 V_s__E sS2:V_
## is_it_footd -0.599                                   
## speakerS2   -0.313  0.012                            
## speakerS3   -0.580  0.138  0.331                     
## V_stts_wr_E -0.531 -0.068  0.402  0.575              
## spkS2:V___E  0.314 -0.023 -0.905 -0.307 -0.444       
## spkS3:V___E  0.453 -0.086 -0.300 -0.868 -0.645  0.325
Anova(deletion.glmer_interaction)
## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: deleted_binary
##                                Chisq Df Pr(>Chisq)    
## is_it_footed                  0.7626  1   0.382502    
## speaker                      50.2421  2   1.23e-11 ***
## V_status_wrt_English          7.3364  1   0.006757 ** 
## speaker:V_status_wrt_English  4.3497  2   0.113625    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(deletion.glmer_interaction, deletion.glmer)
## Data: tongan_deletion_subset
## Models:
## deletion.glmer: deleted_binary ~ +(1 | C_environment) + speaker + is_it_footed + 
## deletion.glmer:     V_status_wrt_English
## deletion.glmer_interaction: deleted_binary ~ +(1 | C_environment) + is_it_footed + speaker * 
## deletion.glmer_interaction:     V_status_wrt_English
##                            Df    AIC    BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)
## deletion.glmer              6 264.88 286.78 -126.44   252.88                         
## deletion.glmer_interaction  8 264.97 294.16 -124.49   248.97 3.9121      2     0.1414

Get some predicted probabilities out of the model, for illustration

invlogit(c(fixef(deletion.glmer)[1], # M baseline
    fixef(deletion.glmer)[2] + fixef(deletion.glmer)[1],    # S2 baseline
  fixef(deletion.glmer)[4] + fixef(deletion.glmer)[1],  # S1, footed
    fixef(deletion.glmer)[5] + fixef(deletion.glmer)[1],    # S1, epenthetic
    fixef(deletion.glmer)[4] + fixef(deletion.glmer)[5] + fixef(deletion.glmer)[1]  # S1, footed and epenthetic
            ) )
##                    (Intercept)                      speakerS2                   is_it_footed 
##                    0.050826854                    0.003489842                    0.086988488 
## V_status_wrt_Englishepenthetic                   is_it_footed 
##                    0.192322053                    0.297590461

Above, the variable deleted_binary treats items that showed deletion in just one of the two repetitions as deleted (there were 9 such items). Here, we try two other options. First, treating such items as non-deleted:

deletion_treatAsNonDeleted.glmer <- glmer((deleted>0.5) ~
  + (1|C_environment)
  + speaker
  + is_it_footed
  + V_status_wrt_English
  , data=tongan_deletion_subset,
  family=binomial)
summary(deletion_treatAsNonDeleted.glmer)
## Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
##  Family: binomial  ( logit )
## Formula: (deleted > 0.5) ~ +(1 | C_environment) + speaker + is_it_footed +  
##     V_status_wrt_English
##    Data: tongan_deletion_subset
## 
##      AIC      BIC   logLik deviance df.resid 
##    265.8    287.6   -126.9    253.8      278 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.9933 -0.3926 -0.1877  0.4975  5.2921 
## 
## Random effects:
##  Groups        Name        Variance Std.Dev.
##  C_environment (Intercept) 1.643    1.282   
## Number of obs: 284, groups:  C_environment, 37
## 
## Fixed effects:
##                                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                     -2.6352     0.6887  -3.826  0.00013 ***
## speakerS2                       -2.9039     0.5947  -4.883 1.05e-06 ***
## speakerS3                        1.3792     0.3696   3.732  0.00019 ***
## is_it_footed                     0.2913     0.5403   0.539  0.58986    
## V_status_wrt_Englishepenthetic   1.2420     0.5243   2.369  0.01784 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) spkrS2 spkrS3 is_t_f
## speakerS2   -0.053                     
## speakerS3   -0.434  0.221              
## is_it_footd -0.609 -0.001  0.107       
## V_stts_wr_E -0.314 -0.062  0.019 -0.189
Anova(deletion_treatAsNonDeleted.glmer)
## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: (deleted > 0.5)
##                        Chisq Df Pr(>Chisq)    
## speaker              48.1661  2  3.474e-11 ***
## is_it_footed          0.2906  1    0.58986    
## V_status_wrt_English  5.6122  1    0.01784 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

And here, we just excluded such items

tongan_deletion_subset$deleted_excluding <- tongan_deletion_subset$deleted
tongan_deletion_subset$deleted_excluding[tongan_deletion_subset$deleted_excluding==0.5] <- NA

deletion_exclude.glmer <- glmer(deleted_excluding ~
  + (1|C_environment)
  + speaker
  + is_it_footed
  + V_status_wrt_English
  , data=tongan_deletion_subset,
  family=binomial)
summary(deletion_exclude.glmer)
## Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
##  Family: binomial  ( logit )
## Formula: deleted_excluding ~ +(1 | C_environment) + speaker + is_it_footed +  
##     V_status_wrt_English
##    Data: tongan_deletion_subset
## 
##      AIC      BIC   logLik deviance df.resid 
##    247.5    269.2   -117.8    235.5      269 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.8002 -0.3676 -0.1301  0.4251  5.5413 
## 
## Random effects:
##  Groups        Name        Variance Std.Dev.
##  C_environment (Intercept) 2.063    1.436   
## Number of obs: 275, groups:  C_environment, 37
## 
## Fixed effects:
##                                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                     -2.9300     0.7390  -3.965 7.34e-05 ***
## speakerS2                       -3.1427     0.6185  -5.081 3.75e-07 ***
## speakerS3                        1.5287     0.3967   3.853 0.000117 ***
## is_it_footed                     0.4848     0.5664   0.856 0.392057    
## V_status_wrt_Englishepenthetic   1.4231     0.5477   2.598 0.009366 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) spkrS2 spkrS3 is_t_f
## speakerS2   -0.006                     
## speakerS3   -0.450  0.186              
## is_it_footd -0.613 -0.025  0.132       
## V_stts_wr_E -0.317 -0.083  0.044 -0.183
Anova(deletion_exclude.glmer)
## Analysis of Deviance Table (Type II Wald chisquare tests)
## 
## Response: deleted_excluding
##                        Chisq Df Pr(>Chisq)    
## speaker              49.6696  2  1.638e-11 ***
## is_it_footed          0.7326  1   0.392057    
## V_status_wrt_English  6.7518  1   0.009366 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Effect of consonant factors on i-deletion

Testing the consonant factors from above to see if they contribute significantly to the model. First, English C-C transition type (for epenthetic vowels only, since that’s the only case where there is an English transition). We can see that transition type does have a significant effect, but not in a way that’s particularly interpretable: the significant pairwise differences are that there is less deletion in son_fric than fric_stop or nas_stop:

deletion_transitionType.glm <- glm(deleted_binary ~
  #+ (1|C_environment) #no random effect on C_enviro (Hessian is numerically singular)
  + speaker
  + is_it_footed
  + English_transition_types
  #+ V_status_wrt_English
  , data=tongan_deletion_subset[tongan_deletion_subset$V_status_wrt_English=="epenthetic",],
  family=binomial)
summary(deletion_transitionType.glm)
## 
## Call:
## glm(formula = deleted_binary ~ +speaker + is_it_footed + English_transition_types, 
##     family = binomial, data = tongan_deletion_subset[tongan_deletion_subset$V_status_wrt_English == 
##         "epenthetic", ])
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.4402  -0.5625  -0.1269   0.5805   2.2526  
## 
## Coefficients:
##                                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                         -2.5044     1.4229  -1.760   0.0784 .  
## speakerS2                           -2.7240     0.5640  -4.830 1.37e-06 ***
## speakerS3                            1.9648     0.4766   4.123 3.74e-05 ***
## is_it_footed                         0.5387     0.7013   0.768   0.4424    
## English_transition_typesfric_son     0.2297     1.6480   0.139   0.8892    
## English_transition_typesfric_stop    2.9260     1.3892   2.106   0.0352 *  
## English_transition_typesnas_stop     2.2350     1.3733   1.628   0.1036    
## English_transition_typesson_fric     0.2654     1.3891   0.191   0.8485    
## English_transition_typesson_son    -15.1410  1098.1188  -0.014   0.9890    
## English_transition_typesson_stop   -14.6003  2399.5451  -0.006   0.9951    
## English_transition_typesstop_fric    2.5774     1.5383   1.675   0.0938 .  
## English_transition_typesstop_son    -0.1792     1.7830  -0.100   0.9200    
## English_transition_typesstop_stop   -0.1275     1.7920  -0.071   0.9433    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 287.41  on 207  degrees of freedom
## Residual deviance: 168.96  on 195  degrees of freedom
##   (1 observation deleted due to missingness)
## AIC: 194.96
## 
## Number of Fisher Scoring iterations: 15
Anova(deletion_transitionType.glm)
## Analysis of Deviance Table (Type II tests)
## 
## Response: deleted_binary
##                          LR Chisq Df Pr(>Chisq)    
## speaker                    93.065  2  < 2.2e-16 ***
## is_it_footed                0.588  1      0.443    
## English_transition_types   39.814  9  8.213e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#pairwise comparisons of English transition types
deletion_transitionType.glht_English_transition_types <- glht(deletion_transitionType.glm, linfct=mcp(English_transition_types = "Tukey"))
summary(deletion_transitionType.glht_English_transition_types)
## Warning in RET$pfunction("adjusted", ...): Completion with error > abseps

## Warning in RET$pfunction("adjusted", ...): Completion with error > abseps

## Warning in RET$pfunction("adjusted", ...): Completion with error > abseps

## Warning in RET$pfunction("adjusted", ...): Completion with error > abseps

## Warning in RET$pfunction("adjusted", ...): Completion with error > abseps

## Warning in RET$pfunction("adjusted", ...): Completion with error > abseps

## Warning in RET$pfunction("adjusted", ...): Completion with error > abseps

## Warning in RET$pfunction("adjusted", ...): Completion with error > abseps

## Warning in RET$pfunction("adjusted", ...): Completion with error > abseps
## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: glm(formula = deleted_binary ~ +speaker + is_it_footed + English_transition_types, 
##     family = binomial, data = tongan_deletion_subset[tongan_deletion_subset$V_status_wrt_English == 
##         "epenthetic", ])
## 
## Linear Hypotheses:
##                              Estimate Std. Error z value Pr(>|z|)    
## fric_son - fric_fric == 0     0.22968    1.64800   0.139   1.0000    
## fric_stop - fric_fric == 0    2.92604    1.38923   2.106   0.4215    
## nas_stop - fric_fric == 0     2.23505    1.37328   1.628   0.7652    
## son_fric - fric_fric == 0     0.26543    1.38908   0.191   1.0000    
## son_son - fric_fric == 0    -15.14099 1098.11881  -0.014   1.0000    
## son_stop - fric_fric == 0   -14.60031 2399.54511  -0.006   1.0000    
## stop_fric - fric_fric == 0    2.57740    1.53832   1.675   0.7336    
## stop_son - fric_fric == 0    -0.17916    1.78298  -0.100   1.0000    
## stop_stop - fric_fric == 0   -0.12750    1.79198  -0.071   1.0000    
## fric_stop - fric_son == 0     2.69636    1.09440   2.464   0.2111    
## nas_stop - fric_son == 0      2.00537    1.06579   1.882   0.5863    
## son_fric - fric_son == 0      0.03575    1.08114   0.033   1.0000    
## son_son - fric_son == 0     -15.37067 1098.11846  -0.014   1.0000    
## son_stop - fric_son == 0    -14.82999 2399.54495  -0.006   1.0000    
## stop_fric - fric_son == 0     2.34772    1.29296   1.816   0.6352    
## stop_son - fric_son == 0     -0.40885    1.59243  -0.257   1.0000    
## stop_stop - fric_son == 0    -0.35718    1.61532  -0.221   1.0000    
## nas_stop - fric_stop == 0    -0.69099    0.49600  -1.393   0.8908    
## son_fric - fric_stop == 0    -2.66061    0.63105  -4.216    <0.01 ***
## son_son - fric_stop == 0    -18.06704 1098.11813  -0.016   1.0000    
## son_stop - fric_stop == 0   -17.52635 2399.54476  -0.007   1.0000    
## stop_fric - fric_stop == 0   -0.34864    0.86741  -0.402   1.0000    
## stop_son - fric_stop == 0    -3.10521    1.31717  -2.357   0.2649    
## stop_stop - fric_stop == 0   -3.05354    1.34056  -2.278   0.3105    
## son_fric - nas_stop == 0     -1.96962    0.58159  -3.387   0.0152 *  
## son_son - nas_stop == 0     -17.37604 1098.11810  -0.016   1.0000    
## son_stop - nas_stop == 0    -16.83536 2399.54475  -0.007   1.0000    
## stop_fric - nas_stop == 0     0.34235    0.85123   0.402   1.0000    
## stop_son - nas_stop == 0     -2.41421    1.29999  -1.857   0.6033    
## stop_stop - nas_stop == 0    -2.36255    1.32624  -1.781   0.6593    
## son_son - son_fric == 0     -15.40643 1098.11810  -0.014   1.0000    
## son_stop - son_fric == 0    -14.86574 2399.54478  -0.006   1.0000    
## stop_fric - son_fric == 0     2.31197    0.93267   2.479   0.2036    
## stop_son - son_fric == 0     -0.44460    1.32180  -0.336   1.0000    
## stop_stop - son_fric == 0    -0.39293    1.34907  -0.291   1.0000    
## son_stop - son_son == 0       0.54069 2638.87821   0.000   1.0000    
## stop_fric - son_son == 0     17.71839 1098.11835   0.016   1.0000    
## stop_son - son_son == 0      14.96183 1098.11873   0.014   1.0000    
## stop_stop - son_son == 0     15.01350 1098.11876   0.014   1.0000    
## stop_fric - son_stop == 0    17.17771 2399.54485   0.007   1.0000    
## stop_son - son_stop == 0     14.42114 2399.54507   0.006   1.0000    
## stop_stop - son_stop == 0    14.47281 2399.54508   0.006   1.0000    
## stop_son - stop_fric == 0    -2.75656    1.47478  -1.869   0.5960    
## stop_stop - stop_fric == 0   -2.70490    1.49282  -1.812   0.6378    
## stop_stop - stop_son == 0     0.05167    1.74736   0.030   1.0000    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)

How about the consonant manners in the Tongan word? They do matter, but ony in that there’s more deletion in fric_stop than in sonorant_fric, and marginally more (p is above but close to 0.05) in fric_stop than in sonorant_stop or stop_stop (not surprising, since after a fricative is a common environment for devoicing/deletion in native words too):

deletion_CEnviro.glm <- glm(deleted_binary ~
  #+ (1|C_environment) #no random effect on C_enviro (large eigenvalue ratio)
  + speaker
  + is_it_footed
  + C_enviro_coarse
  + V_status_wrt_English
  , data=tongan_deletion_subset,
  family=binomial)
summary(deletion_CEnviro.glm)
## 
## Call:
## glm(formula = deleted_binary ~ +speaker + is_it_footed + C_enviro_coarse + 
##     V_status_wrt_English, family = binomial, data = tongan_deletion_subset)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.4162  -0.6638  -0.2040   0.6233   2.5312  
## 
## Coefficients:
##                                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                     -19.3797  1093.5332  -0.018    0.986    
## speakerS2                        -2.7912     0.5361  -5.207 1.92e-07 ***
## speakerS3                         1.4731     0.3730   3.950 7.83e-05 ***
## is_it_footed                      0.6266     0.5097   1.229    0.219    
## C_enviro_coarsesonorant_fric     15.9208  1093.5330   0.015    0.988    
## C_enviro_coarsesonorant_stop     17.0583  1093.5330   0.016    0.988    
## C_enviro_coarsefric_sonorant     16.8936  1093.5332   0.015    0.988    
## C_enviro_coarsefric_fric         16.0567  1093.5334   0.015    0.988    
## C_enviro_coarsefric_stop         18.3821  1093.5331   0.017    0.987    
## C_enviro_coarsestop_sonorant     15.0519  1093.5336   0.014    0.989    
## C_enviro_coarsestop_fric         17.3352  1093.5332   0.016    0.987    
## C_enviro_coarsestop_stop         14.8838  1093.5336   0.014    0.989    
## V_status_wrt_Englishepenthetic    1.7615     0.4504   3.911 9.18e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 378.23  on 283  degrees of freedom
## Residual deviance: 229.12  on 271  degrees of freedom
##   (2 observations deleted due to missingness)
## AIC: 255.12
## 
## Number of Fisher Scoring iterations: 16
Anova(deletion_CEnviro.glm)
## Analysis of Deviance Table (Type II tests)
## 
## Response: deleted_binary
##                      LR Chisq Df Pr(>Chisq)    
## speaker                95.092  2  < 2.2e-16 ***
## is_it_footed            1.537  1      0.215    
## C_enviro_coarse        45.202  8  3.369e-07 ***
## V_status_wrt_English   17.305  1  3.184e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#pairwise comparisons of consonant environment
deletion_CEnviro.glht_C_enviro_coarse <- glht(deletion_CEnviro.glm, linfct=mcp(C_enviro_coarse = "Tukey"))
summary(deletion_CEnviro.glht_C_enviro_coarse)
## Warning in RET$pfunction("adjusted", ...): Completion with error > abseps

## Warning in RET$pfunction("adjusted", ...): Completion with error > abseps

## Warning in RET$pfunction("adjusted", ...): Completion with error > abseps

## Warning in RET$pfunction("adjusted", ...): Completion with error > abseps
## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: glm(formula = deleted_binary ~ +speaker + is_it_footed + C_enviro_coarse + 
##     V_status_wrt_English, family = binomial, data = tongan_deletion_subset)
## 
## Linear Hypotheses:
##                                         Estimate Std. Error z value Pr(>|z|)    
## sonorant_fric - sonorant_sonorant == 0   15.9208  1093.5330   0.015   1.0000    
## sonorant_stop - sonorant_sonorant == 0   17.0583  1093.5330   0.016   1.0000    
## fric_sonorant - sonorant_sonorant == 0   16.8936  1093.5332   0.015   1.0000    
## fric_fric - sonorant_sonorant == 0       16.0567  1093.5334   0.015   1.0000    
## fric_stop - sonorant_sonorant == 0       18.3821  1093.5331   0.017   1.0000    
## stop_sonorant - sonorant_sonorant == 0   15.0519  1093.5336   0.014   1.0000    
## stop_fric - sonorant_sonorant == 0       17.3352  1093.5332   0.016   1.0000    
## stop_stop - sonorant_sonorant == 0       14.8838  1093.5336   0.014   1.0000    
## sonorant_stop - sonorant_fric == 0        1.1375     0.4665   2.439   0.2064    
## fric_sonorant - sonorant_fric == 0        0.9728     0.7812   1.245   0.9243    
## fric_fric - sonorant_fric == 0            0.1359     0.9951   0.137   1.0000    
## fric_stop - sonorant_fric == 0            2.4613     0.5214   4.720   <0.001 ***
## stop_sonorant - sonorant_fric == 0       -0.8689     1.2119  -0.717   0.9977    
## stop_fric - sonorant_fric == 0            1.4144     0.7533   1.878   0.5561    
## stop_stop - sonorant_fric == 0           -1.0370     1.1792  -0.879   0.9908    
## fric_sonorant - sonorant_stop == 0       -0.1647     0.7385  -0.223   1.0000    
## fric_fric - sonorant_stop == 0           -1.0016     0.9724  -1.030   0.9747    
## fric_stop - sonorant_stop == 0            1.3237     0.4575   2.894   0.0663 .  
## stop_sonorant - sonorant_stop == 0       -2.0064     1.1955  -1.678   0.6978    
## stop_fric - sonorant_stop == 0            0.2769     0.7197   0.385   1.0000    
## stop_stop - sonorant_stop == 0           -2.1746     1.1604  -1.874   0.5586    
## fric_fric - fric_sonorant == 0           -0.8369     1.1400  -0.734   0.9973    
## fric_stop - fric_sonorant == 0            1.4885     0.7636   1.949   0.5041    
## stop_sonorant - fric_sonorant == 0       -1.8417     1.3452  -1.369   0.8764    
## stop_fric - fric_sonorant == 0            0.4416     0.9458   0.467   0.9999    
## stop_stop - fric_sonorant == 0           -2.0098     1.3079  -1.537   0.7890    
## fric_stop - fric_fric == 0                2.3254     0.9992   2.327   0.2624    
## stop_sonorant - fric_fric == 0           -1.0048     1.4818  -0.678   0.9985    
## stop_fric - fric_fric == 0                1.2785     1.1397   1.122   0.9579    
## stop_stop - fric_fric == 0               -1.1730     1.4495  -0.809   0.9947    
## stop_sonorant - fric_stop == 0           -3.3302     1.2188  -2.732   0.1021    
## stop_fric - fric_stop == 0               -1.0468     0.7462  -1.403   0.8607    
## stop_stop - fric_stop == 0               -3.4983     1.1837  -2.955   0.0553 .  
## stop_fric - stop_sonorant == 0            2.2833     1.3322   1.714   0.6730    
## stop_stop - stop_sonorant == 0           -0.1681     1.6077  -0.105   1.0000    
## stop_stop - stop_fric == 0               -2.4515     1.3024  -1.882   0.5526    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)

And finally, how about homorganicity? Iff we remove the random effect of consonant environment, then we do get a significant effect of homorganicity: more deletion if homorganic (effect is about one-third the size of the vowel-status effect):

deletion_CsHomorganic.glmer <- glm(deleted_binary ~
  #+ (1|C_environment) #remove it to give homorganicity its best shot
  + speaker
  + is_it_footed
  + Cs_homorganic_or_not
  + V_status_wrt_English
  , data=tongan_deletion_subset,
  family=binomial)
summary(deletion_CsHomorganic.glmer)
## 
## Call:
## glm(formula = deleted_binary ~ +speaker + is_it_footed + Cs_homorganic_or_not + 
##     V_status_wrt_English, family = binomial, data = tongan_deletion_subset)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.8539  -0.6334  -0.3155   0.6287   2.7870  
## 
## Coefficients:
##                                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                     -2.7730     0.5700  -4.865 1.15e-06 ***
## speakerS2                       -2.3584     0.4889  -4.824 1.41e-06 ***
## speakerS3                        1.2352     0.3314   3.727 0.000194 ***
## is_it_footed                     0.3657     0.4606   0.794 0.427243    
## Cs_homorganic_or_nothomorganic   0.9027     0.3247   2.780 0.005430 ** 
## V_status_wrt_Englishepenthetic   1.7902     0.3971   4.508 6.54e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 378.23  on 283  degrees of freedom
## Residual deviance: 266.33  on 278  degrees of freedom
##   (2 observations deleted due to missingness)
## AIC: 278.33
## 
## Number of Fisher Scoring iterations: 5
Anova(deletion_CsHomorganic.glmer)
## Analysis of Deviance Table (Type II tests)
## 
## Response: deleted_binary
##                      LR Chisq Df Pr(>Chisq)    
## speaker                81.471  2  < 2.2e-16 ***
## is_it_footed            0.638  1   0.424595    
## Cs_homorganic_or_not    7.997  1   0.004686 ** 
## V_status_wrt_English   23.805  1  1.066e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Interestingly, there is no significant interaction of homorganicity and vowel status–suggests that deletion is about the resulting consonant cluster (and, independently, about the vowel status), not about being faithful to English when markedness allows it:

deletion_CsHomorganic_interact.glmer <- glm(deleted_binary ~
  #+ (1|C_environment) #remove it to give homorganicity its best shot
  + speaker
  + is_it_footed
  + Cs_homorganic_or_not
  * V_status_wrt_English
  , data=tongan_deletion_subset,
  family=binomial)
summary(deletion_CsHomorganic_interact.glmer)
## 
## Call:
## glm(formula = deleted_binary ~ +speaker + is_it_footed + Cs_homorganic_or_not * 
##     V_status_wrt_English, family = binomial, data = tongan_deletion_subset)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.8126  -0.5480  -0.2954   0.6558   2.6751  
## 
## Coefficients:
##                                                               Estimate Std. Error z value Pr(>|z|)
## (Intercept)                                                    -3.5498     0.8620  -4.118 3.82e-05
## speakerS2                                                      -2.3397     0.4868  -4.807 1.53e-06
## speakerS3                                                       1.2080     0.3317   3.642 0.000271
## is_it_footed                                                    0.4396     0.4677   0.940 0.347343
## Cs_homorganic_or_nothomorganic                                  1.9615     0.8472   2.315 0.020591
## V_status_wrt_Englishepenthetic                                  2.6718     0.7976   3.350 0.000808
## Cs_homorganic_or_nothomorganic:V_status_wrt_Englishepenthetic  -1.3034     0.9236  -1.411 0.158179
##                                                                  
## (Intercept)                                                   ***
## speakerS2                                                     ***
## speakerS3                                                     ***
## is_it_footed                                                     
## Cs_homorganic_or_nothomorganic                                *  
## V_status_wrt_Englishepenthetic                                ***
## Cs_homorganic_or_nothomorganic:V_status_wrt_Englishepenthetic    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 378.23  on 283  degrees of freedom
## Residual deviance: 264.08  on 277  degrees of freedom
##   (2 observations deleted due to missingness)
## AIC: 278.08
## 
## Number of Fisher Scoring iterations: 5
Anova(deletion_CsHomorganic_interact.glmer)
## Analysis of Deviance Table (Type II tests)
## 
## Response: deleted_binary
##                                           LR Chisq Df Pr(>Chisq)    
## speaker                                     80.076  2  < 2.2e-16 ***
## is_it_footed                                 0.894  1   0.344433    
## Cs_homorganic_or_not                         7.997  1   0.004686 ** 
## V_status_wrt_English                        23.805  1  1.066e-06 ***
## Cs_homorganic_or_not:V_status_wrt_English    2.246  1   0.134003    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Final V length

Read in the file and take a look at it:

tongan_superset <- read.table("Tongan_forR_revised_July2016.txt", header=T) 
summary(tongan_superset)
##      good_pattern good_pattern_binary     order     original_order speaker 
##  1         :305   Min.   :0.0000      139    :  2   139    :  5    S1:156  
##  V1_long   : 66   1st Qu.:0.0000      144    :  2   124    :  4    S2:209  
##  V2_long   : 65   Median :0.0000      147    :  2   125    :  4    S3:251  
##  V12_diphth: 59   Mean   :0.4951      178    :  2   128    :  4            
##  V3_long   : 53   3rd Qu.:1.0000      20     :  2   144    :  4            
##  V34_diphth: 29   Max.   :1.0000      (Other):599   (Other):588            
##  (Other)   : 39                       NA's   :  7   NA's   :  7            
##            dictionary_entry            definition   first_V_long    first_V_stressed_in_English
##  sovaleni,_s?valeni:  5     sacrament       :  6   Min.   :0.0000   Min.   :0.0000             
##  ‘?pelikoti        :  4     sovereign_(coin):  5   1st Qu.:0.0000   1st Qu.:0.0000             
##  ‘inisulato        :  4     apricot         :  4   Median :0.0000   Median :1.0000             
##  ‘univesiti        :  4     continent       :  4   Mean   :0.1033   Mean   :0.6136             
##  hemisefia         :  4     hemisphere      :  4   3rd Qu.:0.0000   3rd Qu.:1.0000             
##  hipopotama        :  4     hippopotamus    :  4   Max.   :1.0000   Max.   :1.0000             
##  (Other)           :591     (Other)         :589   NA's   :1                                   
##  first_V_epenthetic_in_English       first_V    first_V_glide_in_English second_V_long  
##  Min.   :0.0000                epenthetic:124   Min.   :0.00000          Min.   :0.000  
##  1st Qu.:0.0000                stressed  :378   1st Qu.:0.00000          1st Qu.:0.000  
##  Median :0.0000                unstressed:114   Median :0.00000          Median :0.000  
##  Mean   :0.2013                                 Mean   :0.04383          Mean   :0.112  
##  3rd Qu.:0.0000                                 3rd Qu.:0.00000          3rd Qu.:0.000  
##  Max.   :1.0000                                 Max.   :1.00000          Max.   :1.000  
##                                                                                         
##  second_V_stressed_in_English second_V_epenthetic_in_English       second_V  
##  Min.   :0.0000               Min.   :0.0000                 epenthetic:181  
##  1st Qu.:0.0000               1st Qu.:0.0000                 stressed  :200  
##  Median :0.0000               Median :0.0000                 unstressed:234  
##  Mean   :0.3263               Mean   :0.2943                 NA's      :  1  
##  3rd Qu.:1.0000               3rd Qu.:1.0000                                 
##  Max.   :1.0000               Max.   :1.0000                                 
##                               NA's   :1                                      
##  second_V_glide_in_English secondV_short_and_third_V_long first_two_Vs_VV  speaker_V1_quality
##  Min.   :0.0000            Min.   :0.00000                Min.   :0.0000   a   :253          
##  1st Qu.:0.0000            1st Qu.:0.00000                1st Qu.:0.0000   e   :137          
##  Median :0.0000            Median :0.00000                Median :0.0000   i   :108          
##  Mean   :0.1153            Mean   :0.08604                Mean   :0.1558   o   : 95          
##  3rd Qu.:0.0000            3rd Qu.:0.00000                3rd Qu.:0.0000   u   : 21          
##  Max.   :1.0000            Max.   :1.00000                Max.   :1.0000   NA's:  2          
##                                                                                              
##  speaker_V2_quality starts_with_possible_cluster  first_two_Cs      CVC_pattern  final_V 
##  a   :137           Min.   :0.0000               pl     : 62   CVCVCVCVCV :137   CV:484  
##  e   :100           1st Qu.:0.0000               kl     : 40   CVCVCVVCV  : 47   V:: 51  
##  i   :251           Median :0.0000               ‘n     : 33   CVCV:CVCV  : 37   VV: 81  
##  o   : 96           Mean   :0.2484               s0     : 22   CVCVCV:CVCV: 32           
##  u   : 29           3rd Qu.:0.0000               kn     : 21   CVCVCVCV:  : 24           
##  NA's:  3           Max.   :1.0000               sk     : 20   CVCVCVCVV  : 19           
##                                                  (Other):418   (Other)    :320           
##      final_V_type    penult_V_type CVC_pattern_coarse is_1st_V_stressed is_2nd_V_stressed
##  epenthetic:411   epenthetic: 36   CVCVCV-F :200      Min.   :0.0000    Min.   :0.0000   
##  stressed  :  5   glide     :  1   CVCV-F   : 65      1st Qu.:0.0000    1st Qu.:0.0000   
##  tense     : 42   stressed  : 37   CVCVCVVCV: 46      Median :1.0000    Median :0.0000   
##  unstressed: 73   unstressed: 48   CVCV:-F  : 43      Mean   :0.6145    Mean   :0.3278   
##  NA's      : 85   NA's      :494   CVVCV-F  : 22      3rd Qu.:1.0000    3rd Qu.:1.0000   
##                                    CV:CV-F  : 21      Max.   :1.0000    Max.   :1.0000   
##                                    (Other)  :219      NA's   :9         NA's   :12
names(tongan_superset)
##  [1] "good_pattern"                   "good_pattern_binary"           
##  [3] "order"                          "original_order"                
##  [5] "speaker"                        "dictionary_entry"              
##  [7] "definition"                     "first_V_long"                  
##  [9] "first_V_stressed_in_English"    "first_V_epenthetic_in_English" 
## [11] "first_V"                        "first_V_glide_in_English"      
## [13] "second_V_long"                  "second_V_stressed_in_English"  
## [15] "second_V_epenthetic_in_English" "second_V"                      
## [17] "second_V_glide_in_English"      "secondV_short_and_third_V_long"
## [19] "first_two_Vs_VV"                "speaker_V1_quality"            
## [21] "speaker_V2_quality"             "starts_with_possible_cluster"  
## [23] "first_two_Cs"                   "CVC_pattern"                   
## [25] "final_V"                        "final_V_type"                  
## [27] "penult_V_type"                  "CVC_pattern_coarse"            
## [29] "is_1st_V_stressed"              "is_2nd_V_stressed"

Some clean-ups needed:

#pick out just the cases where English final V is unstressed, including tense but unstressed

tongan_length <- subset(tongan_superset, tongan_superset$final_V_type == "unstressed" | tongan_superset$final_V_type == "tense")

#get rid of glide cases
tongan_length <- subset(tongan_length, tongan_length$penult_V_type != "glide")

#Take a look at the words, and how many tokens of each there are
tongan_length$definition <- factor(tongan_length$definition)
table(tongan_length$definition)
## 
##          alligator              Aspro         astronomer              banjo          barometer 
##                  3                  2                  3                  3                  2 
##          centurion            chutney         Cinderella         commutator           cucumber 
##                  3                  1                  3                  2                  3 
##           customer        denominator           diameter         dictionary             dynamo 
##                  3                  2                  2                  3                  1 
##           etcetera           eternity           February          geography gladiolus_(flower) 
##                  2                  3                  1                  3                  3 
##              Glaxo         helicopter       hippopotamus          inspector          insulator 
##                  1                  3                  4                  3                  4 
##            January   lavender_(scent)           magnesia            Mercury             Mexico 
##                  1                  3                  1                  1                  2 
##           minister  organdie_(fabric)        phonography         phylactery          professor 
##                  3                  2                  3                  2                  2 
##         programmer           register          secretary          September            society 
##                  2                  3                  4                  2                  1 
##            station    sticker-plaster           swastika        transmitter            trustee 
##                  1                  3                  3                  4                  1 
##               unit         university             whisky 
##                  1                  4                  2
#get rid of empty levels and fix order of levels
tongan_length$final_V <- factor(tongan_length$final_V)
tongan_length$penult_V_type <- factor(tongan_length$penult_V_type)
tongan_length$penult_V_type <-  relevel(tongan_length$penult_V_type, "unstressed") #puts this level at the beginning
tongan_length$penult_V_type <-  relevel(tongan_length$penult_V_type, "epenthetic")
#now it should bo epenthetic-unstressed-stressed

Plot the dependent variable (whether final V is long or not), by the main independent variable (what kind of English V the penult corresponds to)

length_counts <- table(tongan_length$penult_V_type, tongan_length$final_V, dnn=c("preceding V status", "final V length"))

length_counts
##                   final V length
## preceding V status CV V:
##         epenthetic  9 27
##         unstressed 28 15
##         stressed   35  0
mosaic(length_counts, direction="v", pop=FALSE, gp_varnames=gpSerif, gp_labels=gpSerif, gp=gpar(fill=c("black","grey")), labeling_args=list(rot_labels=c(left=0), offset_varnames=c(left=1),offset_labels=c(left=0.1), varnames=c(TRUE, TRUE), set_labels=list('final V length'=c("short", "long"))),margins=c(3,0,1,2)+0.1)

#write to file
png(file="Vowel_length_overall_mosaic_plot2.png",width=myResMultiplier*600,height=myResMultiplier*350, res=myResMultiplier*72) 
mosaic(length_counts, direction="v", pop=FALSE, gp_varnames=gpSerif, gp_labels=gpSerif, gp=gpar(fill=c("black","grey")), labeling_args=list(rot_labels=c(left=0), offset_varnames=c(left=1),offset_labels=c(left=0.1), varnames=c(TRUE, TRUE), set_labels=list('final V length'=c("short", "long"))),margins=c(3,0,1,2)+0.1)
dev.off()
## png 
##   2
#also for each speaker

length_counts_S1 <- table(tongan_length[tongan_length$speaker=="S1",]$penult_V_type, tongan_length[tongan_length$speaker=="S1",]$final_V, dnn=c("preceding V status", "final V length"))
length_counts_S2 <- table(tongan_length[tongan_length$speaker=="S2",]$penult_V_type, tongan_length[tongan_length$speaker=="S2",]$final_V, dnn=c("preceding V status", "final V length"))
length_counts_S3 <- table(tongan_length[tongan_length$speaker=="S3",]$penult_V_type, tongan_length[tongan_length$speaker=="S3",]$final_V, dnn=c("preceding V status", "final V length"))

length_counts_S1
##                   final V length
## preceding V status CV V:
##         epenthetic  2  7
##         unstressed  9  4
##         stressed   11  0
length_counts_S2
##                   final V length
## preceding V status CV V:
##         epenthetic  4  6
##         unstressed 10  4
##         stressed   12  0
length_counts_S3
##                   final V length
## preceding V status CV V:
##         epenthetic  3 14
##         unstressed  9  7
##         stressed   12  0
p1_length <- grid.grabExpr(mosaic(length_counts_S1, direction="v", pop=FALSE, gp_varnames=gpSerif, gp_labels=gpSerif, gp=gpar(fill=c("black","grey")), labeling_args=list(rot_labels=c(left=0,top=60), offset_varnames=c(left=1),offset_labels=c(left=0.1, top=0.9), varnames=c(FALSE, TRUE), set_labels=list('final V length'=c("short", "long"))),margins=c(3,0,0,2)+0.1, sub_gp = gpar(fontfamily=myFontFamily), sub="           Speaker 1"))
## Warning in grabDL(warn, wrap, ...): viewport overwritten (grab MAY not be faithful)
p2_length <- grid.grabExpr(mosaic(length_counts_S2, direction="v", pop=FALSE, gp_varnames=gpSerif, gp_labels=gpSerif, gp=gpar(fill=c("black","grey")), labeling_args=list(rot_labels=c(left=0, top=60), offset_varnames=c(left=1),offset_labels=c(left=0.1, top=0.9), varnames=c(FALSE, FALSE), set_labels=list('final V length'=c("", ""))),margins=c(3,0,0,-4)+0.1, sub_gp = gpar(fontfamily=myFontFamily), sub="Speaker 2               "))
## Warning in grabDL(warn, wrap, ...): viewport overwritten (grab MAY not be faithful)
p3_length <- grid.grabExpr(mosaic(length_counts_S3, direction="v", pop=FALSE, gp_varnames=gpSerif, gp_labels=gpSerif, gp=gpar(fill=c("black","grey")), labeling_args=list(rot_labels=c(left=0, top=60), offset_varnames=c(left=1),offset_labels=c(left=0.1, top=0.9), varnames=c(FALSE, FALSE), set_labels=list('final V length'=c("", ""))),margins=c(3,0,0,-12)+0.1, sub_gp = gpar(fontfamily=myFontFamily), sub="Speaker 3                                                   "))
## Warning in grabDL(warn, wrap, ...): viewport overwritten (grab MAY not be faithful)
grid.arrange(p1_length, p2_length, p3_length, ncol=3)

#Write plot to file for use in paper:
png(file="Vowel_length_individual_speaker_mosaic_plots2.png",width=myResMultiplier*600,height=myResMultiplier*350, res=myResMultiplier*72)
grid.arrange(p1_length, p2_length, p3_length, ncol=3)
dev.off()
## png 
##   2

Now split up the data according to whether the final English vowel is schwa or a tense V (/i/ or /o/):

tongan_length_schwa <- subset(tongan_length, tongan_length$final_V_type == "unstressed")
length_counts_schwa <- table(tongan_length_schwa$penult_V_type, tongan_length_schwa$final_V, dnn=c("preceding V status", "final V length"))
tongan_length_tense <- subset(tongan_length, tongan_length$final_V_type == "tense")
length_counts_tense <- table(tongan_length_tense$penult_V_type, tongan_length_tense$final_V, dnn=c("preceding V status", "final V length"))

length_counts_schwa
##                   final V length
## preceding V status CV V:
##         epenthetic  7 16
##         unstressed 14  8
##         stressed   27  0
length_counts_tense
##                   final V length
## preceding V status CV V:
##         epenthetic  2 11
##         unstressed 14  7
##         stressed    8  0
pschwa <- grid.grabExpr(mosaic(length_counts_schwa, direction="v", pop=FALSE, gp_varnames=gpSerif, gp_labels=gpSerif, gp=gpar(fill=c("black","grey")), labeling_args=list(rot_labels=c(left=0,top=60), offset_varnames=c(left=1),offset_labels=c(left=0.1, top=0.9), varnames=c(FALSE, TRUE), set_labels=list('final V length'=c("short","long"))),margins=c(3,0,0,2)+0.1, sub_gp = gpar(fontfamily=myFontFamily), sub="          English final schwa"))
## Warning in grabDL(warn, wrap, ...): viewport overwritten (grab MAY not be faithful)
ptense <- grid.grabExpr(mosaic(length_counts_tense, direction="v", pop=FALSE, gp_varnames=gpSerif, gp_labels=gpSerif, gp=gpar(fill=c("black","grey")), labeling_args=list(rot_labels=c(left=0, top=60), offset_varnames=c(left=1),offset_labels=c(left=0.1, top=0.9), varnames=c(FALSE, FALSE), set_labels=list('final V length'=c("", ""))),margins=c(3,0,0,-4)+0.1, sub_gp = gpar(fontfamily=myFontFamily), sub="English final tense V          "))
## Warning in grabDL(warn, wrap, ...): viewport overwritten (grab MAY not be faithful)
grid.arrange(pschwa, ptense, ncol=2)

#write to file
png(file="Vowel_length_mosaic_plot_by_V_type2.png",width=myResMultiplier*500,height=myResMultiplier*350, res=myResMultiplier*72)
grid.arrange(pschwa, ptense, ncol=2)
dev.off()
## png 
##   2

Logistic regression for final V length

Here we can use bayesglm() no problem (to handle the separability) because there are no random effects. First, we make the model with maximal interactions:

#make "unstressed" the reference level for the independent variable
tongan_length$penult_V_type <- relevel(tongan_length$penult_V_type, "unstressed")
tongan_length$final_V_type <- relevel(tongan_length$final_V_type, "unstressed")

final_V.bayesGLM=bayesglm(final_V ~
  speaker * penult_V_type * final_V_type
 ,family=binomial,
 data=tongan_length)

Then, we use the function step() to choose the best sub-model:

final_V.best <- step(final_V.bayesGLM)
## Start:  AIC=163.93
## final_V ~ speaker * penult_V_type * final_V_type
## 
##                                      Df Deviance    AIC
## - speaker:penult_V_type:final_V_type 22   91.732 119.73
## <none>                                    91.928 163.93
## 
## Step:  AIC=140.68
## final_V ~ speaker + penult_V_type + final_V_type + speaker:penult_V_type + 
##     speaker:final_V_type + penult_V_type:final_V_type
## 
##                              Df Deviance    AIC
## - speaker:penult_V_type      14   91.998 112.00
## - penult_V_type:final_V_type 12   92.415 116.42
## - speaker:final_V_type       12   92.672 116.67
## <none>                            92.681 140.68
## 
## Step:  AIC=132.9
## final_V ~ speaker + penult_V_type + final_V_type + speaker:final_V_type + 
##     penult_V_type:final_V_type
## 
##                              Df Deviance    AIC
## - penult_V_type:final_V_type 12   92.679 108.68
## - speaker:final_V_type       12   92.983 108.98
## <none>                            92.904 132.90
## 
## Step:  AIC=121.55
## final_V ~ speaker + penult_V_type + final_V_type + speaker:final_V_type
## 
##                        Df Deviance    AIC
## - speaker:final_V_type  8   93.687 105.69
## <none>                      93.547 121.55
## - penult_V_type         8  145.031 157.03
## 
## Step:  AIC=110.38
## final_V ~ speaker + penult_V_type + final_V_type
## 
##                 Df Deviance    AIC
## - speaker        4   95.892 103.89
## - final_V_type   3   93.900 103.90
## <none>               94.382 110.38
## - penult_V_type  4  145.686 153.69
## 
## Step:  AIC=108.58
## final_V ~ penult_V_type + final_V_type
## 
##                 Df Deviance    AIC
## - final_V_type   3   96.106 102.11
## <none>               96.578 108.58
## - penult_V_type  4  149.022 153.02
## 
## Step:  AIC=102.79
## final_V ~ penult_V_type
## 
##                 Df Deviance    AIC
## <none>               96.789 102.79
## - penult_V_type  2  150.049 152.05
summary(final_V.best)
## 
## Call:
## bayesglm(formula = final_V ~ penult_V_type, family = binomial, 
##     data = tongan_length)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.6419  -0.9281  -0.1377   0.7756   1.4491  
## 
## Coefficients:
##                         Estimate Std. Error z value Pr(>|z|)    
## (Intercept)              -0.6193     0.3153  -1.964 0.049512 *  
## penult_V_typeepenthetic   1.6665     0.4833   3.448 0.000565 ***
## penult_V_typestressed    -4.0339     1.5842  -2.546 0.010887 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 150.049  on 113  degrees of freedom
## Residual deviance:  96.789  on 111  degrees of freedom
## AIC: 102.79
## 
## Number of Fisher Scoring iterations: 19
anova(final_V.best, test="Chisq")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: final_V
## 
## Terms added sequentially (first to last)
## 
## 
##               Df Deviance Resid. Df Resid. Dev Pr(>Chi)    
## NULL                            113    150.049             
## penult_V_type  2    53.26       111     96.789 2.72e-12 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Secondary stress

This actually comes first in the paper.

Read in the data and take a look

tongan_stress <- subset(tongan_superset, (tongan_superset$is_1st_V_stressed + tongan_superset$is_2nd_V_stressed == 1) & tongan_superset$good_pattern_binary==1)

summary(tongan_stress)
##                  good_pattern good_pattern_binary     order     original_order speaker 
##  1                     :293   Min.   :1           788    :  2   350    :  4    S1: 97  
##  4moras                :  0   1st Qu.:1           971    :  2   10     :  3    S2: 92  
##  antepenult_but_long   :  0   Median :1           m167   :  2   100    :  3    S3:104  
##  antepenult_main_stress:  0   Mean   :1           10     :  1   108    :  3            
##  glide                 :  0   3rd Qu.:1           100    :  1   12     :  3            
##  semi_duplicate        :  0   Max.   :1           (Other):279   (Other):271            
##  (Other)               :  0                       NA's   :  6   NA's   :  6            
##      dictionary_entry      definition   first_V_long first_V_stressed_in_English
##  ‘amipasitoa :  3     alcohol   :  3   Min.   :0     Min.   :0.0000             
##  ‘apenitiki  :  3     ambassador:  3   1st Qu.:0     1st Qu.:0.0000             
##  ‘aposetolo  :  3     apostle   :  3   Median :0     Median :1.0000             
##  ‘asipulini  :  3     appendix  :  3   Mean   :0     Mean   :0.5631             
##  ‘asital?noma:  3     aspirin   :  3   3rd Qu.:0     3rd Qu.:1.0000             
##  ‘Atalanitiki:  3     astronomer:  3   Max.   :0     Max.   :1.0000             
##  (Other)     :275     (Other)   :275                                            
##  first_V_epenthetic_in_English       first_V    first_V_glide_in_English second_V_long     
##  Min.   :0.0000                epenthetic: 66   Min.   :0.00000          Min.   :0.000000  
##  1st Qu.:0.0000                stressed  :165   1st Qu.:0.00000          1st Qu.:0.000000  
##  Median :0.0000                unstressed: 62   Median :0.00000          Median :0.000000  
##  Mean   :0.2253                                 Mean   :0.04778          Mean   :0.003413  
##  3rd Qu.:0.0000                                 3rd Qu.:0.00000          3rd Qu.:0.000000  
##  Max.   :1.0000                                 Max.   :1.00000          Max.   :1.000000  
##                                                                                            
##  second_V_stressed_in_English second_V_epenthetic_in_English       second_V  
##  Min.   :0.0000               Min.   :0.000                  epenthetic:109  
##  1st Qu.:0.0000               1st Qu.:0.000                  stressed  :112  
##  Median :0.0000               Median :0.000                  unstressed: 72  
##  Mean   :0.3823               Mean   :0.372                                  
##  3rd Qu.:1.0000               3rd Qu.:1.000                                  
##  Max.   :1.0000               Max.   :1.000                                  
##                                                                              
##  second_V_glide_in_English secondV_short_and_third_V_long first_two_Vs_VV   speaker_V1_quality
##  Min.   :0.00000           Min.   :0                      Min.   :0.00000   a   :108          
##  1st Qu.:0.00000           1st Qu.:0                      1st Qu.:0.00000   e   : 69          
##  Median :0.00000           Median :0                      Median :0.00000   i   : 63          
##  Mean   :0.03413           Mean   :0                      Mean   :0.06143   o   : 47          
##  3rd Qu.:0.00000           3rd Qu.:0                      3rd Qu.:0.00000   u   :  4          
##  Max.   :1.00000           Max.   :0                      Max.   :1.00000   NA's:  2          
##                                                                                               
##  speaker_V2_quality starts_with_possible_cluster  first_two_Cs        CVC_pattern  final_V 
##  a   : 46           Min.   :0.0000               pl     : 32   CVCVCVCVCV   :132   CV:221  
##  e   : 42           1st Qu.:0.0000               kl     : 25   CVCVCVCV:    : 23   V:: 38  
##  i   :134           Median :0.0000               ‘n     : 15   CVCVCVVCV    : 23   VV: 34  
##  o   : 52           Mean   :0.2765               ‘s     : 15   CVCVCVCVV    : 16           
##  u   : 17           3rd Qu.:1.0000               pn     : 15   CVCVCVCVVCV  : 15           
##  NA's:  2           Max.   :1.0000               kn     : 13   CVCVCVCV:CVCV: 14           
##                                                  (Other):178   (Other)      : 70           
##      final_V_type    penult_V_type   CVC_pattern_coarse is_1st_V_stressed is_2nd_V_stressed
##  epenthetic:183   epenthetic: 27   CVCVCV-F   :189      Min.   :0.0000    Min.   :0.0000   
##  stressed  :  3   glide     :  1   CVCVCVVCV  : 22      1st Qu.:0.0000    1st Qu.:0.0000   
##  tense     : 20   stressed  : 21   CVCV-F     : 19      Median :1.0000    Median :0.0000   
##  unstressed: 52   unstressed: 29   CVCVCVCVVCV: 15      Mean   :0.6416    Mean   :0.3584   
##  NA's      : 35   NA's      :215   CVVCV-F    :  8      3rd Qu.:1.0000    3rd Qu.:1.0000   
##                                    CVCVV-F    :  7      Max.   :1.0000    Max.   :1.0000   
##                                    (Other)    : 33
names(tongan_stress)
##  [1] "good_pattern"                   "good_pattern_binary"           
##  [3] "order"                          "original_order"                
##  [5] "speaker"                        "dictionary_entry"              
##  [7] "definition"                     "first_V_long"                  
##  [9] "first_V_stressed_in_English"    "first_V_epenthetic_in_English" 
## [11] "first_V"                        "first_V_glide_in_English"      
## [13] "second_V_long"                  "second_V_stressed_in_English"  
## [15] "second_V_epenthetic_in_English" "second_V"                      
## [17] "second_V_glide_in_English"      "secondV_short_and_third_V_long"
## [19] "first_two_Vs_VV"                "speaker_V1_quality"            
## [21] "speaker_V2_quality"             "starts_with_possible_cluster"  
## [23] "first_two_Cs"                   "CVC_pattern"                   
## [25] "final_V"                        "final_V_type"                  
## [27] "penult_V_type"                  "CVC_pattern_coarse"            
## [29] "is_1st_V_stressed"              "is_2nd_V_stressed"

Take a look at the actual words, and how many have each pattern:

##how many word types?
defs_table <- sort(table(as.factor(factor(tongan_stress$definition))))
defs_table
## 
##                 asbestos                battalion               blancmange                brigadier 
##                        1                        1                        1                        1 
##                broadcast              cauliflower                  chutney                 civilian 
##                        1                        1                        1                        1 
##             confirmation              denominator                depositor                elocution 
##                        1                        1                        1                        1 
##               epiglottis                 etcetera                   France                    Glaxo 
##                        1                        1                        1                        1 
##                glycerine               indulgence                 linoleum                  Mercury 
##                        1                        1                        1                        1 
##                metronome               nasturtium                nectarine        organdie_(fabric) 
##                        1                        1                        1                        1 
##                penitance                 Pleiades               Protestant                 republic 
##                        1                        1                        1                        1 
##                sacrilege                secretary                 seminary                  spaniel 
##                        1                        1                        1                        1 
##               tabernacle               television                     unit               university 
##                        1                        1                        1                        1 
##                 aluminum                 antelope                  apricot                    Aspro 
##                        2                        2                        2                        2 
##               commutator                consonant    constitution,_charter                  crucify 
##                        2                        2                        2                        2 
##           eucalyptus_oil                evolution fraction_(in_arithmetic)        geranium_(flower) 
##                        2                        2                        2                        2 
##               gramophone               heliograph               immaculate         jonquil_(flower) 
##                        2                        2                        2                        2 
##               lithograph                magnesium                Manchuria               manuscript 
##                        2                        2                        2                        2 
##            Mediterranean                   Mexico                parachute                paragraph 
##                        2                        2                        2                        2 
##                 pendulum                petroleum                  plastic                porcupine 
##                        2                        2                        2                        2 
##                potassium                predicate               programmer                 register 
##                        2                        2                        2                        2 
##                sacrifice          sticker-plaster              transmitter                 trombone 
##                        2                        2                        2                        2 
##                   unison                  alcohol               ambassador                  apostle 
##                        2                        3                        3                        3 
##                 appendix                  aspirin               astronomer                 Atlantic 
##                        3                        3                        3                        3 
##               atmosphere                    banjo                breakfast                centurion 
##                        3                        3                        3                        3 
##                Christian                Christmas               Cinderella                communist 
##                        3                        3                        3                        3 
##               conference                continent                crocodile                  crystal 
##                        3                        3                        3                        3 
##                 cucumber                 customer                democracy               dictionary 
##                        3                        3                        3                        3 
##                 engineer                 estimate       gladiolus_(flower)               hallelujah 
##                        3                        3                        3                        3 
##               helicopter               hemisphere             hippopotamus                inspector 
##                        3                        3                        3                        3 
##                insulator                  insulin                 kangaroo         lavender_(scent) 
##                        3                        3                        3                        3 
##               locomotive                 mandolin      math,_matriculation                 meridian 
##                        3                        3                        3                        3 
##              millionaire                 minister                 monogram        obbligato_(music) 
##                        3                        3                        3                        3 
##                  ostrich                 pancreas                  penguin                Pentecost 
##                        3                        3                        3                        3 
##              phonography                president                 princess                  problem 
##                        3                        3                        3                        3 
##                 prophesy               quarantine                sacrament                Sanhedrin 
##                        3                        3                        3                        3 
##                saxophone              scholarship                  stencil                 swastika 
##                        3                        3                        3                        3 
##                telegraph              trade_union 
##                        3                        3
dim(defs_table)
## [1] 134
##how many with each stress pattern?
table(tongan_stress$is_1st_V_stressed, tongan_stress$is_2nd_V_stressed)
##    
##       0   1
##   0   0 105
##   1 188   0
#make a table of "possible cluster" items
table(tongan_stress[tongan_stress$starts_with_possible_cluster == 1,]$definition)
## 
##                   absolution                      admiral                      alcohol 
##                            0                            0                            0 
##                    alligator                     aluminum                   ambassador 
##                            0                            0                            0 
##                      analyse                    antarctic                     antelope 
##                            0                            0                            0 
##                      apostle                     appendix                      apricot 
##                            0                            0                            0 
##                       arctic                      arsenic                     asbestos 
##                            0                            0                            0 
##                      aspirin                        Aspro                   astronomer 
##                            0                            0                            0 
##                     Atlantic                   atmosphere                    Australia 
##                            0                            0                            0 
##                        banjo                    barbarian                     baritone 
##                            0                            0                            0 
##                    barometer                    battalion                      bayonet 
##                            0                            0                            0 
##                   blancmange               blotting-paper                    boomerang 
##                            1                            0                            0 
##                       braces                       Brazil                    breakfast 
##                            0                            0                            3 
##                    brigadier                      Britain                    broadcast 
##                            1                            0                            1 
##                       bronze                      captain carbolic_acid,_carbolic_soap 
##                            0                            0                            0 
##                    carnation                  cauliflower                    centurion 
##                            0                            1                            0 
##                   chlorodyne                    Christian                    Christmas 
##                            0                            3                            3 
##                    chromatic                      chutney                   Cinderella 
##                            0                            0                            3 
##                     civilian        civilization,_culture                       claret 
##                            0                            0                            0 
##                     clarinet             clarkia_(flower)                      climate 
##                            0                            0                            0 
##                      cocaine                    communist                   commutator 
##                            0                            0                            0 
##                     compound                   conference                 confirmation 
##                            0                            0                            0 
##                   conscience                    consonant        constitution,_charter 
##                            0                            0                            0 
##                    continent                    crocodile                      crucify 
##                            0                            3                            2 
##                      crystal                     cucumber                     customer 
##                            3                            0                            0 
##                      cypress                    democracy                  denominator 
##                            0                            0                            0 
##                      deposit                    depositor                     diagonal 
##                            0                            0                            0 
##                      diagram                     diameter                      diamond 
##                            0                            0                            0 
##                   dictionary                      diocese                     disciple 
##                            0                            0                            0 
##                     discount                     doctrine                       dragon 
##                            0                            0                            0 
##                     dynamics                       dynamo                      ebonite 
##                            0                            0                            0 
##                      element                     elephant                    elevation 
##                            0                            0                            0 
##                    elocution                       empire                       enamel 
##                            0                            0                            0 
##                     engineer                    Epicurean                   epiglottis 
##                            0                            0                            0 
##                     estimate                     etcetera                     eternity 
##                            0                            0                            0 
##               eucalyptus_oil                    evolution                     February 
##                            0                            0                            0 
##               firmament,_sky                       florin                     fountain 
##                            0                            0                            0 
##     fraction_(in_arithmetic)                       France                        frost 
##                            2                            1                            0 
##                      Gentile                    geography            geranium_(flower) 
##                            0                            0                            2 
##                        giant           gladiolus_(flower)                        Glaxo 
##                            0                            3                            1 
##                        glory                    glycerine                       gospel 
##                            0                            1                            0 
##                   gramophone                      granite                      grenade 
##                            2                            0                            0 
##   grenadilla_(passion_fruit)                   hallelujah                   helicopter 
##                            0                            0                            0 
##                   heliograph                   hemisphere                 hippopotamus 
##                            0                            0                            0 
##                      history            hyacinth_(flower)           hydrangea_(flower) 
##                            0                            0                            0 
##                     hydrogen                   hypotenuse                   immaculate 
##                            0                            0                            0 
##                      incense                   indulgence                       insect 
##                            0                            0                            0 
##                    inspector                    insulator                      insulin 
##                            0                            0                            0 
##                      invoice                       jacket                      January 
##                            0                            0                            0 
##                    Jerusalem             jonquil_(flower)                     kangaroo 
##                            0                            0                            0 
##             lavender_(scent)                     linoleum                   lithograph 
##                            0                            0                            0 
##                   locomotive                     magnesia                    magnesium 
##                            0                            0                            0 
##                      malaria                    Manchuria                     mandolin 
##                            0                            0                            0 
##                   manuscript                    margarine                     marigold 
##                            0                            0                            0 
##          math,_matriculation                   mayonnaise                Mediterranean 
##                            0                            0                            0 
##                     merchant                      Mercury                     meridian 
##                            0                            0                            0 
##                      Messiah                    metronome                       Mexico 
##                            0                            0                            0 
##                   microphone                  millionaire                     minister 
##                            0                            0                            0 
##                     molasses                     mongoose                     monogram 
##                            0                            0                            0 
##                       muslin                   nasturtium                    nectarine 
##                            0                            0                            0 
##                       Norway            obbligato_(music)            organdie_(fabric) 
##                            0                            0                            0 
##                      ostrich                     pancreas                    parachute 
##                            0                            0                            2 
##                     Paradise                    paragraph                     parallel 
##                            0                            2                            0 
##                    patriarch                     pendulum                      penguin 
##                            0                            0                            0 
##                    penitance                    Pentecost                    petroleum 
##                            0                            0                            0 
##                  phonography                   phylactery                      pioneer 
##                            0                            0                            0 
##                      plastic                     Pleiades                    pneumonia 
##                            2                            1                            0 
##                   poinsettia                  pomegranate                    porcupine 
##                            0                            0                            0 
##                    potassium                    predicate                    president 
##                            0                            2                            3 
##                     princess                      private                      problem 
##                            3                            0                            3 
##                    professor                   programmer                      promise 
##                            0                            2                            0 
##                     prophesy                      prophet                   Protestant 
##                            3                            0                            1 
##                      proverb                   quarantine                       radium 
##                            0                            3                            0 
##                     register                     republic                   rhinoceros 
##                            0                            0                            0 
##                       rosary                    sacrament                    sacrifice 
##                            0                            0                            0 
##                    sacrilege                     sandwich                    Sanhedrin 
##                            0                            0                            0 
##                    saxophone                  scholarship                      science 
##                            0                            3                            0 
##                       scribe                    secretary                     seminary 
##                            0                            1                            0 
##                    September                     seraphim                      servant 
##                            0                            0                            0 
##                    serviette                      sirloin                      society 
##                            0                            0                            0 
##             sovereign_(coin)                      spaniel                      station 
##                            0                            1                            0 
##                      stencil              sticker-plaster                    stockings 
##                            3                            2                            0 
##                     swastika                       Sweden                   tabernacle 
##                            0                            0                            0 
##                    telegraph                   television                  trade_union 
##                            3                            1                            0 
##                  transmitter                      triceps                       trifle 
##                            2                            0                            0 
##                     trombone                     trousers                      trustee 
##                            2                            0                            0 
##                   turpentine                       unison                         unit 
##                            0                            0                            0 
##                   university                      vehicle                       violet 
##                            0                            0                            0 
##                       violin                        waltz                     Wesleyan 
##                            0                            0                            0 
##                       whisky                        widow                  willow_tree 
##                            0                            0                            0 
##                     wisteria                    xylophone                       zodiac 
##                            0                            0                            0
table(tongan_stress$starts_with_possible_cluster, tongan_stress$first_V)
##    
##     epenthetic stressed unstressed
##   0          3      149         60
##   1         63       16          2
#How can there be 3 items that don't start with a possible cluster, and yet the first V is epenthetic? These are jonquil (twice) and chutney, where the English post-alveolar is rendered as [si].

#table for paper of V1/V2 status by stress type
stressTable <- table(tongan_stress$first_V, tongan_stress$second_V, tongan_stress$is_1st_V_stressed)
stressTable
## , ,  = 0
## 
##             
##              epenthetic stressed unstressed
##   epenthetic          0       48          1
##   stressed           16        0         22
##   unstressed          2       16          0
## 
## , ,  = 1
## 
##             
##              epenthetic stressed unstressed
##   epenthetic          0       17          0
##   stressed           78        0         49
##   unstressed         13       31          0

Can we make a nice mosaic plot? A 3-D plot is hard to read. We treat each combination of stress statuses as a separate category.

tongan_stress$combined_categories <- as.factor(paste("V1",tongan_stress$first_V, "V2", tongan_stress$second_V))
#reorder levels
tongan_stress$combined_categories <- relevel(tongan_stress$combined_categories, "V1 stressed V2 epenthetic")
tongan_stress$combined_categories <- relevel(tongan_stress$combined_categories, "V1 unstressed V2 epenthetic")
tongan_stress$combined_categories <- relevel(tongan_stress$combined_categories, "V1 stressed V2 unstressed")
tongan_stress$combined_categories <- relevel(tongan_stress$combined_categories, "V1 unstressed V2 stressed")
tongan_stress$combined_categories <- relevel(tongan_stress$combined_categories, "V1 epenthetic V2 unstressed")
tongan_stress$combined_categories <- relevel(tongan_stress$combined_categories, "V1 epenthetic V2 stressed")

levels(tongan_stress$combined_categories)
## [1] "V1 epenthetic V2 stressed"   "V1 epenthetic V2 unstressed" "V1 unstressed V2 stressed"  
## [4] "V1 stressed V2 unstressed"   "V1 unstressed V2 epenthetic" "V1 stressed V2 epenthetic"
stressTable2 <- table(tongan_stress$combined_categories, tongan_stress$is_1st_V_stressed, dnn=c("Vs status","secondary stress"))

mosaic(stressTable2,direction="v", pop=FALSE, 
       gp_varnames=gpSerif, gp_labels=gpSerif, gp=gpar(fill=c("black","grey")), #fonts and colors
       labeling_args=list(rot_labels=c(left=0, top=60), offset_varnames=c(left=0.5),offset_labels=c(left=0.5,top=3), varnames=c(FALSE, FALSE), set_labels=list("secondary stress"=c("V1", "V2"))))                  

# Do the same thing separately for each speaker
  
stressTable_S1 <- table(tongan_stress[tongan_deletion$speaker=="S1",]$combined_categories, tongan_stress[tongan_deletion$speaker=="S1",]$is_1st_V_stressed, dnn=c("Vs status","secondary stress"))
stressTable_S2 <- table(tongan_stress[tongan_deletion$speaker=="S2",]$combined_categories, tongan_stress[tongan_deletion$speaker=="S2",]$is_1st_V_stressed, dnn=c("Vs status","secondary stress"))
stressTable_S3 <- table(tongan_stress[tongan_deletion$speaker=="S3",]$combined_categories, tongan_stress[tongan_deletion$speaker=="S3",]$is_1st_V_stressed, dnn=c("Vs status","secondary stress"))

#plot them side by side. Doesn't look so great here (plot is cramped), but we coded it this way in case we want to make a .png
str1 <- grid.grabExpr(mosaic(stressTable_S1,direction="v", pop=FALSE, gp_varnames=gpSerif, gp_labels=gpSerif, gp=gpar(fill=c("black","grey")), labeling_args=list(rot_labels=c(left=0), offset_varnames=c(left=1.5),offset_labels=c(left=1), varnames=c(FALSE, FALSE), set_labels=list("secondary stress"=c("V1", "V2"))), margins=c(2,0,0,4)+0.1, sub_gp = gpar(fontfamily=myFontFamily), sub="                    Speaker 1"))
## Warning in grabDL(warn, wrap, ...): viewport overwritten (grab MAY not be faithful)
str2 <- grid.grabExpr(mosaic(stressTable_S2,direction="v", pop=FALSE, gp_varnames=gpSerif, gp_labels=gpSerif, gp=gpar(fill=c("black","grey")), labeling_args=list(rot_labels=c(left=0), offset_varnames=c(left=1.5),offset_labels=c(left=1), varnames=c(FALSE, FALSE), labels=c(TRUE,FALSE)), margins=c(2,0,0,-2)+0.1, sub_gp = gpar(fontfamily=myFontFamily), sub="Speaker 2          "))
## Warning in grabDL(warn, wrap, ...): viewport overwritten (grab MAY not be faithful)
str3 <- grid.grabExpr(mosaic(stressTable_S3,direction="v", pop=FALSE, gp_varnames=gpSerif, gp_labels=gpSerif, gp=gpar(fill=c("black","grey")), labeling_args=list(rot_labels=c(left=0), offset_varnames=c(left=1.5),offset_labels=c(left=1), varnames=c(FALSE, FALSE), labels=c(TRUE,FALSE)), margins=c(2,0,0,-7.8)+0.1, sub_gp = gpar(fontfamily=myFontFamily), sub="Speaker 3                              "))
## Warning in grabDL(warn, wrap, ...): viewport overwritten (grab MAY not be faithful)
grid.arrange(str1, str2, str3, ncol=3)

Logistic regression for secondary stress

The problem with treating first_V and second_V as separate factors (possibly with an interaction) is that many combinations are missing. For example, there are no tokens where both Vs are epenthetic. Instead, we treat first_V*second_V as though it were a single factor. We make that factor:

tongan_stress$first_and_secondV <- interaction(tongan_stress$first_V, tongan_stress$second_V)

#get rid of missing levels, plus the level that has just one token ('blancmange')
tongan_stress[tongan_stress$definition=="blancmange",]$first_and_secondV <- NA
tongan_stress$first_and_secondV <- factor(tongan_stress$first_and_secondV)

#set reference level to V1 epenthetic, V2 corr. to stressed
tongan_stress$first_and_secondV <- relevel(tongan_stress$first_and_secondV, "epenthetic.stressed")

#check resulting counts
table(tongan_stress$first_and_secondV)
## 
##   epenthetic.stressed   stressed.epenthetic unstressed.epenthetic   unstressed.stressed 
##                    65                    94                    15                    47 
##   stressed.unstressed 
##                    71

There is a binary variable for whether the first two Cs can correspond to a legal initial cluster in English, but no further coding of surrounding consonants. The rationale for this is that while fine details of surrounding consonants could be expected to influence vowel devoicing and deletion, we have no expectation that fine details of consonants will influence stressability, unless speakers have inferred that certain C_C environments attract or repel stress (because they tend to correspond to CVC or CC in English).

#maximal-interactions model
tongan_stress.bayesglm_interactVs <- bayesglm(is_1st_V_stressed ~
  speaker*(speaker_V1_quality
 +speaker_V2_quality
 + starts_with_possible_cluster
 + first_and_secondV)
  , data=tongan_stress,
  family=binomial(link="logit"))

#select the best model
tongan_stress.best_interactVs <- step(tongan_stress.bayesglm_interactVs)
## Start:  AIC=354.68
## is_1st_V_stressed ~ speaker * (speaker_V1_quality + speaker_V2_quality + 
##     starts_with_possible_cluster + first_and_secondV)
## 
##                                        Df Deviance    AIC
## - speaker:speaker_V2_quality            8   269.90 337.90
## - speaker:speaker_V1_quality            8   271.20 339.20
## - speaker:first_and_secondV             8   277.98 345.98
## - speaker:starts_with_possible_cluster  2   268.16 348.16
## <none>                                      270.68 354.68
## 
## Step:  AIC=341.42
## is_1st_V_stressed ~ speaker + speaker_V1_quality + speaker_V2_quality + 
##     starts_with_possible_cluster + first_and_secondV + speaker:speaker_V1_quality + 
##     speaker:starts_with_possible_cluster + speaker:first_and_secondV
## 
##                                        Df Deviance    AIC
## - speaker:speaker_V1_quality            8   274.25 326.25
## - speaker:first_and_secondV             8   282.61 334.61
## - speaker_V2_quality                    4   275.34 335.34
## - speaker:starts_with_possible_cluster  2   271.86 335.86
## <none>                                      273.42 341.42
## 
## Step:  AIC=329.04
## is_1st_V_stressed ~ speaker + speaker_V1_quality + speaker_V2_quality + 
##     starts_with_possible_cluster + first_and_secondV + speaker:starts_with_possible_cluster + 
##     speaker:first_and_secondV
## 
##                                        Df Deviance    AIC
## - speaker:first_and_secondV             8   287.54 323.54
## - speaker_V2_quality                    4   279.65 323.65
## - speaker:starts_with_possible_cluster  2   277.05 325.05
## <none>                                      277.05 329.04
## - speaker_V1_quality                    4   293.35 337.35
## 
## Step:  AIC=324.5
## is_1st_V_stressed ~ speaker + speaker_V1_quality + speaker_V2_quality + 
##     starts_with_possible_cluster + first_and_secondV + speaker:starts_with_possible_cluster
## 
##                                        Df Deviance    AIC
## - speaker_V2_quality                    4   292.82 320.83
## - speaker:starts_with_possible_cluster  2   289.79 321.79
## <none>                                      288.50 324.50
## - first_and_secondV                     4   306.26 334.26
## - speaker_V1_quality                    4   306.43 334.43
## 
## Step:  AIC=321.64
## is_1st_V_stressed ~ speaker + speaker_V1_quality + starts_with_possible_cluster + 
##     first_and_secondV + speaker:starts_with_possible_cluster
## 
##                                        Df Deviance    AIC
## - speaker:starts_with_possible_cluster  2   295.12 319.12
## <none>                                      293.64 321.64
## - speaker_V1_quality                    4   311.17 331.16
## - first_and_secondV                     4   313.46 333.46
## 
## Step:  AIC=319.88
## is_1st_V_stressed ~ speaker + speaker_V1_quality + starts_with_possible_cluster + 
##     first_and_secondV
## 
##                                Df Deviance    AIC
## - starts_with_possible_cluster  1   295.18 317.18
## <none>                              295.88 319.88
## - speaker                       2   302.13 322.13
## - speaker_V1_quality            4   312.95 328.95
## - first_and_secondV             4   315.80 331.80
## 
## Step:  AIC=317.56
## is_1st_V_stressed ~ speaker + speaker_V1_quality + first_and_secondV
## 
##                      Df Deviance    AIC
## <none>                    295.56 317.56
## - speaker             2   302.19 320.19
## - speaker_V1_quality  4   313.24 327.24
## - first_and_secondV   4   342.42 356.42
summary(tongan_stress.best_interactVs)
## 
## Call:
## bayesglm(formula = is_1st_V_stressed ~ speaker + speaker_V1_quality + 
##     first_and_secondV, family = binomial(link = "logit"), data = tongan_stress)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.2036  -0.8126   0.4618   0.7989   1.7989  
## 
## Coefficients:
##                                        Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                             -1.0894     0.3668  -2.970 0.002982 ** 
## speakerS2                                0.1508     0.3371   0.447 0.654616    
## speakerS3                                0.8166     0.3428   2.382 0.017224 *  
## speaker_V1_qualitye                      0.8987     0.4319   2.081 0.037435 *  
## speaker_V1_qualityi                     -0.8429     0.3586  -2.351 0.018744 *  
## speaker_V1_qualityo                     -0.4585     0.4093  -1.120 0.262668    
## speaker_V1_qualityu                      0.1222     1.0209   0.120 0.904692    
## first_and_secondVstressed.epenthetic     2.3753     0.4152   5.721 1.06e-08 ***
## first_and_secondVunstressed.epenthetic   2.5968     0.7441   3.490 0.000483 ***
## first_and_secondVunstressed.stressed     1.6044     0.4373   3.669 0.000243 ***
## first_and_secondVstressed.unstressed     1.6278     0.4098   3.972 7.12e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 376.13  on 289  degrees of freedom
## Residual deviance: 295.56  on 279  degrees of freedom
##   (3 observations deleted due to missingness)
## AIC: 317.56
## 
## Number of Fisher Scoring iterations: 8
Anova(tongan_stress.best_interactVs)
## Analysis of Deviance Table (Type II tests)
## 
## Response: is_1st_V_stressed
##                    LR Chisq Df Pr(>Chisq)    
## speaker               6.623  2   0.036456 *  
## speaker_V1_quality   17.675  4   0.001429 ** 
## first_and_secondV    46.860  4  1.631e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(tongan_stress.best_interactVs, test="Chisq")
## Analysis of Deviance Table
## 
## Model: binomial, link: logit
## 
## Response: is_1st_V_stressed
## 
## Terms added sequentially (first to last)
## 
## 
##                    Df Deviance Resid. Df Resid. Dev  Pr(>Chi)    
## NULL                                 289     376.13              
## speaker             2    3.535       287     372.60    0.1707    
## speaker_V1_quality  4   30.176       283     342.42 4.506e-06 ***
## first_and_secondV   4   46.860       279     295.56 1.631e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Which levels of first_second_V are pairwise different?

tongan_stress_V1V2.glht_interactVs <- glht(tongan_stress.best_interactVs, linfct=mcp(first_and_secondV = "Tukey"))
summary(tongan_stress_V1V2.glht_interactVs) #epenth-str has lower rate of initial secondary stress than the other four
## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: bayesglm(formula = is_1st_V_stressed ~ speaker + speaker_V1_quality + 
##     first_and_secondV, family = binomial(link = "logit"), data = tongan_stress)
## 
## Linear Hypotheses:
##                                                  Estimate Std. Error z value Pr(>|z|)    
## stressed.epenthetic - epenthetic.stressed == 0    2.37529    0.41516   5.721  < 0.001 ***
## unstressed.epenthetic - epenthetic.stressed == 0  2.59677    0.74410   3.490  0.00400 ** 
## unstressed.stressed - epenthetic.stressed == 0    1.60443    0.43725   3.669  0.00204 ** 
## stressed.unstressed - epenthetic.stressed == 0    1.62780    0.40979   3.972  < 0.001 ***
## unstressed.epenthetic - stressed.epenthetic == 0  0.22148    0.76216   0.291  0.99834    
## unstressed.stressed - stressed.epenthetic == 0   -0.77086    0.43446  -1.774  0.37389    
## stressed.unstressed - stressed.epenthetic == 0   -0.74750    0.38719  -1.931  0.28752    
## unstressed.stressed - unstressed.epenthetic == 0 -0.99234    0.77530  -1.280  0.69124    
## stressed.unstressed - unstressed.epenthetic == 0 -0.96898    0.76083  -1.274  0.69527    
## stressed.unstressed - unstressed.stressed == 0    0.02337    0.42511   0.055  1.00000    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)

Surrounding Cs as random effect

If we instead treat surrounding C as a random effect, the way we did for V deletion, we have to use glmer(). Unfortunately, it won’t converge with any interactions we tried, so this model has no interactions at all. Nor would it converge with either speaker_V1_quality or speaker_V2_quality.

Here’s the fullest model we were able to get convergence with:

tongan_stress.glmer_interactVs <- glmer(is_1st_V_stressed ~
  (1|first_two_Cs)  
  + speaker  + starts_with_possible_cluster                         
  + first_and_secondV
  , data=tongan_stress,
  family=binomial(link="logit"))
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model failed to
## converge with max|grad| = 0.00198649 (tol = 0.001, component 1)
summary(tongan_stress.glmer_interactVs)
## Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
##  Family: binomial  ( logit )
## Formula: is_1st_V_stressed ~ (1 | first_two_Cs) + speaker + starts_with_possible_cluster +  
##     first_and_secondV
##    Data: tongan_stress
## 
##      AIC      BIC   logLik deviance df.resid 
##    314.9    348.0   -148.5    296.9      283 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.0352 -0.5166  0.2792  0.4949  2.2877 
## 
## Random effects:
##  Groups       Name        Variance Std.Dev.
##  first_two_Cs (Intercept) 1.754    1.324   
## Number of obs: 292, groups:  first_two_Cs, 51
## 
## Fixed effects:
##                                        Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                            -1.35282    0.77306  -1.750  0.08013 .  
## speakerS2                               0.06629    0.38198   0.174  0.86222    
## speakerS3                               0.94783    0.39846   2.379  0.01737 *  
## starts_with_possible_cluster            0.25163    0.82744   0.304  0.76105    
## first_and_secondVstressed.epenthetic    3.35499    0.82748   4.054 5.02e-05 ***
## first_and_secondVunstressed.epenthetic  3.26331    1.16557   2.800  0.00511 ** 
## first_and_secondVunstressed.stressed    1.76523    0.83036   2.126  0.03351 *  
## first_and_secondVstressed.unstressed    1.93672    0.67037   2.889  0.00386 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##                        (Intr) spkrS2 spkrS3 str___ frst_nd_scndVstrssd.p frst_nd_scndVnstrssd.p
## speakerS2              -0.212                                                                  
## speakerS3              -0.285  0.468                                                           
## strts_wth__            -0.608 -0.063 -0.006                                                    
## frst_nd_scndVstrssd.p  -0.762 -0.027  0.130  0.519                                             
## frst_nd_scndVnstrssd.p -0.563 -0.023  0.057  0.385  0.578                                      
## frst_nd_scndVnstrssd.s -0.777 -0.035  0.025  0.507  0.682                 0.522                
## frst_nd_scndVstrssd.n  -0.786 -0.007  0.079  0.496  0.695                 0.500                
##                        frst_nd_scndVnstrssd.s
## speakerS2                                    
## speakerS3                                    
## strts_wth__                                  
## frst_nd_scndVstrssd.p                        
## frst_nd_scndVnstrssd.p                       
## frst_nd_scndVnstrssd.s                       
## frst_nd_scndVstrssd.n   0.676                
## convergence code: 0
## Model failed to converge with max|grad| = 0.00198649 (tol = 0.001, component 1)

In that model, which levels of first_and_secondV are significantly different?

tongan_stress.glht_interactVs <- glht(tongan_stress.glmer_interactVs, linfct=mcp(first_and_secondV = "Tukey"))
summary(tongan_stress.glht_interactVs) #epenth-str < {str-unstr, unstr-epenth, str-epenth}
## 
##   Simultaneous Tests for General Linear Hypotheses
## 
## Multiple Comparisons of Means: Tukey Contrasts
## 
## 
## Fit: glmer(formula = is_1st_V_stressed ~ (1 | first_two_Cs) + speaker + 
##     starts_with_possible_cluster + first_and_secondV, data = tongan_stress, 
##     family = binomial(link = "logit"))
## 
## Linear Hypotheses:
##                                                  Estimate Std. Error z value Pr(>|z|)    
## stressed.epenthetic - epenthetic.stressed == 0    3.35499    0.82748   4.054   <0.001 ***
## unstressed.epenthetic - epenthetic.stressed == 0  3.26331    1.16557   2.800   0.0381 *  
## unstressed.stressed - epenthetic.stressed == 0    1.76523    0.83036   2.126   0.1967    
## stressed.unstressed - epenthetic.stressed == 0    1.93672    0.67037   2.889   0.0294 *  
## unstressed.epenthetic - stressed.epenthetic == 0 -0.09168    0.96385  -0.095   1.0000    
## unstressed.stressed - stressed.epenthetic == 0   -1.58976    0.66144  -2.403   0.1066    
## stressed.unstressed - stressed.epenthetic == 0   -1.41827    0.60231  -2.355   0.1197    
## unstressed.stressed - unstressed.epenthetic == 0 -1.49808    1.01868  -1.471   0.5643    
## stressed.unstressed - unstressed.epenthetic == 0 -1.32660    1.01313  -1.309   0.6698    
## stressed.unstressed - unstressed.stressed == 0    0.17148    0.62126   0.276   0.9986    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- single-step method)