1. Study introduction

The present study examines whether phonation type, vowel quality and duration play a role in perception of checked tones in Shanghai Chinese. A three-alternative forced-choice experiment was conducted in which 40 native listeners chose among three Chinese characters representing an open, a nasal-ending and a glottal-ending syllable according to the word they heard. Results showed main effects of phonation type, vowel quality and duration as well as the interaction between the former two with respect to both decisions and reaction time (RT). Two-way interactions between duration and the other two cues, however, could only be seen in RT. We therefore tentatively conclude that tenser phonation may be the primary perceptual cue to checked tones while more central vowel space and shorter duration are secondary, in line with findings in other tonal languages like Vietnamese and Cantonese. Additionally, we also found that the low vowel pair /ɐ/ and /ɑ/ and high register tones tended to bias perception towards checked tones.

This .Rmd file documents the data processing, statistical analysis and plotting of this study.

2. Load packages

library(lme4)

## Loading required package: Matrix

library(lmerTest)

## 
## Attaching package: 'lmerTest'

## The following object is masked from 'package:lme4':
## 
##     lmer

## The following object is masked from 'package:stats':
## 
##     step

library(ggplot2)
library(forcats)
library(viridis)

## Loading required package: viridisLite

library(hrbrthemes)
library(tidyr)

## 
## Attaching package: 'tidyr'

## The following objects are masked from 'package:Matrix':
## 
##     expand, pack, unpack

library(ggsignif)
library(emmeans)

## Welcome to emmeans.
## Caution: You lose important information if you filter this package's results.
## See '? untidy'

options(width=200)

3. Plotting all responses

3.1 Load data

table.dt <- read.delim("results.tsv",  stringsAsFactors=TRUE)
table.dt$group <- as.factor(table.dt$group)
table.dt <- table.dt[,-4:-5]  #remove district and trial number

3.2 Count responses

participant <- levels(table.dt$participant)
dt.nasalph <- table.dt[table.dt$phonation == 'nasal',]
dt.openph <- table.dt[table.dt$phonation == 'unchecked',]
dt.glottalph <- table.dt[table.dt$phonation == 'checked',]
cnt <- 1
nasal <- vector(length = 40 * 3)
open <- vector(length = 40 * 3)
glottal <- vector(length = 40 * 3)
for (part in participant) {
  dt.part <- dt.openph[dt.openph$participant == part,]
  nasal[cnt] <- nrow(dt.part[dt.part$decision == "s",])
  open[cnt] <- nrow(dt.part[dt.part$decision == "a",])
  glottal[cnt] <- nrow(dt.part[dt.part$decision == "d",])
  cnt <- cnt+1
}
for (part in participant) {
  dt.part <- dt.nasalph[dt.nasalph$participant == part,]
  nasal[cnt] <- nrow(dt.part[dt.part$decision == "s",])
  open[cnt] <- nrow(dt.part[dt.part$decision == "a",])
  glottal[cnt] <- nrow(dt.part[dt.part$decision == "d",])
  cnt <- cnt+1
}
for (part in participant) {
  dt.part <- dt.glottalph[dt.glottalph$participant == part,]
  nasal[cnt] <- nrow(dt.part[dt.part$decision == "s",])
  open[cnt] <- nrow(dt.part[dt.part$decision == "a",])
  glottal[cnt] <- nrow(dt.part[dt.part$decision == "d",])
  cnt <- cnt+1
}

3.3 Transfer count to percentage

# Transfer count to percentage
new_frame <- data.frame(participant = 1:40, open = c(open[1:40]/0.64, open[41:80]/0.32, open[81:120]/0.64), nasal = c(nasal[1:40]/0.64, nasal[41:80]/0.32, nasal[81:120]/0.64), glottal = c(glottal[1:40]/0.64, glottal[41:80]/0.32, glottal[81:120]/0.64), stimuli_type = rep(c("open", "nasal", "glottal"), each = 40))

new_frame_long <- gather(new_frame[,-1], key = "perceived_syllable", value = "percentage", -stimuli_type)
new_frame_long$stimuli_type <- factor(new_frame_long$stimuli_type, levels = c("open", "nasal", "glottal"))
new_frame_long$perceived_syllable <- factor(new_frame_long$perceived_syllable, levels = c("open", "nasal", "glottal"))

3.4 Plot

fig_2 <- ggplot(new_frame_long, aes(x = stimuli_type, 
                           y= percentage, fill = perceived_syllable)) + ylab("Percentage of responses (%)") +
  scale_x_discrete(labels = c("[−tense] [−nasal]","[−tense] [+nasal]","[+tense] [−nasal]")) +
  scale_y_continuous(limits=c(0,100), breaks=seq(0,100,20), expand = c(0, 0)) +
  labs(fill = "Perceived syllable") + 
  geom_boxplot(outlier.size = 0.5, notch = FALSE, linewidth = 0.35, fatten = 2) +
  scale_fill_viridis(discrete = TRUE, alpha=0.5, option="D", labels = c("open", "nasal-ending", "glottal-ending")) +
  theme_ipsum() +
  theme(
      legend.position = "bottom",
      axis.title.x  = element_text(vjust = 0.5, hjust = 0.5, family = "serif", face="bold", size = 12),
      axis.title.y  = element_text(hjust = 0.5, family = "serif", face="bold", size = 12),
      axis.text.x   = element_text(family = "serif", size = 10.5),
      axis.text.y   = element_text(family = "serif", size = 10.5),
      legend.title  = element_text(family = "serif", face="bold",size = 12),
      legend.text = element_text(family = "serif", size = 10.5),
      legend.box.spacing = margin(0.25),
      plot.margin = unit(c(0.25,0.75,0.25,0.75), units = "cm")
    ) + xlab("Phonation type and final nasality as auditory cues in stimuli")
      #+ xlab("Stimuli sorted by two auditory cues: phonation and nasality")
ggsave("Figure_2.png", fig_2, width = 132, height = 72, units = "mm", dpi = 1000)
fig_2

4. Responses

4.1 Adjust data

#remove responses related to "nasal"
table.dt <- table.dt[table.dt$decision != "s" & table.dt$phonation != "nasal", ] 
table.dt <- droplevels(table.dt)

4.2 Contrast coding

# Duration: orthogonal polynomial coding
#contrasts(table.dt$duration) <- contr.poly(4)
contrast.duration <- cbind(c(-1/2,-1/6,1/6,1/2),c(0.5,-0.5,-0.5,0.5),c(-1/6,1/2,-1/2,1/6))
colnames(contrast.duration) <- c("L","Q","C")
contrasts(table.dt$duration) <- contrast.duration
contrasts(table.dt$duration)

##            L    Q          C
## A -0.5000000  0.5 -0.1666667
## B -0.1666667 -0.5  0.5000000
## C  0.1666667 -0.5 -0.5000000
## D  0.5000000  0.5  0.1666667

# Vowel quality
contrast.vowel_quality <- cbind (c(+0.5,-0.5))
colnames(contrast.vowel_quality) <- c("+closed-open")
contrasts(table.dt$vowel_quality) <- contrast.vowel_quality
contrasts(table.dt$vowel_quality)

##        +closed-open
## closed          0.5
## open           -0.5

# Vowel pair
contrast.vowel_pair <- cbind (c(+0.5,-0.5))
colnames(contrast.vowel_pair) <- c("+a-o")
contrasts(table.dt$vowel_pair) <- contrast.vowel_pair
contrasts(table.dt$vowel_pair)

##   +a-o
## a  0.5
## o -0.5

# Phonation type
contrast.phonation <- cbind (c(+0.5,-0.5))
colnames(contrast.phonation) <- c("+checked-unchecked")
contrasts(table.dt$phonation) <- contrast.phonation
contrasts(table.dt$phonation)

##           +checked-unchecked
## checked                  0.5
## unchecked               -0.5

# Tone register
contrast.tone <- cbind (c(-0.5,+0.5))
colnames(contrast.tone) <- c("-high+low")
contrasts(table.dt$tone) <- contrast.tone
contrasts(table.dt$tone)

##      -high+low
## high      -0.5
## low        0.5

What about age? If we centre all our binary predictors, we should also centre Age. Otherwise, the p-values for all predictors not involving Age will be for newborns.

mean (table.dt$age)

## [1] 27.07325

sd (table.dt$age)

## [1] 9.607725

So let’ centre Age:

table.dt$age_centred <- (table.dt$age - 27.0) / 10.0

The p-values for all predictors not involving age_centred will then be for 27-year-olds, and the estimate of the effect of age_centred will be in units of “per decade”.

4.3 Models of decisions (glmer)

mdl.dcsion <- lme4::glmer(decision ~ vowel_pair + tone + vowel_quality*duration*phonation + (1|participant), data=table.dt, family=binomial)
summary(mdl.dcsion)

## Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
##  Family: binomial  ( logit )
## Formula: decision ~ vowel_pair + tone + vowel_quality * duration * phonation +      (1 | participant)
##    Data: table.dt
## 
##      AIC      BIC   logLik deviance df.resid 
##   3595.5   3718.7  -1778.8   3557.5     4814 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -7.8048 -0.2322  0.2358  0.4226  7.9532 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  participant (Intercept) 0.2105   0.4589  
## Number of obs: 4833, groups:  participant, 40
## 
## Fixed effects:
##                                                                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                                                      0.75139    0.08986   8.362  < 2e-16 ***
## vowel_pair+a-o                                                   0.72303    0.08795   8.221  < 2e-16 ***
## tone-high+low                                                   -0.38358    0.08673  -4.423 9.74e-06 ***
## vowel_quality+closed-open                                        2.23494    0.10635  21.014  < 2e-16 ***
## durationL                                                       -1.30395    0.14437  -9.032  < 2e-16 ***
## durationQ                                                        0.19548    0.10473   1.867    0.062 .  
## durationC                                                        0.08093    0.13685   0.591    0.554    
## phonation+checked-unchecked                                      3.36549    0.10822  31.098  < 2e-16 ***
## vowel_quality+closed-open:durationL                              0.22557    0.28816   0.783    0.434    
## vowel_quality+closed-open:durationQ                             -0.13204    0.20946  -0.630    0.528    
## vowel_quality+closed-open:durationC                             -0.26698    0.27370  -0.975    0.329    
## vowel_quality+closed-open:phonation+checked-unchecked           -2.49753    0.21137 -11.816  < 2e-16 ***
## durationL:phonation+checked-unchecked                            0.19035    0.28824   0.660    0.509    
## durationQ:phonation+checked-unchecked                            0.05918    0.20947   0.282    0.778    
## durationC:phonation+checked-unchecked                           -0.11225    0.27369  -0.410    0.682    
## vowel_quality+closed-open:durationL:phonation+checked-unchecked -0.23217    0.57637  -0.403    0.687    
## vowel_quality+closed-open:durationQ:phonation+checked-unchecked  0.58434    0.41886   1.395    0.163    
## vowel_quality+closed-open:durationC:phonation+checked-unchecked -0.67884    0.54736  -1.240    0.215    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## 
## Correlation matrix not shown by default, as p = 18 > 12.
## Use print(x, correlation=TRUE)  or
##     vcov(x)        if you need it

The model doesn’t converge if Age is included:

mdl.dcsion_age <- lme4::glmer(decision ~ age + vowel_pair + tone +  vowel_quality*duration*phonation + (1|participant), data=table.dt, family=binomial)

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model failed to converge with max|grad| = 0.020339 (tol = 0.002, component 1)

summary(mdl.dcsion_age)

## Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
##  Family: binomial  ( logit )
## Formula: decision ~ age + vowel_pair + tone + vowel_quality * duration *      phonation + (1 | participant)
##    Data: table.dt
## 
##      AIC      BIC   logLik deviance df.resid 
##   3597.4   3727.0  -1778.7   3557.4     4813 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -7.8117 -0.2317  0.2355  0.4229  7.9398 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  participant (Intercept) 0.2097   0.4579  
## Number of obs: 4833, groups:  participant, 40
## 
## Fixed effects:
##                                                                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                                                      0.839513   0.254680   3.296  0.00098 ***
## age                                                             -0.003264   0.008812  -0.370  0.71112    
## vowel_pair+a-o                                                   0.722872   0.087948   8.219  < 2e-16 ***
## tone-high+low                                                   -0.383530   0.086726  -4.422 9.77e-06 ***
## vowel_quality+closed-open                                        2.234822   0.106353  21.013  < 2e-16 ***
## durationL                                                       -1.303855   0.144376  -9.031  < 2e-16 ***
## durationQ                                                        0.195414   0.104730   1.866  0.06206 .  
## durationC                                                        0.081060   0.136853   0.592  0.55364    
## phonation+checked-unchecked                                      3.365423   0.108218  31.098  < 2e-16 ***
## vowel_quality+closed-open:durationL                              0.225751   0.288181   0.783  0.43341    
## vowel_quality+closed-open:durationQ                             -0.131836   0.209463  -0.629  0.52909    
## vowel_quality+closed-open:durationC                             -0.266897   0.273720  -0.975  0.32952    
## vowel_quality+closed-open:phonation+checked-unchecked           -2.497985   0.211369 -11.818  < 2e-16 ***
## durationL:phonation+checked-unchecked                            0.190610   0.288246   0.661  0.50844    
## durationQ:phonation+checked-unchecked                            0.059164   0.209478   0.282  0.77761    
## durationC:phonation+checked-unchecked                           -0.112012   0.273709  -0.409  0.68236    
## vowel_quality+closed-open:durationL:phonation+checked-unchecked -0.231539   0.576511  -0.402  0.68796    
## vowel_quality+closed-open:durationQ:phonation+checked-unchecked  0.584303   0.418909   1.395  0.16307    
## vowel_quality+closed-open:durationC:phonation+checked-unchecked -0.678433   0.547404  -1.239  0.21521    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## 
## Correlation matrix not shown by default, as p = 19 > 12.
## Use print(x, correlation=TRUE)  or
##     vcov(x)        if you need it

## optimizer (Nelder_Mead) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.020339 (tol = 0.002, component 1)

The model doesn’t even converge if age_centred is included:

mdl.dcsion_age <- lme4::glmer(decision ~ age_centred + vowel_pair + tone +  vowel_quality*duration*phonation + (1|participant), data=table.dt, family=binomial)

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model failed to converge with max|grad| = 0.00844478 (tol = 0.002, component 1)

summary(mdl.dcsion_age)

## Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
##  Family: binomial  ( logit )
## Formula: decision ~ age_centred + vowel_pair + tone + vowel_quality *      duration * phonation + (1 | participant)
##    Data: table.dt
## 
##      AIC      BIC   logLik deviance df.resid 
##   3597.4   3727.0  -1778.7   3557.4     4813 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -7.8041 -0.2317  0.2356  0.4228  7.9339 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  participant (Intercept) 0.2097   0.458   
## Number of obs: 4833, groups:  participant, 40
## 
## Fixed effects:
##                                                                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                                                      0.75141    0.08974   8.373  < 2e-16 ***
## age_centred                                                     -0.03309    0.08813  -0.375    0.707    
## vowel_pair+a-o                                                   0.72296    0.08795   8.220  < 2e-16 ***
## tone-high+low                                                   -0.38369    0.08672  -4.424 9.68e-06 ***
## vowel_quality+closed-open                                        2.23466    0.10633  21.016  < 2e-16 ***
## durationL                                                       -1.30319    0.14431  -9.030  < 2e-16 ***
## durationQ                                                        0.19541    0.10471   1.866    0.062 .  
## durationC                                                        0.08142    0.13686   0.595    0.552    
## phonation+checked-unchecked                                      3.36496    0.10820  31.100  < 2e-16 ***
## vowel_quality+closed-open:durationL                              0.22548    0.28806   0.783    0.434    
## vowel_quality+closed-open:durationQ                             -0.13356    0.20942  -0.638    0.524    
## vowel_quality+closed-open:durationC                             -0.26668    0.27373  -0.974    0.330    
## vowel_quality+closed-open:phonation+checked-unchecked           -2.49791    0.21133 -11.820  < 2e-16 ***
## durationL:phonation+checked-unchecked                            0.19110    0.28814   0.663    0.507    
## durationQ:phonation+checked-unchecked                            0.05765    0.20944   0.275    0.783    
## durationC:phonation+checked-unchecked                           -0.11229    0.27372  -0.410    0.682    
## vowel_quality+closed-open:durationL:phonation+checked-unchecked -0.23002    0.57620  -0.399    0.690    
## vowel_quality+closed-open:durationQ:phonation+checked-unchecked  0.58341    0.41884   1.393    0.164    
## vowel_quality+closed-open:durationC:phonation+checked-unchecked -0.67705    0.54748  -1.237    0.216    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## 
## Correlation matrix not shown by default, as p = 19 > 12.
## Use print(x, correlation=TRUE)  or
##     vcov(x)        if you need it

## optimizer (Nelder_Mead) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.00844478 (tol = 0.002, component 1)

Sometimes it helps if only bobyqa is used as the optimizer:

ctrl <- glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 1e5))
mdl.dcsion_age <- lme4::glmer(decision ~ age_centred + vowel_pair + tone +  vowel_quality*duration*phonation + (1|participant), data=table.dt, family=binomial, control=ctrl)
summary(mdl.dcsion_age)

## Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
##  Family: binomial  ( logit )
## Formula: decision ~ age_centred + vowel_pair + tone + vowel_quality *      duration * phonation + (1 | participant)
##    Data: table.dt
## Control: ctrl
## 
##      AIC      BIC   logLik deviance df.resid 
##   3597.4   3727.0  -1778.7   3557.4     4813 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -7.8144 -0.2317  0.2356  0.4228  7.9391 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  participant (Intercept) 0.2098   0.458   
## Number of obs: 4833, groups:  participant, 40
## 
## Fixed effects:
##                                                                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                                                      0.75153    0.08975   8.374  < 2e-16 ***
## age_centred                                                     -0.03301    0.08813  -0.375   0.7080    
## vowel_pair+a-o                                                   0.72301    0.08795   8.221  < 2e-16 ***
## tone-high+low                                                   -0.38355    0.08673  -4.422 9.76e-06 ***
## vowel_quality+closed-open                                        2.23495    0.10636  21.014  < 2e-16 ***
## durationL                                                       -1.30384    0.14437  -9.031  < 2e-16 ***
## durationQ                                                        0.19550    0.10473   1.867   0.0619 .  
## durationC                                                        0.08088    0.13685   0.591   0.5545    
## phonation+checked-unchecked                                      3.36554    0.10822  31.099  < 2e-16 ***
## vowel_quality+closed-open:durationL                              0.22535    0.28817   0.782   0.4342    
## vowel_quality+closed-open:durationQ                             -0.13195    0.20946  -0.630   0.5287    
## vowel_quality+closed-open:durationC                             -0.26692    0.27371  -0.975   0.3295    
## vowel_quality+closed-open:phonation+checked-unchecked           -2.49771    0.21137 -11.817  < 2e-16 ***
## durationL:phonation+checked-unchecked                            0.19018    0.28826   0.660   0.5094    
## durationQ:phonation+checked-unchecked                            0.05913    0.20947   0.282   0.7777    
## durationC:phonation+checked-unchecked                           -0.11223    0.27369  -0.410   0.6818    
## vowel_quality+closed-open:durationL:phonation+checked-unchecked -0.23177    0.57640  -0.402   0.6876    
## vowel_quality+closed-open:durationQ:phonation+checked-unchecked  0.58431    0.41886   1.395   0.1630    
## vowel_quality+closed-open:durationC:phonation+checked-unchecked -0.67872    0.54740  -1.240   0.2150    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## 
## Correlation matrix not shown by default, as p = 19 > 12.
## Use print(x, correlation=TRUE)  or
##     vcov(x)        if you need it

No problem, but the centring was crucial:

mdl.dcsion_age <- lme4::glmer(decision ~ age + vowel_pair + tone +  vowel_quality*duration*phonation + (1|participant), data=table.dt, family=binomial, control=ctrl)

## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model failed to converge with max|grad| = 0.0140575 (tol = 0.002, component 1)

summary(mdl.dcsion_age)

## Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
##  Family: binomial  ( logit )
## Formula: decision ~ age + vowel_pair + tone + vowel_quality * duration *      phonation + (1 | participant)
##    Data: table.dt
## Control: ctrl
## 
##      AIC      BIC   logLik deviance df.resid 
##   3597.4   3727.0  -1778.7   3557.4     4813 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -7.8104 -0.2318  0.2357  0.4228  7.9453 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  participant (Intercept) 0.2097   0.4579  
## Number of obs: 4833, groups:  participant, 40
## 
## Fixed effects:
##                                                                  Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                                                      0.840520   0.254707   3.300 0.000967 ***
## age                                                             -0.003299   0.008813  -0.374 0.708183    
## vowel_pair+a-o                                                   0.723033   0.087949   8.221  < 2e-16 ***
## tone-high+low                                                   -0.383600   0.086727  -4.423 9.73e-06 ***
## vowel_quality+closed-open                                        2.235045   0.106357  21.015  < 2e-16 ***
## durationL                                                       -1.304330   0.144377  -9.034  < 2e-16 ***
## durationQ                                                        0.195067   0.104733   1.863 0.062531 .  
## durationC                                                        0.081475   0.136854   0.595 0.551616    
## phonation+checked-unchecked                                      3.365542   0.108223  31.098  < 2e-16 ***
## vowel_quality+closed-open:durationL                              0.225817   0.288196   0.784 0.433302    
## vowel_quality+closed-open:durationQ                             -0.132045   0.209467  -0.630 0.528441    
## vowel_quality+closed-open:durationC                             -0.265480   0.273727  -0.970 0.332111    
## vowel_quality+closed-open:phonation+checked-unchecked           -2.497838   0.211379 -11.817  < 2e-16 ***
## durationL:phonation+checked-unchecked                            0.191024   0.288280   0.663 0.507566    
## durationQ:phonation+checked-unchecked                            0.059092   0.209484   0.282 0.777878    
## durationC:phonation+checked-unchecked                           -0.111519   0.273709  -0.407 0.683689    
## vowel_quality+closed-open:durationL:phonation+checked-unchecked -0.234597   0.576385  -0.407 0.683998    
## vowel_quality+closed-open:durationQ:phonation+checked-unchecked  0.580842   0.418922   1.387 0.165590    
## vowel_quality+closed-open:durationC:phonation+checked-unchecked -0.673263   0.547409  -1.230 0.218732    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## 
## Correlation matrix not shown by default, as p = 19 > 12.
## Use print(x, correlation=TRUE)  or
##     vcov(x)        if you need it

## optimizer (bobyqa) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.0140575 (tol = 0.002, component 1)

No influence of age on decision making was found, so age was excluded.

4.4 Calculating odds ratio

# odds calculating function
odds_calculating <- function(row, variable_name) {
  estimate <- abs(summary(mdl.dcsion)$coefficients[row,1])
  sd <- abs(summary(mdl.dcsion)$coefficients[row,2])
  exp_estimate <- round(exp(estimate),2)
  exp_interval <- c(round(exp(estimate - 1.96 * sd),2), round(exp(estimate + 1.96 * sd),2))
  z <- round(summary(mdl.dcsion)$coefficients[row,3],3)
  p <- summary(mdl.dcsion)$coefficients[row,4]
  print(c(variable_name, exp_estimate, exp_interval, z, p))
}

# vowel pair
odds_calculating(2, "vowel pair")

## [1] "vowel pair"           "2.06"                 "1.73"                 "2.45"                 "8.221"                "2.01931520142918e-16"

# tone
odds_calculating(3, "tone register")

## [1] "tone register"     "1.47"              "1.24"              "1.74"              "-4.423"            "9.73864491924e-06"

# vowel quality
odds_calculating(4, "vowel quality")

## [1] "vowel quality"        "9.35"                 "7.59"                 "11.51"                "21.014"               "4.86885491720551e-98"

# duration.L
odds_calculating(5, "duration linear")

## [1] "duration linear"      "3.68"                 "2.78"                 "4.89"                 "-9.032"               "1.68454864356566e-19"

#phonation type
odds_calculating(8, "phonation type")

## [1] "phonation type"        "28.95"                 "23.41"                 "35.79"                 "31.098"                "2.52710719713357e-212"

# vowel quality and phonation type
odds_calculating(12, "vowel quality and phonation")

## [1] "vowel quality and phonation" "12.15"                       "8.03"                        "18.39"                       "-11.816"                     "3.22845522404258e-32"

4.5 Plotting the interaction

# Load data and create vectors to accommodate data
cnt <- 1
open.ckd <- vector(length = 40)
closed.ckd <- vector(length = 40)
open.unckd <- vector(length = 40)
closed.unckd <- vector(length = 40)
for (part in participant) {
  dt.part <- table.dt[table.dt$participant == part,]
  open.ckd[cnt] <- nrow(dt.part[dt.part$phonation == 'checked' & dt.part$vowel_quality == "open" & dt.part$decision == "d",])
  closed.ckd[cnt] <- nrow(dt.part[dt.part$phonation == 'checked' & dt.part$vowel_quality == "closed"& dt.part$decision == "d",])
  open.unckd[cnt] <- nrow(dt.part[dt.part$phonation == 'unchecked' & dt.part$vowel_quality == "open"& dt.part$decision == "d",])
  closed.unckd[cnt] <- nrow(dt.part[dt.part$phonation == 'unchecked' & dt.part$vowel_quality == "closed"& dt.part$decision == "d",])
  cnt <- cnt+1
}

# gather data together
DF <- data.frame(responses = (c(c(open.ckd, closed.ckd),c(open.unckd,closed.unckd))/0.32), phonation = rep(c("tense", "modal"), each = 80), vowel_quality = rep(rep(c("unchecked","checked"),each = 40), 2), stringsAsFactors = FALSE)

DF$phonation <- factor(DF$phonation, levels = c("tense","modal"))
DF$responses <- DF$responses * 0.32

# plot
fig_3 <- ggplot(DF, aes(x = phonation,
                           y= responses, fill = vowel_quality)) + ylab("Stimuli perceived as checked tones") +
  scale_y_continuous(limits=c(0,35.5), breaks=seq(0,32,8), expand = c(0, 0)) +
  labs(fill = "Vowel quality") + 
  geom_boxplot(outlier.size = 0.75, notch = FALSE, linewidth = 0.35, fatten = 2) +
  #geom_violin() +
  scale_fill_viridis(discrete = TRUE, alpha=0.5, option="D", labels = c("checked\n/o/ and /ɐ/","unchecked\n/ɔ/ and /ɑ/")) +
  theme_ipsum() +
  theme(
      #legend.position = c(0.95,0.9),
      legend.position = "right",
      axis.title.x  = element_text(vjust = 0.5, hjust = 0.5, family = "serif", face="bold", size = 12),
      axis.title.y  = element_text(hjust = 0.5, family = "serif", face="bold", size = 12),
      axis.text.x   = element_text(family = "serif", size = 10.5),
      axis.text.y   = element_text(family = "serif", size = 10.5),
      legend.title  = element_text(family = "serif", face="bold", size = 12),
      legend.text = element_text(family = "serif", size = 10.5, 
      margin = margin(b = 10)),
      plot.margin = unit(c(0.15,0.75,0.15,0.75), units = "cm")
    ) + xlab("Phonation type") +
   geom_signif(comparisons = list(c("tense", "modal")), map_signif_level = TRUE, textsize = 4, vjust = 0.1)
ggsave("Figure_3.png", fig_3, width = 130, height = 75, units = "mm", dpi = 1000)
fig_3

predicted_values <- predict(mdl.dcsion, newdata = table.dt, type = "response")
#interaction.plot(table.dt$phonation, table.dt$vowel_quality, response = predicted_values, type = "b", col = c("red", "blue"))

ggplot(table.dt, aes(x = table.dt$phonation, y = predicted_values, color = factor( table.dt$vowel_quality))) + ylab("mean percentage of stimuli perceived as checked tones") +
  geom_point(position = position_dodge(width = 0.2)) +
  #geom_violin() +
  geom_line(aes(group = factor(table.dt$vowel_quality)), position = position_dodge(width = 0.2)) +
  labs(color = "Vowel quality") +
  scale_fill_viridis(discrete = TRUE, alpha=0.5, option="D") +
  theme_ipsum()+
  theme(
      #legend.position = c(0.95,0.9),
      legend.position = "right",
      axis.title.x  = element_text(vjust = 0.5, hjust = 0.5, family = "serif", face="bold", size = 12),
      axis.title.y  = element_text(hjust = 0.5, family = "serif", face="bold", size = 12),
      axis.text.x   = element_text(family = "serif", size = 10.5),
      axis.text.y   = element_text(family = "serif", size = 10.5),
      legend.title  = element_text(family = "serif", face="bold", size = 12),
      legend.text = element_text(family = "serif", size = 10.5, margin = margin(b = 10))
    ) + xlab("phonation type") +
   geom_signif(comparisons = list(c("checked", "unchecked")), map_signif_level = TRUE, textsize = 4, vjust = 0.1)

## Warning: Use of `table.dt$phonation` is discouraged.
## ℹ Use `phonation` instead.

## Warning: Use of `table.dt$vowel_quality` is discouraged.
## ℹ Use `vowel_quality` instead.
## Use of `table.dt$vowel_quality` is discouraged.
## ℹ Use `vowel_quality` instead.

## Warning: Use of `table.dt$phonation` is discouraged.
## ℹ Use `phonation` instead.

## Warning: Use of `table.dt$vowel_quality` is discouraged.
## ℹ Use `vowel_quality` instead.

## Warning: Use of `table.dt$phonation` is discouraged.
## ℹ Use `phonation` instead.

## Warning: Use of `table.dt$vowel_quality` is discouraged.
## ℹ Use `vowel_quality` instead.

summary(pairs(emmeans(mdl.dcsion, ~ vowel_quality | phonation), adjust = "bonferroni"))

## NOTE: Results may be misleading due to involvement in interactions

## phonation = checked:
##  contrast      estimate    SE  df z.ratio p.value
##  closed - open    0.986 0.154 Inf   6.396  <.0001
## 
## phonation = unchecked:
##  contrast      estimate    SE  df z.ratio p.value
##  closed - open    3.484 0.146 Inf  23.934  <.0001
## 
## Results are averaged over the levels of: vowel_pair, tone, duration 
## Results are given on the log odds ratio (not the response) scale.

5. Reaction time

5.1 Data preparation

# reload data
table.dt2 <- read.delim("results.tsv",  stringsAsFactors=TRUE)
table.dt2 <- table.dt2[,-4:-5]
table.dt2$group <- as.factor(table.dt2$group)

# remove responses as nasal
table.dt2 <- table.dt2[table.dt2$phonation != "nasal", ]
(nrow(table.dt2))

## [1] 5120

table.dt2 <- droplevels(table.dt2)

#remove responses with RT longer than 5125 ms
table.dt2 <- table.dt2[table.dt2$rt <= 5125, ]
(nrow(table.dt2))

## [1] 4630

# contrast coding
#contrasts(table.dt2$duration) <- contr.poly(4)
#contrasts(table.dt2$duration)
contrasts(table.dt2$duration) <- contrast.duration
contrasts(table.dt2$duration)

##            L    Q          C
## A -0.5000000  0.5 -0.1666667
## B -0.1666667 -0.5  0.5000000
## C  0.1666667 -0.5 -0.5000000
## D  0.5000000  0.5  0.1666667

contrasts(table.dt2$vowel_quality) <- contrast.vowel_quality
contrasts(table.dt2$vowel_pair) <- contrast.vowel_pair
contrasts(table.dt2$phonation) <- contrast.phonation
contrasts(table.dt2$tone) <- contrast.tone

5.2 Statistical model

mdl.rt <- lmerTest::lmer(rt ~ vowel_pair + tone + vowel_quality*duration*phonation + (1|participant), data=table.dt2)
summary(mdl.rt)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
## Formula: rt ~ vowel_pair + tone + vowel_quality * duration * phonation +      (1 | participant)
##    Data: table.dt2
## 
## REML criterion at convergence: 75577.1
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.0074 -0.6818 -0.2240  0.4248  4.1984 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  participant (Intercept) 225757   475.1   
##  Residual                726436   852.3   
## Number of obs: 4630, groups:  participant, 40
## 
## Fixed effects:
##                                                                 Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)                                                     1991.831     76.228   36.919  26.130  < 2e-16 ***
## vowel_pair+a-o                                                   325.796     25.067 4571.016  12.997  < 2e-16 ***
## tone-high+low                                                     98.582     25.064 4571.000   3.933 8.51e-05 ***
## vowel_quality+closed-open                                         73.274     25.079 4571.085   2.922  0.00350 ** 
## durationL                                                        177.998     33.698 4571.187   5.282 1.34e-07 ***
## durationQ                                                         39.049     25.082 4571.345   1.557  0.11957    
## durationC                                                         24.767     33.598 4570.997   0.737  0.46107    
## phonation+checked-unchecked                                       76.015     25.075 4570.954   3.031  0.00245 ** 
## vowel_quality+closed-open:durationL                              191.588     67.380 4571.057   2.843  0.00448 ** 
## vowel_quality+closed-open:durationQ                               -8.282     50.156 4571.070  -0.165  0.86886    
## vowel_quality+closed-open:durationC                               74.053     67.210 4571.071   1.102  0.27060    
## vowel_quality+closed-open:phonation+checked-unchecked           -426.327     50.208 4571.834  -8.491  < 2e-16 ***
## durationL:phonation+checked-unchecked                            217.453     67.372 4570.957   3.228  0.00126 ** 
## durationQ:phonation+checked-unchecked                            -54.877     50.155 4571.011  -1.094  0.27394    
## durationC:phonation+checked-unchecked                            -75.249     67.205 4571.125  -1.120  0.26291    
## vowel_quality+closed-open:durationL:phonation+checked-unchecked -140.212    134.766 4571.217  -1.040  0.29820    
## vowel_quality+closed-open:durationQ:phonation+checked-unchecked  153.610    100.311 4571.143   1.531  0.12575    
## vowel_quality+closed-open:durationC:phonation+checked-unchecked -146.513    134.402 4571.016  -1.090  0.27572    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## 
## Correlation matrix not shown by default, as p = 18 > 12.
## Use print(x, correlation=TRUE)  or
##     vcov(x)        if you need it

mdl.rt_age <- lmerTest::lmer(rt ~ age + vowel_pair + tone + vowel_quality*duration*phonation + (1|participant), data=table.dt2)
summary(mdl.rt_age)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
## Formula: rt ~ age + vowel_pair + tone + vowel_quality * duration * phonation +      (1 | participant)
##    Data: table.dt2
## 
## REML criterion at convergence: 75570.8
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.0142 -0.6818 -0.2239  0.4246  4.1995 
## 
## Random effects:
##  Groups      Name        Variance Std.Dev.
##  participant (Intercept) 230582   480.2   
##  Residual                726425   852.3   
## Number of obs: 4630, groups:  participant, 40
## 
## Fixed effects:
##                                                                 Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)                                                     2107.084    231.183   35.978   9.114 7.01e-11 ***
## age                                                               -4.263      8.069   35.894  -0.528  0.60051    
## vowel_pair+a-o                                                   325.805     25.067 4571.154  12.997  < 2e-16 ***
## tone-high+low                                                     98.585     25.064 4571.139   3.933 8.50e-05 ***
## vowel_quality+closed-open                                         73.297     25.079 4571.215   2.923  0.00349 ** 
## durationL                                                        178.026     33.698 4571.318   5.283 1.33e-07 ***
## durationQ                                                         39.037     25.081 4571.478   1.556  0.11968    
## durationC                                                         24.759     33.598 4571.137   0.737  0.46121    
## phonation+checked-unchecked                                       76.008     25.075 4571.093   3.031  0.00245 ** 
## vowel_quality+closed-open:durationL                              191.610     67.380 4571.196   2.844  0.00448 ** 
## vowel_quality+closed-open:durationQ                               -8.243     50.156 4571.204  -0.164  0.86946    
## vowel_quality+closed-open:durationC                               74.027     67.209 4571.205   1.101  0.27076    
## vowel_quality+closed-open:phonation+checked-unchecked           -426.402     50.208 4571.939  -8.493  < 2e-16 ***
## durationL:phonation+checked-unchecked                            217.455     67.371 4571.097   3.228  0.00126 ** 
## durationQ:phonation+checked-unchecked                            -54.910     50.154 4571.141  -1.095  0.27365    
## durationC:phonation+checked-unchecked                            -75.304     67.205 4571.258  -1.121  0.26255    
## vowel_quality+closed-open:durationL:phonation+checked-unchecked -140.192    134.765 4571.350  -1.040  0.29827    
## vowel_quality+closed-open:durationQ:phonation+checked-unchecked  153.596    100.310 4571.274   1.531  0.12579    
## vowel_quality+closed-open:durationC:phonation+checked-unchecked -146.575    134.401 4571.154  -1.091  0.27551    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

## 
## Correlation matrix not shown by default, as p = 19 > 12.
## Use print(x, correlation=TRUE)  or
##     vcov(x)        if you need it

No effect of age on reaction time either.

ggplot(table.dt2, aes(x = phonation,
                           y= rt, fill = vowel_quality)) + 
  geom_boxplot(na.rm = TRUE) + 
  scale_y_continuous(limits=c(500,2500), breaks=seq(500,2500,500), expand = c(0, 0)) +
  ylab("Reaction time (ms)") +
  scale_fill_viridis(discrete = TRUE, alpha=0.5, option="D") +
  labs(fill = "Vowel quality") + 
  theme_ipsum() +
  theme(
      legend.position = "right",
      axis.title.x  = element_text(vjust = 0.5, hjust = 0.5, family = "serif", face="bold", size = 12),
      axis.title.y  = element_text(hjust = 0.5, family = "serif", face="bold", size = 12),
      axis.text.x   = element_text(family = "serif", size = 10.5),
      axis.text.y   = element_text(family = "serif", size = 10.5),
      legend.title  = element_text(family = "serif", face="bold", size = 12),
      legend.text = element_text(family = "serif", size = 10.5, margin = margin(b = 10))
    )

predicted_values <- predict(mdl.rt, newdata = table.dt2, type = "response")
ggplot(table.dt2, aes(x = table.dt2$phonation, y = predicted_values, color = factor( table.dt2$vowel_quality))) + ylab("Reaction time (ms)") +
  geom_point(position = position_dodge(width = 0.2)) +
  #geom_violin() +
  geom_line(aes(group = factor(table.dt2$vowel_quality)), position = position_dodge(width = 0.2)) +
  labs(color = "Vowel quality") +
  scale_fill_viridis(discrete = TRUE, alpha=0.5, option="D") +
  theme_ipsum()+
  theme(
      #legend.position = c(0.95,0.9),
      legend.position = "right",
      axis.title.x  = element_text(vjust = 0.5, hjust = 0.5, family = "serif", face="bold", size = 12),
      axis.title.y  = element_text(hjust = 0.5, family = "serif", face="bold", size = 12),
      axis.text.x   = element_text(family = "serif", size = 10.5),
      axis.text.y   = element_text(family = "serif", size = 10.5),
      legend.title  = element_text(family = "serif", face="bold", size = 12),
      legend.text = element_text(family = "serif", size = 10.5, margin = margin(b = 10))
    ) + xlab("phonation type") +
   geom_signif(comparisons = list(c("checked", "unchecked")), map_signif_level = TRUE, textsize = 4, vjust = 0.1)

## Warning: Use of `table.dt2$phonation` is discouraged.
## ℹ Use `phonation` instead.

## Warning: Use of `table.dt2$vowel_quality` is discouraged.
## ℹ Use `vowel_quality` instead.
## Use of `table.dt2$vowel_quality` is discouraged.
## ℹ Use `vowel_quality` instead.

## Warning: Use of `table.dt2$phonation` is discouraged.
## ℹ Use `phonation` instead.

## Warning: Use of `table.dt2$vowel_quality` is discouraged.
## ℹ Use `vowel_quality` instead.

## Warning: Use of `table.dt2$phonation` is discouraged.
## ℹ Use `phonation` instead.

## Warning: Use of `table.dt2$vowel_quality` is discouraged.
## ℹ Use `vowel_quality` instead.

5.3 Calculating intervals

# function
interval_calculating <- function(row, variable_name) {
  estimate <- summary(mdl.rt)$coefficients[row,1]
  sd <- summary(mdl.rt)$coefficients[row,2]
  rounded_estimate <- round(estimate)
  interval <- c(round((estimate - 1.96 * sd),0), round((estimate + 1.96 * sd),0))
  df <- summary(mdl.rt)$coefficients[row,3]
  df <- round(df,2)
  t <- round(summary(mdl.rt)$coefficients[row,4],3)
  p <- summary(mdl.rt)$coefficients[row,5]
  return(c(variable_name, rounded_estimate, interval, df, t, p))
}

# vowel pair
interval_calculating(2, "vowel pair")

## [1] "vowel pair"           "326"                  "277"                  "375"                  "4571.02"              "12.997"               "5.94931701183492e-38"

# tone
interval_calculating(3, "tone register")

## [1] "tone register"        "99"                   "49"                   "148"                  "4571"                 "3.933"                "8.50660397227027e-05"

# vowel quality
interval_calculating(4, "vowel quality")

## [1] "vowel quality"     "73"                "24"                "122"               "4571.08"           "2.922"             "0.003498440996793"

# duration.L
interval_calculating(5, "duration linear")

## [1] "duration linear"      "178"                  "112"                  "244"                  "4571.19"              "5.282"                "1.33634229794574e-07"

#phonation type
interval_calculating(8, "phonation type")

## [1] "phonation type"      "76"                  "27"                  "125"                 "4570.95"             "3.031"               "0.00244733005618166"

# vowel quality and phonation type
interval_calculating(12, "vowel quality and phonation")

## [1] "vowel quality and phonation" "-426"                        "-525"                        "-328"                        "4571.83"                     "-8.491"                     
## [7] "2.73188486142037e-17"

# vowel quality and duration.L
interval_calculating(9, "vowel quality and duration.L")

## [1] "vowel quality and duration.L" "192"                          "60"                           "324"                          "4571.06"                      "2.843"                       
## [7] "0.00448360395428211"

# phonation type and duration.L
interval_calculating(13, "phonation type and duration.L")

## [1] "phonation type and duration.L" "217"                           "85"                            "350"                           "4570.96"                       "3.228"                        
## [7] "0.00125690839011005"

checkedness_experiment

Chengjia Ye

2023-12-01