The present study examines whether phonation type, vowel quality and duration play a role in perception of checked tones in Shanghai Chinese. A three-alternative forced-choice experiment was conducted in which 40 native listeners chose among three Chinese characters representing an open, a nasal-ending and a glottal-ending syllable according to the word they heard. Results showed main effects of phonation type, vowel quality and duration as well as the interaction between the former two with respect to both decisions and reaction time (RT). Two-way interactions between duration and the other two cues, however, could only be seen in RT. We therefore tentatively conclude that tenser phonation may be the primary perceptual cue to checked tones while more central vowel space and shorter duration are secondary, in line with findings in other tonal languages like Vietnamese and Cantonese. Additionally, we also found that the low vowel pair /ɐ/ and /ɑ/ and high register tones tended to bias perception towards checked tones.
This .Rmd file documents the data processing, statistical analysis and plotting of this study.
library(lme4)
## Loading required package: Matrix
library(lmerTest)
##
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
##
## lmer
## The following object is masked from 'package:stats':
##
## step
library(ggplot2)
library(forcats)
library(viridis)
## Loading required package: viridisLite
library(hrbrthemes)
library(tidyr)
##
## Attaching package: 'tidyr'
## The following objects are masked from 'package:Matrix':
##
## expand, pack, unpack
library(ggsignif)
library(emmeans)
## Welcome to emmeans.
## Caution: You lose important information if you filter this package's results.
## See '? untidy'
options(width=200)
table.dt <- read.delim("results.tsv", stringsAsFactors=TRUE)
table.dt$group <- as.factor(table.dt$group)
table.dt <- table.dt[,-4:-5] #remove district and trial number
participant <- levels(table.dt$participant)
dt.nasalph <- table.dt[table.dt$phonation == 'nasal',]
dt.openph <- table.dt[table.dt$phonation == 'unchecked',]
dt.glottalph <- table.dt[table.dt$phonation == 'checked',]
cnt <- 1
nasal <- vector(length = 40 * 3)
open <- vector(length = 40 * 3)
glottal <- vector(length = 40 * 3)
for (part in participant) {
dt.part <- dt.openph[dt.openph$participant == part,]
nasal[cnt] <- nrow(dt.part[dt.part$decision == "s",])
open[cnt] <- nrow(dt.part[dt.part$decision == "a",])
glottal[cnt] <- nrow(dt.part[dt.part$decision == "d",])
cnt <- cnt+1
}
for (part in participant) {
dt.part <- dt.nasalph[dt.nasalph$participant == part,]
nasal[cnt] <- nrow(dt.part[dt.part$decision == "s",])
open[cnt] <- nrow(dt.part[dt.part$decision == "a",])
glottal[cnt] <- nrow(dt.part[dt.part$decision == "d",])
cnt <- cnt+1
}
for (part in participant) {
dt.part <- dt.glottalph[dt.glottalph$participant == part,]
nasal[cnt] <- nrow(dt.part[dt.part$decision == "s",])
open[cnt] <- nrow(dt.part[dt.part$decision == "a",])
glottal[cnt] <- nrow(dt.part[dt.part$decision == "d",])
cnt <- cnt+1
}
# Transfer count to percentage
new_frame <- data.frame(participant = 1:40, open = c(open[1:40]/0.64, open[41:80]/0.32, open[81:120]/0.64), nasal = c(nasal[1:40]/0.64, nasal[41:80]/0.32, nasal[81:120]/0.64), glottal = c(glottal[1:40]/0.64, glottal[41:80]/0.32, glottal[81:120]/0.64), stimuli_type = rep(c("open", "nasal", "glottal"), each = 40))
new_frame_long <- gather(new_frame[,-1], key = "perceived_syllable", value = "percentage", -stimuli_type)
new_frame_long$stimuli_type <- factor(new_frame_long$stimuli_type, levels = c("open", "nasal", "glottal"))
new_frame_long$perceived_syllable <- factor(new_frame_long$perceived_syllable, levels = c("open", "nasal", "glottal"))
fig_2 <- ggplot(new_frame_long, aes(x = stimuli_type,
y= percentage, fill = perceived_syllable)) + ylab("Percentage of responses (%)") +
scale_x_discrete(labels = c("[−tense] [−nasal]","[−tense] [+nasal]","[+tense] [−nasal]")) +
scale_y_continuous(limits=c(0,100), breaks=seq(0,100,20), expand = c(0, 0)) +
labs(fill = "Perceived syllable") +
geom_boxplot(outlier.size = 0.5, notch = FALSE, linewidth = 0.35, fatten = 2) +
scale_fill_viridis(discrete = TRUE, alpha=0.5, option="D", labels = c("open", "nasal-ending", "glottal-ending")) +
theme_ipsum() +
theme(
legend.position = "bottom",
axis.title.x = element_text(vjust = 0.5, hjust = 0.5, family = "serif", face="bold", size = 12),
axis.title.y = element_text(hjust = 0.5, family = "serif", face="bold", size = 12),
axis.text.x = element_text(family = "serif", size = 10.5),
axis.text.y = element_text(family = "serif", size = 10.5),
legend.title = element_text(family = "serif", face="bold",size = 12),
legend.text = element_text(family = "serif", size = 10.5),
legend.box.spacing = margin(0.25),
plot.margin = unit(c(0.25,0.75,0.25,0.75), units = "cm")
) + xlab("Phonation type and final nasality as auditory cues in stimuli")
#+ xlab("Stimuli sorted by two auditory cues: phonation and nasality")
ggsave("Figure_2.png", fig_2, width = 132, height = 72, units = "mm", dpi = 1000)
fig_2
#remove responses related to "nasal"
table.dt <- table.dt[table.dt$decision != "s" & table.dt$phonation != "nasal", ]
table.dt <- droplevels(table.dt)
# Duration: orthogonal polynomial coding
#contrasts(table.dt$duration) <- contr.poly(4)
contrast.duration <- cbind(c(-1/2,-1/6,1/6,1/2),c(0.5,-0.5,-0.5,0.5),c(-1/6,1/2,-1/2,1/6))
colnames(contrast.duration) <- c("L","Q","C")
contrasts(table.dt$duration) <- contrast.duration
contrasts(table.dt$duration)
## L Q C
## A -0.5000000 0.5 -0.1666667
## B -0.1666667 -0.5 0.5000000
## C 0.1666667 -0.5 -0.5000000
## D 0.5000000 0.5 0.1666667
# Vowel quality
contrast.vowel_quality <- cbind (c(+0.5,-0.5))
colnames(contrast.vowel_quality) <- c("+closed-open")
contrasts(table.dt$vowel_quality) <- contrast.vowel_quality
contrasts(table.dt$vowel_quality)
## +closed-open
## closed 0.5
## open -0.5
# Vowel pair
contrast.vowel_pair <- cbind (c(+0.5,-0.5))
colnames(contrast.vowel_pair) <- c("+a-o")
contrasts(table.dt$vowel_pair) <- contrast.vowel_pair
contrasts(table.dt$vowel_pair)
## +a-o
## a 0.5
## o -0.5
# Phonation type
contrast.phonation <- cbind (c(+0.5,-0.5))
colnames(contrast.phonation) <- c("+checked-unchecked")
contrasts(table.dt$phonation) <- contrast.phonation
contrasts(table.dt$phonation)
## +checked-unchecked
## checked 0.5
## unchecked -0.5
# Tone register
contrast.tone <- cbind (c(-0.5,+0.5))
colnames(contrast.tone) <- c("-high+low")
contrasts(table.dt$tone) <- contrast.tone
contrasts(table.dt$tone)
## -high+low
## high -0.5
## low 0.5
What about age? If we centre all our binary predictors, we should also centre Age. Otherwise, the p-values for all predictors not involving Age will be for newborns.
mean (table.dt$age)
## [1] 27.07325
sd (table.dt$age)
## [1] 9.607725
So let’ centre Age:
table.dt$age_centred <- (table.dt$age - 27.0) / 10.0
The p-values for all predictors not involving age_centred will then be for 27-year-olds, and the estimate of the effect of age_centred will be in units of “per decade”.
mdl.dcsion <- lme4::glmer(decision ~ vowel_pair + tone + vowel_quality*duration*phonation + (1|participant), data=table.dt, family=binomial)
summary(mdl.dcsion)
## Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
## Family: binomial ( logit )
## Formula: decision ~ vowel_pair + tone + vowel_quality * duration * phonation + (1 | participant)
## Data: table.dt
##
## AIC BIC logLik deviance df.resid
## 3595.5 3718.7 -1778.8 3557.5 4814
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -7.8048 -0.2322 0.2358 0.4226 7.9532
##
## Random effects:
## Groups Name Variance Std.Dev.
## participant (Intercept) 0.2105 0.4589
## Number of obs: 4833, groups: participant, 40
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.75139 0.08986 8.362 < 2e-16 ***
## vowel_pair+a-o 0.72303 0.08795 8.221 < 2e-16 ***
## tone-high+low -0.38358 0.08673 -4.423 9.74e-06 ***
## vowel_quality+closed-open 2.23494 0.10635 21.014 < 2e-16 ***
## durationL -1.30395 0.14437 -9.032 < 2e-16 ***
## durationQ 0.19548 0.10473 1.867 0.062 .
## durationC 0.08093 0.13685 0.591 0.554
## phonation+checked-unchecked 3.36549 0.10822 31.098 < 2e-16 ***
## vowel_quality+closed-open:durationL 0.22557 0.28816 0.783 0.434
## vowel_quality+closed-open:durationQ -0.13204 0.20946 -0.630 0.528
## vowel_quality+closed-open:durationC -0.26698 0.27370 -0.975 0.329
## vowel_quality+closed-open:phonation+checked-unchecked -2.49753 0.21137 -11.816 < 2e-16 ***
## durationL:phonation+checked-unchecked 0.19035 0.28824 0.660 0.509
## durationQ:phonation+checked-unchecked 0.05918 0.20947 0.282 0.778
## durationC:phonation+checked-unchecked -0.11225 0.27369 -0.410 0.682
## vowel_quality+closed-open:durationL:phonation+checked-unchecked -0.23217 0.57637 -0.403 0.687
## vowel_quality+closed-open:durationQ:phonation+checked-unchecked 0.58434 0.41886 1.395 0.163
## vowel_quality+closed-open:durationC:phonation+checked-unchecked -0.67884 0.54736 -1.240 0.215
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation matrix not shown by default, as p = 18 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
The model doesn’t converge if Age is included:
mdl.dcsion_age <- lme4::glmer(decision ~ age + vowel_pair + tone + vowel_quality*duration*phonation + (1|participant), data=table.dt, family=binomial)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model failed to converge with max|grad| = 0.020339 (tol = 0.002, component 1)
summary(mdl.dcsion_age)
## Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
## Family: binomial ( logit )
## Formula: decision ~ age + vowel_pair + tone + vowel_quality * duration * phonation + (1 | participant)
## Data: table.dt
##
## AIC BIC logLik deviance df.resid
## 3597.4 3727.0 -1778.7 3557.4 4813
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -7.8117 -0.2317 0.2355 0.4229 7.9398
##
## Random effects:
## Groups Name Variance Std.Dev.
## participant (Intercept) 0.2097 0.4579
## Number of obs: 4833, groups: participant, 40
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.839513 0.254680 3.296 0.00098 ***
## age -0.003264 0.008812 -0.370 0.71112
## vowel_pair+a-o 0.722872 0.087948 8.219 < 2e-16 ***
## tone-high+low -0.383530 0.086726 -4.422 9.77e-06 ***
## vowel_quality+closed-open 2.234822 0.106353 21.013 < 2e-16 ***
## durationL -1.303855 0.144376 -9.031 < 2e-16 ***
## durationQ 0.195414 0.104730 1.866 0.06206 .
## durationC 0.081060 0.136853 0.592 0.55364
## phonation+checked-unchecked 3.365423 0.108218 31.098 < 2e-16 ***
## vowel_quality+closed-open:durationL 0.225751 0.288181 0.783 0.43341
## vowel_quality+closed-open:durationQ -0.131836 0.209463 -0.629 0.52909
## vowel_quality+closed-open:durationC -0.266897 0.273720 -0.975 0.32952
## vowel_quality+closed-open:phonation+checked-unchecked -2.497985 0.211369 -11.818 < 2e-16 ***
## durationL:phonation+checked-unchecked 0.190610 0.288246 0.661 0.50844
## durationQ:phonation+checked-unchecked 0.059164 0.209478 0.282 0.77761
## durationC:phonation+checked-unchecked -0.112012 0.273709 -0.409 0.68236
## vowel_quality+closed-open:durationL:phonation+checked-unchecked -0.231539 0.576511 -0.402 0.68796
## vowel_quality+closed-open:durationQ:phonation+checked-unchecked 0.584303 0.418909 1.395 0.16307
## vowel_quality+closed-open:durationC:phonation+checked-unchecked -0.678433 0.547404 -1.239 0.21521
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation matrix not shown by default, as p = 19 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.020339 (tol = 0.002, component 1)
The model doesn’t even converge if age_centred
is
included:
mdl.dcsion_age <- lme4::glmer(decision ~ age_centred + vowel_pair + tone + vowel_quality*duration*phonation + (1|participant), data=table.dt, family=binomial)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model failed to converge with max|grad| = 0.00844478 (tol = 0.002, component 1)
summary(mdl.dcsion_age)
## Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
## Family: binomial ( logit )
## Formula: decision ~ age_centred + vowel_pair + tone + vowel_quality * duration * phonation + (1 | participant)
## Data: table.dt
##
## AIC BIC logLik deviance df.resid
## 3597.4 3727.0 -1778.7 3557.4 4813
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -7.8041 -0.2317 0.2356 0.4228 7.9339
##
## Random effects:
## Groups Name Variance Std.Dev.
## participant (Intercept) 0.2097 0.458
## Number of obs: 4833, groups: participant, 40
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.75141 0.08974 8.373 < 2e-16 ***
## age_centred -0.03309 0.08813 -0.375 0.707
## vowel_pair+a-o 0.72296 0.08795 8.220 < 2e-16 ***
## tone-high+low -0.38369 0.08672 -4.424 9.68e-06 ***
## vowel_quality+closed-open 2.23466 0.10633 21.016 < 2e-16 ***
## durationL -1.30319 0.14431 -9.030 < 2e-16 ***
## durationQ 0.19541 0.10471 1.866 0.062 .
## durationC 0.08142 0.13686 0.595 0.552
## phonation+checked-unchecked 3.36496 0.10820 31.100 < 2e-16 ***
## vowel_quality+closed-open:durationL 0.22548 0.28806 0.783 0.434
## vowel_quality+closed-open:durationQ -0.13356 0.20942 -0.638 0.524
## vowel_quality+closed-open:durationC -0.26668 0.27373 -0.974 0.330
## vowel_quality+closed-open:phonation+checked-unchecked -2.49791 0.21133 -11.820 < 2e-16 ***
## durationL:phonation+checked-unchecked 0.19110 0.28814 0.663 0.507
## durationQ:phonation+checked-unchecked 0.05765 0.20944 0.275 0.783
## durationC:phonation+checked-unchecked -0.11229 0.27372 -0.410 0.682
## vowel_quality+closed-open:durationL:phonation+checked-unchecked -0.23002 0.57620 -0.399 0.690
## vowel_quality+closed-open:durationQ:phonation+checked-unchecked 0.58341 0.41884 1.393 0.164
## vowel_quality+closed-open:durationC:phonation+checked-unchecked -0.67705 0.54748 -1.237 0.216
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation matrix not shown by default, as p = 19 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
## optimizer (Nelder_Mead) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.00844478 (tol = 0.002, component 1)
Sometimes it helps if only bobyqa
is used as the
optimizer:
ctrl <- glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 1e5))
mdl.dcsion_age <- lme4::glmer(decision ~ age_centred + vowel_pair + tone + vowel_quality*duration*phonation + (1|participant), data=table.dt, family=binomial, control=ctrl)
summary(mdl.dcsion_age)
## Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
## Family: binomial ( logit )
## Formula: decision ~ age_centred + vowel_pair + tone + vowel_quality * duration * phonation + (1 | participant)
## Data: table.dt
## Control: ctrl
##
## AIC BIC logLik deviance df.resid
## 3597.4 3727.0 -1778.7 3557.4 4813
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -7.8144 -0.2317 0.2356 0.4228 7.9391
##
## Random effects:
## Groups Name Variance Std.Dev.
## participant (Intercept) 0.2098 0.458
## Number of obs: 4833, groups: participant, 40
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.75153 0.08975 8.374 < 2e-16 ***
## age_centred -0.03301 0.08813 -0.375 0.7080
## vowel_pair+a-o 0.72301 0.08795 8.221 < 2e-16 ***
## tone-high+low -0.38355 0.08673 -4.422 9.76e-06 ***
## vowel_quality+closed-open 2.23495 0.10636 21.014 < 2e-16 ***
## durationL -1.30384 0.14437 -9.031 < 2e-16 ***
## durationQ 0.19550 0.10473 1.867 0.0619 .
## durationC 0.08088 0.13685 0.591 0.5545
## phonation+checked-unchecked 3.36554 0.10822 31.099 < 2e-16 ***
## vowel_quality+closed-open:durationL 0.22535 0.28817 0.782 0.4342
## vowel_quality+closed-open:durationQ -0.13195 0.20946 -0.630 0.5287
## vowel_quality+closed-open:durationC -0.26692 0.27371 -0.975 0.3295
## vowel_quality+closed-open:phonation+checked-unchecked -2.49771 0.21137 -11.817 < 2e-16 ***
## durationL:phonation+checked-unchecked 0.19018 0.28826 0.660 0.5094
## durationQ:phonation+checked-unchecked 0.05913 0.20947 0.282 0.7777
## durationC:phonation+checked-unchecked -0.11223 0.27369 -0.410 0.6818
## vowel_quality+closed-open:durationL:phonation+checked-unchecked -0.23177 0.57640 -0.402 0.6876
## vowel_quality+closed-open:durationQ:phonation+checked-unchecked 0.58431 0.41886 1.395 0.1630
## vowel_quality+closed-open:durationC:phonation+checked-unchecked -0.67872 0.54740 -1.240 0.2150
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation matrix not shown by default, as p = 19 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
No problem, but the centring was crucial:
mdl.dcsion_age <- lme4::glmer(decision ~ age + vowel_pair + tone + vowel_quality*duration*phonation + (1|participant), data=table.dt, family=binomial, control=ctrl)
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model failed to converge with max|grad| = 0.0140575 (tol = 0.002, component 1)
summary(mdl.dcsion_age)
## Generalized linear mixed model fit by maximum likelihood (Laplace Approximation) ['glmerMod']
## Family: binomial ( logit )
## Formula: decision ~ age + vowel_pair + tone + vowel_quality * duration * phonation + (1 | participant)
## Data: table.dt
## Control: ctrl
##
## AIC BIC logLik deviance df.resid
## 3597.4 3727.0 -1778.7 3557.4 4813
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -7.8104 -0.2318 0.2357 0.4228 7.9453
##
## Random effects:
## Groups Name Variance Std.Dev.
## participant (Intercept) 0.2097 0.4579
## Number of obs: 4833, groups: participant, 40
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 0.840520 0.254707 3.300 0.000967 ***
## age -0.003299 0.008813 -0.374 0.708183
## vowel_pair+a-o 0.723033 0.087949 8.221 < 2e-16 ***
## tone-high+low -0.383600 0.086727 -4.423 9.73e-06 ***
## vowel_quality+closed-open 2.235045 0.106357 21.015 < 2e-16 ***
## durationL -1.304330 0.144377 -9.034 < 2e-16 ***
## durationQ 0.195067 0.104733 1.863 0.062531 .
## durationC 0.081475 0.136854 0.595 0.551616
## phonation+checked-unchecked 3.365542 0.108223 31.098 < 2e-16 ***
## vowel_quality+closed-open:durationL 0.225817 0.288196 0.784 0.433302
## vowel_quality+closed-open:durationQ -0.132045 0.209467 -0.630 0.528441
## vowel_quality+closed-open:durationC -0.265480 0.273727 -0.970 0.332111
## vowel_quality+closed-open:phonation+checked-unchecked -2.497838 0.211379 -11.817 < 2e-16 ***
## durationL:phonation+checked-unchecked 0.191024 0.288280 0.663 0.507566
## durationQ:phonation+checked-unchecked 0.059092 0.209484 0.282 0.777878
## durationC:phonation+checked-unchecked -0.111519 0.273709 -0.407 0.683689
## vowel_quality+closed-open:durationL:phonation+checked-unchecked -0.234597 0.576385 -0.407 0.683998
## vowel_quality+closed-open:durationQ:phonation+checked-unchecked 0.580842 0.418922 1.387 0.165590
## vowel_quality+closed-open:durationC:phonation+checked-unchecked -0.673263 0.547409 -1.230 0.218732
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation matrix not shown by default, as p = 19 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
## optimizer (bobyqa) convergence code: 0 (OK)
## Model failed to converge with max|grad| = 0.0140575 (tol = 0.002, component 1)
No influence of age on decision making was found, so age was excluded.
# odds calculating function
odds_calculating <- function(row, variable_name) {
estimate <- abs(summary(mdl.dcsion)$coefficients[row,1])
sd <- abs(summary(mdl.dcsion)$coefficients[row,2])
exp_estimate <- round(exp(estimate),2)
exp_interval <- c(round(exp(estimate - 1.96 * sd),2), round(exp(estimate + 1.96 * sd),2))
z <- round(summary(mdl.dcsion)$coefficients[row,3],3)
p <- summary(mdl.dcsion)$coefficients[row,4]
print(c(variable_name, exp_estimate, exp_interval, z, p))
}
# vowel pair
odds_calculating(2, "vowel pair")
## [1] "vowel pair" "2.06" "1.73" "2.45" "8.221" "2.01931520142918e-16"
# tone
odds_calculating(3, "tone register")
## [1] "tone register" "1.47" "1.24" "1.74" "-4.423" "9.73864491924e-06"
# vowel quality
odds_calculating(4, "vowel quality")
## [1] "vowel quality" "9.35" "7.59" "11.51" "21.014" "4.86885491720551e-98"
# duration.L
odds_calculating(5, "duration linear")
## [1] "duration linear" "3.68" "2.78" "4.89" "-9.032" "1.68454864356566e-19"
#phonation type
odds_calculating(8, "phonation type")
## [1] "phonation type" "28.95" "23.41" "35.79" "31.098" "2.52710719713357e-212"
# vowel quality and phonation type
odds_calculating(12, "vowel quality and phonation")
## [1] "vowel quality and phonation" "12.15" "8.03" "18.39" "-11.816" "3.22845522404258e-32"
# Load data and create vectors to accommodate data
cnt <- 1
open.ckd <- vector(length = 40)
closed.ckd <- vector(length = 40)
open.unckd <- vector(length = 40)
closed.unckd <- vector(length = 40)
for (part in participant) {
dt.part <- table.dt[table.dt$participant == part,]
open.ckd[cnt] <- nrow(dt.part[dt.part$phonation == 'checked' & dt.part$vowel_quality == "open" & dt.part$decision == "d",])
closed.ckd[cnt] <- nrow(dt.part[dt.part$phonation == 'checked' & dt.part$vowel_quality == "closed"& dt.part$decision == "d",])
open.unckd[cnt] <- nrow(dt.part[dt.part$phonation == 'unchecked' & dt.part$vowel_quality == "open"& dt.part$decision == "d",])
closed.unckd[cnt] <- nrow(dt.part[dt.part$phonation == 'unchecked' & dt.part$vowel_quality == "closed"& dt.part$decision == "d",])
cnt <- cnt+1
}
# gather data together
DF <- data.frame(responses = (c(c(open.ckd, closed.ckd),c(open.unckd,closed.unckd))/0.32), phonation = rep(c("tense", "modal"), each = 80), vowel_quality = rep(rep(c("unchecked","checked"),each = 40), 2), stringsAsFactors = FALSE)
DF$phonation <- factor(DF$phonation, levels = c("tense","modal"))
DF$responses <- DF$responses * 0.32
# plot
fig_3 <- ggplot(DF, aes(x = phonation,
y= responses, fill = vowel_quality)) + ylab("Stimuli perceived as checked tones") +
scale_y_continuous(limits=c(0,35.5), breaks=seq(0,32,8), expand = c(0, 0)) +
labs(fill = "Vowel quality") +
geom_boxplot(outlier.size = 0.75, notch = FALSE, linewidth = 0.35, fatten = 2) +
#geom_violin() +
scale_fill_viridis(discrete = TRUE, alpha=0.5, option="D", labels = c("checked\n/o/ and /ɐ/","unchecked\n/ɔ/ and /ɑ/")) +
theme_ipsum() +
theme(
#legend.position = c(0.95,0.9),
legend.position = "right",
axis.title.x = element_text(vjust = 0.5, hjust = 0.5, family = "serif", face="bold", size = 12),
axis.title.y = element_text(hjust = 0.5, family = "serif", face="bold", size = 12),
axis.text.x = element_text(family = "serif", size = 10.5),
axis.text.y = element_text(family = "serif", size = 10.5),
legend.title = element_text(family = "serif", face="bold", size = 12),
legend.text = element_text(family = "serif", size = 10.5,
margin = margin(b = 10)),
plot.margin = unit(c(0.15,0.75,0.15,0.75), units = "cm")
) + xlab("Phonation type") +
geom_signif(comparisons = list(c("tense", "modal")), map_signif_level = TRUE, textsize = 4, vjust = 0.1)
ggsave("Figure_3.png", fig_3, width = 130, height = 75, units = "mm", dpi = 1000)
fig_3
predicted_values <- predict(mdl.dcsion, newdata = table.dt, type = "response")
#interaction.plot(table.dt$phonation, table.dt$vowel_quality, response = predicted_values, type = "b", col = c("red", "blue"))
ggplot(table.dt, aes(x = table.dt$phonation, y = predicted_values, color = factor( table.dt$vowel_quality))) + ylab("mean percentage of stimuli perceived as checked tones") +
geom_point(position = position_dodge(width = 0.2)) +
#geom_violin() +
geom_line(aes(group = factor(table.dt$vowel_quality)), position = position_dodge(width = 0.2)) +
labs(color = "Vowel quality") +
scale_fill_viridis(discrete = TRUE, alpha=0.5, option="D") +
theme_ipsum()+
theme(
#legend.position = c(0.95,0.9),
legend.position = "right",
axis.title.x = element_text(vjust = 0.5, hjust = 0.5, family = "serif", face="bold", size = 12),
axis.title.y = element_text(hjust = 0.5, family = "serif", face="bold", size = 12),
axis.text.x = element_text(family = "serif", size = 10.5),
axis.text.y = element_text(family = "serif", size = 10.5),
legend.title = element_text(family = "serif", face="bold", size = 12),
legend.text = element_text(family = "serif", size = 10.5, margin = margin(b = 10))
) + xlab("phonation type") +
geom_signif(comparisons = list(c("checked", "unchecked")), map_signif_level = TRUE, textsize = 4, vjust = 0.1)
## Warning: Use of `table.dt$phonation` is discouraged.
## ℹ Use `phonation` instead.
## Warning: Use of `table.dt$vowel_quality` is discouraged.
## ℹ Use `vowel_quality` instead.
## Use of `table.dt$vowel_quality` is discouraged.
## ℹ Use `vowel_quality` instead.
## Warning: Use of `table.dt$phonation` is discouraged.
## ℹ Use `phonation` instead.
## Warning: Use of `table.dt$vowel_quality` is discouraged.
## ℹ Use `vowel_quality` instead.
## Warning: Use of `table.dt$phonation` is discouraged.
## ℹ Use `phonation` instead.
## Warning: Use of `table.dt$vowel_quality` is discouraged.
## ℹ Use `vowel_quality` instead.
summary(pairs(emmeans(mdl.dcsion, ~ vowel_quality | phonation), adjust = "bonferroni"))
## NOTE: Results may be misleading due to involvement in interactions
## phonation = checked:
## contrast estimate SE df z.ratio p.value
## closed - open 0.986 0.154 Inf 6.396 <.0001
##
## phonation = unchecked:
## contrast estimate SE df z.ratio p.value
## closed - open 3.484 0.146 Inf 23.934 <.0001
##
## Results are averaged over the levels of: vowel_pair, tone, duration
## Results are given on the log odds ratio (not the response) scale.
# reload data
table.dt2 <- read.delim("results.tsv", stringsAsFactors=TRUE)
table.dt2 <- table.dt2[,-4:-5]
table.dt2$group <- as.factor(table.dt2$group)
# remove responses as nasal
table.dt2 <- table.dt2[table.dt2$phonation != "nasal", ]
(nrow(table.dt2))
## [1] 5120
table.dt2 <- droplevels(table.dt2)
#remove responses with RT longer than 5125 ms
table.dt2 <- table.dt2[table.dt2$rt <= 5125, ]
(nrow(table.dt2))
## [1] 4630
# contrast coding
#contrasts(table.dt2$duration) <- contr.poly(4)
#contrasts(table.dt2$duration)
contrasts(table.dt2$duration) <- contrast.duration
contrasts(table.dt2$duration)
## L Q C
## A -0.5000000 0.5 -0.1666667
## B -0.1666667 -0.5 0.5000000
## C 0.1666667 -0.5 -0.5000000
## D 0.5000000 0.5 0.1666667
contrasts(table.dt2$vowel_quality) <- contrast.vowel_quality
contrasts(table.dt2$vowel_pair) <- contrast.vowel_pair
contrasts(table.dt2$phonation) <- contrast.phonation
contrasts(table.dt2$tone) <- contrast.tone
mdl.rt <- lmerTest::lmer(rt ~ vowel_pair + tone + vowel_quality*duration*phonation + (1|participant), data=table.dt2)
summary(mdl.rt)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
## Formula: rt ~ vowel_pair + tone + vowel_quality * duration * phonation + (1 | participant)
## Data: table.dt2
##
## REML criterion at convergence: 75577.1
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.0074 -0.6818 -0.2240 0.4248 4.1984
##
## Random effects:
## Groups Name Variance Std.Dev.
## participant (Intercept) 225757 475.1
## Residual 726436 852.3
## Number of obs: 4630, groups: participant, 40
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 1991.831 76.228 36.919 26.130 < 2e-16 ***
## vowel_pair+a-o 325.796 25.067 4571.016 12.997 < 2e-16 ***
## tone-high+low 98.582 25.064 4571.000 3.933 8.51e-05 ***
## vowel_quality+closed-open 73.274 25.079 4571.085 2.922 0.00350 **
## durationL 177.998 33.698 4571.187 5.282 1.34e-07 ***
## durationQ 39.049 25.082 4571.345 1.557 0.11957
## durationC 24.767 33.598 4570.997 0.737 0.46107
## phonation+checked-unchecked 76.015 25.075 4570.954 3.031 0.00245 **
## vowel_quality+closed-open:durationL 191.588 67.380 4571.057 2.843 0.00448 **
## vowel_quality+closed-open:durationQ -8.282 50.156 4571.070 -0.165 0.86886
## vowel_quality+closed-open:durationC 74.053 67.210 4571.071 1.102 0.27060
## vowel_quality+closed-open:phonation+checked-unchecked -426.327 50.208 4571.834 -8.491 < 2e-16 ***
## durationL:phonation+checked-unchecked 217.453 67.372 4570.957 3.228 0.00126 **
## durationQ:phonation+checked-unchecked -54.877 50.155 4571.011 -1.094 0.27394
## durationC:phonation+checked-unchecked -75.249 67.205 4571.125 -1.120 0.26291
## vowel_quality+closed-open:durationL:phonation+checked-unchecked -140.212 134.766 4571.217 -1.040 0.29820
## vowel_quality+closed-open:durationQ:phonation+checked-unchecked 153.610 100.311 4571.143 1.531 0.12575
## vowel_quality+closed-open:durationC:phonation+checked-unchecked -146.513 134.402 4571.016 -1.090 0.27572
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation matrix not shown by default, as p = 18 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
mdl.rt_age <- lmerTest::lmer(rt ~ age + vowel_pair + tone + vowel_quality*duration*phonation + (1|participant), data=table.dt2)
summary(mdl.rt_age)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
## Formula: rt ~ age + vowel_pair + tone + vowel_quality * duration * phonation + (1 | participant)
## Data: table.dt2
##
## REML criterion at convergence: 75570.8
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.0142 -0.6818 -0.2239 0.4246 4.1995
##
## Random effects:
## Groups Name Variance Std.Dev.
## participant (Intercept) 230582 480.2
## Residual 726425 852.3
## Number of obs: 4630, groups: participant, 40
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 2107.084 231.183 35.978 9.114 7.01e-11 ***
## age -4.263 8.069 35.894 -0.528 0.60051
## vowel_pair+a-o 325.805 25.067 4571.154 12.997 < 2e-16 ***
## tone-high+low 98.585 25.064 4571.139 3.933 8.50e-05 ***
## vowel_quality+closed-open 73.297 25.079 4571.215 2.923 0.00349 **
## durationL 178.026 33.698 4571.318 5.283 1.33e-07 ***
## durationQ 39.037 25.081 4571.478 1.556 0.11968
## durationC 24.759 33.598 4571.137 0.737 0.46121
## phonation+checked-unchecked 76.008 25.075 4571.093 3.031 0.00245 **
## vowel_quality+closed-open:durationL 191.610 67.380 4571.196 2.844 0.00448 **
## vowel_quality+closed-open:durationQ -8.243 50.156 4571.204 -0.164 0.86946
## vowel_quality+closed-open:durationC 74.027 67.209 4571.205 1.101 0.27076
## vowel_quality+closed-open:phonation+checked-unchecked -426.402 50.208 4571.939 -8.493 < 2e-16 ***
## durationL:phonation+checked-unchecked 217.455 67.371 4571.097 3.228 0.00126 **
## durationQ:phonation+checked-unchecked -54.910 50.154 4571.141 -1.095 0.27365
## durationC:phonation+checked-unchecked -75.304 67.205 4571.258 -1.121 0.26255
## vowel_quality+closed-open:durationL:phonation+checked-unchecked -140.192 134.765 4571.350 -1.040 0.29827
## vowel_quality+closed-open:durationQ:phonation+checked-unchecked 153.596 100.310 4571.274 1.531 0.12579
## vowel_quality+closed-open:durationC:phonation+checked-unchecked -146.575 134.401 4571.154 -1.091 0.27551
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation matrix not shown by default, as p = 19 > 12.
## Use print(x, correlation=TRUE) or
## vcov(x) if you need it
No effect of age on reaction time either.
ggplot(table.dt2, aes(x = phonation,
y= rt, fill = vowel_quality)) +
geom_boxplot(na.rm = TRUE) +
scale_y_continuous(limits=c(500,2500), breaks=seq(500,2500,500), expand = c(0, 0)) +
ylab("Reaction time (ms)") +
scale_fill_viridis(discrete = TRUE, alpha=0.5, option="D") +
labs(fill = "Vowel quality") +
theme_ipsum() +
theme(
legend.position = "right",
axis.title.x = element_text(vjust = 0.5, hjust = 0.5, family = "serif", face="bold", size = 12),
axis.title.y = element_text(hjust = 0.5, family = "serif", face="bold", size = 12),
axis.text.x = element_text(family = "serif", size = 10.5),
axis.text.y = element_text(family = "serif", size = 10.5),
legend.title = element_text(family = "serif", face="bold", size = 12),
legend.text = element_text(family = "serif", size = 10.5, margin = margin(b = 10))
)
predicted_values <- predict(mdl.rt, newdata = table.dt2, type = "response")
ggplot(table.dt2, aes(x = table.dt2$phonation, y = predicted_values, color = factor( table.dt2$vowel_quality))) + ylab("Reaction time (ms)") +
geom_point(position = position_dodge(width = 0.2)) +
#geom_violin() +
geom_line(aes(group = factor(table.dt2$vowel_quality)), position = position_dodge(width = 0.2)) +
labs(color = "Vowel quality") +
scale_fill_viridis(discrete = TRUE, alpha=0.5, option="D") +
theme_ipsum()+
theme(
#legend.position = c(0.95,0.9),
legend.position = "right",
axis.title.x = element_text(vjust = 0.5, hjust = 0.5, family = "serif", face="bold", size = 12),
axis.title.y = element_text(hjust = 0.5, family = "serif", face="bold", size = 12),
axis.text.x = element_text(family = "serif", size = 10.5),
axis.text.y = element_text(family = "serif", size = 10.5),
legend.title = element_text(family = "serif", face="bold", size = 12),
legend.text = element_text(family = "serif", size = 10.5, margin = margin(b = 10))
) + xlab("phonation type") +
geom_signif(comparisons = list(c("checked", "unchecked")), map_signif_level = TRUE, textsize = 4, vjust = 0.1)
## Warning: Use of `table.dt2$phonation` is discouraged.
## ℹ Use `phonation` instead.
## Warning: Use of `table.dt2$vowel_quality` is discouraged.
## ℹ Use `vowel_quality` instead.
## Use of `table.dt2$vowel_quality` is discouraged.
## ℹ Use `vowel_quality` instead.
## Warning: Use of `table.dt2$phonation` is discouraged.
## ℹ Use `phonation` instead.
## Warning: Use of `table.dt2$vowel_quality` is discouraged.
## ℹ Use `vowel_quality` instead.
## Warning: Use of `table.dt2$phonation` is discouraged.
## ℹ Use `phonation` instead.
## Warning: Use of `table.dt2$vowel_quality` is discouraged.
## ℹ Use `vowel_quality` instead.
# function
interval_calculating <- function(row, variable_name) {
estimate <- summary(mdl.rt)$coefficients[row,1]
sd <- summary(mdl.rt)$coefficients[row,2]
rounded_estimate <- round(estimate)
interval <- c(round((estimate - 1.96 * sd),0), round((estimate + 1.96 * sd),0))
df <- summary(mdl.rt)$coefficients[row,3]
df <- round(df,2)
t <- round(summary(mdl.rt)$coefficients[row,4],3)
p <- summary(mdl.rt)$coefficients[row,5]
return(c(variable_name, rounded_estimate, interval, df, t, p))
}
# vowel pair
interval_calculating(2, "vowel pair")
## [1] "vowel pair" "326" "277" "375" "4571.02" "12.997" "5.94931701183492e-38"
# tone
interval_calculating(3, "tone register")
## [1] "tone register" "99" "49" "148" "4571" "3.933" "8.50660397227027e-05"
# vowel quality
interval_calculating(4, "vowel quality")
## [1] "vowel quality" "73" "24" "122" "4571.08" "2.922" "0.003498440996793"
# duration.L
interval_calculating(5, "duration linear")
## [1] "duration linear" "178" "112" "244" "4571.19" "5.282" "1.33634229794574e-07"
#phonation type
interval_calculating(8, "phonation type")
## [1] "phonation type" "76" "27" "125" "4570.95" "3.031" "0.00244733005618166"
# vowel quality and phonation type
interval_calculating(12, "vowel quality and phonation")
## [1] "vowel quality and phonation" "-426" "-525" "-328" "4571.83" "-8.491"
## [7] "2.73188486142037e-17"
# vowel quality and duration.L
interval_calculating(9, "vowel quality and duration.L")
## [1] "vowel quality and duration.L" "192" "60" "324" "4571.06" "2.843"
## [7] "0.00448360395428211"
# phonation type and duration.L
interval_calculating(13, "phonation type and duration.L")
## [1] "phonation type and duration.L" "217" "85" "350" "4570.96" "3.228"
## [7] "0.00125690839011005"