Analysis
Version 1 (Lab)
The following code chunk reads in the data
# Read in the data
raw_results_lab <- read.csv("results_all/csvFiles/20180320/expt_2_results_lab_long_20180320.csv")
# Rename 1st column name (excel issue)
colnames(raw_results_lab)[1] <- "timeReceived"
# Remove data for self-reported non-native English speakers (participants 104, 108, 114, 118, 124, 127, 130, 143)
raw_results_lab %>%
subset(subject != 104) %>%
subset(subject != 108) %>%
subset(subject != 114) %>%
subset(subject != 118) %>%
subset(subject != 124) %>%
subset(subject != 127) %>%
subset(subject != 130) %>%
subset(subject != 143) %>%
droplevels -> raw_results_lab_native
# Extract experiment and filler data
raw_results_lab %>%
subset(itemType == "experimental") %>%
droplevels -> experiment_data_lab
raw_results_lab %>%
subset(itemType == "filler") %>%
droplevels -> filler_data_lab
The experiment results are presented in the following table by condition.
# Make sure ratings data is numeric so that it can be averaged
experiment_data_lab$rating %<>% as.numeric
# Reverse levels of the intermSubj (subject) factor to mirror discussion
# Indefinite before Definite
experiment_data_lab$intermSubj <- relevel(experiment_data_lab$intermSubj, "indef")
# 'there' before indefinite and definite
experiment_data_lab$intermSubj <- relevel(experiment_data_lab$intermSubj, "there")
# Organize the data by condition
experiment_data_lab %>%
group_by(intermSubj, dependencyType) %>%
dplyr::summarize(mean_rating = mean(rating),
sd_rating = sd(rating),
n = n(),
se_rating = sd_rating/sqrt(n)) -> descriptive_summary_lab
# Save descriptive summary for use in other scripts
saveRDS(descriptive_summary_lab, file="expt2_descriptive_summary.rds")
# Print summary
print(descriptive_summary_lab)
The results are visualized in the plot generated by the following code chunk.
descriptive_summary_lab %>%
ggplot(aes(x = intermSubj,
y = mean_rating,
colour = dependencyType,
group = dependencyType)) -> descriptive_plot_lab
descriptive_plot_lab +
theme_minimal() +
labs(x = "Subject",
y = "Mean rating",
colour = "Dependency type") +
scale_color_discrete("Dependency type", labels = c("Anaphoric", "Movement")) +
geom_errorbar(aes(ymin = mean_rating - se_rating,
ymax = mean_rating + se_rating),
width = 0.15) +
geom_point(aes(col = dependencyType),
size = 2) +
scale_y_continuous(breaks = seq(1:6)) +
theme(panel.grid.minor = element_blank(),
legend.text = element_text(size = 12),
axis.text = element_text(size = 12), # column labels
axis.title.x = element_text(size = 15, # text of x-axis title (here, "Intermediate subject")
margin = margin(0.5, NA, 0.5, NA, "cm")),
axis.title.y = element_text(size = 15, # text of y-axis title (here, "Mean rating")
margin = margin(NA, 0.5, NA, 0.5, "cm"))) -> descriptive_plot_lab
descriptive_plot_lab
The mean rating for the there|movement looks like it could be at floor here, so it would be good to look at the filler sentences that were expected to be ungrammatical and see if any of them were rated below this condition. The following code generates a plot in which the average rating for each filler sentences is represented as a thin orange line crossing the plot.
descriptive_summary_lab %>%
ggplot(aes(x = intermSubj,
y = mean_rating,
ymin = 1,
ymax = 6,
colour = dependencyType,
group = dependencyType)) -> descriptive_plot_w_fillers
descriptive_plot_w_fillers +
theme_minimal() +
labs(x = "Subject",
y = "Mean rating",
colour = "Dependency type",
linetype = "Item type") +
scale_linetype_discrete(labels = "Filler") +
scale_color_discrete(labels = c("Anaphoric", "Movement")) +
geom_hline(data = descriptive_summary_lab_fillers,
aes(yintercept = mean_rating,
linetype = itemType),
color = "orange",
alpha = 0.35,
show.legend = TRUE) +
geom_errorbar(aes(ymin = mean_rating - se_rating,
ymax = mean_rating + se_rating),
width = 0.15) +
geom_point(aes(col = dependencyType),
size = 2) +
scale_y_continuous(breaks = seq(1:6)) +
theme(panel.grid.minor = element_blank(),
legend.text = element_text(size = 12),
axis.text = element_text(size = 12), # column labels
axis.title.x = element_text(size = 15, # text of x-axis title (here, "Intermediate subject")
margin = margin(0.5, NA, 0.5, NA, "cm")),
axis.title.y = element_text(size = 15, # text of y-axis title (here, "Mean rating")
margin = margin(NA, 0.5, NA, 0.5, "cm"))) -> descriptive_plot_w_fillers
print(descriptive_plot_w_fillers)
Version 2 (Mechanical Turk)
Since the concern about a floor effect was not diffused by the ungrammatical filler sentences, a replicate version of this experiment was run on Mechanical Turk. The following code chunk reads in the second set of data.
# Import the data
read.csv("results_all/csvFiles/20180306/expt_2_results_mturk_long_20180306.csv") -> raw_results_mturk
# Fix excel first col. name issue
colnames(raw_results_mturk)[1] <- "timeReceived"
# Separate fillers and experimental data
raw_results_mturk %>% subset(itemType == "experimental") %>% droplevels -> experiment_data_mturk
raw_results_mturk %>% subset(itemType == "filler") %>% droplevels -> filler_data_mturk
Participants were excluded whose average rating for ungrammatical filler sentences was higher than their average rating for grammatical filler sentences. This code chunk identifies participants who meet this criterion.
filler_data_mturk %>%
# Group filler data by subject
group_by(subject) %>%
# Average the ratings for each subject, including separate averages for expected grammatical and expected ungrammatical ratings
summarize(mean_rating = mean(rating),
sd_rating = sd(rating),
n = n(),
se_rating = sd_rating/sqrt(n),
mean_gramm = mean(rating[expectedGramm == "1"]),
mean_ungramm = mean(rating[expectedGramm == "0"])) -> worker_summary
# Find workers whose mean gramm is less than or equal to their mean ungramm
worker_summary %>%
subset(mean_ungramm >= mean_gramm) %>%
print
The average ratings for each condition are presented in the following table, and in the following plot, these ratings are plotted with the average rating for each filler sentence as above. The pattern of ratings was similar, although the experiment conditions were rated higher on average, and the filler sentences received a broader range of ratings.
# Make sure ratings data is numeric
experiment_data_mturk$rating %<>% as.numeric
# Reverse levels of the intermSubj (subject) factor to mirror discussion
# Indefinite before Definite
experiment_data_mturk$intermSubj <- relevel(experiment_data_mturk$intermSubj, "indef")
# 'there' before indefinite and definite
experiment_data_mturk$intermSubj <- relevel(experiment_data_mturk$intermSubj, "there")
# Summarize the experimental data
experiment_data_mturk %>%
group_by(intermSubj, dependencyType) %>%
dplyr::summarize(meanRating = mean(rating),
sd.rating = sd(rating),
n = n(),
se.rating = sd.rating/sqrt(n)) -> descriptive_summary_mturk
# Summarize the filler data
filler_data_mturk %>% group_by(itemID,
expectedGramm,
itemType) %>%
dplyr::summarize(mean_rating = mean(rating),
sd_rating = sd(rating),
n = n(),
se_rating = sd_rating/sqrt(n)) -> descriptive_summary_mturk_fillers
# Make a table
print(descriptive_summary_mturk)
# Same plot for Expt2 MTurk version w/ fillers overlaid
descriptive_summary_mturk %>%
ggplot(aes(x = intermSubj,
y = meanRating,
ymin = 1,
ymax = 6,
colour = dependencyType,
group = dependencyType)) -> descriptive_plot_fillers_mturk
descriptive_plot_fillers_mturk +
theme_minimal() +
labs(x = "Subject",
y = "Mean rating",
colour = "Dependency type",
linetype = "Item type") +
scale_linetype_discrete(labels = "Filler") +
scale_color_discrete(labels = c("Anaphoric", "Movement")) +
geom_hline(data = descriptive_summary_mturk_fillers,
aes(yintercept = mean_rating,
linetype = itemType),
color = "orange",
alpha = 0.35,
show.legend = TRUE) +
geom_errorbar(aes(ymin = meanRating - se.rating,
ymax = meanRating + se.rating),
width = 0.10) +
geom_point(aes(col = dependencyType),
size = 3) +
scale_y_continuous(breaks = seq(1:6)) +
theme(panel.grid.minor = element_blank(),
legend.text = element_text(size = 12),
axis.text = element_text(size = 12), # column labels
axis.title.x = element_text(size = 15, # text of x-axis title (here, "Intermediate subject")
margin = margin(0.5, NA, 0.5, NA, "cm")),
axis.title.y = element_text(size = 15, # text of y-axis title (here, "Mean rating")
margin = margin(NA, 0.5, NA, 0.5, "cm"))) -> descriptive_plot_fillers_mturk
descriptive_plot_fillers_mturk
Ordinal regression analysis
The ordinal regression analysis printed below was run on the hummingbird cluster using the ratings data from Version 2 of this experiment. The following code chunk sets the contrasts for the two dfiferent factors. For the three-level factor, Helmert contrast coding was used so that the existential (there) conditions would be compared to the combination of the other conditions, and so that the indefinite and definite conditions would only be compared directly to each other.
# Make sure ratings data is a factor
experiment_data_mturk$rating %<>% as.factor
# Helmert contrast-coding
contrasts(experiment_data_mturk$intermSubj) <- "contr.helmert"
# Rename contrast column names
dimnames(contrasts(experiment_data_mturk$intermSubj))[[2]] <- c("definiteness", "height")
# Fix Helmert contrast coding so that in DEFINITENESS comparison, 'there' is neutralized, and in HEIGHT comparison, 'there' is compared to both def and indef
contrasts(experiment_data_mturk$intermSubj)[1,] <- c(0, 2)
contrasts(experiment_data_mturk$intermSubj)[2,] <- c(1, -1)
contrasts(experiment_data_mturk$intermSubj)[3,] <- c(-1, -1)
# Show contrasts for Subject factor
contrasts(experiment_data_mturk$intermSubj)
definiteness height
there 0 2
indef 1 -1
def -1 -1
# Set contrasts for dependency type & show
contrasts(experiment_data_mturk$dependencyType) <- c(-0.5, 0.5)
# Show contrasts for dependency type factor
contrasts(experiment_data_mturk$dependencyType)
[,1]
coref -0.5
move 0.5
# Save RDS file for use in other scripts
saveRDS(experiment_data_mturk, file = "expt2_data_mturk.rds")
The mixed effects ordinal regression analysis was run on the UCSC hummingbird cluster as follows.
# Full mixed effects analysis run on the hummingbird cluster as follows
# Read in the data saved from personal computer
readRDS(file = "expt2_data_mturk.rds") -> expt2_data
# Run ordinal regression analysis
clmm(data = expt2_data,
# Rating as dependent variable, fixed effects as intermSubj, dependencyType, and their interactions
formula = rating ~ intermSubj * dependencyType +
# Random effects by subject
(1 + intermSubj * dependencyType | subject) +
# Random effects by item
(1 + intermSubj * dependencyType | itemSet)
) -> expt2_clmm_full
# Save expt2_clmm_full
saveRDS(expt2_clmm_full, "expt2_clmm_full_20181118.rds")
# Read in clmm data saved from cluster
readRDS("expt2_clmm_full_20181118.rds") -> expt2_clmm_cluster
summary(expt2_clmm_cluster)
Cumulative Link Mixed Model fitted with the Laplace approximation
formula: rating ~ intermSubj * dependencyType + (1 + intermSubj * dependencyType |
subject) + (1 + intermSubj * dependencyType | itemSet)
data: expt2_data
Random effects:
Groups Name Variance Std.Dev. Corr
subject (Intercept) 4.3385 2.0829
intermSubjindef 0.3675 0.6062 -0.774
intermSubjthere 2.2816 1.5105 -0.849 0.540
dependencyTypemove 6.9260 2.6317 -0.780 0.691 0.929
intermSubjindef:dependencyTypemove 1.3518 1.1627 0.266 -0.134 -0.441 -0.502
intermSubjthere:dependencyTypemove 4.7399 2.1771 0.459 0.007 -0.740 -0.600 0.781
itemSet (Intercept) 0.7054 0.8399
intermSubjindef 0.2425 0.4924 0.286
intermSubjthere 0.2896 0.5381 -0.128 0.914
dependencyTypemove 0.4536 0.6735 -0.935 -0.607 -0.232
intermSubjindef:dependencyTypemove 0.3698 0.6081 -0.300 -1.000 -0.908 0.618
intermSubjthere:dependencyTypemove 0.6563 0.8101 0.146 -0.906 -1.000 0.214 0.900
Number of groups: subject 37, itemSet 24
Coefficients:
Estimate Std. Error z value Pr(>|z|)
intermSubjdefiniteness -0.03465 0.12163 -0.285 0.776
intermSubjheight -0.42393 0.07023 -6.037 1.57e-09 ***
dependencyType1 -4.52988 0.46279 -9.788 < 2e-16 ***
intermSubjdefiniteness:dependencyType1 0.24417 0.23122 1.056 0.291
intermSubjheight:dependencyType1 0.62476 0.15586 4.008 6.11e-05 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Threshold coefficients:
Estimate Std. Error z value
1|2 -4.0539 0.2898 -13.986
2|3 -1.9714 0.2581 -7.639
3|4 -0.6482 0.2511 -2.581
4|5 1.0279 0.2524 4.072
5|6 2.9003 0.2692 10.775
Sprouse, Jon, Matthew W. Wagers, and Colin Phillips. 2013. “Deriving competing predictions from grammatical approaches and reductionist approaches to island effects.” In Experimental Syntax and Island Effects, edited by Jon Sprouse and Norbert Hornstein, 21–41. Cambridge, United Kingdom: Cambridge University Press. https://doi.org/10.1017/CBO9781139035309.003.
