Descriptive stats
# Get summary of experiment data
exp_data %>%
group_by(env,
structure,
length) %>%
summarize(mean_rating = mean(rating),
sd_rating = sd(rating),
n = n(),
se_rating = sd_rating/sqrt(n)) -> summary
print(summary)
# Reorder levels
exp_data$env <- relevel(exp_data$env, "ee")
exp_data$env <- relevel(exp_data$env, "vt")
exp_data$length <- relevel(exp_data$length, "short")
exp_data$structure <- relevel(exp_data$structure, "non-island")
# Get summary of experiment data
exp_data %>%
group_by(env,
structure,
length) %>%
summarize(mean_rating = mean(rating),
sd_rating = sd(rating),
n = n(),
se_rating = sd_rating/sqrt(n)) -> summary
print(summary)
# Make a descriptive plot
summary_forplot <- summary
summary_forplot[7,] <- summary_forplot[5,]
summary_forplot[8,] <- summary_forplot[6,]
summary_forplot$environment <- summary_forplot$env
summary_forplot$environment[5:6] <- c("vt", "vt")
summary_forplot$environment[7:8] <- c("ee", "ee")
summary_forplot$environment <- recode(summary_forplot$environment, vt = "VT", ee = "EE")
summary_forplot %>%
ggplot(aes(x = length,
y = mean_rating,
ymin = 1,
ymax = 6,
color = structure,
group = structure)) +
facet_grid(.~environment) +
geom_errorbar(aes(ymin = mean_rating - se_rating,
ymax = mean_rating + se_rating),
width = 0.15) +
geom_point(aes(col = structure),
size = 2) +
scale_y_continuous(breaks = seq(1:6)) +
scale_color_discrete("Structure",
labels = c("Non-island",
"Island")) +
labs(x = "Length",
y = "Mean rating") -> plot_summary
print(plot_summary)
# Make an alternative plot
summary_forplot %>%
ggplot(aes(x = length,
y = mean_rating,
color = env,
ymin = 1,
ymax = 6)) +
geom_point(size = 2) +
geom_errorbar(aes(ymin = mean_rating - se_rating,
ymax = mean_rating + se_rating),
width = 0.15) +
labs(title = "Main items") -> plot_summary_collapsed
print(plot_summary_collapsed)
Breakdown by verb
exp_data %>%
subset(structure == "island") %>%
group_by(length,
structure,
verb, env) %>%
summarize(mean_rating = mean(rating),
sd_rating = sd(rating),
n = n(),
se_rating = sd_rating/sqrt(n)) -> vbv_summary
print(vbv_summary)
for (vb in 1:length(unique(vbv_summary$verb))) {
which.verb <- unique(vbv_summary$verb)[vb]
vbv_summary %>%
subset(verb == which.verb) %>%
ggplot(aes(x = length,
y = mean_rating,
color = structure,
group = structure,
ymin = 1,
ymax = 6)) +
scale_y_continuous(breaks = seq(1:6)) +
geom_point(data = subset(summary,
structure == "non-island"),
aes(x = length,
y = mean_rating)) +
geom_errorbar(data = subset(summary,
structure == "non-island"),
aes(ymin = mean_rating - se_rating,
ymax = mean_rating + se_rating),
width = 0.15) +
geom_point(aes(col = structure),
size = 2) +
geom_errorbar(aes(ymin = mean_rating - se_rating,
ymax = mean_rating + se_rating),
width = 0.15) +
labs(title = paste0(as.character(which.verb), " (", vbv_summary$env[vbv_summary$verb == which.verb], ")"),
x = "Length",
y = "Mean rating") -> plot
print(plot)
}
DD scores by verb
# Save baseline data
exp_data %>%
subset(structure == "non-island") %>%
droplevels %>%
group_by(length) %>%
summarize(mean_z_rating = mean(z_rating)) -> baseline_z_ratings
# Get DD scores for each verb
exp_data %>%
subset(structure == "island") %>%
droplevels %>%
group_by(verb, env) %>%
summarize(mean_z_rating = mean(z_rating),
isl_sh = mean(z_rating[length == "short"]),
isl_l = mean(z_rating[length == "long"]),
nisl_sh = baseline_z_ratings$mean_z_rating[1],
nisl_l = baseline_z_ratings$mean_z_rating[2],
d1 = nisl_l - isl_l,
d2 = nisl_sh - isl_sh,
dd = d1 - d2) -> vbv_dd
print(vbv_dd)
# Plot verbs by DD score
vbv_dd %>%
subset(select = c(verb, env, dd)) %>%
ggplot(aes(x = reorder(verb, dd),
y = dd,
fill = env)) +
geom_col() +
scale_fill_discrete(name = "Environment",
labels = c("VT", "EE")) +
labs(x = "Verb",
y = "DD score")
Burn-in items (vs. main items)
ratings_data_excl %>%
subset(type == "burn-in") %>%
group_by(env,
structure,
length) %>%
summarize(mean_rating = mean(rating),
sd_rating = sd(rating),
n = n(),
se_rating = sd_rating/sqrt(n)) -> summary_burn_in
print(summary_burn_in)
summary_burn_in %>%
ggplot(aes(x = length,
y = mean_rating,
color = env,
ymin = 1,
ymax = 6)) +
geom_point(size = 2) +
geom_errorbar(aes(ymin = mean_rating - se_rating,
ymax = mean_rating + se_rating),
width = 0.15) +
labs(title = "Burn-in items") -> plot_summary_burn_in
print(plot_summary_burn_in)
# Compare
print(plot_summary_collapsed)
# Considering burn-in and main items together
ratings_data_excl %>%
subset(type == "experimental" | type == "burn-in") %>%
droplevels %>%
group_by(env,
structure,
length) %>%
summarize(mean_rating = mean(rating),
sd_rating = sd(rating),
n = n(),
se_rating = sd_rating/sqrt(n)) -> summary_combined
print(summary_combined)
# Plot
summary_combined %>%
ggplot(aes(x = length,
y = mean_rating,
color = env,
ymin = 1,
ymax = 6)) +
geom_point(size = 2) +
geom_errorbar(aes(ymin = mean_rating - se_rating,
ymax = mean_rating + se_rating),
width = 0.15) +
labs(title = "Combined burn-in and main items") -> plot_summary_combined
print(plot_summary_combined)
Trial order effects
ratings_data_excl %>%
subset(type == "experimental" | type == "burn-in") %>%
droplevels %>%
subset(length == "long") %>%
droplevels -> ratings_data_excl_combined
# Order the data frame by prolific id and trial
ratings_data_excl_combined_ordered <- ratings_data_excl_combined[order(ratings_data_excl_combined$epoch_time,
ratings_data_excl_combined$env,
ratings_data_excl_combined$trial),]
# Add new col for equalized trial
# Get number of observations per participant
obs <- nrow(ratings_data_excl_combined_ordered[ratings_data_excl_combined_ordered$epoch_time == min(ratings_data_excl_combined_ordered$epoch_time),])
ratings_data_excl_combined_ordered$trial_eq <- 1:(obs/3)
# Plot the ratings and draw a line
ratings_data_excl_combined_ordered %>%
ggplot(aes(x = trial_eq,
y = z_rating,
color = env,
group = env)) +
geom_jitter() +
geom_smooth(method = "lm") -> plot_trial_order
print(plot_trial_order)
CLMs
Simple effects analysis
library(ordinal)
Attaching package: ‘ordinal’
The following object is masked from ‘package:dplyr’:
slice
# Make sure nominal variables are coded as factors
exp_data$rating %<>% as.factor
exp_data$env %<>% as.factor
exp_data$structure %<>% as.factor
exp_data$length %<>% as.factor
# Sum code structure and length factors
contrasts(exp_data$structure) <- c(-0.5, 0.5)
contrasts(exp_data$length) <- c(-0.5, 0.5)
# Contrasts for env factor should be dummy-coded correctly by default after reordering the factors
clm(data = exp_data,
formula = rating ~ env * length) -> clm_analysis
summary(clm_analysis)
formula: rating ~ env * length
data: exp_data
Coefficients:
Estimate Std. Error z value Pr(>|z|)
envee -1.3305 0.1696 -7.844 4.37e-15 ***
envvt -1.6752 0.1748 -9.586 < 2e-16 ***
length1 0.2813 0.2370 1.187 0.235
envee:length1 -2.6726 0.3394 -7.874 3.42e-15 ***
envvt:length1 -2.5573 0.3438 -7.437 1.03e-13 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Threshold coefficients:
Estimate Std. Error z value
1|2 -3.8944 0.1971 -19.754
2|3 -2.6209 0.1634 -16.044
3|4 -1.5535 0.1411 -11.013
4|5 -0.5841 0.1270 -4.599
5|6 0.5289 0.1263 4.188
Mixed effects analysis
# Save the ratings data to run on hb.ucsc.edu
saveRDS(exp_data,
file = "ratings_data_for_clmm.rds")
# Run the following clmm on the cluster:
# clmm(data = exp_data,
# formula = rating ~ env * length +
# (1 + env * length | prolific_id) +
# (1 + env * length | item)) -> clmm_analysis
# saveRDS(clmm_analysis, "exp6_clmm.rds")
# Read in RDS from cluster
clmm_analysis <- readRDS(file = "exp6_clmm.rds")
summary(clmm_analysis)
Variance-covariance matrix of the parameters is not defined
Cumulative Link Mixed Model fitted with the Laplace approximation
formula: rating ~ env * length + (1 + env * length | prolific_id) + (1 + env * length | item)
data: ratings_data_excl_baselines
Random effects:
Groups Name Variance Std.Dev. Corr
prolific_id (Intercept) 7.2111 2.6854
envee 0.5457 0.7387 -0.892
envvt 1.5785 1.2564 -0.523 0.840
lengthlong 1.0150 1.0075 -0.856 0.739 0.293
envee:lengthlong 2.0579 1.4345 -0.232 0.445 0.658 -0.077
envvt:lengthlong 1.5228 1.2340 -0.373 0.262 0.116 0.211 0.702
item (Intercept) 0.3197 0.5654
envee 0.7320 0.8555 -0.292
envvt 0.2181 0.4670 0.322 -0.677
lengthlong 3.2911 1.8141 0.224 -0.828 0.577
envee:lengthlong 0.9801 0.9900 -0.238 0.647 -0.465 -0.961
envvt:lengthlong 7.2584 2.6941 -0.418 0.927 -0.497 -0.911 0.808
Number of groups: prolific_id 30, item 24
Coefficients:
Estimate Std. Error z value Pr(>|z|)
envee -2.3324 NA NA NA
envvt -2.8870 NA NA NA
length1 0.5469 NA NA NA
envee:length1 -4.7146 NA NA NA
envvt:length1 -4.3064 NA NA NA
Threshold coefficients:
Estimate Std. Error z value
1|2 -6.5216 NA NA
2|3 -4.5322 NA NA
3|4 -2.8098 NA NA
4|5 -1.1472 NA NA
5|6 0.9454 NA NA
---
title: "RC subextraction in English: Experiment 6 notebook"
author: "Jake W. Vincent (&#106;&#119;&#118;&#105;&#110;&#99;&#101;n&#64;&#117;c&#115;c.e&#100;u)"
output:
  html_notebook: 
    fig_caption: yes
    number_sections: yes
    theme: flatly
    code_folding: show
    css: style.css
    toc: true
  pdf_document: 
    keep_tex: yes
bibliography: ~/Documents/library.bib
---
```{r setup}
library(ggplot2)
library(dplyr)
library(tidyr)
library(magrittr)
```

```{r}
# Data wrangling done in exp6_setup_nb.Rmd
# Read in data
ratings_data_incl <- read.csv(file = "exp6_ratings.csv")

# Remove excluded participants' data
ratings_data_incl %>%
  subset(!(prolific_id %in% excluded)) %>%
  droplevels -> ratings_data_excl

ratings_data_excl$env <- relevel(ratings_data_excl$env, "vt")
ratings_data_excl$env <- relevel(ratings_data_excl$env, "ee")
ratings_data_excl$env <- relevel(ratings_data_excl$env, "complement")
ratings_data_excl$length <- relevel(ratings_data_excl$length, "short")
ratings_data_excl$structure <- relevel(ratings_data_excl$structure, "non-island")

# Add z-scores (by participant) for use later, INCLUDING burn-in trials
ratings_data_excl %>%
  subset(type == "experimental" | type == "filler" | type == "burn-in" | type == "burn-in_filler") %>%
  droplevels %>%
  group_by(prolific_id) %>%
  mutate(z_rating_all = scale(rating)) -> ratings_data_all

# Add z-scores (by participant) for use later, EXCLUDING burn-in trials
ratings_data_all %>%
  subset(type == "experimental" | type == "filler") %>%
  droplevels %>%
  group_by(prolific_id) %>%
  mutate(z_rating = scale(rating)) -> ratings_data

# Get experiment data
ratings_data %>%
  subset(type == "experimental") %>%
  droplevels -> exp_data

# Get filler data
ratings_data %>%
  subset(type == "filler") %>%
  droplevels -> filler_data
```

# Descriptive stats

```{r descriptive_summary}
# Get summary of experiment data
exp_data %>%
  group_by(env,
           structure,
           length) %>%
  summarize(mean_rating = mean(rating),
            sd_rating = sd(rating),
            n = n(),
            se_rating = sd_rating/sqrt(n)) -> summary
print(summary)
```

```{r descriptive_plot}
# Make a descriptive plot
summary_forplot <- summary
summary_forplot[7,] <- summary_forplot[5,]
summary_forplot[8,] <- summary_forplot[6,]
summary_forplot$environment <- summary_forplot$env
summary_forplot$environment[5:6] <- c("vt", "vt")
summary_forplot$environment[7:8] <- c("ee", "ee")
summary_forplot$environment <- recode(summary_forplot$environment, vt = "VT", ee = "EE")

summary_forplot %>%
  ggplot(aes(x = length,
             y = mean_rating,
             ymin = 1,
             ymax = 6,
             color = structure,
             group = structure)) +
  facet_grid(.~environment) +
  geom_errorbar(aes(ymin = mean_rating - se_rating,
                    ymax = mean_rating + se_rating),
                width = 0.15) +
  geom_point(aes(col = structure),
             size = 2) +
  scale_y_continuous(breaks = seq(1:6)) +
  scale_color_discrete("Structure",
                       labels = c("Non-island",
                                  "Island")) +
  labs(x = "Length",
       y = "Mean rating") -> plot_summary
print(plot_summary)

# Make an alternative plot
summary_forplot %>%
  ggplot(aes(x = length,
             y = mean_rating,
             color = env,
             ymin = 1,
             ymax = 6)) +
  geom_point(size = 2) +
  geom_errorbar(aes(ymin = mean_rating - se_rating,
                    ymax = mean_rating + se_rating),
                width = 0.15) + 
  labs(title = "Main items") -> plot_summary_collapsed
print(plot_summary_collapsed)
```

## Breakdown by verb

```{r verb_breakdown}
exp_data %>%
  subset(structure == "island") %>%
  group_by(length,
           structure,
           verb, env) %>%
  summarize(mean_rating = mean(rating),
            sd_rating = sd(rating),
            n = n(),
            se_rating = sd_rating/sqrt(n)) -> vbv_summary
print(vbv_summary)

for (vb in 1:length(unique(vbv_summary$verb))) {
  which.verb <- unique(vbv_summary$verb)[vb]
  vbv_summary %>%
    subset(verb == which.verb) %>%
    ggplot(aes(x = length,
               y = mean_rating,
               color = structure,
               group = structure,
               ymin = 1,
               ymax = 6)) +
    scale_y_continuous(breaks = seq(1:6)) +
    geom_point(data = subset(summary,
                             structure == "non-island"),
               aes(x = length,
                   y = mean_rating)) +
    geom_errorbar(data = subset(summary,
                                structure == "non-island"),
                  aes(ymin = mean_rating - se_rating,
                      ymax = mean_rating + se_rating),
                  width = 0.15) +
    geom_point(aes(col = structure),
               size = 2) +
    geom_errorbar(aes(ymin = mean_rating - se_rating,
                      ymax = mean_rating + se_rating),
                  width = 0.15) +
    labs(title = paste0(as.character(which.verb), " (", vbv_summary$env[vbv_summary$verb == which.verb], ")"),
         x = "Length",
         y = "Mean rating") -> plot
  print(plot)
}
```

### DD scores by verb

```{r vbv_dd_scores}
# Save baseline data
exp_data %>%
  subset(structure == "non-island") %>%
  droplevels %>%
  group_by(length) %>%
  summarize(mean_z_rating = mean(z_rating)) -> baseline_z_ratings

# Get DD scores for each verb
exp_data %>%
  subset(structure == "island") %>%
  droplevels %>%
  group_by(verb, env) %>%
  summarize(mean_z_rating = mean(z_rating),
            isl_sh = mean(z_rating[length == "short"]),
            isl_l = mean(z_rating[length == "long"]),
            nisl_sh = baseline_z_ratings$mean_z_rating[1],
            nisl_l = baseline_z_ratings$mean_z_rating[2],
            d1 = nisl_l - isl_l,
            d2 = nisl_sh - isl_sh,
            dd = d1 - d2) -> vbv_dd
print(vbv_dd)

# Plot verbs by DD score
vbv_dd %>%
  subset(select = c(verb, env, dd)) %>%
  ggplot(aes(x = reorder(verb, dd),
             y = dd,
             fill = env)) +
  geom_col() + 
  scale_fill_discrete(name = "Environment",
                      labels = c("VT", "EE")) +
  labs(x = "Verb",
       y = "DD score")
```

## Burn-in items (vs. main items)
```{r burn-in}
ratings_data_excl %>%
  subset(type == "burn-in") %>%
  group_by(env, 
           structure,
           length) %>%
  summarize(mean_rating = mean(rating),
            sd_rating = sd(rating),
            n = n(),
            se_rating = sd_rating/sqrt(n)) -> summary_burn_in

print(summary_burn_in)
```

```{r plot_burn_in_summary}
summary_burn_in %>%
  ggplot(aes(x = length,
             y = mean_rating,
             color = env,
             ymin = 1,
             ymax = 6)) +
  geom_point(size = 2) +
  geom_errorbar(aes(ymin = mean_rating - se_rating,
                    ymax = mean_rating + se_rating),
                width = 0.15) + 
  labs(title = "Burn-in items") -> plot_summary_burn_in
print(plot_summary_burn_in)
# Compare
print(plot_summary_collapsed)
```

```{r}
# Considering burn-in and main items together
ratings_data_excl %>%
  subset(type == "experimental" | type == "burn-in") %>%
  droplevels %>%
  group_by(env,
           structure,
           length) %>%
  summarize(mean_rating = mean(rating),
            sd_rating = sd(rating),
            n = n(),
            se_rating = sd_rating/sqrt(n)) -> summary_combined
print(summary_combined)
# Plot
summary_combined %>%
  ggplot(aes(x = length,
             y = mean_rating,
             color = env,
             ymin = 1,
             ymax = 6)) +
  geom_point(size = 2) +
  geom_errorbar(aes(ymin = mean_rating - se_rating,
                    ymax = mean_rating + se_rating),
                width = 0.15) + 
  labs(title = "Combined burn-in and main items") -> plot_summary_combined
print(plot_summary_combined)
```

## Trial order effects
```{r}
ratings_data_excl %>%
  subset(type == "experimental" | type == "burn-in") %>%
  droplevels %>%
  subset(length == "long") %>%
  droplevels -> ratings_data_excl_combined

# Order the data frame by prolific id and trial
ratings_data_excl_combined_ordered <- ratings_data_excl_combined[order(ratings_data_excl_combined$epoch_time,
                                                                       ratings_data_excl_combined$env,
                                                                       ratings_data_excl_combined$trial),]

# Add new col for equalized trial
# Get number of observations per participant
obs <- nrow(ratings_data_excl_combined_ordered[ratings_data_excl_combined_ordered$epoch_time ==  min(ratings_data_excl_combined_ordered$epoch_time),])
ratings_data_excl_combined_ordered$trial_eq <- 1:(obs/3)

# Plot the ratings and draw a line
ratings_data_excl_combined_ordered %>%
  ggplot(aes(x = trial_eq,
             y = z_rating,
             color = env,
             group = env)) +
  geom_jitter() + 
  geom_smooth(method = "lm") -> plot_trial_order
print(plot_trial_order)
```

# CLMs

## Simple effects analysis

```{r}
library(ordinal)

# Make sure nominal variables are coded as factors
exp_data$rating %<>% as.factor
exp_data$env %<>% as.factor
exp_data$structure %<>% as.factor
exp_data$length %<>% as.factor

# Sum code structure and length factors
contrasts(exp_data$structure) <- c(-0.5, 0.5)
contrasts(exp_data$length) <- c(-0.5, 0.5)
# Contrasts for env factor should be dummy-coded correctly by default after reordering the factors

clm(data = exp_data,
    formula = rating ~ env * length) -> clm_analysis
summary(clm_analysis)
```

## Mixed effects analysis
```{r}
# Save the ratings data to run on hb.ucsc.edu
saveRDS(exp_data,
        file = "ratings_data_for_clmm.rds")

# Run the following clmm on the cluster:
# clmm(data = exp_data,
#      formula = rating ~ env * length +
#      (1 + env * length | prolific_id) +
#      (1 + env * length | item)) -> clmm_analysis
# saveRDS(clmm_analysis, "exp6_clmm.rds")
 
# Read in RDS from cluster
clmm_analysis <- readRDS(file = "exp6_clmm.rds")
summary(clmm_analysis)
```

