# ============================================================ # Merge: Shaded Lights Experiment — Combined 2-Light Dataset # ============================================================ # Sources DOfile_realexp_1.R → session "2L(1)" # DOfile_realexp.R → session "2L(2)" # Stacks them vertically with continuous subject numbering. # Output: Data_2L # ============================================================ # # ------------------------------------------------------------ # DATA_2L — VARIABLE CODEBOOK # ------------------------------------------------------------ # # UNIT OF OBSERVATION # One row per subject × treatment × guess (trial). # Each subject makes 12 guesses per treatment across 5 treatments # → 60 rows per subject. Sessions are stacked vertically. # # ── IDENTIFIERS ───────────────────────────────────────────── # subject_id : Integer. Re-numbered continuously across both # sessions: subjects from 2L(1) keep their original # IDs; subjects from 2L(2) are offset by the number # of subjects in 2L(1), so IDs never repeat. # prolific_id : Character. Anonymised Prolific ID (24-char hex). # session : Factor. Which data-collection session the subject # belongs to: "2L(1)" (pilot 1) or "2L(2)" (pilot 2). # # ── TREATMENT ─────────────────────────────────────────────── # treatment : Character. The logical rule governing the machine # in this treatment. Five levels: # AND – sound iff red ON AND blue ON # OR – sound iff red ON OR blue ON # INHIBIT – sound iff red ON AND blue OFF # EITHER – sound iff exactly one light ON (XOR) # JOINT – sound iff both lights share state # (both OFF or both ON) # round_order : Integer (1–5). Position in which the subject # encountered this treatment (randomised per subject). # # ── TRIAL-LEVEL VARIABLES ─────────────────────────────────── # Guess_Number : Integer (1–12). Index of the trial within the # treatment. # Light_Config : Character. Observable light state shown on this # trial, formatted as "(red, blue)" with 1 = ON, # 0 = OFF. Values: "(0,0)", "(0,1)", "(1,0)", "(1,1)". # Already accounts for the original_color swap. # red_light : Integer (0/1). State of the red light on this trial. # blue_light : Integer (0/1). State of the blue light on this trial. # Guess : Integer (0/1). Subject's prediction: 1 = predicts # sound, 0 = predicts no sound. # Machine_CorrectP: Integer (0/1). Optimal prediction given the true # rule and the current Light_Config. Compare to Guess # to measure trial-level accuracy. # # ── TREATMENT-LEVEL VARIABLES ─────────────────────────────── # time_machine : Numeric. Seconds spent on the prediction page for # this treatment. # Notes : Character. Free-text notes written by the subject # while observing the data for this treatment. # certainty : Numeric. Self-reported confidence in predictions # for this treatment (scale collected after guesses). # difficulty : Numeric. Self-reported difficulty of this treatment. # Collected after the 5th treatment; filled to all # rows within the subject. # difficulty_certainty : Numeric. Combined difficulty/certainty rating. # Same collection timing as difficulty. # predicted_correct_self : Numeric. Subject's own estimate of how many # of their guesses were correct. # max_correct : Integer (0–12). Number of guesses matching # Machine_CorrectP in this treatment. # trueextracted : Integer (0/1). 1 if the subject demonstrably learned # the true rule: binom.test(max_correct, 12, 0.5) # p-value ≤ 0.01 AND max_correct > 6. # # ── SUBJECT-LEVEL VARIABLES ───────────────────────────────── # STRATEGY : Character. Free-text description of strategy used, # collected once at the end of the experiment. # COMMENTS : Character. Free-text final comments. # num_wrong : Integer. Comprehension-check errors (assessed once # at the start). # final_payment : Numeric. Total payment in GBP (fee + bonus). # payment_machine : Factor. Treatment randomly selected for payment; # same levels as treatment. # education : Character. Highest education level (Prolific). # age : Integer. Age in years (Prolific). # time_total_min : Numeric. Total experiment duration in minutes. # original_color : Integer. 1 = standard light-color assignment, # 0 = red/blue labels were swapped in the display. # All variables above already correct for this swap. # # ── RULE-VALUE VARIABLES (one per deterministic rule) ─────── # val_ : Numeric. Proportion of trials in this treatment's # dataset that would predict correctly. # Rules: always_0, AND, INHIBIT, red, blue_alone, # blue, EITHER, OR, NOR, JOINT, not_blue, # not_blue_alone, not_red, not_INHIBIT, NAND, always_1. # # ── PER-CONFIG ACCURACY OF THE TRUE RULE ──────────────────── # ACC_00 : Numeric. Accuracy of the true rule on Light_Config # (0,0) within this treatment's dataset. # ACC_01 : Numeric. Accuracy on config (0,1). # ACC_10 : Numeric. Accuracy on config (1,0). # ACC_11 : Numeric. Accuracy on config (1,1). # # ── LIGHT-CONFIG FREQUENCIES ──────────────────────────────── # Frequency_00 : Numeric. Proportion of trials in the treatment # dataset where Light_Config = (0,0). # Frequency_01 : Numeric. Proportion for config (0,1). # Frequency_10 : Numeric. Proportion for config (1,0). # Frequency_11 : Numeric. Proportion for config (1,1). # # ── STRUCTURAL (MIXTURE-MODEL) POSTERIORS ─────────────────── # post_ : Numeric. Bayesian posterior probability that this # subject × treatment observation is generated by # . Prior = population shares estimated via # stratEst finite mixture model on this treatment's # subjects; likelihood = how well the subject's 12 # guesses match given the estimated error rate. # Rules: all 16 deterministic rules listed above, # plus random (p = 0.5 fixed) and always_p (p # estimated). Posteriors sum to 1 per subject × # treatment. Constant across the 12 trial rows. # Rule_used : Character. Name of the rule with the highest # posterior for this subject × treatment. # posterior : Numeric (2 d.p.). Posterior probability of Rule_used. # # ── ANALYSIS VARIABLES ────────────────────────────────────── # correlationhigh : Integer (0/1). 1 for session × treatment # combinations with higher signal noise: # 2L(2): AND, EITHER # 2L(1): OR, INHIBIT, JOINT # ------------------------------------------------------------ library(tidyverse) # ---- 1. Merge the datasets generated in DOfile_realexp and DOfile_realexp_1 ------ # Each script defines df_final (including the trueextracted column # computed in their step 17). Sourcing into separate environments # prevents name collisions between the two pipelines. script_dir <- dirname(rstudioapi::getActiveDocumentContext()$path) env_2L1 <- new.env(parent = globalenv()) source(file.path(script_dir, "Structural_Exp_1.R"), local = env_2L1) env_2L2 <- new.env(parent = globalenv()) source(file.path(script_dir, "Structural_Exp.R"), local = env_2L2) df_2L1 <- env_2L1$df_final_post # pilot session 1 df_2L2 <- env_2L2$df_final_post # pilot session 2 # ---- 2. Add session identifier df_2L1 <- df_2L1 %>% mutate(session = "2L(1)") df_2L2 <- df_2L2 %>% mutate(session = "2L(2)") # ---- 3. Continue subject numbering across sessions # 2L(1) subjects keep their original IDs (1 … N₁). # 2L(2) subject IDs are offset by the number of subjects in 2L(1) # so that numbering is continuous and never restarts from 1. n_subjects_2L1 <- n_distinct(df_2L1$subject_id) df_2L2 <- df_2L2 %>% mutate(subject_id = subject_id + n_subjects_2L1) # ---- 4. Stack vertically → Data_2L - Data_2L <- bind_rows(df_2L1, df_2L2) %>% mutate(session = factor(session, levels = c("2L(1)", "2L(2)"))) %>% arrange(session, subject_id, round_order, Guess_Number) # ---- 5. Correlationhigh variable # correlationhigh = 1 for the harder session × treatment combinations Data_2L <- Data_2L |> mutate(correlationhigh = as.integer( (treatment == "AND" & session == "2L(2)") | (treatment == "EITHER" & session == "2L(2)") | (treatment == "OR" & session == "2L(1)") | (treatment == "INHIBIT" & session == "2L(1)") | (treatment == "JOINT" & session == "2L(1)") )) # ---- 6. Subject_acc: guess-weighted accuracy by light-config frequency # For each guess the contribution is: # ACC_lightconfig if Guess == Machine_CorrectP (subject chose optimally) # 1 - ACC_lightconfig otherwise # Subject_acc is the mean over the 12 guesses within a (subject, treatment). subject_stats <- Data_2L |> mutate( acc_val = case_when( Light_Config == "(0,0)" ~ ACC_00, Light_Config == "(0,1)" ~ ACC_01, Light_Config == "(1,0)" ~ ACC_10, Light_Config == "(1,1)" ~ ACC_11 ), guess_acc = if_else(Guess == Machine_CorrectP, acc_val, 1 - acc_val) ) |> group_by(subject_id, treatment, correlationhigh) |> summarise( Subject_acc = mean(guess_acc, na.rm = TRUE), max_correct = first(max_correct), .groups = "drop" ) # ---- 2. CDF plots by statistical properties (correlationhigh): Aggregate ---- corr_labels <- c("0" = "Low correlation", "1" = "High correlation") # CDF of max_correct p_max <- subject_stats |> mutate(correlationhigh = factor(correlationhigh)) |> ggplot(aes(x = max_correct, colour = correlationhigh)) + stat_ecdf(linewidth = 0.8) + scale_colour_manual(values = c("0" = "#2166ac", "1" = "#d6604d"), labels = corr_labels) + labs( x = "Number of correct guesses (max_correct)", y = "Cumulative proportion", colour = NULL ) + theme_bw() + theme(legend.position = "bottom") print(p_max) # ---- 3. CDF plots by statistical properties (correlationhigh): By treatment ---- p_max_bytreat <- subject_stats |> mutate(correlationhigh = factor(correlationhigh)) |> ggplot(aes(x = max_correct, colour = correlationhigh)) + stat_ecdf(linewidth = 0.8) + scale_colour_manual(values = c("0" = "#2166ac", "1" = "#d6604d"), labels = corr_labels) + facet_wrap(~ treatment, nrow = 1) + labs( x = "Number of correct guesses (max_correct)", y = "Cumulative proportion", colour = NULL ) + theme_bw() + theme(legend.position = "bottom") print(p_max_bytreat) # ---- 4. Bar plots: fraction of subjects per max_correct bin by correlationhigh ---- # Fractions within each correlationhigh group bar_data <- subject_stats |> mutate(correlationhigh = factor(correlationhigh)) |> group_by(correlationhigh, max_correct) |> summarise(n = n(), .groups = "drop") |> group_by(correlationhigh) |> mutate(frac = n / sum(n)) |> ungroup() # Group means (for vertical reference lines) means_data <- subject_stats |> mutate(correlationhigh = factor(correlationhigh)) |> group_by(correlationhigh) |> summarise(mean_mc = mean(max_correct, na.rm = TRUE), .groups = "drop") # Combined bar plot with mean lines p_bar <- ggplot(bar_data, aes(x = max_correct, y = frac, fill = correlationhigh)) + geom_bar(stat = "identity", position = "dodge", width = 0.7, alpha = 0.85) + geom_vline(data = means_data, aes(xintercept = mean_mc, colour = correlationhigh), linewidth = 0.9, linetype = "dashed") + scale_fill_manual(values = c("0" = "#2166ac", "1" = "#d6604d"), labels = corr_labels) + scale_colour_manual(values = c("0" = "#2166ac", "1" = "#d6604d"), labels = corr_labels, guide = "none") + scale_x_continuous(breaks = 0:12) + labs( x = "Number of correct guesses (max_correct)", y = "Fraction of subjects", fill = NULL ) + theme_bw() + theme(legend.position = "bottom") print(p_bar) # 10 panels: one per treatment × correlationhigh bar_data_treat <- subject_stats |> mutate(correlationhigh = factor(correlationhigh)) |> group_by(treatment, correlationhigh, max_correct) |> summarise(n = n(), .groups = "drop") |> group_by(treatment, correlationhigh) |> mutate(frac = n / sum(n)) |> ungroup() means_data_treat <- subject_stats |> mutate(correlationhigh = factor(correlationhigh)) |> group_by(treatment, correlationhigh) |> summarise(mean_mc = mean(max_correct, na.rm = TRUE), .groups = "drop") p_bar_treat <- ggplot(bar_data_treat, aes(x = max_correct, y = frac, fill = correlationhigh)) + geom_bar(stat = "identity", width = 0.8, alpha = 0.85) + geom_vline(data = means_data_treat, aes(xintercept = mean_mc, colour = correlationhigh), linewidth = 0.9, linetype = "dashed") + scale_fill_manual(values = c("0" = "#2166ac", "1" = "#d6604d"), labels = corr_labels) + scale_colour_manual(values = c("0" = "#2166ac", "1" = "#d6604d"), labels = corr_labels, guide = "none") + scale_x_continuous(breaks = seq(0, 12, 3)) + facet_grid(correlationhigh ~ treatment, labeller = labeller(correlationhigh = corr_labels)) + labs( x = "Number of correct guesses (max_correct)", y = "Fraction of subjects", fill = NULL ) + theme_bw() + theme(legend.position = "none") print(p_bar_treat) # ---- 5. Bar plots: fraction of subjects by trueextractedbycorrelationhigh ---- te_stats <- Data_2L |> distinct(subject_id, treatment, correlationhigh, trueextracted) # Aggregate bar_data_te <- te_stats |> mutate(correlationhigh = factor(correlationhigh)) |> group_by(correlationhigh, trueextracted) |> summarise(n = n(), .groups = "drop") |> group_by(correlationhigh) |> mutate(frac = n / sum(n)) |> ungroup() means_data_te <- te_stats |> mutate(correlationhigh = factor(correlationhigh)) |> group_by(correlationhigh) |> summarise(mean_te = mean(trueextracted, na.rm = TRUE), .groups = "drop") p_bar_te <- ggplot(bar_data_te, aes(x = trueextracted, y = frac, fill = correlationhigh)) + geom_bar(stat = "identity", position = "dodge", width = 0.5, alpha = 0.85) + geom_vline(data = means_data_te, aes(xintercept = mean_te, colour = correlationhigh), linewidth = 0.9, linetype = "dashed") + scale_fill_manual(values = c("0" = "#2166ac", "1" = "#d6604d"), labels = corr_labels) + scale_colour_manual(values = c("0" = "#2166ac", "1" = "#d6604d"), labels = corr_labels, guide = "none") + scale_x_continuous(breaks = 0:1, labels = c("Not extracted", "Extracted")) + labs( x = "True rule extracted (trueextracted)", y = "Fraction of subjects", fill = NULL ) + theme_bw() + theme(legend.position = "bottom") print(p_bar_te) # 10 panels: one per treatment × correlationhigh bar_data_te_treat <- te_stats |> mutate(correlationhigh = factor(correlationhigh)) |> group_by(treatment, correlationhigh, trueextracted) |> summarise(n = n(), .groups = "drop") |> group_by(treatment, correlationhigh) |> mutate(frac = n / sum(n)) |> ungroup() means_data_te_treat <- te_stats |> mutate(correlationhigh = factor(correlationhigh)) |> group_by(treatment, correlationhigh) |> summarise(mean_te = mean(trueextracted, na.rm = TRUE), .groups = "drop") p_bar_te_treat <- ggplot(bar_data_te_treat, aes(x = trueextracted, y = frac, fill = correlationhigh)) + geom_bar(stat = "identity", width = 0.5, alpha = 0.85) + geom_vline(data = means_data_te_treat, aes(xintercept = mean_te, colour = correlationhigh), linewidth = 0.9, linetype = "dashed") + scale_fill_manual(values = c("0" = "#2166ac", "1" = "#d6604d"), labels = corr_labels) + scale_colour_manual(values = c("0" = "#2166ac", "1" = "#d6604d"), labels = corr_labels, guide = "none") + scale_x_continuous(breaks = 0:1, labels = c("Not extracted", "Extracted")) + facet_grid(correlationhigh ~ treatment, labeller = labeller(correlationhigh = corr_labels)) + labs( x = "True rule extracted (trueextracted)", y = "Fraction of subjects", fill = NULL ) + theme_bw() + theme(legend.position = "none") print(p_bar_te_treat) # ---- 6. Mixed-effects regression: max_correct ~ correlationhigh + (1 | subject_id) ---- # Variance decomposition: subject-specific intercept vs. correlationhigh effect vs. residual. # Data is at subject × treatment level (one obs per subject per treatment). library(lme4) lmm_maxcorrect <- lmer( trueextracted ~ correlationhigh + (1 | subject_id), data = te_stats, REML = TRUE ) summary(lmm_maxcorrect) # Variance decomposition vc <- as.data.frame(VarCorr(lmm_maxcorrect)) total_var <- sum(vc$vcov) cat("\n--- Variance decomposition ---\n") cat(sprintf("Subject intercept variance : %.3f (%.1f%%)\n", vc$vcov[vc$grp == "subject_id"], 100 * vc$vcov[vc$grp == "subject_id"] / total_var)) cat(sprintf("Residual variance : %.3f (%.1f%%)\n", vc$vcov[vc$grp == "Residual"], 100 * vc$vcov[vc$grp == "Residual"] / total_var)) cat(sprintf("Fixed effect (correlationhigh): %.3f (SE = %.3f)\n", fixef(lmm_maxcorrect)["correlationhigh"], sqrt(diag(vcov(lmm_maxcorrect)))["correlationhigh"])) # ---- 7. Suboptimal extraction and randomizing ---- # 9a. All 16 deterministic rules mapped to Light_Config values rule_table <- expand.grid(p00 = 0:1, p01 = 0:1, p10 = 0:1, p11 = 0:1) |> as_tibble() |> mutate(rule_id = row_number()) |> pivot_longer(-rule_id, names_to = "config_key", values_to = "rule_pred") |> mutate(Light_Config = case_when( config_key == "p00" ~ "(0,0)", config_key == "p01" ~ "(0,1)", config_key == "p10" ~ "(1,0)", config_key == "p11" ~ "(1,1)" )) |> select(-config_key) # 9b. Find which rule_id corresponds to the true rule for each treatment true_rule_ids <- Data_2L |> distinct(treatment, Light_Config, Machine_CorrectP) |> left_join(rule_table, by = "Light_Config", relationship = "many-to-many") |> group_by(treatment, rule_id) |> summarise(is_true = all(Machine_CorrectP == rule_pred), .groups = "drop") |> filter(is_true) |> select(treatment, true_rule_id = rule_id) # 9c. Count matches against all 15 non-true rules per subject × treatment # suboptimalextracted = 1 if any non-true rule passes binom.test (p <= 0.01, count > 6) suboptimalextracted_df <- Data_2L |> select(subject_id, treatment, correlationhigh, Light_Config, Guess) |> left_join(rule_table, by = "Light_Config", relationship = "many-to-many") |> left_join(true_rule_ids, by = "treatment") |> filter(rule_id != true_rule_id) |> group_by(subject_id, treatment, correlationhigh, rule_id) |> summarise(n_match = sum(Guess == rule_pred), .groups = "drop") |> rowwise() |> mutate(passes = binom.test(n_match, 12, 0.5)$p.value <= 0.01 & n_match > 6) |> ungroup() |> group_by(subject_id, treatment, correlationhigh) |> summarise(suboptimalextracted = as.integer(any(passes)), .groups = "drop") # 9d. Combine with trueextracted and define randomizing extraction_stats <- Data_2L |> distinct(subject_id, treatment, correlationhigh, trueextracted) |> left_join(suboptimalextracted_df, by = c("subject_id", "treatment", "correlationhigh")) |> mutate(randomizing = as.integer(trueextracted == 0 & suboptimalextracted == 0)) # 9e. Frequency table by correlationhigh tbl_corr <- extraction_stats |> group_by(correlationhigh) |> summarise( N = n(), true_ext = sum(trueextracted, na.rm = TRUE), subopt = sum(suboptimalextracted, na.rm = TRUE), rand = sum(randomizing, na.rm = TRUE), .groups = "drop" ) |> mutate( true_ext_pct = round(true_ext / N * 100, 1), subopt_pct = round(subopt / N * 100, 1), rand_pct = round(rand / N * 100, 1) ) print(tbl_corr) # 9f. Frequency table by treatment tbl_treat <- extraction_stats |> group_by(treatment) |> summarise( N = n(), true_ext = sum(trueextracted, na.rm = TRUE), subopt = sum(suboptimalextracted, na.rm = TRUE), rand = sum(randomizing, na.rm = TRUE), .groups = "drop" ) |> mutate( true_ext_pct = round(true_ext / N * 100, 1), subopt_pct = round(subopt / N * 100, 1), rand_pct = round(rand / N * 100, 1) ) print(tbl_treat) # 9g. Frequency table by treatment × correlationhigh tbl_treat_corr <- extraction_stats |> group_by(treatment, correlationhigh) |> summarise( N = n(), true_ext = sum(trueextracted, na.rm = TRUE), subopt = sum(suboptimalextracted, na.rm = TRUE), rand = sum(randomizing, na.rm = TRUE), .groups = "drop" ) |> mutate( true_ext_pct = round(true_ext / N * 100, 1), subopt_pct = round(subopt / N * 100, 1), rand_pct = round(rand / N * 100, 1), correlationhigh = factor(correlationhigh, levels = c(0, 1), labels = c("Low", "High")) ) |> arrange(treatment, correlationhigh) print(tbl_treat_corr) # 9g (wide). Same table pivoted: one row per treatment, % columns split by Low/High tbl_treat_corr_wide <- tbl_treat_corr |> select(treatment, correlationhigh, N, true_ext_pct, subopt_pct, rand_pct) |> pivot_wider( names_from = correlationhigh, values_from = c(N, true_ext_pct, subopt_pct, rand_pct), names_glue = "{.value}_{correlationhigh}" ) |> select(treatment, N_Low, N_High, true_ext_pct_Low, true_ext_pct_High, subopt_pct_Low, subopt_pct_High, rand_pct_Low, rand_pct_High) print(tbl_treat_corr_wide) # ---- 10. Rule_used: posterior table and bar chart by treatment and correlation high ---- rule_obs <- Data_2L |> distinct(subject_id, treatment, correlationhigh, Rule_used) |> mutate(correlationhigh = factor(correlationhigh, levels = c(0, 1), labels = c("Low", "High"))) # --- 10a. Proportion table (treatment × correlationhigh × Rule_used) --- tbl_rule <- rule_obs |> group_by(treatment, correlationhigh, Rule_used) |> summarise(n = n(), .groups = "drop") |> group_by(treatment, correlationhigh) |> mutate(pct = round(n / sum(n) * 100, 1)) |> ungroup() |> arrange(treatment, correlationhigh, desc(pct)) print(tbl_rule) # Wide version: one row per treatment × Rule_used, columns = Low / High % tbl_rule_wide <- tbl_rule |> select(treatment, correlationhigh, Rule_used, pct) |> pivot_wider(names_from = correlationhigh, values_from = pct, values_fill = 0) |> arrange(treatment, desc(Low + High)) print(tbl_rule_wide) # --- 10b. Bar chart: proportion of Rule_used by treatment × correlationhigh --- rule_plot_data <- tbl_rule |> mutate(treatment = factor(treatment, levels = c("AND", "OR", "INHIBIT", "EITHER", "JOINT"))) p_rule <- ggplot(rule_plot_data, aes(x = reorder(Rule_used, -pct), y = pct, fill = correlationhigh)) + geom_bar(stat = "identity", position = "dodge", alpha = 0.85) + scale_fill_manual(values = c("Low" = "#2166ac", "High" = "#d6604d")) + facet_wrap(~ treatment, nrow = 1) + labs( x = "Rule used", y = "% of subjects", fill = NULL ) + theme_bw() + theme( axis.text.x = element_text(angle = 45, hjust = 1, size = 7), legend.position = "bottom" ) print(p_rule) # ---- 11. Prior shares from structural estimation -------------------------------- # env_2L1$ESTIMATION_RESULTS (from Structural_Exp_1.R, session "2L(1)") and # env_2L2$ESTIMATION_RESULTS (from Structural_Exp.R, session "2L(2)") each # contain one MLE population share per strategy per treatment — these are the # priors (mixing weights) before subject-level data updates the posteriors. # The same treatment can be Low correlation in one session and High in the other. prior_long_2L <- bind_rows( env_2L1$ESTIMATION_RESULTS |> mutate(session = "2L(1)"), env_2L2$ESTIMATION_RESULTS |> mutate(session = "2L(2)") ) |> select(-loglike, -num_par) |> pivot_longer(c(-treatment, -session), names_to = "Rule", values_to = "share") |> filter(!is.na(share), share > 0) |> mutate( correlationhigh = factor( as.integer( (treatment == "AND" & session == "2L(2)") | (treatment == "EITHER" & session == "2L(2)") | (treatment == "OR" & session == "2L(1)") | (treatment == "INHIBIT" & session == "2L(1)") | (treatment == "JOINT" & session == "2L(1)") ), levels = c(0, 1), labels = c("Low", "High") ), treatment = factor(treatment, levels = c("AND", "OR", "INHIBIT", "EITHER", "JOINT")), pct = round(share * 100, 1) ) # --- 11a. Proportion table (treatment × session × Rule, sorted by share) --- tbl_prior_2L <- prior_long_2L |> arrange(treatment, session, desc(pct)) print(tbl_prior_2L) # Wide: one row per treatment × session × Rule, columns = Low / High % tbl_prior_2L_wide <- tbl_prior_2L |> select(treatment, session, correlationhigh, Rule, pct) |> pivot_wider(names_from = correlationhigh, values_from = pct, values_fill = 0) |> arrange(treatment, session, desc(Low + High)) print(tbl_prior_2L_wide) # --- 11b. Bar chart: rules with prior share ≥ 5% in at least one treatment × session --- rules_above5_2L <- prior_long_2L |> filter(pct >= 5) |> distinct(Rule) p_prior_2L_top <- prior_long_2L |> semi_join(rules_above5_2L, by = "Rule") |> ggplot(aes(x = reorder(Rule, -pct), y = pct, fill = correlationhigh)) + geom_bar(stat = "identity", position = "dodge", alpha = 0.85) + scale_fill_manual(values = c("Low" = "#2166ac", "High" = "#d6604d")) + facet_wrap(~ treatment, nrow = 1, scales = "free_x") + labs( x = "Rule (prior)", y = "Prior share (%)", fill = NULL ) + theme_bw() + theme( axis.text.x = element_text(angle = 45, hjust = 1, size = 7), legend.position = "bottom" ) print(p_prior_2L_top)