# ============================================================ # Data Cleaning: Shaded Lights Experiment (2 Lights) # ============================================================ # Output: long-format dataset, one row per subject × treatment × guess # ============================================================ # # ------------------------------------------------------------ # VARIABLE CODEBOOK # ------------------------------------------------------------ # # UNIT OF OBSERVATION # Each row is one prediction (guess) made by one subject in one treatment. # A subject makes 12 guesses per treatment and completes 5 treatments, # giving 60 rows per subject (80 subjects × 60 = 4800 rows total). # # SUBJECT IDENTIFIERS # subject_id : Anonymous sequential integer ID (1–80), replacing # the Prolific ID. Consistent across all rows of a subject. # # TREATMENT # treatment : The rule governing which light configuration causes # sound. Factor with 5 levels: # AND – sound iff red ON and blue ON # OR – sound iff red ON or blue ON (at least one) # INHIBIT – sound iff red ON and blue OFF # EITHER – sound iff exactly one light ON (XOR) # JOINT – sound iff both lights OFF or both lights ON (same state) # round_order : Position in which the subject encountered this # treatment (1 = first, 5 = last). Treatments are # randomized across subjects, so the same treatment # may appear in different positions for different subjects. # Useful for studying order/learning effects. # # TRIAL-LEVEL VARIABLES (vary within subject × treatment) # Guess_Number : Index of the prediction within the treatment (1–12). # Light_Config : Observable light state shown to the subject for this # prediction, formatted as "(red, blue)" where 1 = ON # and 0 = OFF. Accounts for original_color swap. # Possible values: (0,0), (0,1), (1,0), (1,1). # red_light : State of the red light (1 = ON, 0 = OFF). # blue_light : State of the blue light (1 = ON, 0 = OFF). # Guess : The subject's prediction on this trial (1 = predicts # sound, 0 = predicts no sound). # Machine_CorrectP : The correct prediction a fully-informed subject should # make given the true rule for this treatment and the # current light configuration (1 = predict sound, # 0 = predict no sound). Derived deterministically from # the treatment rule and Light_Config. Comparing Guess # to Machine_CorrectP measures rule-learning accuracy. # # TREATMENT-LEVEL VARIABLES (same value for all 12 guesses within a treatment) # time_machine : Time (in seconds) the subject spent on the prediction # page for this treatment. # Notes : Free-text notes the subject wrote during this treatment. # certainty : Subject's self-reported confidence in their predictions # for this treatment (numeric scale). # difficulty : Subject's self-reported difficulty of this treatment # (numeric scale). Collected only after the final (5th) # treatment and filled to all rounds within each subject. # difficulty_certainty: Additional difficulty/certainty rating (numeric scale). # Same collection timing as difficulty. # predicted_correct_self : Subject's prediction of how many of their own # guesses were correct (numeric). # # SUBJECT-LEVEL VARIABLES (same value for all rows of a subject) # STRATEGY : Free-text description of the strategy the subject # used across treatments. Collected once at the end. # COMMENTS : Free-text final comments. Collected once at the end. # num_wrong : Number of errors the subject made in the comprehension # check, administered once at the start of the experiment. # final_payment : Total payment in GBP (participation fee + bonus). # payment_machine : The treatment (machine) randomly selected for payment. # Factor with the same levels as treatment. # education : Highest education level completed (character), from # Prolific demographics (Demografic.csv). # age : Subject's age in years (integer), from Prolific # demographics (Demografic.csv). # time_total_min : Total time (in minutes) from experiment start to end of # the last Combined_2lights task page. Computed as # (page_load_ts of round 5 + time_on_page of round 5) minus # participant.time_started_utc. # # TECHNICAL # original_color : Indicates whether light color labels were swapped in # the display (1 = standard, 0 = swapped). All subjects # in this dataset have original_color = 1. # red_light, blue_light, Light_Config, and Machine_CorrectP # already account for this swap. # ------------------------------------------------------------ #install.packages("tidyverse") library(tidyverse) # ---- File paths ------------------------------------------- # Paths are relative to the location of this script script_dir <- dirname(rstudioapi::getActiveDocumentContext()$path) path_main <- file.path(script_dir, "MYDATA.csv") path_pilot <- file.path(script_dir, "Data_Pilot2L(2)_24.csv") # ---- 1. Read raw data ------------------------------------ # MYDATA.csv is in super-wide format: one row per participant, # with all 5 rounds spread across columns (Combined_2lights.N.player.*). # Payment info is embedded as Pay.1.player.* columns. # Data_Pilot2L(2)_24.csv has the same structure; rows are appended below. # bind_rows fills NA for any columns that differ between the two files. raw <- bind_rows( read_csv(path_main, show_col_types = FALSE), read_csv(path_pilot, show_col_types = FALSE) ) # ---- 2. Drop rows with no Prolific ID or no guesses ------- # Rows with no Combined_2lights.1.player.ID_subject are incomplete # (e.g. Redirect-only rows, bots, or participants who did not start). # Also drop subjects who have a Prolific ID but submitted no guesses at all, # and any test/dummy entries whose ID is not a valid 24-character Prolific hex ID. guess_cols <- grep("^Combined_2lights\\.\\d+\\.player\\.guess\\d+$", names(raw), value = TRUE) raw <- raw %>% filter( !is.na(`Combined_2lights.1.player.ID_subject`), `Combined_2lights.1.player.ID_subject` != "", str_detect(`Combined_2lights.1.player.ID_subject`, "^[0-9a-f]{24}$"), if_any(all_of(guess_cols), ~ !is.na(.x) & .x != "") ) # ---- 3. Create subject ID mapping ------------------------- # Prolific ID is only in Combined_2lights.1 (round 1 column). # Build the mapping via participant.code (consistent across all rounds), # ordering by Prolific ID so the numbering is deterministic. id_map <- raw %>% distinct(participant.code, `Combined_2lights.1.player.ID_subject`) %>% arrange(`Combined_2lights.1.player.ID_subject`) %>% mutate(subject_id = row_number()) %>% rename(prolific_id = `Combined_2lights.1.player.ID_subject`) %>% select(participant.code, subject_id, prolific_id) raw <- left_join(raw, id_map, by = "participant.code") # ---- 4. Correct-prediction lookup table ------------------- # For each treatment × observable light configuration (red, blue), # define the correct prediction (Machine_CorrectP) a subject should make # if they knew the true rule perfectly. # Machine_CorrectP = 1 means "predict sound occurs"; 0 means "predict no sound". # Note: original_color (see Step 11) can swap which physical light # acts as "red" vs "blue" in these rules. # # Summary of correct predictions per task: # AND: predict 1 only when red=1 AND blue=1 (both on) # OR: predict 1 when red=1 OR blue=1 (at least one on) # INHIBIT: predict 1 only when red=1 AND blue=0 (red on, blue off) # EITHER: predict 1 when red≠blue (exactly one on) # JOINT: predict 1 when red=blue (both off or both on) (same state) correct_rules <- tribble( ~treatment, ~red, ~blue, ~Machine_CorrectP, # AND: sound iff BOTH red ON and blue ON "AND", 0, 0, 0, "AND", 0, 1, 0, "AND", 1, 0, 0, "AND", 1, 1, 1, # OR: sound iff AT LEAST ONE light ON "OR", 0, 0, 0, "OR", 0, 1, 1, "OR", 1, 0, 1, "OR", 1, 1, 1, # INHIBIT: sound iff RED ON and BLUE OFF (red alone triggers; blue inhibits) "INHIBIT", 0, 0, 0, "INHIBIT", 0, 1, 0, "INHIBIT", 1, 0, 1, "INHIBIT", 1, 1, 0, # EITHER: sound iff EXACTLY ONE light ON (XOR) "EITHER", 0, 0, 0, "EITHER", 0, 1, 1, "EITHER", 1, 0, 1, "EITHER", 1, 1, 0, # JOINT: sound iff both lights OFF or both lights ON (XNOR / joint state) "JOINT", 0, 0, 1, "JOINT", 0, 1, 0, "JOINT", 1, 0, 0, "JOINT", 1, 1, 1 ) # ---- 5. Subject-level text variables ---------------------- # Strategy and final comments may be filled in any round; take the first # non-empty value across all 5 rounds. subject_text <- raw %>% mutate( STRATEGY = coalesce( na_if(as.character(`Combined_2lights.1.player.prediction_strategy`), ""), na_if(as.character(`Combined_2lights.2.player.prediction_strategy`), ""), na_if(as.character(`Combined_2lights.3.player.prediction_strategy`), ""), na_if(as.character(`Combined_2lights.4.player.prediction_strategy`), ""), na_if(as.character(`Combined_2lights.5.player.prediction_strategy`), "") ), COMMENTS = coalesce( na_if(as.character(`Combined_2lights.1.player.final_comments`), ""), na_if(as.character(`Combined_2lights.2.player.final_comments`), ""), na_if(as.character(`Combined_2lights.3.player.final_comments`), ""), na_if(as.character(`Combined_2lights.4.player.final_comments`), ""), na_if(as.character(`Combined_2lights.5.player.final_comments`), "") ) ) %>% select(participant.code, STRATEGY, COMMENTS) # ---- 6. Comprehension check errors ------------------------ # num_wrong is only populated in round 1 (the first treatment encountered) cq_errors <- raw %>% select(participant.code, num_wrong = `Combined_2lights.1.player.num_wrong`) %>% mutate(num_wrong = as.integer(num_wrong)) # ---- 7. Payment info from embedded Pay columns ------------ # Pay info is embedded directly in MYDATA.csv as Pay.1.player.* columns pay_clean <- raw %>% mutate( final_payment = as.numeric(session.config.participation_fee) + as.numeric(`Pay.1.player.bonus_total`), payment_machine = str_match(`Pay.1.player.payment_details`, '"machine"\\s*:\\s*"([^"]+)"')[, 2] ) %>% select(participant.code, final_payment, payment_machine) # ---- 7b. Demographics from Prolific export -------------------- path_demo <- file.path(script_dir, "Demografic.csv") path_demo_pilot <- file.path(script_dir, "Demographic_2L(2)_24.csv") demographics <- bind_rows( read_csv(path_demo, show_col_types = FALSE), read_csv(path_demo_pilot, show_col_types = FALSE) ) %>% select( prolific_id = `Participant id`, education = `Highest education level completed`, age = Age ) %>% distinct(prolific_id, .keep_all = TRUE) # ---- 7c. Total time in experiment ------------------------- # Approximate total time from experiment start to end of the last task round. # End time = page_load_ts of round 5 + time_on_page of round 5. time_total <- raw %>% mutate( time_total_sec = (as.numeric(as.POSIXct(`Combined_2lights.5.player.page_load_ts`, origin = "1970-01-01")) + as.numeric(`Combined_2lights.5.player.time_on_page`)) - as.numeric(as.POSIXct(participant.time_started_utc, origin = "1970-01-01")), time_total_min = time_total_sec / 60 ) %>% select(participant.code, time_total_min) # ---- 8. Reshape wide → long (one row per participant × round) ---- # MYDATA.csv has 5 rounds spread across column groups Combined_2lights.N.player.*. # Extract each round into a standardised set of columns and stack vertically, # yielding the same structure as the original Combined_2lights.csv long format. select_round <- function(df, n) { prefix <- paste0("Combined_2lights.", n, ".") rename_map <- c( "player.machine_name" = paste0(prefix, "player.machine_name"), "subsession.round_number" = paste0(prefix, "subsession.round_number"), "player.original_color" = paste0(prefix, "player.original_color"), "player.time_on_page" = paste0(prefix, "player.time_on_page"), "player.notes" = paste0(prefix, "player.notes"), "player.certainty" = paste0(prefix, "player.certainty"), "player.difficulty" = paste0(prefix, "player.difficulty"), "player.difficulty_certainty" = paste0(prefix, "player.difficulty_certainty"), "player.predicted_correct_self" = paste0(prefix, "player.predicted_correct_self"), setNames(paste0(prefix, "player.guess", 1:12), paste0("player.guess", 1:12)), setNames(paste0(prefix, "player.row", 1:12), paste0("player.row", 1:12)) ) df %>% select(participant.code, subject_id, prolific_id, !!!rename_map) %>% filter(!is.na(player.machine_name), player.machine_name != "") } raw_long <- bind_rows(lapply(1:5, select_round, df = raw)) # ---- 9. Pivot to long format ------------------------------ # Columns player.guess1–12 and player.row1–12 are in wide format. # Pivot to one row per (subject, treatment, guess number). df_long <- raw_long %>% select( participant.code, subject_id, prolific_id, treatment = player.machine_name, round_order = subsession.round_number, # position in sequence (1–5) original_color = player.original_color, time_machine = player.time_on_page, Notes = player.notes, certainty = player.certainty, difficulty = player.difficulty, difficulty_certainty = player.difficulty_certainty, predicted_correct_self = player.predicted_correct_self, matches("^player\\.(guess|row)\\d+$") ) %>% pivot_longer( cols = matches("^player\\.(guess|row)\\d+$"), names_to = c(".value", "Guess_Number"), names_pattern = "player\\.(guess|row)(\\d+)" ) %>% rename(Guess = guess, config_raw = row) %>% # Drop empty guess slots (some rows may have fewer than 12 guesses) filter(!is.na(Guess), Guess != "") %>% mutate( Guess_Number = as.integer(Guess_Number), Guess = as.integer(Guess) ) # ---- 10. Parse light configuration ------------------------- # config_raw is stored as a numpy-style array string: "[r b 0 s]" # Layout of the 4 values: # [0] red_light – first observable light (1 = ON, 0 = OFF) # [1] blue_light – second observable light (1 = ON, 0 = OFF) # [2] (always 0) – unused slot (no third light in 2-light experiment) # [3] pre-drawn sound outcome (not observed by subject; used internally for payment) # # Light_Config summarises the observable state shown to the subject. df_long <- df_long %>% mutate( config_nums = str_extract_all(config_raw, "\\d+"), red_light = map_int(config_nums, ~ as.integer(.x[1])), blue_light = map_int(config_nums, ~ as.integer(.x[2])), Light_Config = paste0("(", red_light, ",", blue_light, ")") ) %>% select(-config_nums, -config_raw) # ---- 11. Fill difficulty and difficulty_certainty across rounds ---- # These are only collected in the final (5th) round; propagate to all rounds # within each subject so no rows are left NA. df_long <- df_long %>% group_by(participant.code) %>% fill(difficulty, difficulty_certainty, .direction = "downup") %>% ungroup() # ---- 12. Account for original_color (light color swap) ----- # original_color = 1: position 0 = RED light, position 1 = BLUE light (standard) # original_color = 0: colors are SWAPPED in the display — swap red_light and # blue_light so that all downstream variables (Light_Config, # red_light, blue_light, Machine_CorrectP) reflect what the # subject actually saw as "red" and "blue". # In this dataset all rows have original_color = 1 (no swap occurred), # but the logic handles the general case. df_long <- df_long %>% mutate( original_color = as.integer(original_color), red_light = if_else(original_color == 1L, red_light, blue_light), blue_light = if_else(original_color == 1L, blue_light, red_light), Light_Config = paste0("(", red_light, ",", blue_light, ")") ) # ---- 13. Join correct predictions ------------------------- df_long <- df_long %>% left_join( correct_rules %>% rename(red_light = red, blue_light = blue), by = c("treatment", "red_light", "blue_light") ) # ---- 14. Merge subject- and session-level variables ------- df_final <- df_long %>% left_join(subject_text, by = "participant.code") %>% left_join(cq_errors, by = "participant.code") %>% left_join(pay_clean, by = "participant.code") %>% left_join(time_total, by = "participant.code") %>% select(-participant.code) %>% # remove internal oTree code; prolific_id is kept left_join(demographics, by = "prolific_id") # ---- 15. Final type coercion and variable ordering -------- df_final <- df_final %>% mutate( subject_id = as.integer(subject_id), treatment = factor(treatment, levels = c("AND", "OR", "INHIBIT", "EITHER", "JOINT")), round_order = as.integer(round_order), Guess_Number = as.integer(Guess_Number), Guess = as.integer(Guess), Machine_CorrectP = as.integer(Machine_CorrectP), red_light = as.integer(red_light), blue_light = as.integer(blue_light), original_color = as.integer(original_color), time_machine = as.numeric(time_machine), certainty = as.numeric(certainty), difficulty = as.numeric(difficulty), difficulty_certainty = as.numeric(difficulty_certainty), predicted_correct_self = as.numeric(predicted_correct_self), num_wrong = as.integer(num_wrong), final_payment = as.numeric(final_payment), payment_machine = factor(payment_machine, levels = c("AND", "OR", "INHIBIT", "EITHER", "JOINT")), education = as.character(education), age = as.integer(age), time_total_min = as.numeric(time_total_min) ) %>% select( # Subject identifier subject_id, prolific_id, # Treatment info treatment, round_order, # Trial-level Guess_Number, Light_Config, red_light, blue_light, Guess, # subject's prediction (0/1) Machine_CorrectP, # optimal deterministic prediction given the rule # Treatment-level assessments time_machine, Notes, certainty, difficulty, difficulty_certainty, predicted_correct_self, # Subject-level text STRATEGY, COMMENTS, # Subject-level scalars num_wrong, # comprehension check errors (assessed once at start) final_payment, # participation fee + bonus payment_machine, # treatment randomly selected for payment education, # highest education level completed (Prolific) age, # age in years (Prolific) time_total_min, # total experiment time in minutes (start → end of last task page) # Keep original_color for reference original_color ) %>% arrange(subject_id, round_order, Guess_Number) # ---- 16. Inspect result ----------------------------------- glimpse(df_final) cat("\nRows: ", nrow(df_final), "\n") cat("Subjects: ", n_distinct(df_final$subject_id), "\n") cat("Treatments:", paste(levels(df_final$treatment), collapse = ", "), "\n") # ---- 17. Learning indicator (binomial test per subject × treatment) -------- # For each subject × treatment, count how many of the 12 guesses were correct # (Guess == Machine_CorrectP). Then test whether this count is significantly # above chance (p = 0.5, n = 12) using a one-sample binomial test. # # max_correct : number of correct guesses out of 12 # trueextracted : 1 if binom.test p-value <= 0.01 AND max_correct > 6, # indicating the subject learned the true rule learning <- df_final %>% group_by(subject_id, treatment) %>% summarise( max_correct = sum(Guess == Machine_CorrectP), .groups = "drop" ) %>% rowwise() %>% mutate( trueextracted = as.integer( binom.test(max_correct, 12, 0.5)$p.value <= 0.01 & max_correct > 6 ) ) %>% ungroup() # Join back into df_final df_final <- df_final %>% left_join(learning, by = c("subject_id", "treatment")) cat("\nLearning summary (trueextracted) by treatment:\n") print( df_final %>% distinct(subject_id, treatment, trueextracted) %>% count(treatment, trueextracted) ) # ---- 18. Payment table ------------------------------------ payment_table <- df_final %>% distinct(subject_id, prolific_id, final_payment) %>% arrange(subject_id) cat("\nPayment table:\n") print(payment_table, n = Inf) # ---- 19. Comments table ----------------------------------- comments_table <- df_final %>% distinct(subject_id, COMMENTS) %>% arrange(subject_id) cat("\nComments table:\n") for (i in seq_len(nrow(comments_table))) { cat(sprintf("\nSubject %d:\n", comments_table$subject_id[i])) text <- ifelse(is.na(comments_table$COMMENTS[i]), "(none)", comments_table$COMMENTS[i]) cat(strwrap(text, width = 80, indent = 2, exdent = 2), sep = "\n") } # ---- 20. Average correct guesses per treatment ------------ avg_correct <- df_final %>% distinct(subject_id, treatment, max_correct) %>% group_by(treatment) %>% summarise(avg_correct = mean(max_correct, na.rm = TRUE), .groups = "drop") %>% arrange(treatment) cat("\nAverage correct guesses per treatment (out of 12):\n") print(avg_correct) # ---- 21. Per-subject × treatment summary ------------------ subject_treatment_summary <- df_final %>% distinct(subject_id, treatment, round_order, time_machine, max_correct) %>% arrange(subject_id, round_order) cat("\nSubject × treatment summary (machine, time, correct guesses):\n") for (sid in unique(subject_treatment_summary$subject_id)) { cat(sprintf("\nSubject %d:\n", sid)) sub <- subject_treatment_summary %>% filter(subject_id == sid) for (j in seq_len(nrow(sub))) { cat(sprintf(" Round %d | %-7s | Time: %5.0f s | Correct guesses: %d/12\n", sub$round_order[j], as.character(sub$treatment[j]), sub$time_machine[j], sub$max_correct[j])) } } # ---- 22. Rule values from experimental design -------------------------------- # Frequency vectors from Combined_2lights/__init__.py, commented case_list (lines 1061-1065). # case1 rows in order: (R,B,S) = (0,0,0),(0,0,1),(0,1,0),(0,1,1),(1,1,0),(1,1,1),(1,0,0),(1,0,1) case1_obs <- tibble( red = c(0, 0, 0, 0, 1, 1, 1, 1), blue = c(0, 0, 1, 1, 1, 1, 0, 0), sound = c(0, 1, 0, 1, 0, 1, 0, 1), freq_AND = c(6, 2, 5, 1, 1, 7, 6, 2), # freq18 freq_OR = c(7, 1, 1, 7, 1, 11, 1, 2), # freq3 freq_EITHER = c(6, 1, 2, 6, 5, 1, 1, 7), # freq20 freq_JOINT = c(0, 1, 7, 1, 1, 9, 9, 1), # freq8 freq_INHIBIT = c(5, 1, 6, 0, 3, 2, 0, 14) # freq12 ) rule_values <- sapply(c("AND", "OR", "EITHER", "JOINT", "INHIBIT"), function(rule) { freqs <- case1_obs[[paste0("freq_", rule)]] pred <- left_join(case1_obs, filter(correct_rules, treatment == rule), by = c("red", "blue"))$Machine_CorrectP sum(freqs * (case1_obs$sound == pred)) / sum(freqs) }) cat("\nRule values (proportion of trials correctly predicted by the true rule):\n") print(round(rule_values, 4)) # ---- 23. Value of all 16 rules for each treatment dataset ------------------- # 16 deterministic rules from Structural_Exp.R (lines 134-151). # Action vector (a1,a2,a3,a4): prediction for states (0,0),(0,1),(1,0),(1,1). strategies_16 <- list( always_0 = c(0, 0, 0, 0), AND = c(0, 0, 0, 1), INHIBIT = c(0, 0, 1, 0), red = c(0, 0, 1, 1), blue_alone = c(0, 1, 0, 0), blue = c(0, 1, 0, 1), EITHER = c(0, 1, 1, 0), OR = c(0, 1, 1, 1), NOR = c(1, 0, 0, 0), JOINT = c(1, 0, 0, 1), not_blue = c(1, 0, 1, 0), not_blue_alone = c(1, 0, 1, 1), not_red = c(1, 1, 0, 0), not_INHIBIT = c(1, 1, 0, 1), NAND = c(1, 1, 1, 0), always_1 = c(1, 1, 1, 1) ) # Map each case1 row to which action index applies (a1=1,a2=2,a3=3,a4=4): # rows 0-1: (R=0,B=0)→state(0,0)→a1; rows 2-3: (R=0,B=1)→state(0,1)→a2 # rows 4-5: (R=1,B=1)→state(1,1)→a4; rows 6-7: (R=1,B=0)→state(1,0)→a3 state_idx <- c(1L, 1L, 2L, 2L, 4L, 4L, 3L, 3L) rule_val_matrix <- sapply(strategies_16, function(actions) { vapply(c("AND", "OR", "EITHER", "JOINT", "INHIBIT"), function(treat) { freqs <- case1_obs[[paste0("freq_", treat)]] pred <- actions[state_idx] sum(freqs * (case1_obs$sound == pred)) / sum(freqs) }, numeric(1)) }) cat("\nValue of all 16 rules for each treatment dataset (proportion correctly predicted):\n") print(round(rule_val_matrix, 4)) # ---- 24. Merge rule values into df_final ------------------------------------ rule_val_df <- as_tibble(rule_val_matrix, rownames = "treatment") %>% rename_with(~ paste0("val_", .), -treatment) df_final <- df_final %>% left_join(rule_val_df, by = "treatment") # ---- 25. Per-light-config accuracy of the true rule ------------------------- # For each treatment × light config, proportion of observed trials in that # treatment's dataset where the true rule correctly predicts the sound. # case1_obs row pairs per config: (0,0)→rows 1-2, (0,1)→rows 3-4, (1,1)→rows 5-6, (1,0)→rows 7-8 config_lookup <- tibble( config = c("(0,0)", "(0,1)", "(1,1)", "(1,0)"), red = c(0L, 0L, 1L, 1L), blue = c(0L, 1L, 1L, 0L), row_s0 = c(1L, 3L, 5L, 7L), row_s1 = c(2L, 4L, 6L, 8L) ) acc_config_df <- lapply(c("AND", "OR", "EITHER", "JOINT", "INHIBIT"), function(treat) { freqs <- case1_obs[[paste0("freq_", treat)]] config_lookup %>% left_join(filter(correct_rules, treatment == treat), by = c("red", "blue")) %>% mutate( f0 = freqs[row_s0], f1 = freqs[row_s1], acc = ifelse(Machine_CorrectP == 0L, f0, f1) / (f0 + f1), treatment = treat, varname = paste0("ACC_", gsub("[(), ]", "", config)) ) %>% select(treatment, varname, acc) }) %>% bind_rows() %>% pivot_wider(names_from = varname, values_from = acc) df_final <- df_final %>% left_join(acc_config_df, by = "treatment") # ---- 26. Frequency of each light configuration in the treatment dataset ------ freq_config_df <- lapply(c("AND", "OR", "EITHER", "JOINT", "INHIBIT"), function(treat) { freqs <- case1_obs[[paste0("freq_", treat)]] N <- sum(freqs) config_lookup %>% mutate( freq_config = (freqs[row_s0] + freqs[row_s1]) / N, treatment = treat, varname = paste0("Frequency_", gsub("[(), ]", "", config)) ) %>% select(treatment, varname, freq_config) }) %>% bind_rows() %>% pivot_wider(names_from = varname, values_from = freq_config) df_final <- df_final %>% left_join(freq_config_df, by = "treatment")