# ============================================================================= # Structural_modelselection.R — Model selection via AIC / BIC # for the Shaded Lights Experiment (3 Lights) # # Strategy: # Baseline model : 1D + 2D rules + stochastic (always included) # Full model : 1D + 2D + ALL 218 3D rules + stochastic # # Information criteria (lower = better): # AIC = -2 * LL + 2 * k # BIC = -2 * LL + k * ln(n) [more conservative than AIC] # where k = model$num.par and n = number of choices in the treatment. # # Three model-selection steps per treatment: # 1) Baseline vs Full : head-to-head with all 218 three-variable rules # 2) Marginal 3D contribution : each of the 218 rules added alone to baseline # → pre-screening filter (delta_BIC < -BIC_SCREEN_THRESHOLD) # 3) Greedy forward selection : runs only on the rules that passed screening, # keeping computation tractable # # Final selected model = lowest BIC among {Full, Greedy}. # # Outputs: # ModelSelection_BaselineVsFull.csv — step-1 comparison table # ModelSelection_Marginal3D.csv — step-2 per-rule deltas (all 218 rules) # ModelSelection_Greedy.csv — step-3 greedy summary # ModelSelection_FinalShares.csv — final strategy shares per treatment # ============================================================================= # ============================================================ # 0) Packages # ============================================================ library(dplyr) library(tidyr) library(stringr) library(stratEst) # ============================================================ # 1) Load data # ============================================================ script_dir <- dirname(rstudioapi::getActiveDocumentContext()$path) source(file.path(script_dir, "DOfile_realexp_3lights (2).R")) df_strat <- df_final %>% rename( id = subject_id, period = Guess_Number, input = Light_Config ) %>% mutate( choice = as.character(Guess), treatment = as.character(treatment), game = 1L ) %>% select(id, period, choice, input, treatment, game) %>% arrange(id, treatment, period) cat("Dataset ready:", nrow(df_strat), "rows |", n_distinct(df_strat$id), "subjects |", n_distinct(df_strat$treatment), "treatments\n") # ============================================================ # 2) Build all 256 deterministic strategies # ============================================================ choice_labels <- c("0", "1") input_vals_8 <- c("(0,0,0)", "(0,0,1)", "(0,1,0)", "(0,1,1)", "(1,0,0)", "(1,0,1)", "(1,1,0)", "(1,1,1)") inputs_strat_8 <- c(NA, input_vals_8) tr_8state <- as.numeric(rep(1:8, times = 8)) make_det_strategy_8 <- function(actions) { stopifnot(length(actions) == 8, all(actions %in% c(0L, 1L))) pc <- as.double(unlist(lapply(actions, function(a) c(1 - a, a)))) stratEst.strategy( choices = choice_labels, inputs = inputs_strat_8, num.states = 8, tr.inputs = tr_8state, prob.choices = pc ) } all_det <- setNames( lapply(0:255, function(j) { actions <- as.integer(intToBits(j))[1:8] make_det_strategy_8(actions) }), sapply(0:255, function(j) paste0("s_", paste(as.integer(intToBits(j))[1:8], collapse = "")) ) ) # Apply meaningful aliases meaningful_aliases <- c( # 1D always_0 = 0L, always_1 = 255L, green = 170L, not_green = 85L, blue = 204L, not_blue = 51L, red = 240L, not_red = 15L, # 2D (red, blue) NOR_rb = 3L, INHIBIT_br = 12L, INHIBIT_rb = 48L, EITHER_rb = 60L, NAND_rb = 63L, AND_rb = 192L, OR_rb = 252L, JOINT_rb = 195L, not_INHIBIT_rb = 207L, not_INHIBIT_br = 243L, # 2D (red, green) NOR_rg = 5L, INHIBIT_gr = 10L, AND_rg = 160L, OR_rg = 250L, INHIBIT_rg = 80L, EITHER_rg = 90L, JOINT_rg = 165L, NAND_rg = 95L, not_INHIBIT_rg = 175L, not_INHIBIT_gr = 245L, # 2D (blue, green) NOR_bg = 17L, INHIBIT_gb = 34L, AND_bg = 136L, OR_bg = 238L, INHIBIT_bg = 68L, EITHER_bg = 102L, JOINT_bg = 153L, NAND_bg = 119L, not_INHIBIT_bg = 187L, not_INHIBIT_gb = 221L, # 3D NOR3 = 1L, green_alone = 2L, blue_alone = 4L, blue_green_only = 8L, red_alone = 16L, red_green_only = 32L, red_blue_only = 64L, AND3 = 128L, minority = 23L, majority = 232L, ODD_parity = 150L, EVEN_parity = 105L, NAND3 = 127L, OR3 = 254L ) for (nice_name in names(meaningful_aliases)) { j <- meaningful_aliases[[nice_name]] old <- paste0("s_", paste(as.integer(intToBits(j))[1:8], collapse = "")) idx <- match(old, names(all_det)) if (!is.na(idx)) names(all_det)[idx] <- nice_name } random <- stratEst.strategy( choices = choice_labels, inputs = inputs_strat_8, num.states = 1, tr.inputs = rep(1, 8), prob.choices = c(0.5, 0.5) ) always_p <- stratEst.strategy( choices = choice_labels, inputs = inputs_strat_8, num.states = 1, tr.inputs = rep(1, 8), prob.choices = c(NA_real_, NA_real_) ) # ============================================================ # 3) Define strategy groups # ============================================================ # Screening threshold: a 3D rule must reduce BIC by more than this # (i.e. delta_BIC < -BIC_SCREEN_THRESHOLD) to enter the greedy step. # Raise to be more conservative; lower to admit more candidates. BIC_SCREEN_THRESHOLD <- 6 names_1d <- c( "always_0", "always_1", "green", "not_green", "blue", "not_blue", "red", "not_red" ) names_2d <- c( "NOR_rb", "INHIBIT_br", "INHIBIT_rb", "EITHER_rb", "NAND_rb", "AND_rb", "OR_rb", "JOINT_rb", "not_INHIBIT_rb", "not_INHIBIT_br", "NOR_rg", "INHIBIT_gr", "AND_rg", "OR_rg", "INHIBIT_rg", "EITHER_rg", "JOINT_rg", "NAND_rg", "not_INHIBIT_rg", "not_INHIBIT_gr", "NOR_bg", "INHIBIT_gb", "AND_bg", "OR_bg", "INHIBIT_bg", "EITHER_bg", "JOINT_bg", "NAND_bg", "not_INHIBIT_bg", "not_INHIBIT_gb" ) # All 218 three-variable rules: everything in all_det not classified as 1D or 2D names_3d_all <- setdiff(names(all_det), c(names_1d, names_2d)) strats_baseline <- c( all_det[names_1d], all_det[names_2d], list(random = random, always_p = always_p) ) strats_full <- c( all_det[names_1d], all_det[names_2d], all_det[names_3d_all], list(random = random, always_p = always_p) ) cat(sprintf("Baseline strategies : %d (1D + 2D + stochastic)\n", length(strats_baseline))) cat(sprintf("Full strategies : %d (1D + 2D + all 218 3D + stochastic)\n", length(strats_full))) cat(sprintf("3D candidate pool : %d rules\n", length(names_3d_all))) cat(sprintf("BIC screening threshold: %.1f\n", BIC_SCREEN_THRESHOLD)) # ============================================================ # 4) Helper: fit model and return LL / k / AIC / BIC # ============================================================ fit_model <- function(data, strategies, label = "") { n <- nrow(data) tryCatch({ mdl <- stratEst.model( data = data, strategies = strategies, inner.runs = 10, outer.runs = 1, inner.max = 10 ) k <- mdl$num.par LL <- mdl$loglike AIC <- -2 * LL + 2 * k BIC <- -2 * LL + k * log(n) list( model = mdl, LL = LL, k = k, n = n, AIC = AIC, BIC = BIC, label = label, error = NULL ) }, error = function(e) { cat(" ERROR [", label, "]:", conditionMessage(e), "\n") list( model = NULL, LL = NA_real_, k = NA_integer_, n = n, AIC = NA_real_, BIC = NA_real_, label = label, error = conditionMessage(e) ) }) } # ============================================================ # 5) Step 1 — Baseline vs Full # ============================================================ all_treatments <- sort(unique(df_strat$treatment)) comparison_rows <- vector("list", length(all_treatments)) names(comparison_rows) <- all_treatments res_baseline_all <- vector("list", length(all_treatments)) res_full_all <- vector("list", length(all_treatments)) names(res_baseline_all) <- all_treatments names(res_full_all) <- all_treatments for (TREAT in all_treatments) { cat("\n========== Step 1 | Treatment:", TREAT, "==========\n") df_t <- df_strat %>% filter(treatment == TREAT) %>% mutate(treatment = as.factor(treatment)) cat(" Rows:", nrow(df_t), "| Subjects:", n_distinct(df_t$id), "\n") cat(" Fitting baseline...\n") rb <- fit_model(df_t, strats_baseline, label = "baseline") res_baseline_all[[TREAT]] <- rb cat(sprintf(" LL=%8.3f k=%d AIC=%9.3f BIC=%9.3f\n", rb$LL, rb$k, rb$AIC, rb$BIC)) cat(" Fitting full...\n") rf <- fit_model(df_t, strats_full, label = "full") res_full_all[[TREAT]] <- rf cat(sprintf(" LL=%8.3f k=%d AIC=%9.3f BIC=%9.3f\n", rf$LL, rf$k, rf$AIC, rf$BIC)) # Lower is better; delta < 0 means full improves over baseline dAIC <- rf$AIC - rb$AIC dBIC <- rf$BIC - rb$BIC winner_AIC <- if (!is.na(dAIC)) ifelse(dAIC < 0, "full", "baseline") else NA_character_ winner_BIC <- if (!is.na(dBIC)) ifelse(dBIC < 0, "full", "baseline") else NA_character_ cat(sprintf(" delta_AIC = %+.3f (%s wins) | delta_BIC = %+.3f (%s wins)\n", dAIC, winner_AIC, dBIC, winner_BIC)) comparison_rows[[TREAT]] <- data.frame( treatment = TREAT, n_obs = rb$n, LL_baseline = rb$LL, k_baseline = rb$k, AIC_baseline = rb$AIC, BIC_baseline = rb$BIC, LL_full = rf$LL, k_full = rf$k, AIC_full = rf$AIC, BIC_full = rf$BIC, delta_AIC = dAIC, delta_BIC = dBIC, winner_AIC = winner_AIC, winner_BIC = winner_BIC, stringsAsFactors = FALSE ) } COMPARISON <- bind_rows(comparison_rows) cat("\n=== Step 1 — Baseline vs Full ===\n") print(COMPARISON %>% mutate(across(where(is.numeric), ~round(., 3)))) write.csv(COMPARISON, file.path(script_dir, "ModelSelection_BaselineVsFull.csv"), row.names = FALSE) cat("Saved ModelSelection_BaselineVsFull.csv\n") # ============================================================ # 6) Step 2 — Marginal contribution of all 218 three-variable rules # (baseline + one rule at a time; report delta AIC/BIC) # This is the pre-screening step: rules with delta_BIC < -BIC_SCREEN_THRESHOLD # are passed to the greedy step; the rest are discarded. # ============================================================ cat("\n\n===== Step 2 — Marginal 3D contribution (all 218 rules) =====\n") cat(sprintf("Screening threshold: delta_BIC < -%.1f\n", BIC_SCREEN_THRESHOLD)) marginal_rows <- list() for (TREAT in all_treatments) { cat("\n Treatment:", TREAT, "(", length(names_3d_all), "rules to test)\n") df_t <- df_strat %>% filter(treatment == TREAT) %>% mutate(treatment = as.factor(treatment)) bic_base <- comparison_rows[[TREAT]]$BIC_baseline aic_base <- comparison_rows[[TREAT]]$AIC_baseline for (rule3 in names_3d_all) { strats_candidate <- c(strats_baseline, all_det[rule3]) res_c <- fit_model(df_t, strats_candidate, label = paste0("baseline+", rule3)) marginal_rows[[paste(TREAT, rule3, sep = "_")]] <- data.frame( treatment = TREAT, rule_3d = rule3, LL = res_c$LL, k = res_c$k, AIC = res_c$AIC, BIC = res_c$BIC, delta_AIC = res_c$AIC - aic_base, delta_BIC = res_c$BIC - bic_base, improves_AIC = !is.na(res_c$AIC) & res_c$AIC < aic_base, improves_BIC = !is.na(res_c$BIC) & res_c$BIC < bic_base, stringsAsFactors = FALSE ) cat(sprintf(" %-20s delta_AIC=%+7.3f delta_BIC=%+7.3f\n", rule3, res_c$AIC - aic_base, res_c$BIC - bic_base)) } } MARGINAL_3D <- bind_rows(marginal_rows) cat("\n=== Step 2 — Marginal 3D Summary (rules that improve BIC) ===\n") print( MARGINAL_3D %>% filter(improves_BIC) %>% arrange(treatment, delta_BIC) %>% mutate(across(where(is.numeric), ~round(., 3))) ) write.csv(MARGINAL_3D, file.path(script_dir, "ModelSelection_Marginal3D.csv"), row.names = FALSE) cat("Saved ModelSelection_Marginal3D.csv\n") # ============================================================ # 6.5) Screening — keep only rules with delta_BIC < -BIC_SCREEN_THRESHOLD # per treatment; these form the candidate pool for Step 3. # ============================================================ screened_3d <- lapply(all_treatments, function(TREAT) { rules <- MARGINAL_3D %>% filter(treatment == TREAT, delta_BIC < -BIC_SCREEN_THRESHOLD) %>% arrange(delta_BIC) %>% pull(rule_3d) rules }) names(screened_3d) <- all_treatments cat("\n=== Screening results (rules passing delta_BIC < -", BIC_SCREEN_THRESHOLD, ") ===\n", sep = "") for (TREAT in all_treatments) { n_pass <- length(screened_3d[[TREAT]]) cat(sprintf(" %s: %d / %d rules pass\n", TREAT, n_pass, length(names_3d_all))) if (n_pass > 0) cat(" ", paste(screened_3d[[TREAT]], collapse = ", "), "\n") } # ============================================================ # 7) Step 3 — Greedy forward selection (BIC-based) # Operates only on the rules that passed the Step 2 screen. # Worst-case models per treatment = sum(1:n_screened), which # is small (typically < 50 rules pass) and tractable. # ============================================================ cat("\n\n===== Step 3 — Greedy forward selection (BIC, screened candidates) =====\n") greedy_rows <- list() greedy_models <- vector("list", length(all_treatments)) names(greedy_models) <- all_treatments for (TREAT in all_treatments) { n_cand <- length(screened_3d[[TREAT]]) cat(sprintf("\n Treatment: %s | %d screened candidate(s)\n", TREAT, n_cand)) df_t <- df_strat %>% filter(treatment == TREAT) %>% mutate(treatment = as.factor(treatment)) current_strats <- strats_baseline current_BIC <- comparison_rows[[TREAT]]$BIC_baseline remaining_3d <- screened_3d[[TREAT]] # only screened rules added_rules <- character(0) step_log <- list() if (length(remaining_3d) == 0) { cat(" No rules passed screening — keeping baseline.\n") greedy_models[[TREAT]] <- res_baseline_all[[TREAT]]$model } repeat { if (length(remaining_3d) == 0) break step_res <- lapply(remaining_3d, function(rule3) { res <- fit_model(df_t, c(current_strats, all_det[rule3]), label = rule3) data.frame(rule = rule3, BIC = res$BIC, model_obj = I(list(res$model)), stringsAsFactors = FALSE) }) step_df <- bind_rows(step_res) best_idx <- which.min(step_df$BIC) best_BIC <- step_df$BIC[best_idx] best_rule <- step_df$rule[best_idx] if (!is.na(best_BIC) && best_BIC < current_BIC) { cat(sprintf(" + '%s': BIC %.3f -> %.3f (delta = %+.3f)\n", best_rule, current_BIC, best_BIC, best_BIC - current_BIC)) step_log[[length(step_log) + 1]] <- data.frame( treatment = TREAT, step = length(added_rules) + 1L, rule_added = best_rule, BIC_before = current_BIC, BIC_after = best_BIC, delta_BIC = best_BIC - current_BIC, stringsAsFactors = FALSE ) added_rules <- c(added_rules, best_rule) current_strats <- c(current_strats, all_det[best_rule]) current_BIC <- best_BIC remaining_3d <- setdiff(remaining_3d, best_rule) # Save the best model object for this step greedy_models[[TREAT]] <- step_df$model_obj[[best_idx]] } else { cat(" No further BIC improvement. Stopping.\n") break } } # If no 3D rule was ever added, keep the baseline model if (length(added_rules) == 0) { greedy_models[[TREAT]] <- res_baseline_all[[TREAT]]$model } greedy_rows[[TREAT]] <- data.frame( treatment = TREAT, added_3d_rules = if (length(added_rules) > 0) paste(added_rules, collapse = ", ") else "(none)", n_rules_added = length(added_rules), final_BIC = current_BIC, baseline_BIC = comparison_rows[[TREAT]]$BIC_baseline, BIC_improvement = comparison_rows[[TREAT]]$BIC_baseline - current_BIC, stringsAsFactors = FALSE ) cat(sprintf(" Final greedy model: %d 3D rule(s) added (%s) | BIC = %.3f\n", length(added_rules), if (length(added_rules) > 0) paste(added_rules, collapse = ", ") else "none", current_BIC)) if (length(step_log) > 0) { step_df_out <- bind_rows(step_log) cat(" Greedy steps:\n") print(step_df_out %>% mutate(across(where(is.numeric), ~round(., 3)))) } } GREEDY_SUMMARY <- bind_rows(greedy_rows) cat("\n=== Step 3 — Greedy Forward Selection Summary ===\n") print(GREEDY_SUMMARY %>% mutate(across(where(is.numeric), ~round(., 3)))) write.csv(GREEDY_SUMMARY, file.path(script_dir, "ModelSelection_Greedy.csv"), row.names = FALSE) cat("Saved ModelSelection_Greedy.csv\n") # ============================================================ # 8) Final model selection — lowest BIC among {Full, Greedy} # (Greedy is always >= Baseline, so Baseline need not be # compared separately; Full may outperform Greedy if adding # many 3D rules jointly outweighs the BIC penalty.) # ============================================================ cat("\n\n===== Final model selection (best BIC) =====\n") final_rows <- list() final_models <- vector("list", length(all_treatments)) names(final_models) <- all_treatments for (TREAT in all_treatments) { bic_full <- comparison_rows[[TREAT]]$BIC_full bic_greedy <- greedy_rows[[TREAT]]$final_BIC if (!is.na(bic_full) && !is.na(bic_greedy)) { if (bic_full < bic_greedy) { winner <- "full" final_BIC <- bic_full final_models[[TREAT]] <- res_full_all[[TREAT]]$model } else { winner <- "greedy" final_BIC <- bic_greedy final_models[[TREAT]] <- greedy_models[[TREAT]] } } else if (!is.na(bic_full)) { winner <- "full" final_BIC <- bic_full final_models[[TREAT]] <- res_full_all[[TREAT]]$model } else { winner <- "greedy" final_BIC <- bic_greedy final_models[[TREAT]] <- greedy_models[[TREAT]] } cat(sprintf(" %s -> winner: %s | BIC = %.3f\n", TREAT, winner, final_BIC)) final_rows[[TREAT]] <- data.frame( treatment = TREAT, BIC_baseline = comparison_rows[[TREAT]]$BIC_baseline, BIC_full = bic_full, BIC_greedy = bic_greedy, selected_model = winner, final_BIC = final_BIC, stringsAsFactors = FALSE ) } FINAL_SELECTION <- bind_rows(final_rows) cat("\n=== Final Model Selection ===\n") print(FINAL_SELECTION %>% mutate(across(where(is.numeric), ~round(., 3)))) # ============================================================ # 8.5) Step 4 — Prune non-significant shares and re-estimate # For each strategy in the winning model, we test whether # its population share is significantly greater than zero # via a likelihood ratio test (LRT): # - Fit model WITHOUT strategy s → LL_drop # - LRT = 2 * (LL_win - LL_drop) ~ chi-sq(1) under H0 # - p-value = 1 - pchisq(LRT, df = 1) # Strategies with p > LRT_ALPHA (share not significantly # different from 0) are dropped. The model is then # re-estimated one final time with the pruned strategy set. # final_models is updated so sections 9-12 use pruned models. # ============================================================ LRT_ALPHA <- 0.05 # significance level for the LRT (one df) cat(sprintf("\n\n===== Step 4 — Prune non-significant shares (LRT, alpha = %.2f) =====\n", LRT_ALPHA)) # Full pool of named strategy objects for look-up by name strat_pool <- c(all_det, list(random = random, always_p = always_p)) prune_rows <- list() pruned_models <- vector("list", length(all_treatments)) names(pruned_models) <- all_treatments for (TREAT in all_treatments) { cat(sprintf("\n Treatment: %s\n", TREAT)) win_model <- final_models[[TREAT]] if (is.null(win_model)) { pruned_models[[TREAT]] <- NULL next } df_t <- df_strat %>% filter(treatment == TREAT) %>% mutate(treatment = as.factor(treatment)) win_strat_names <- colnames(win_model$post.assignment) LL_win <- win_model$loglike shares_vec <- setNames(as.numeric(win_model$shares), win_strat_names) cat(sprintf(" Winning model strategies (%d): %s\n", length(win_strat_names), paste(win_strat_names, collapse = ", "))) cat(sprintf(" Shares: %s\n", paste(sprintf("%s=%.4f", win_strat_names, shares_vec), collapse = ", "))) cat(sprintf(" LL of winning model: %.3f\n", LL_win)) # Only test strategies present in the pool (lookup by name) testable <- win_strat_names[win_strat_names %in% names(strat_pool)] # Strategies with share already == 0 are trivially non-significant; skip LRT zero_share <- testable[shares_vec[testable] == 0] nonzero_strats <- testable[shares_vec[testable] > 0] cat(sprintf(" Strategies with share = 0 (auto-drop): %s\n", if (length(zero_share) > 0) paste(zero_share, collapse = ", ") else "(none)")) cat(sprintf(" Strategies with share > 0 (to test): %s\n", if (length(nonzero_strats) > 0) paste(nonzero_strats, collapse = ", ") else "(none)")) # LRT for each nonzero-share strategy lrt_rows <- lapply(nonzero_strats, function(s) { remaining <- setdiff(testable, c(s, zero_share)) # drop s and already-zero ones if (length(remaining) == 0) { # Cannot drop the only strategy; keep it return(data.frame(strategy = s, LL_drop = NA_real_, LRT = NA_real_, p_value = NA_real_, keep = TRUE, stringsAsFactors = FALSE)) } res_drop <- fit_model(df_t, strat_pool[remaining], label = paste0("drop_", s)) LRT_stat <- 2 * max(0, LL_win - res_drop$LL) # LRT >= 0 by definition p_val <- 1 - pchisq(LRT_stat, df = 1) data.frame( strategy = s, LL_drop = res_drop$LL, LRT = LRT_stat, p_value = p_val, keep = p_val < LRT_ALPHA, stringsAsFactors = FALSE ) }) if (length(lrt_rows) > 0) { lrt_df <- bind_rows(lrt_rows) cat(" LRT results:\n") print(lrt_df %>% mutate(across(where(is.numeric), ~round(., 4)))) } else { lrt_df <- data.frame(strategy = character(0), LL_drop = numeric(0), LRT = numeric(0), p_value = numeric(0), keep = logical(0)) } sig_strats <- lrt_df$strategy[lrt_df$keep] insig_strats <- lrt_df$strategy[!lrt_df$keep] # Strategies to retain in final pruned model kept_names <- c(sig_strats) # significant nonzero-share strategies only dropped_names <- c(zero_share, insig_strats) cat(sprintf(" Keep (%d): %s\n", length(kept_names), if (length(kept_names) > 0) paste(kept_names, collapse = ", ") else "(none)")) cat(sprintf(" Drop (%d): %s\n", length(dropped_names), if (length(dropped_names) > 0) paste(dropped_names, collapse = ", ") else "(none)")) if (length(dropped_names) == 0) { cat(" No strategies dropped — keeping winning model as-is.\n") pruned_models[[TREAT]] <- win_model } else if (length(kept_names) == 0) { cat(" WARNING: all strategies would be dropped — keeping winning model.\n") pruned_models[[TREAT]] <- win_model kept_names <- win_strat_names dropped_names <- character(0) } else { kept_in_pool <- kept_names[kept_names %in% names(strat_pool)] pruned_strats <- strat_pool[kept_in_pool] cat(sprintf(" Re-estimating pruned model with %d strategies...\n", length(pruned_strats))) res_pruned <- fit_model(df_t, pruned_strats, label = paste0("pruned_", TREAT)) cat(sprintf(" Pruned model: LL=%.3f k=%d BIC=%.3f (winning BIC was %.3f)\n", res_pruned$LL, res_pruned$k, res_pruned$BIC, FINAL_SELECTION$final_BIC[FINAL_SELECTION$treatment == TREAT])) pruned_models[[TREAT]] <- res_pruned$model } prune_rows[[TREAT]] <- data.frame( treatment = TREAT, strategies_kept = paste(kept_names, collapse = ", "), strategies_dropped = if (length(dropped_names) > 0) paste(dropped_names, collapse = ", ") else "(none)", n_kept = length(kept_names), n_dropped = length(dropped_names), stringsAsFactors = FALSE ) } PRUNE_SUMMARY <- bind_rows(prune_rows) cat("\n=== Step 4 — Pruning Summary ===\n") print(PRUNE_SUMMARY) write.csv(PRUNE_SUMMARY, file.path(script_dir, "ModelSelection_PrunedShares.csv"), row.names = FALSE) cat("Saved ModelSelection_PrunedShares.csv\n") # Update final_models with pruned models; sections 9-12 read final_models final_models <- pruned_models cat("\nfinal_models updated — downstream output (sections 9-12) uses pruned models.\n") # ============================================================ # 9) Extract and save strategy shares of the final model # ============================================================ final_shares_rows <- list() for (TREAT in all_treatments) { mdl <- final_models[[TREAT]] if (is.null(mdl)) next shares_vec <- as.numeric(mdl$shares) #names(shares_vec) <- rownames(mdl$shares) # strategy names names(shares_vec) <- colnames(mdl$post.assignment) # Keep only non-zero shares shares_nz <- shares_vec[shares_vec > 0] shares_nz <- sort(shares_nz, decreasing = TRUE) final_shares_rows[[TREAT]] <- data.frame( treatment = TREAT, selected_model = FINAL_SELECTION$selected_model[FINAL_SELECTION$treatment == TREAT], final_BIC = FINAL_SELECTION$final_BIC[FINAL_SELECTION$treatment == TREAT], strategy = names(shares_nz), share = round(shares_nz, 4), row.names = NULL, stringsAsFactors = FALSE ) } FINAL_SHARES <- bind_rows(final_shares_rows) cat("\n=== Final Model — Strategy Shares ===\n") print(FINAL_SHARES) write.csv(FINAL_SHARES, file.path(script_dir, "ModelSelection_FinalShares.csv"), row.names = FALSE) cat("Saved ModelSelection_FinalShares.csv\n") # ============================================================ # 10) Bar plots — top 10 strategies per treatment (final model) # ============================================================ for (TREAT in sort(unique(FINAL_SHARES$treatment))) { df_plot <- FINAL_SHARES %>% filter(treatment == TREAT) %>% arrange(desc(share)) %>% slice_head(n = 10) if (nrow(df_plot) == 0) next sel <- FINAL_SELECTION$selected_model[FINAL_SELECTION$treatment == TREAT] old_mar <- par(mar = c(10, 4, 3, 1)) barplot( setNames(df_plot$share, df_plot$strategy), main = paste0("Final model shares — ", TREAT, " (", sel, ")"), ylab = "share", ylim = c(0, 1), las = 2, cex.names = 0.75 ) par(old_mar) } cat("\nDone.\n") # ============================================================ # 11) ESTIMATION_RESULTS — wide-format shares of the final model # Same interface as Structural_3lights.R / Structural_allrules.R: # one row per treatment, columns: treatment | loglike | num_par | # | | … # Strategies absent from a treatment's winning model get NA. # Analysis_3lights.R reads this via env$ESTIMATION_RESULTS. # ============================================================ results_ms_list <- vector("list", length(all_treatments)) names(results_ms_list) <- all_treatments for (TREAT in all_treatments) { mdl <- final_models[[TREAT]] if (is.null(mdl)) { results_ms_list[[TREAT]] <- data.frame( treatment = TREAT, loglike = NA_real_, num_par = NA_integer_, stringsAsFactors = FALSE ) next } shares_vec <- as.numeric(mdl$shares) names(shares_vec) <- colnames(mdl$post.assignment) results_ms_list[[TREAT]] <- data.frame( treatment = TREAT, loglike = mdl$loglike, num_par = mdl$num.par, t(shares_vec), check.names = FALSE, stringsAsFactors = FALSE ) } ESTIMATION_RESULTS <- bind_rows(results_ms_list) %>% arrange(treatment) ms_strat_cols <- setdiff(names(ESTIMATION_RESULTS), c("treatment", "loglike", "num_par")) ms_nonzero_cols <- ms_strat_cols[ colSums(ESTIMATION_RESULTS[, ms_strat_cols, drop = FALSE], na.rm = TRUE) > 0 ] ESTIMATION_RESULTS <- ESTIMATION_RESULTS[, c("treatment", "loglike", "num_par", ms_nonzero_cols) ] cat("\n=== ESTIMATION_RESULTS (model-selected, for Analysis_3lights.R) ===\n") print(ESTIMATION_RESULTS %>% mutate(across(where(is.numeric), ~round(., 4)))) # ============================================================ # 12) df_final_post — df_final augmented with posteriors from # the winning model per treatment, plus Rule_used & posterior. # Same interface as Structural_3lights.R. # Analysis_3lights.R reads this via env$df_final_post. # ============================================================ post_summary_list <- vector("list", length(all_treatments)) names(post_summary_list) <- all_treatments for (TREAT in all_treatments) { mdl <- final_models[[TREAT]] if (is.null(mdl) || is.null(mdl$post.assignment)) next post_mat <- mdl$post.assignment strat_names <- colnames(post_mat) post_df <- as.data.frame(post_mat, row.names = NULL) colnames(post_df) <- paste0("post_", strat_names) post_df$subject_id <- as.integer(sub("^id", "", rownames(post_mat))) post_df$treatment <- TREAT # Assign Rule_used / posterior within this treatment's model post_mx <- as.matrix(post_df[, paste0("post_", strat_names), drop = FALSE]) best_idx <- max.col(post_mx, ties.method = "first") post_df$Rule_used <- strat_names[best_idx] post_df$posterior <- round(post_mx[cbind(seq_len(nrow(post_mx)), best_idx)], 2) post_summary_list[[TREAT]] <- post_df } # bind_rows fills missing post_ columns with NA across treatments df_posteriors_ms <- bind_rows(post_summary_list) %>% mutate(treatment = as.character(treatment)) post_join_cols <- c( grep("^post_", names(df_posteriors_ms), value = TRUE), "Rule_used", "posterior" ) df_final_post <- df_final %>% mutate(treatment = as.character(treatment)) %>% left_join( df_posteriors_ms %>% select(subject_id, treatment, all_of(post_join_cols)), by = c("subject_id", "treatment") ) cat("\n=== df_final_post (model-selected, for Analysis_3lights.R) ===\n") cat("Rows:", nrow(df_final_post), "| Cols:", ncol(df_final_post), "\n") cat("Rule_used values:", paste(sort(unique(df_final_post$Rule_used)), collapse = ", "), "\n") cat("Missing Rule_used:", sum(is.na(df_final_post$Rule_used)), "\n")