# ============================================================================= # Structural_modelselection_2lights1.R — Model selection via BIC # for the Shaded Lights Experiment (2 Lights, Dataset 1) # # Strategy: # Baseline model : 7 core rules (red, blue_alone, AND, OR, EITHER, INHIBIT, JOINT) # + stochastic strategies (random, always_p) # Full model : all 16 deterministic rules + stochastic # # Information criteria (lower = better): # AIC = -2 * LL + 2 * k # BIC = -2 * LL + k * ln(n) # where k = model$num.par and n = number of choices in the treatment. # # Three model-selection steps per treatment: # 1) Baseline vs Full : head-to-head comparison # 2) Marginal contribution : each of the 9 non-baseline rules added alone to baseline # → pre-screening filter (delta_BIC < -BIC_SCREEN_THRESHOLD) # 3) Greedy forward selection : runs only on rules that passed screening # # Final selected model = lowest BIC among {Full, Greedy}. # Followed by LRT pruning of non-significant shares. # # Outputs: # ModelSelection_2lights1_BaselineVsFull.csv # ModelSelection_2lights1_MarginalOther.csv # ModelSelection_2lights1_Greedy.csv # ModelSelection_2lights1_PrunedShares.csv # ModelSelection_2lights1_FinalShares.csv # ============================================================================= # ============================================================ # 0) Packages # ============================================================ library(dplyr) library(tidyr) library(stringr) library(stratEst) # ============================================================ # 1) Load data # ============================================================ script_dir <- dirname(rstudioapi::getActiveDocumentContext()$path) source(file.path(script_dir, "DOfile_realexp_1.R")) df_strat <- df_final %>% rename( id = subject_id, period = Guess_Number, input = Light_Config ) %>% mutate( choice = as.character(Guess), treatment = as.character(treatment), game = 1L ) %>% select(id, period, choice, input, treatment, game) %>% arrange(id, treatment, period) cat("Dataset ready:", nrow(df_strat), "rows |", n_distinct(df_strat$id), "subjects |", n_distinct(df_strat$treatment), "treatments\n") cat("Choice values:", paste(sort(unique(df_strat$choice)), collapse = ", "), "\n") cat("Input values: ", paste(sort(unique(df_strat$input)), collapse = ", "), "\n") # ============================================================ # 2) Strategy building blocks # ============================================================ choice_labels <- c("0", "1") input_vals <- c("(0,0)", "(0,1)", "(1,0)", "(1,1)") inputs_strat <- c(NA, input_vals) tr_4state <- rep(c(1, 2, 3, 4), times = 4) make_det_strategy <- function(a1, a2, a3, a4) { pc <- as.double(c(1-a1, a1, 1-a2, a2, 1-a3, a3, 1-a4, a4)) stratEst.strategy( choices = choice_labels, inputs = inputs_strat, num.states = 4, tr.inputs = tr_4state, prob.choices = pc ) } # ============================================================ # 3) Build all 16 deterministic strategies + stochastic # ============================================================ det_strategies <- list( always_0 = make_det_strategy(0, 0, 0, 0), # never predict sound AND = make_det_strategy(0, 0, 0, 1), # sound iff both lights on INHIBIT = make_det_strategy(0, 0, 1, 0), # sound iff red on, blue off red = make_det_strategy(0, 0, 1, 1), # sound whenever red on blue_alone = make_det_strategy(0, 1, 0, 0), # sound iff blue on, red off blue = make_det_strategy(0, 1, 0, 1), # sound whenever blue on EITHER = make_det_strategy(0, 1, 1, 0), # sound iff exactly one on (XOR) OR = make_det_strategy(0, 1, 1, 1), # sound iff at least one on NOR = make_det_strategy(1, 0, 0, 0), # sound iff both off JOINT = make_det_strategy(1, 0, 0, 1), # sound iff same state not_blue = make_det_strategy(1, 0, 1, 0), # sound whenever blue off not_blue_alone = make_det_strategy(1, 0, 1, 1), # sound unless only blue on not_red = make_det_strategy(1, 1, 0, 0), # sound whenever red off not_INHIBIT = make_det_strategy(1, 1, 0, 1), # sound unless only red on NAND = make_det_strategy(1, 1, 1, 0), # sound unless both on always_1 = make_det_strategy(1, 1, 1, 1) # always predict sound ) random <- stratEst.strategy( choices = choice_labels, inputs = inputs_strat, num.states = 1, tr.inputs = c(1, 1, 1, 1), prob.choices = c(0.5, 0.5) ) always_p <- stratEst.strategy( choices = choice_labels, inputs = inputs_strat, num.states = 1, tr.inputs = c(1, 1, 1, 1), prob.choices = c(NA_real_, NA_real_) ) strategies_all <- c(det_strategies, list(random = random, always_p = always_p)) strat_pool <- strategies_all # used in LRT pruning step cat("Total strategies:", length(strategies_all), "\n") # ============================================================ # 4) Define strategy groups and BIC threshold # ============================================================ # Screening threshold: a non-baseline rule must reduce BIC by more than this # (i.e. delta_BIC < -BIC_SCREEN_THRESHOLD) to enter the greedy step. BIC_SCREEN_THRESHOLD <- 5 names_baseline <- c("red", "blue_alone", "AND", "OR", "EITHER", "INHIBIT", "JOINT") names_other <- setdiff(names(det_strategies), names_baseline) strats_baseline <- c( det_strategies[names_baseline], list(random = random, always_p = always_p) ) strats_full <- c( det_strategies, list(random = random, always_p = always_p) ) cat(sprintf("Baseline strategies : %d (7 core rules + stochastic)\n", length(strats_baseline))) cat(sprintf("Full strategies : %d (all 16 det + stochastic)\n", length(strats_full))) cat(sprintf("Candidate pool : %d rules\n", length(names_other))) cat(sprintf("BIC screening threshold: %.1f\n", BIC_SCREEN_THRESHOLD)) cat("Baseline rules:", paste(names_baseline, collapse = ", "), "\n") cat("Candidate pool:", paste(names_other, collapse = ", "), "\n") # ============================================================ # 5) Helper: fit model and return LL / k / AIC / BIC # ============================================================ fit_model <- function(data, strategies, label = "") { n <- nrow(data) tryCatch({ mdl <- stratEst.model( data = data, strategies = strategies, inner.runs = 10, outer.runs = 1, inner.max = 10 ) k <- mdl$num.par LL <- mdl$loglike AIC <- -2 * LL + 2 * k BIC <- -2 * LL + k * log(n) list( model = mdl, LL = LL, k = k, n = n, AIC = AIC, BIC = BIC, label = label, error = NULL ) }, error = function(e) { cat(" ERROR [", label, "]:", conditionMessage(e), "\n") list( model = NULL, LL = NA_real_, k = NA_integer_, n = n, AIC = NA_real_, BIC = NA_real_, label = label, error = conditionMessage(e) ) }) } # ============================================================ # 6) Step 1 — Baseline vs Full # ============================================================ all_treatments <- sort(unique(df_strat$treatment)) comparison_rows <- vector("list", length(all_treatments)) names(comparison_rows) <- all_treatments res_baseline_all <- vector("list", length(all_treatments)) res_full_all <- vector("list", length(all_treatments)) names(res_baseline_all) <- all_treatments names(res_full_all) <- all_treatments for (TREAT in all_treatments) { cat("\n========== Step 1 | Treatment:", TREAT, "==========\n") df_t <- df_strat %>% filter(treatment == TREAT) %>% mutate(treatment = as.factor(treatment)) cat(" Rows:", nrow(df_t), "| Subjects:", n_distinct(df_t$id), "\n") cat(" Fitting baseline...\n") rb <- fit_model(df_t, strats_baseline, label = "baseline") res_baseline_all[[TREAT]] <- rb cat(sprintf(" LL=%8.3f k=%d AIC=%9.3f BIC=%9.3f\n", rb$LL, rb$k, rb$AIC, rb$BIC)) cat(" Fitting full...\n") rf <- fit_model(df_t, strats_full, label = "full") res_full_all[[TREAT]] <- rf cat(sprintf(" LL=%8.3f k=%d AIC=%9.3f BIC=%9.3f\n", rf$LL, rf$k, rf$AIC, rf$BIC)) dAIC <- rf$AIC - rb$AIC dBIC <- rf$BIC - rb$BIC winner_AIC <- if (!is.na(dAIC)) ifelse(dAIC < 0, "full", "baseline") else NA_character_ winner_BIC <- if (!is.na(dBIC)) ifelse(dBIC < 0, "full", "baseline") else NA_character_ cat(sprintf(" delta_AIC = %+.3f (%s wins) | delta_BIC = %+.3f (%s wins)\n", dAIC, winner_AIC, dBIC, winner_BIC)) comparison_rows[[TREAT]] <- data.frame( treatment = TREAT, n_obs = rb$n, LL_baseline = rb$LL, k_baseline = rb$k, AIC_baseline = rb$AIC, BIC_baseline = rb$BIC, LL_full = rf$LL, k_full = rf$k, AIC_full = rf$AIC, BIC_full = rf$BIC, delta_AIC = dAIC, delta_BIC = dBIC, winner_AIC = winner_AIC, winner_BIC = winner_BIC, stringsAsFactors = FALSE ) } COMPARISON <- bind_rows(comparison_rows) cat("\n=== Step 1 — Baseline vs Full ===\n") print(COMPARISON %>% mutate(across(where(is.numeric), ~round(., 3)))) write.csv(COMPARISON, file.path(script_dir, "ModelSelection_2lights1_BaselineVsFull.csv"), row.names = FALSE) cat("Saved ModelSelection_2lights1_BaselineVsFull.csv\n") # ============================================================ # 7) Step 2 — Marginal contribution of each non-baseline rule # (baseline + one rule at a time; report delta AIC/BIC) # Pre-screening: rules with delta_BIC < -BIC_SCREEN_THRESHOLD # are passed to the greedy step. # ============================================================ cat("\n\n===== Step 2 — Marginal contribution of non-baseline rules =====\n") cat(sprintf("Screening threshold: delta_BIC < -%.1f\n", BIC_SCREEN_THRESHOLD)) marginal_rows <- list() for (TREAT in all_treatments) { cat("\n Treatment:", TREAT, "(", length(names_other), "rules to test)\n") df_t <- df_strat %>% filter(treatment == TREAT) %>% mutate(treatment = as.factor(treatment)) bic_base <- comparison_rows[[TREAT]]$BIC_baseline aic_base <- comparison_rows[[TREAT]]$AIC_baseline for (rule in names_other) { strats_candidate <- c(strats_baseline, det_strategies[rule]) res_c <- fit_model(df_t, strats_candidate, label = paste0("baseline+", rule)) marginal_rows[[paste(TREAT, rule, sep = "_")]] <- data.frame( treatment = TREAT, rule = rule, LL = res_c$LL, k = res_c$k, AIC = res_c$AIC, BIC = res_c$BIC, delta_AIC = res_c$AIC - aic_base, delta_BIC = res_c$BIC - bic_base, improves_AIC = !is.na(res_c$AIC) & res_c$AIC < aic_base, improves_BIC = !is.na(res_c$BIC) & res_c$BIC < bic_base, stringsAsFactors = FALSE ) cat(sprintf(" %-20s delta_AIC=%+7.3f delta_BIC=%+7.3f\n", rule, res_c$AIC - aic_base, res_c$BIC - bic_base)) } } MARGINAL_OTHER <- bind_rows(marginal_rows) cat("\n=== Step 2 — Marginal Summary (rules that improve BIC) ===\n") print( MARGINAL_OTHER %>% filter(improves_BIC) %>% arrange(treatment, delta_BIC) %>% mutate(across(where(is.numeric), ~round(., 3))) ) write.csv(MARGINAL_OTHER, file.path(script_dir, "ModelSelection_2lights1_MarginalOther.csv"), row.names = FALSE) cat("Saved ModelSelection_2lights1_MarginalOther.csv\n") # ============================================================ # 7.5) Screening — keep only rules with delta_BIC < -BIC_SCREEN_THRESHOLD # ============================================================ screened_rules <- lapply(all_treatments, function(TREAT) { MARGINAL_OTHER %>% filter(treatment == TREAT, delta_BIC < -BIC_SCREEN_THRESHOLD) %>% arrange(delta_BIC) %>% pull(rule) }) names(screened_rules) <- all_treatments cat("\n=== Screening results (rules passing delta_BIC < -", BIC_SCREEN_THRESHOLD, ") ===\n", sep = "") for (TREAT in all_treatments) { n_pass <- length(screened_rules[[TREAT]]) cat(sprintf(" %s: %d / %d rules pass\n", TREAT, n_pass, length(names_other))) if (n_pass > 0) cat(" ", paste(screened_rules[[TREAT]], collapse = ", "), "\n") } # ============================================================ # 8) Step 3 — Greedy forward selection (BIC-based) # Operates only on the rules that passed the Step 2 screen. # ============================================================ cat("\n\n===== Step 3 — Greedy forward selection (BIC, screened candidates) =====\n") greedy_rows <- list() greedy_models <- vector("list", length(all_treatments)) names(greedy_models) <- all_treatments for (TREAT in all_treatments) { n_cand <- length(screened_rules[[TREAT]]) cat(sprintf("\n Treatment: %s | %d screened candidate(s)\n", TREAT, n_cand)) df_t <- df_strat %>% filter(treatment == TREAT) %>% mutate(treatment = as.factor(treatment)) current_strats <- strats_baseline current_BIC <- comparison_rows[[TREAT]]$BIC_baseline remaining <- screened_rules[[TREAT]] added_rules <- character(0) step_log <- list() if (length(remaining) == 0) { cat(" No rules passed screening — keeping baseline.\n") greedy_models[[TREAT]] <- res_baseline_all[[TREAT]]$model } repeat { if (length(remaining) == 0) break step_res <- lapply(remaining, function(rule) { res <- fit_model(df_t, c(current_strats, det_strategies[rule]), label = rule) data.frame(rule = rule, BIC = res$BIC, model_obj = I(list(res$model)), stringsAsFactors = FALSE) }) step_df <- bind_rows(step_res) best_idx <- which.min(step_df$BIC) best_BIC <- step_df$BIC[best_idx] best_rule <- step_df$rule[best_idx] if (!is.na(best_BIC) && best_BIC < current_BIC) { cat(sprintf(" + '%s': BIC %.3f -> %.3f (delta = %+.3f)\n", best_rule, current_BIC, best_BIC, best_BIC - current_BIC)) step_log[[length(step_log) + 1]] <- data.frame( treatment = TREAT, step = length(added_rules) + 1L, rule_added = best_rule, BIC_before = current_BIC, BIC_after = best_BIC, delta_BIC = best_BIC - current_BIC, stringsAsFactors = FALSE ) added_rules <- c(added_rules, best_rule) current_strats <- c(current_strats, det_strategies[best_rule]) current_BIC <- best_BIC remaining <- setdiff(remaining, best_rule) greedy_models[[TREAT]] <- step_df$model_obj[[best_idx]] } else { cat(" No further BIC improvement. Stopping.\n") break } } if (length(added_rules) == 0) { greedy_models[[TREAT]] <- res_baseline_all[[TREAT]]$model } greedy_rows[[TREAT]] <- data.frame( treatment = TREAT, added_rules = if (length(added_rules) > 0) paste(added_rules, collapse = ", ") else "(none)", n_rules_added = length(added_rules), final_BIC = current_BIC, baseline_BIC = comparison_rows[[TREAT]]$BIC_baseline, BIC_improvement = comparison_rows[[TREAT]]$BIC_baseline - current_BIC, stringsAsFactors = FALSE ) cat(sprintf(" Final greedy model: %d rule(s) added (%s) | BIC = %.3f\n", length(added_rules), if (length(added_rules) > 0) paste(added_rules, collapse = ", ") else "none", current_BIC)) if (length(step_log) > 0) { step_df_out <- bind_rows(step_log) cat(" Greedy steps:\n") print(step_df_out %>% mutate(across(where(is.numeric), ~round(., 3)))) } } GREEDY_SUMMARY <- bind_rows(greedy_rows) cat("\n=== Step 3 — Greedy Forward Selection Summary ===\n") print(GREEDY_SUMMARY %>% mutate(across(where(is.numeric), ~round(., 3)))) write.csv(GREEDY_SUMMARY, file.path(script_dir, "ModelSelection_2lights1_Greedy.csv"), row.names = FALSE) cat("Saved ModelSelection_2lights1_Greedy.csv\n") # ============================================================ # 9) Final model selection — lowest BIC among {Full, Greedy} # ============================================================ cat("\n\n===== Final model selection (best BIC) =====\n") final_rows <- list() final_models <- vector("list", length(all_treatments)) names(final_models) <- all_treatments for (TREAT in all_treatments) { bic_full <- comparison_rows[[TREAT]]$BIC_full bic_greedy <- greedy_rows[[TREAT]]$final_BIC if (!is.na(bic_full) && !is.na(bic_greedy)) { if (bic_full < bic_greedy) { winner <- "full" final_BIC <- bic_full final_models[[TREAT]] <- res_full_all[[TREAT]]$model } else { winner <- "greedy" final_BIC <- bic_greedy final_models[[TREAT]] <- greedy_models[[TREAT]] } } else if (!is.na(bic_full)) { winner <- "full" final_BIC <- bic_full final_models[[TREAT]] <- res_full_all[[TREAT]]$model } else { winner <- "greedy" final_BIC <- bic_greedy final_models[[TREAT]] <- greedy_models[[TREAT]] } cat(sprintf(" %s -> winner: %s | BIC = %.3f\n", TREAT, winner, final_BIC)) final_rows[[TREAT]] <- data.frame( treatment = TREAT, BIC_baseline = comparison_rows[[TREAT]]$BIC_baseline, BIC_full = bic_full, BIC_greedy = bic_greedy, selected_model = winner, final_BIC = final_BIC, stringsAsFactors = FALSE ) } FINAL_SELECTION <- bind_rows(final_rows) cat("\n=== Final Model Selection ===\n") print(FINAL_SELECTION %>% mutate(across(where(is.numeric), ~round(., 3)))) # ============================================================ # 9.5) Step 4 — Prune non-significant shares (LRT) # For each strategy in the winning model, test whether its # population share is significantly > 0 via LRT: # LL_drop = LL without strategy s # LRT = 2 * (LL_win - LL_drop) ~ chi-sq(1) under H0 # Strategies with p > LRT_ALPHA are dropped; model is # re-estimated with the remaining strategies. # ============================================================ LRT_ALPHA <- 0.05 cat(sprintf("\n\n===== Step 4 — Prune non-significant shares (LRT, alpha = %.2f) =====\n", LRT_ALPHA)) prune_rows <- list() pruned_models <- vector("list", length(all_treatments)) names(pruned_models) <- all_treatments for (TREAT in all_treatments) { cat(sprintf("\n Treatment: %s\n", TREAT)) win_model <- final_models[[TREAT]] if (is.null(win_model)) { pruned_models[[TREAT]] <- NULL next } df_t <- df_strat %>% filter(treatment == TREAT) %>% mutate(treatment = as.factor(treatment)) win_strat_names <- colnames(win_model$post.assignment) LL_win <- win_model$loglike shares_vec <- setNames(as.numeric(win_model$shares), win_strat_names) cat(sprintf(" Winning model strategies (%d): %s\n", length(win_strat_names), paste(win_strat_names, collapse = ", "))) cat(sprintf(" Shares: %s\n", paste(sprintf("%s=%.4f", win_strat_names, shares_vec), collapse = ", "))) cat(sprintf(" LL of winning model: %.3f\n", LL_win)) testable <- win_strat_names[win_strat_names %in% names(strat_pool)] zero_share <- testable[shares_vec[testable] == 0] nonzero_strats <- testable[shares_vec[testable] > 0] cat(sprintf(" Strategies with share = 0 (auto-drop): %s\n", if (length(zero_share) > 0) paste(zero_share, collapse = ", ") else "(none)")) cat(sprintf(" Strategies with share > 0 (to test): %s\n", if (length(nonzero_strats) > 0) paste(nonzero_strats, collapse = ", ") else "(none)")) lrt_rows <- lapply(nonzero_strats, function(s) { remaining <- setdiff(testable, c(s, zero_share)) if (length(remaining) == 0) { return(data.frame(strategy = s, LL_drop = NA_real_, LRT = NA_real_, p_value = NA_real_, keep = TRUE, stringsAsFactors = FALSE)) } res_drop <- fit_model(df_t, strat_pool[remaining], label = paste0("drop_", s)) LRT_stat <- 2 * max(0, LL_win - res_drop$LL) p_val <- 1 - pchisq(LRT_stat, df = 1) data.frame( strategy = s, LL_drop = res_drop$LL, LRT = LRT_stat, p_value = p_val, keep = p_val < LRT_ALPHA, stringsAsFactors = FALSE ) }) if (length(lrt_rows) > 0) { lrt_df <- bind_rows(lrt_rows) cat(" LRT results:\n") print(lrt_df %>% mutate(across(where(is.numeric), ~round(., 4)))) } else { lrt_df <- data.frame(strategy = character(0), LL_drop = numeric(0), LRT = numeric(0), p_value = numeric(0), keep = logical(0)) } sig_strats <- lrt_df$strategy[lrt_df$keep] insig_strats <- lrt_df$strategy[!lrt_df$keep] kept_names <- sig_strats dropped_names <- c(zero_share, insig_strats) cat(sprintf(" Keep (%d): %s\n", length(kept_names), if (length(kept_names) > 0) paste(kept_names, collapse = ", ") else "(none)")) cat(sprintf(" Drop (%d): %s\n", length(dropped_names), if (length(dropped_names) > 0) paste(dropped_names, collapse = ", ") else "(none)")) if (length(dropped_names) == 0) { cat(" No strategies dropped — keeping winning model as-is.\n") pruned_models[[TREAT]] <- win_model } else if (length(kept_names) == 0) { cat(" WARNING: all strategies would be dropped — keeping winning model.\n") pruned_models[[TREAT]] <- win_model kept_names <- win_strat_names dropped_names <- character(0) } else { kept_in_pool <- kept_names[kept_names %in% names(strat_pool)] pruned_strats <- strat_pool[kept_in_pool] cat(sprintf(" Re-estimating pruned model with %d strategies...\n", length(pruned_strats))) res_pruned <- fit_model(df_t, pruned_strats, label = paste0("pruned_", TREAT)) cat(sprintf(" Pruned model: LL=%.3f k=%d BIC=%.3f (winning BIC was %.3f)\n", res_pruned$LL, res_pruned$k, res_pruned$BIC, FINAL_SELECTION$final_BIC[FINAL_SELECTION$treatment == TREAT])) pruned_models[[TREAT]] <- res_pruned$model } prune_rows[[TREAT]] <- data.frame( treatment = TREAT, strategies_kept = paste(kept_names, collapse = ", "), strategies_dropped = if (length(dropped_names) > 0) paste(dropped_names, collapse = ", ") else "(none)", n_kept = length(kept_names), n_dropped = length(dropped_names), stringsAsFactors = FALSE ) } PRUNE_SUMMARY <- bind_rows(prune_rows) cat("\n=== Step 4 — Pruning Summary ===\n") print(PRUNE_SUMMARY) write.csv(PRUNE_SUMMARY, file.path(script_dir, "ModelSelection_2lights1_PrunedShares.csv"), row.names = FALSE) cat("Saved ModelSelection_2lights1_PrunedShares.csv\n") # Update final_models with pruned models; sections 10-13 use final_models final_models <- pruned_models cat("\nfinal_models updated — downstream output uses pruned models.\n") # ============================================================ # 10) Extract and save strategy shares of the final model # ============================================================ final_shares_rows <- list() for (TREAT in all_treatments) { mdl <- final_models[[TREAT]] if (is.null(mdl)) next shares_vec <- as.numeric(mdl$shares) names(shares_vec) <- colnames(mdl$post.assignment) shares_nz <- shares_vec[shares_vec > 0] shares_nz <- sort(shares_nz, decreasing = TRUE) final_shares_rows[[TREAT]] <- data.frame( treatment = TREAT, selected_model = FINAL_SELECTION$selected_model[FINAL_SELECTION$treatment == TREAT], final_BIC = FINAL_SELECTION$final_BIC[FINAL_SELECTION$treatment == TREAT], strategy = names(shares_nz), share = round(shares_nz, 4), row.names = NULL, stringsAsFactors = FALSE ) } FINAL_SHARES <- bind_rows(final_shares_rows) cat("\n=== Final Model — Strategy Shares ===\n") print(FINAL_SHARES) write.csv(FINAL_SHARES, file.path(script_dir, "ModelSelection_2lights1_FinalShares.csv"), row.names = FALSE) cat("Saved ModelSelection_2lights1_FinalShares.csv\n") # ============================================================ # 11) Bar plots — top strategies per treatment (final model) # ============================================================ for (TREAT in sort(unique(FINAL_SHARES$treatment))) { df_plot <- FINAL_SHARES %>% filter(treatment == TREAT) %>% arrange(desc(share)) %>% slice_head(n = 10) if (nrow(df_plot) == 0) next sel <- FINAL_SELECTION$selected_model[FINAL_SELECTION$treatment == TREAT] old_mar <- par(mar = c(10, 4, 3, 1)) barplot( setNames(df_plot$share, df_plot$strategy), main = paste0("Final model shares — ", TREAT, " (", sel, ")"), ylab = "share", ylim = c(0, 1), las = 2, cex.names = 0.75 ) par(old_mar) } cat("\nDone.\n") # ============================================================ # 12) ESTIMATION_RESULTS — wide-format shares of the final model # One row per treatment, columns: treatment | loglike | num_par | # | | … # ============================================================ results_ms_list <- vector("list", length(all_treatments)) names(results_ms_list) <- all_treatments for (TREAT in all_treatments) { mdl <- final_models[[TREAT]] if (is.null(mdl)) { results_ms_list[[TREAT]] <- data.frame( treatment = TREAT, loglike = NA_real_, num_par = NA_integer_, stringsAsFactors = FALSE ) next } shares_vec <- as.numeric(mdl$shares) names(shares_vec) <- colnames(mdl$post.assignment) results_ms_list[[TREAT]] <- data.frame( treatment = TREAT, loglike = mdl$loglike, num_par = mdl$num.par, t(shares_vec), check.names = FALSE, stringsAsFactors = FALSE ) } ESTIMATION_RESULTS <- bind_rows(results_ms_list) %>% arrange(treatment) ms_strat_cols <- setdiff(names(ESTIMATION_RESULTS), c("treatment", "loglike", "num_par")) ms_nonzero_cols <- ms_strat_cols[ colSums(ESTIMATION_RESULTS[, ms_strat_cols, drop = FALSE], na.rm = TRUE) > 0 ] ESTIMATION_RESULTS <- ESTIMATION_RESULTS[, c("treatment", "loglike", "num_par", ms_nonzero_cols) ] cat("\n=== ESTIMATION_RESULTS (model-selected) ===\n") print(ESTIMATION_RESULTS %>% mutate(across(where(is.numeric), ~round(., 4)))) # ============================================================ # 13) df_final_post — df_final augmented with posteriors from # the winning model per treatment, plus Rule_used & posterior. # ============================================================ post_summary_list <- vector("list", length(all_treatments)) names(post_summary_list) <- all_treatments for (TREAT in all_treatments) { mdl <- final_models[[TREAT]] if (is.null(mdl) || is.null(mdl$post.assignment)) next post_mat <- mdl$post.assignment strat_names <- colnames(post_mat) post_df <- as.data.frame(post_mat, row.names = NULL) colnames(post_df) <- paste0("post_", strat_names) post_df$subject_id <- as.integer(sub("^id", "", rownames(post_mat))) post_df$treatment <- TREAT post_mx <- as.matrix(post_df[, paste0("post_", strat_names), drop = FALSE]) best_idx <- max.col(post_mx, ties.method = "first") post_df$Rule_used <- strat_names[best_idx] post_df$posterior <- round(post_mx[cbind(seq_len(nrow(post_mx)), best_idx)], 2) post_summary_list[[TREAT]] <- post_df } df_posteriors_ms <- bind_rows(post_summary_list) %>% mutate(treatment = as.character(treatment)) post_join_cols <- c( grep("^post_", names(df_posteriors_ms), value = TRUE), "Rule_used", "posterior" ) df_final_post <- df_final %>% mutate(treatment = as.character(treatment)) %>% left_join( df_posteriors_ms %>% select(subject_id, treatment, all_of(post_join_cols)), by = c("subject_id", "treatment") ) cat("\n=== df_final_post (model-selected) ===\n") cat("Rows:", nrow(df_final_post), "| Cols:", ncol(df_final_post), "\n") cat("Rule_used values:", paste(sort(unique(df_final_post$Rule_used)), collapse = ", "), "\n") cat("Missing Rule_used:", sum(is.na(df_final_post$Rule_used)), "\n")