# =============================================================================
# Structural_modelselection.R — Model selection via AIC / BIC
#                               for the Shaded Lights Experiment (3 Lights)
#
# Strategy:
#   Baseline model  : 1D + 2D rules + stochastic  (always included)
#   Full model      : 1D + 2D + ALL 218 3D rules + stochastic
#
#   Information criteria (lower = better):
#     AIC = -2 * LL + 2 * k
#     BIC = -2 * LL + k * ln(n)     [more conservative than AIC]
#   where k = model$num.par and n = number of choices in the treatment.
#
# Three model-selection steps per treatment:
#   1) Baseline vs Full          : head-to-head with all 218 three-variable rules
#   2) Marginal 3D contribution  : each of the 218 rules added alone to baseline
#                                  → pre-screening filter (delta_BIC < -BIC_SCREEN_THRESHOLD)
#   3) Greedy forward selection  : runs only on the rules that passed screening,
#                                  keeping computation tractable
#
# Final selected model = lowest BIC among {Full, Greedy}.
#
# Outputs:
#   ModelSelection_BaselineVsFull.csv  — step-1 comparison table
#   ModelSelection_Marginal3D.csv      — step-2 per-rule deltas (all 218 rules)
#   ModelSelection_Greedy.csv          — step-3 greedy summary
#   ModelSelection_FinalShares.csv     — final strategy shares per treatment
# =============================================================================


# ============================================================
# 0) Packages
# ============================================================

library(dplyr)
library(tidyr)
library(stringr)
library(stratEst)


# ============================================================
# 1) Load data
# ============================================================

script_dir <- dirname(rstudioapi::getActiveDocumentContext()$path)
source(file.path(script_dir, "DOfile_realexp_3lights (2).R"))

df_strat <- df_final %>%
  rename(
    id     = subject_id,
    period = Guess_Number,
    input  = Light_Config
  ) %>%
  mutate(
    choice    = as.character(Guess),
    treatment = as.character(treatment),
    game      = 1L
  ) %>%
  select(id, period, choice, input, treatment, game) %>%
  arrange(id, treatment, period)

cat("Dataset ready:", nrow(df_strat), "rows |",
    n_distinct(df_strat$id), "subjects |",
    n_distinct(df_strat$treatment), "treatments\n")


# ============================================================
# 2) Build all 256 deterministic strategies
# ============================================================

choice_labels  <- c("0", "1")
input_vals_8   <- c("(0,0,0)", "(0,0,1)", "(0,1,0)", "(0,1,1)",
                    "(1,0,0)", "(1,0,1)", "(1,1,0)", "(1,1,1)")
inputs_strat_8 <- c(NA, input_vals_8)
tr_8state      <- as.numeric(rep(1:8, times = 8))

make_det_strategy_8 <- function(actions) {
  stopifnot(length(actions) == 8, all(actions %in% c(0L, 1L)))
  pc <- as.double(unlist(lapply(actions, function(a) c(1 - a, a))))
  stratEst.strategy(
    choices      = choice_labels,
    inputs       = inputs_strat_8,
    num.states   = 8,
    tr.inputs    = tr_8state,
    prob.choices = pc
  )
}

all_det <- setNames(
  lapply(0:255, function(j) {
    actions <- as.integer(intToBits(j))[1:8]
    make_det_strategy_8(actions)
  }),
  sapply(0:255, function(j)
    paste0("s_", paste(as.integer(intToBits(j))[1:8], collapse = ""))
  )
)

# Apply meaningful aliases
meaningful_aliases <- c(
  # 1D
  always_0       =   0L, always_1       = 255L,
  green          = 170L, not_green      =  85L,
  blue           = 204L, not_blue       =  51L,
  red            = 240L, not_red        =  15L,
  # 2D (red, blue)
  NOR_rb         =   3L, INHIBIT_br     =  12L,
  INHIBIT_rb     =  48L, EITHER_rb      =  60L,
  NAND_rb        =  63L, AND_rb         = 192L,
  OR_rb          = 252L, JOINT_rb       = 195L,
  not_INHIBIT_rb = 207L, not_INHIBIT_br = 243L,
  # 2D (red, green)
  NOR_rg         =   5L, INHIBIT_gr     =  10L,
  AND_rg         = 160L, OR_rg          = 250L,
  INHIBIT_rg     =  80L, EITHER_rg      =  90L,
  JOINT_rg       = 165L, NAND_rg        =  95L,
  not_INHIBIT_rg = 175L, not_INHIBIT_gr = 245L,
  # 2D (blue, green)
  NOR_bg         =  17L, INHIBIT_gb     =  34L,
  AND_bg         = 136L, OR_bg          = 238L,
  INHIBIT_bg     =  68L, EITHER_bg      = 102L,
  JOINT_bg       = 153L, NAND_bg        = 119L,
  not_INHIBIT_bg = 187L, not_INHIBIT_gb = 221L,
  # 3D
  NOR3           =   1L, green_alone    =   2L,
  blue_alone     =   4L, blue_green_only =  8L,
  red_alone      =  16L, red_green_only  =  32L,
  red_blue_only  =  64L, AND3           = 128L,
  minority       =  23L, majority       = 232L,
  ODD_parity     = 150L, EVEN_parity    = 105L,
  NAND3          = 127L, OR3            = 254L
)

for (nice_name in names(meaningful_aliases)) {
  j   <- meaningful_aliases[[nice_name]]
  old <- paste0("s_", paste(as.integer(intToBits(j))[1:8], collapse = ""))
  idx <- match(old, names(all_det))
  if (!is.na(idx)) names(all_det)[idx] <- nice_name
}

random <- stratEst.strategy(
  choices = choice_labels, inputs = inputs_strat_8,
  num.states = 1, tr.inputs = rep(1, 8),
  prob.choices = c(0.5, 0.5)
)

always_p <- stratEst.strategy(
  choices = choice_labels, inputs = inputs_strat_8,
  num.states = 1, tr.inputs = rep(1, 8),
  prob.choices = c(NA_real_, NA_real_)
)


# ============================================================
# 3) Define strategy groups
# ============================================================

# Screening threshold: a 3D rule must reduce BIC by more than this
# (i.e. delta_BIC < -BIC_SCREEN_THRESHOLD) to enter the greedy step.
# Raise to be more conservative; lower to admit more candidates.
BIC_SCREEN_THRESHOLD <- 6

names_1d <- c(
  "always_0", "always_1",
  "green", "not_green", "blue", "not_blue", "red", "not_red"
)

names_2d <- c(
  "NOR_rb", "INHIBIT_br", "INHIBIT_rb", "EITHER_rb",
  "NAND_rb", "AND_rb", "OR_rb", "JOINT_rb",
  "not_INHIBIT_rb", "not_INHIBIT_br",
  "NOR_rg", "INHIBIT_gr", "AND_rg", "OR_rg",
  "INHIBIT_rg", "EITHER_rg", "JOINT_rg", "NAND_rg",
  "not_INHIBIT_rg", "not_INHIBIT_gr",
  "NOR_bg", "INHIBIT_gb", "AND_bg", "OR_bg",
  "INHIBIT_bg", "EITHER_bg", "JOINT_bg", "NAND_bg",
  "not_INHIBIT_bg", "not_INHIBIT_gb"
)

# All 218 three-variable rules: everything in all_det not classified as 1D or 2D
names_3d_all <- setdiff(names(all_det), c(names_1d, names_2d))

strats_baseline <- c(
  all_det[names_1d],
  all_det[names_2d],
  list(random = random, always_p = always_p)
)

strats_full <- c(
  all_det[names_1d],
  all_det[names_2d],
  all_det[names_3d_all],
  list(random = random, always_p = always_p)
)

cat(sprintf("Baseline strategies : %d (1D + 2D + stochastic)\n", length(strats_baseline)))
cat(sprintf("Full strategies     : %d (1D + 2D + all 218 3D + stochastic)\n", length(strats_full)))
cat(sprintf("3D candidate pool   : %d rules\n", length(names_3d_all)))
cat(sprintf("BIC screening threshold: %.1f\n", BIC_SCREEN_THRESHOLD))


# ============================================================
# 4) Helper: fit model and return LL / k / AIC / BIC
# ============================================================

fit_model <- function(data, strategies, label = "") {
  n <- nrow(data)
  tryCatch({
    mdl <- stratEst.model(
      data       = data,
      strategies = strategies,
      inner.runs = 10,
      outer.runs = 1,
      inner.max  = 10
    )
    k   <- mdl$num.par
    LL  <- mdl$loglike
    AIC <- -2 * LL + 2 * k
    BIC <- -2 * LL + k * log(n)
    list(
      model = mdl, LL = LL, k = k, n = n,
      AIC = AIC, BIC = BIC, label = label, error = NULL
    )
  }, error = function(e) {
    cat("  ERROR [", label, "]:", conditionMessage(e), "\n")
    list(
      model = NULL, LL = NA_real_, k = NA_integer_, n = n,
      AIC = NA_real_, BIC = NA_real_, label = label, error = conditionMessage(e)
    )
  })
}


# ============================================================
# 5) Step 1 — Baseline vs Full
# ============================================================

all_treatments <- sort(unique(df_strat$treatment))

comparison_rows  <- vector("list", length(all_treatments))
names(comparison_rows) <- all_treatments

res_baseline_all <- vector("list", length(all_treatments))
res_full_all     <- vector("list", length(all_treatments))
names(res_baseline_all) <- all_treatments
names(res_full_all)     <- all_treatments

for (TREAT in all_treatments) {
  cat("\n========== Step 1 | Treatment:", TREAT, "==========\n")

  df_t <- df_strat %>%
    filter(treatment == TREAT) %>%
    mutate(treatment = as.factor(treatment))
  cat("  Rows:", nrow(df_t), "| Subjects:", n_distinct(df_t$id), "\n")

  cat("  Fitting baseline...\n")
  rb <- fit_model(df_t, strats_baseline, label = "baseline")
  res_baseline_all[[TREAT]] <- rb
  cat(sprintf("    LL=%8.3f  k=%d  AIC=%9.3f  BIC=%9.3f\n",
              rb$LL, rb$k, rb$AIC, rb$BIC))

  cat("  Fitting full...\n")
  rf <- fit_model(df_t, strats_full, label = "full")
  res_full_all[[TREAT]] <- rf
  cat(sprintf("    LL=%8.3f  k=%d  AIC=%9.3f  BIC=%9.3f\n",
              rf$LL, rf$k, rf$AIC, rf$BIC))

  # Lower is better; delta < 0 means full improves over baseline
  dAIC <- rf$AIC - rb$AIC
  dBIC <- rf$BIC - rb$BIC

  winner_AIC <- if (!is.na(dAIC)) ifelse(dAIC < 0, "full", "baseline") else NA_character_
  winner_BIC <- if (!is.na(dBIC)) ifelse(dBIC < 0, "full", "baseline") else NA_character_

  cat(sprintf("  delta_AIC = %+.3f (%s wins) | delta_BIC = %+.3f (%s wins)\n",
              dAIC, winner_AIC, dBIC, winner_BIC))

  comparison_rows[[TREAT]] <- data.frame(
    treatment    = TREAT,
    n_obs        = rb$n,
    LL_baseline  = rb$LL,  k_baseline  = rb$k,
    AIC_baseline = rb$AIC, BIC_baseline = rb$BIC,
    LL_full      = rf$LL,  k_full      = rf$k,
    AIC_full     = rf$AIC, BIC_full    = rf$BIC,
    delta_AIC    = dAIC,
    delta_BIC    = dBIC,
    winner_AIC   = winner_AIC,
    winner_BIC   = winner_BIC,
    stringsAsFactors = FALSE
  )
}

COMPARISON <- bind_rows(comparison_rows)
cat("\n=== Step 1 — Baseline vs Full ===\n")
print(COMPARISON %>% mutate(across(where(is.numeric), ~round(., 3))))

write.csv(COMPARISON,
          file.path(script_dir, "ModelSelection_BaselineVsFull.csv"),
          row.names = FALSE)
cat("Saved ModelSelection_BaselineVsFull.csv\n")


# ============================================================
# 6) Step 2 — Marginal contribution of all 218 three-variable rules
#    (baseline + one rule at a time; report delta AIC/BIC)
#    This is the pre-screening step: rules with delta_BIC < -BIC_SCREEN_THRESHOLD
#    are passed to the greedy step; the rest are discarded.
# ============================================================

cat("\n\n===== Step 2 — Marginal 3D contribution (all 218 rules) =====\n")
cat(sprintf("Screening threshold: delta_BIC < -%.1f\n", BIC_SCREEN_THRESHOLD))

marginal_rows <- list()

for (TREAT in all_treatments) {
  cat("\n  Treatment:", TREAT, "(", length(names_3d_all), "rules to test)\n")

  df_t <- df_strat %>%
    filter(treatment == TREAT) %>%
    mutate(treatment = as.factor(treatment))

  bic_base <- comparison_rows[[TREAT]]$BIC_baseline
  aic_base <- comparison_rows[[TREAT]]$AIC_baseline

  for (rule3 in names_3d_all) {
    strats_candidate <- c(strats_baseline, all_det[rule3])
    res_c <- fit_model(df_t, strats_candidate, label = paste0("baseline+", rule3))

    marginal_rows[[paste(TREAT, rule3, sep = "_")]] <- data.frame(
      treatment  = TREAT,
      rule_3d    = rule3,
      LL         = res_c$LL,
      k          = res_c$k,
      AIC        = res_c$AIC,
      BIC        = res_c$BIC,
      delta_AIC  = res_c$AIC - aic_base,
      delta_BIC  = res_c$BIC - bic_base,
      improves_AIC = !is.na(res_c$AIC) & res_c$AIC < aic_base,
      improves_BIC = !is.na(res_c$BIC) & res_c$BIC < bic_base,
      stringsAsFactors = FALSE
    )
    cat(sprintf("    %-20s  delta_AIC=%+7.3f  delta_BIC=%+7.3f\n",
                rule3,
                res_c$AIC - aic_base,
                res_c$BIC - bic_base))
  }
}

MARGINAL_3D <- bind_rows(marginal_rows)
cat("\n=== Step 2 — Marginal 3D Summary (rules that improve BIC) ===\n")
print(
  MARGINAL_3D %>%
    filter(improves_BIC) %>%
    arrange(treatment, delta_BIC) %>%
    mutate(across(where(is.numeric), ~round(., 3)))
)

write.csv(MARGINAL_3D,
          file.path(script_dir, "ModelSelection_Marginal3D.csv"),
          row.names = FALSE)
cat("Saved ModelSelection_Marginal3D.csv\n")


# ============================================================
# 6.5) Screening — keep only rules with delta_BIC < -BIC_SCREEN_THRESHOLD
#      per treatment; these form the candidate pool for Step 3.
# ============================================================

screened_3d <- lapply(all_treatments, function(TREAT) {
  rules <- MARGINAL_3D %>%
    filter(treatment == TREAT, delta_BIC < -BIC_SCREEN_THRESHOLD) %>%
    arrange(delta_BIC) %>%
    pull(rule_3d)
  rules
})
names(screened_3d) <- all_treatments

cat("\n=== Screening results (rules passing delta_BIC < -", BIC_SCREEN_THRESHOLD, ") ===\n",
    sep = "")
for (TREAT in all_treatments) {
  n_pass <- length(screened_3d[[TREAT]])
  cat(sprintf("  %s: %d / %d rules pass\n", TREAT, n_pass, length(names_3d_all)))
  if (n_pass > 0)
    cat("    ", paste(screened_3d[[TREAT]], collapse = ", "), "\n")
}


# ============================================================
# 7) Step 3 — Greedy forward selection (BIC-based)
#    Operates only on the rules that passed the Step 2 screen.
#    Worst-case models per treatment = sum(1:n_screened), which
#    is small (typically < 50 rules pass) and tractable.
# ============================================================

cat("\n\n===== Step 3 — Greedy forward selection (BIC, screened candidates) =====\n")

greedy_rows    <- list()
greedy_models  <- vector("list", length(all_treatments))
names(greedy_models) <- all_treatments

for (TREAT in all_treatments) {
  n_cand <- length(screened_3d[[TREAT]])
  cat(sprintf("\n  Treatment: %s | %d screened candidate(s)\n", TREAT, n_cand))

  df_t <- df_strat %>%
    filter(treatment == TREAT) %>%
    mutate(treatment = as.factor(treatment))

  current_strats <- strats_baseline
  current_BIC    <- comparison_rows[[TREAT]]$BIC_baseline
  remaining_3d   <- screened_3d[[TREAT]]   # only screened rules
  added_rules    <- character(0)
  step_log       <- list()

  if (length(remaining_3d) == 0) {
    cat("    No rules passed screening — keeping baseline.\n")
    greedy_models[[TREAT]] <- res_baseline_all[[TREAT]]$model
  }

  repeat {
    if (length(remaining_3d) == 0) break

    step_res <- lapply(remaining_3d, function(rule3) {
      res <- fit_model(df_t, c(current_strats, all_det[rule3]), label = rule3)
      data.frame(rule = rule3, BIC = res$BIC, model_obj = I(list(res$model)),
                 stringsAsFactors = FALSE)
    })
    step_df <- bind_rows(step_res)

    best_idx <- which.min(step_df$BIC)
    best_BIC <- step_df$BIC[best_idx]
    best_rule <- step_df$rule[best_idx]

    if (!is.na(best_BIC) && best_BIC < current_BIC) {
      cat(sprintf("    + '%s': BIC %.3f -> %.3f (delta = %+.3f)\n",
                  best_rule, current_BIC, best_BIC, best_BIC - current_BIC))

      step_log[[length(step_log) + 1]] <- data.frame(
        treatment  = TREAT,
        step       = length(added_rules) + 1L,
        rule_added = best_rule,
        BIC_before = current_BIC,
        BIC_after  = best_BIC,
        delta_BIC  = best_BIC - current_BIC,
        stringsAsFactors = FALSE
      )

      added_rules    <- c(added_rules, best_rule)
      current_strats <- c(current_strats, all_det[best_rule])
      current_BIC    <- best_BIC
      remaining_3d   <- setdiff(remaining_3d, best_rule)

      # Save the best model object for this step
      greedy_models[[TREAT]] <- step_df$model_obj[[best_idx]]

    } else {
      cat("    No further BIC improvement. Stopping.\n")
      break
    }
  }

  # If no 3D rule was ever added, keep the baseline model
  if (length(added_rules) == 0) {
    greedy_models[[TREAT]] <- res_baseline_all[[TREAT]]$model
  }

  greedy_rows[[TREAT]] <- data.frame(
    treatment     = TREAT,
    added_3d_rules = if (length(added_rules) > 0)
                       paste(added_rules, collapse = ", ") else "(none)",
    n_rules_added  = length(added_rules),
    final_BIC      = current_BIC,
    baseline_BIC   = comparison_rows[[TREAT]]$BIC_baseline,
    BIC_improvement = comparison_rows[[TREAT]]$BIC_baseline - current_BIC,
    stringsAsFactors = FALSE
  )

  cat(sprintf("  Final greedy model: %d 3D rule(s) added (%s) | BIC = %.3f\n",
              length(added_rules),
              if (length(added_rules) > 0) paste(added_rules, collapse = ", ") else "none",
              current_BIC))

  if (length(step_log) > 0) {
    step_df_out <- bind_rows(step_log)
    cat("  Greedy steps:\n")
    print(step_df_out %>% mutate(across(where(is.numeric), ~round(., 3))))
  }
}

GREEDY_SUMMARY <- bind_rows(greedy_rows)
cat("\n=== Step 3 — Greedy Forward Selection Summary ===\n")
print(GREEDY_SUMMARY %>% mutate(across(where(is.numeric), ~round(., 3))))

write.csv(GREEDY_SUMMARY,
          file.path(script_dir, "ModelSelection_Greedy.csv"),
          row.names = FALSE)
cat("Saved ModelSelection_Greedy.csv\n")


# ============================================================
# 8) Final model selection — lowest BIC among {Full, Greedy}
#    (Greedy is always >= Baseline, so Baseline need not be
#    compared separately; Full may outperform Greedy if adding
#    many 3D rules jointly outweighs the BIC penalty.)
# ============================================================

cat("\n\n===== Final model selection (best BIC) =====\n")

final_rows   <- list()
final_models <- vector("list", length(all_treatments))
names(final_models) <- all_treatments

for (TREAT in all_treatments) {
  bic_full   <- comparison_rows[[TREAT]]$BIC_full
  bic_greedy <- greedy_rows[[TREAT]]$final_BIC

  if (!is.na(bic_full) && !is.na(bic_greedy)) {
    if (bic_full < bic_greedy) {
      winner     <- "full"
      final_BIC  <- bic_full
      final_models[[TREAT]] <- res_full_all[[TREAT]]$model
    } else {
      winner     <- "greedy"
      final_BIC  <- bic_greedy
      final_models[[TREAT]] <- greedy_models[[TREAT]]
    }
  } else if (!is.na(bic_full)) {
    winner    <- "full"
    final_BIC <- bic_full
    final_models[[TREAT]] <- res_full_all[[TREAT]]$model
  } else {
    winner    <- "greedy"
    final_BIC <- bic_greedy
    final_models[[TREAT]] <- greedy_models[[TREAT]]
  }

  cat(sprintf("  %s -> winner: %s | BIC = %.3f\n", TREAT, winner, final_BIC))

  final_rows[[TREAT]] <- data.frame(
    treatment      = TREAT,
    BIC_baseline   = comparison_rows[[TREAT]]$BIC_baseline,
    BIC_full       = bic_full,
    BIC_greedy     = bic_greedy,
    selected_model = winner,
    final_BIC      = final_BIC,
    stringsAsFactors = FALSE
  )
}

FINAL_SELECTION <- bind_rows(final_rows)
cat("\n=== Final Model Selection ===\n")
print(FINAL_SELECTION %>% mutate(across(where(is.numeric), ~round(., 3))))


# ============================================================
# 8.5) Step 4 — Prune non-significant shares and re-estimate
#      For each strategy in the winning model, we test whether
#      its population share is significantly greater than zero
#      via a likelihood ratio test (LRT):
#        - Fit model WITHOUT strategy s  →  LL_drop
#        - LRT = 2 * (LL_win - LL_drop) ~ chi-sq(1) under H0
#        - p-value = 1 - pchisq(LRT, df = 1)
#      Strategies with p > LRT_ALPHA (share not significantly
#      different from 0) are dropped.  The model is then
#      re-estimated one final time with the pruned strategy set.
#      final_models is updated so sections 9-12 use pruned models.
# ============================================================

LRT_ALPHA <- 0.05   # significance level for the LRT (one df)

cat(sprintf("\n\n===== Step 4 — Prune non-significant shares (LRT, alpha = %.2f) =====\n",
            LRT_ALPHA))

# Full pool of named strategy objects for look-up by name
strat_pool <- c(all_det, list(random = random, always_p = always_p))

prune_rows    <- list()
pruned_models <- vector("list", length(all_treatments))
names(pruned_models) <- all_treatments

for (TREAT in all_treatments) {
  cat(sprintf("\n  Treatment: %s\n", TREAT))

  win_model <- final_models[[TREAT]]
  if (is.null(win_model)) {
    pruned_models[[TREAT]] <- NULL
    next
  }

  df_t <- df_strat %>%
    filter(treatment == TREAT) %>%
    mutate(treatment = as.factor(treatment))

  win_strat_names <- colnames(win_model$post.assignment)
  LL_win          <- win_model$loglike

  shares_vec <- setNames(as.numeric(win_model$shares), win_strat_names)

  cat(sprintf("    Winning model strategies (%d): %s\n",
              length(win_strat_names), paste(win_strat_names, collapse = ", ")))
  cat(sprintf("    Shares: %s\n",
              paste(sprintf("%s=%.4f", win_strat_names, shares_vec), collapse = ", ")))
  cat(sprintf("    LL of winning model: %.3f\n", LL_win))

  # Only test strategies present in the pool (lookup by name)
  testable <- win_strat_names[win_strat_names %in% names(strat_pool)]
  # Strategies with share already == 0 are trivially non-significant; skip LRT
  zero_share     <- testable[shares_vec[testable] == 0]
  nonzero_strats <- testable[shares_vec[testable] > 0]

  cat(sprintf("    Strategies with share = 0 (auto-drop): %s\n",
              if (length(zero_share) > 0) paste(zero_share, collapse = ", ") else "(none)"))
  cat(sprintf("    Strategies with share > 0 (to test): %s\n",
              if (length(nonzero_strats) > 0) paste(nonzero_strats, collapse = ", ") else "(none)"))

  # LRT for each nonzero-share strategy
  lrt_rows <- lapply(nonzero_strats, function(s) {
    remaining <- setdiff(testable, c(s, zero_share))  # drop s and already-zero ones
    if (length(remaining) == 0) {
      # Cannot drop the only strategy; keep it
      return(data.frame(strategy = s, LL_drop = NA_real_, LRT = NA_real_,
                        p_value = NA_real_, keep = TRUE, stringsAsFactors = FALSE))
    }
    res_drop <- fit_model(df_t, strat_pool[remaining], label = paste0("drop_", s))
    LRT_stat <- 2 * max(0, LL_win - res_drop$LL)   # LRT >= 0 by definition
    p_val    <- 1 - pchisq(LRT_stat, df = 1)
    data.frame(
      strategy = s,
      LL_drop  = res_drop$LL,
      LRT      = LRT_stat,
      p_value  = p_val,
      keep     = p_val < LRT_ALPHA,
      stringsAsFactors = FALSE
    )
  })

  if (length(lrt_rows) > 0) {
    lrt_df <- bind_rows(lrt_rows)
    cat("    LRT results:\n")
    print(lrt_df %>% mutate(across(where(is.numeric), ~round(., 4))))
  } else {
    lrt_df <- data.frame(strategy = character(0), LL_drop = numeric(0),
                         LRT = numeric(0), p_value = numeric(0), keep = logical(0))
  }

  sig_strats   <- lrt_df$strategy[lrt_df$keep]
  insig_strats <- lrt_df$strategy[!lrt_df$keep]

  # Strategies to retain in final pruned model
  kept_names    <- c(sig_strats)   # significant nonzero-share strategies only
  dropped_names <- c(zero_share, insig_strats)

  cat(sprintf("    Keep  (%d): %s\n", length(kept_names),
              if (length(kept_names) > 0) paste(kept_names, collapse = ", ") else "(none)"))
  cat(sprintf("    Drop  (%d): %s\n", length(dropped_names),
              if (length(dropped_names) > 0) paste(dropped_names, collapse = ", ") else "(none)"))

  if (length(dropped_names) == 0) {
    cat("    No strategies dropped — keeping winning model as-is.\n")
    pruned_models[[TREAT]] <- win_model
  } else if (length(kept_names) == 0) {
    cat("    WARNING: all strategies would be dropped — keeping winning model.\n")
    pruned_models[[TREAT]] <- win_model
    kept_names    <- win_strat_names
    dropped_names <- character(0)
  } else {
    kept_in_pool  <- kept_names[kept_names %in% names(strat_pool)]
    pruned_strats <- strat_pool[kept_in_pool]
    cat(sprintf("    Re-estimating pruned model with %d strategies...\n",
                length(pruned_strats)))
    res_pruned <- fit_model(df_t, pruned_strats, label = paste0("pruned_", TREAT))
    cat(sprintf("    Pruned model: LL=%.3f  k=%d  BIC=%.3f  (winning BIC was %.3f)\n",
                res_pruned$LL, res_pruned$k, res_pruned$BIC,
                FINAL_SELECTION$final_BIC[FINAL_SELECTION$treatment == TREAT]))
    pruned_models[[TREAT]] <- res_pruned$model
  }

  prune_rows[[TREAT]] <- data.frame(
    treatment          = TREAT,
    strategies_kept    = paste(kept_names, collapse = ", "),
    strategies_dropped = if (length(dropped_names) > 0) paste(dropped_names, collapse = ", ") else "(none)",
    n_kept             = length(kept_names),
    n_dropped          = length(dropped_names),
    stringsAsFactors   = FALSE
  )
}

PRUNE_SUMMARY <- bind_rows(prune_rows)
cat("\n=== Step 4 — Pruning Summary ===\n")
print(PRUNE_SUMMARY)

write.csv(PRUNE_SUMMARY,
          file.path(script_dir, "ModelSelection_PrunedShares.csv"),
          row.names = FALSE)
cat("Saved ModelSelection_PrunedShares.csv\n")

# Update final_models with pruned models; sections 9-12 read final_models
final_models <- pruned_models
cat("\nfinal_models updated — downstream output (sections 9-12) uses pruned models.\n")


# ============================================================
# 9) Extract and save strategy shares of the final model
# ============================================================

final_shares_rows <- list()

for (TREAT in all_treatments) {
  mdl <- final_models[[TREAT]]
  if (is.null(mdl)) next

  shares_vec        <- as.numeric(mdl$shares)
  #names(shares_vec) <- rownames(mdl$shares)    # strategy names
  names(shares_vec) <- colnames(mdl$post.assignment)
  
  # Keep only non-zero shares
  shares_nz <- shares_vec[shares_vec > 0]
  shares_nz <- sort(shares_nz, decreasing = TRUE)

  final_shares_rows[[TREAT]] <- data.frame(
    treatment      = TREAT,
    selected_model = FINAL_SELECTION$selected_model[FINAL_SELECTION$treatment == TREAT],
    final_BIC      = FINAL_SELECTION$final_BIC[FINAL_SELECTION$treatment == TREAT],
    strategy       = names(shares_nz),
    share          = round(shares_nz, 4),
    row.names      = NULL,
    stringsAsFactors = FALSE
  )
}

FINAL_SHARES <- bind_rows(final_shares_rows)
cat("\n=== Final Model — Strategy Shares ===\n")
print(FINAL_SHARES)

write.csv(FINAL_SHARES,
          file.path(script_dir, "ModelSelection_FinalShares.csv"),
          row.names = FALSE)
cat("Saved ModelSelection_FinalShares.csv\n")


# ============================================================
# 10) Bar plots — top 10 strategies per treatment (final model)
# ============================================================

for (TREAT in sort(unique(FINAL_SHARES$treatment))) {
  df_plot <- FINAL_SHARES %>%
    filter(treatment == TREAT) %>%
    arrange(desc(share)) %>%
    slice_head(n = 10)

  if (nrow(df_plot) == 0) next

  sel <- FINAL_SELECTION$selected_model[FINAL_SELECTION$treatment == TREAT]
  old_mar <- par(mar = c(10, 4, 3, 1))
  barplot(
    setNames(df_plot$share, df_plot$strategy),
    main      = paste0("Final model shares — ", TREAT, " (", sel, ")"),
    ylab      = "share",
    ylim      = c(0, 1),
    las       = 2,
    cex.names = 0.75
  )
  par(old_mar)
}

cat("\nDone.\n")


# ============================================================
# 11) ESTIMATION_RESULTS — wide-format shares of the final model
#     Same interface as Structural_3lights.R / Structural_allrules.R:
#     one row per treatment, columns: treatment | loglike | num_par |
#     <strategy_1> | <strategy_2> | …
#     Strategies absent from a treatment's winning model get NA.
#     Analysis_3lights.R reads this via env$ESTIMATION_RESULTS.
# ============================================================

results_ms_list <- vector("list", length(all_treatments))
names(results_ms_list) <- all_treatments

for (TREAT in all_treatments) {
  mdl <- final_models[[TREAT]]
  if (is.null(mdl)) {
    results_ms_list[[TREAT]] <- data.frame(
      treatment = TREAT, loglike = NA_real_, num_par = NA_integer_,
      stringsAsFactors = FALSE
    )
    next
  }
  shares_vec        <- as.numeric(mdl$shares)
  names(shares_vec) <- colnames(mdl$post.assignment)
  results_ms_list[[TREAT]] <- data.frame(
    treatment = TREAT,
    loglike   = mdl$loglike,
    num_par   = mdl$num.par,
    t(shares_vec),
    check.names      = FALSE,
    stringsAsFactors = FALSE
  )
}

ESTIMATION_RESULTS <- bind_rows(results_ms_list) %>% arrange(treatment)

ms_strat_cols   <- setdiff(names(ESTIMATION_RESULTS), c("treatment", "loglike", "num_par"))
ms_nonzero_cols <- ms_strat_cols[
  colSums(ESTIMATION_RESULTS[, ms_strat_cols, drop = FALSE], na.rm = TRUE) > 0
]
ESTIMATION_RESULTS <- ESTIMATION_RESULTS[,
  c("treatment", "loglike", "num_par", ms_nonzero_cols)
]

cat("\n=== ESTIMATION_RESULTS (model-selected, for Analysis_3lights.R) ===\n")
print(ESTIMATION_RESULTS %>% mutate(across(where(is.numeric), ~round(., 4))))


# ============================================================
# 12) df_final_post — df_final augmented with posteriors from
#     the winning model per treatment, plus Rule_used & posterior.
#     Same interface as Structural_3lights.R.
#     Analysis_3lights.R reads this via env$df_final_post.
# ============================================================

post_summary_list <- vector("list", length(all_treatments))
names(post_summary_list) <- all_treatments

for (TREAT in all_treatments) {
  mdl <- final_models[[TREAT]]
  if (is.null(mdl) || is.null(mdl$post.assignment)) next

  post_mat    <- mdl$post.assignment
  strat_names <- colnames(post_mat)

  post_df             <- as.data.frame(post_mat, row.names = NULL)
  colnames(post_df)   <- paste0("post_", strat_names)
  post_df$subject_id  <- as.integer(sub("^id", "", rownames(post_mat)))
  post_df$treatment   <- TREAT

  # Assign Rule_used / posterior within this treatment's model
  post_mx <- as.matrix(post_df[, paste0("post_", strat_names), drop = FALSE])
  best_idx <- max.col(post_mx, ties.method = "first")
  post_df$Rule_used <- strat_names[best_idx]
  post_df$posterior <- round(post_mx[cbind(seq_len(nrow(post_mx)), best_idx)], 2)

  post_summary_list[[TREAT]] <- post_df
}

# bind_rows fills missing post_ columns with NA across treatments
df_posteriors_ms <- bind_rows(post_summary_list) %>%
  mutate(treatment = as.character(treatment))

post_join_cols <- c(
  grep("^post_", names(df_posteriors_ms), value = TRUE),
  "Rule_used", "posterior"
)

df_final_post <- df_final %>%
  mutate(treatment = as.character(treatment)) %>%
  left_join(
    df_posteriors_ms %>% select(subject_id, treatment, all_of(post_join_cols)),
    by = c("subject_id", "treatment")
  )

cat("\n=== df_final_post (model-selected, for Analysis_3lights.R) ===\n")
cat("Rows:", nrow(df_final_post), "| Cols:", ncol(df_final_post), "\n")
cat("Rule_used values:", paste(sort(unique(df_final_post$Rule_used)), collapse = ", "), "\n")
cat("Missing Rule_used:", sum(is.na(df_final_post$Rule_used)), "\n")