import random
from collections import Counter, defaultdict

"""
Simulate many participants to verify that random assignment produces a reasonable
spread across:
- Cohorts: Experienced (smart) vs Standard (non-smart)
- Tool counts: 1, 2, 3, 4

This script mirrors presurvey.Info.before_next_page logic:
- is_smart_group if newsvendor_knowledge > 3
- tool_count randomly chosen from {1,2,3,4}

We generate a population with a configurable knowledge distribution and show
frequencies and percentages per cohort and tool count.
"""

# Adjust these to reflect your expected participant population
N = 10000
# Example distribution for knowledge 1..5 (sum to 1.0)
knowledge_weights = {
    1: 0.18,
    2: 0.22,
    3: 0.28,
    4: 0.20,
    5: 0.12,
}

knowledge_levels = list(knowledge_weights.keys())
knowledge_probs = [knowledge_weights[k] for k in knowledge_levels]

# Make a cumulative distribution for sampling without numpy
cdf = []
acc = 0.0
for p in knowledge_probs:
    acc += p
    cdf.append(acc)


def sample_knowledge():
    r = random.random()
    for lvl, cutoff in zip(knowledge_levels, cdf):
        if r <= cutoff:
            return lvl
    return knowledge_levels[-1]


def simulate(n=N):
    by_cohort_and_tools = defaultdict(Counter)
    totals_by_cohort = Counter()

    for _ in range(n):
        knowledge = sample_knowledge()
        is_smart = knowledge > 3  # mirrors presurvey
        cohort = 'Experienced' if is_smart else 'Standard'
        tool_count = random.choice([1, 2, 3, 4])

        by_cohort_and_tools[cohort][tool_count] += 1
        totals_by_cohort[cohort] += 1

    return by_cohort_and_tools, totals_by_cohort


def show_results(by_cohort_and_tools, totals_by_cohort):
    print('=== Tool Count Distribution by Cohort ===')
    for cohort in ['Experienced', 'Standard']:
        total = totals_by_cohort[cohort]
        if total == 0:
            print(f"{cohort}: (no participants)")
            continue
        print(f"\n{cohort} (n={total})")
        row = by_cohort_and_tools[cohort]
        for tools in sorted([1, 2, 3, 4]):
            count = row.get(tools, 0)
            pct = (count / total) * 100 if total else 0
            print(f"  {tools} tools: {count:5d}  ({pct:5.1f}%)")

    # Overall sanity check
    grand_total = sum(totals_by_cohort.values())
    print(f"\nGrand total: {grand_total}")


if __name__ == '__main__':
    random.seed(42)
    by_cohort_and_tools, totals_by_cohort = simulate()
    show_results(by_cohort_and_tools, totals_by_cohort)