import random from collections import Counter, defaultdict """ Simulate many participants to verify that random assignment produces a reasonable spread across: - Cohorts: Experienced (smart) vs Standard (non-smart) - Tool counts: 1, 2, 3, 4 This script mirrors presurvey.Info.before_next_page logic: - is_smart_group if newsvendor_knowledge > 3 - tool_count randomly chosen from {1,2,3,4} We generate a population with a configurable knowledge distribution and show frequencies and percentages per cohort and tool count. """ # Adjust these to reflect your expected participant population N = 10000 # Example distribution for knowledge 1..5 (sum to 1.0) knowledge_weights = { 1: 0.18, 2: 0.22, 3: 0.28, 4: 0.20, 5: 0.12, } knowledge_levels = list(knowledge_weights.keys()) knowledge_probs = [knowledge_weights[k] for k in knowledge_levels] # Make a cumulative distribution for sampling without numpy cdf = [] acc = 0.0 for p in knowledge_probs: acc += p cdf.append(acc) def sample_knowledge(): r = random.random() for lvl, cutoff in zip(knowledge_levels, cdf): if r <= cutoff: return lvl return knowledge_levels[-1] def simulate(n=N): by_cohort_and_tools = defaultdict(Counter) totals_by_cohort = Counter() for _ in range(n): knowledge = sample_knowledge() is_smart = knowledge > 3 # mirrors presurvey cohort = 'Experienced' if is_smart else 'Standard' tool_count = random.choice([1, 2, 3, 4]) by_cohort_and_tools[cohort][tool_count] += 1 totals_by_cohort[cohort] += 1 return by_cohort_and_tools, totals_by_cohort def show_results(by_cohort_and_tools, totals_by_cohort): print('=== Tool Count Distribution by Cohort ===') for cohort in ['Experienced', 'Standard']: total = totals_by_cohort[cohort] if total == 0: print(f"{cohort}: (no participants)") continue print(f"\n{cohort} (n={total})") row = by_cohort_and_tools[cohort] for tools in sorted([1, 2, 3, 4]): count = row.get(tools, 0) pct = (count / total) * 100 if total else 0 print(f" {tools} tools: {count:5d} ({pct:5.1f}%)") # Overall sanity check grand_total = sum(totals_by_cohort.values()) print(f"\nGrand total: {grand_total}") if __name__ == '__main__': random.seed(42) by_cohort_and_tools, totals_by_cohort = simulate() show_results(by_cohort_and_tools, totals_by_cohort)