"""
Generate oTree per-app CSV exports for Combined_2lights (session: 2Lights_T1),
80 participants × 5 rounds.
Output format exactly matches what oTree 6.x produces when you download
per-app CSV files from the admin panel after real participants complete
the experiment.
Column order:
participant (standard) → session → participant (PARTICIPANT_FIELDS)
→ player → subsession.round_number → group.id_in_subsession
"""
import csv
import json
import random
import string
import datetime
import numpy as np
import os
random.seed(42)
np.random.seed(42)
# ── Constants ─────────────────────────────────────────────────────────────────
N_PLAYERS = 80
N_ROUNDS = 5
N_GUESSES = 12
GUESSES_PER_CONFIG = 3
SESSION_CODE = 'n6m3k8j1'
SESSION_START = datetime.datetime(2025, 6, 20, 9, 0, 0)
PARTICIPATION_FEE = 3.1 # 3.10 → serialises as 3.1
RW_CURRENCY_PER_POINT = 3.1
# ── Case definition ───────────────────────────────────────────────────────────
# Columns: Red, Blue, Others, Sound
case1 = [
[0, 0, 0, 0],
[0, 0, 0, 1],
[0, 1, 0, 0],
[0, 1, 0, 1],
[1, 1, 0, 0],
[1, 1, 0, 1],
[1, 0, 0, 0],
[1, 0, 0, 1],
]
# Five machines used in Combined_2lights (Instructions.before_next_page)
MACHINE_FREQS = {
'AND': [6, 2, 5, 1, 1, 7, 6, 2], # freq18 – AND EASY
'OR': [7, 1, 1, 7, 1, 11, 1, 2], # freq3 – OR DIFFICULT
'EITHER': [6, 1, 2, 6, 5, 1, 1, 7], # freq20 – EITHER EASY
'JOINT': [0, 1, 7, 1, 1, 9, 9, 1], # freq8 – JOINT DIFFICULT
'INHIBIT': [5, 1, 6, 0, 3, 2, 0, 14], # freq12 – INHIBIT DIFFICULT
}
MACHINE_NAMES = ['AND', 'OR', 'EITHER', 'JOINT', 'INHIBIT']
# Researcher-defined correct predictions (set in Instructions.before_next_page)
CORRECT_PREDICTIONS = {
'1L': {'(0, 0)': 0, '(0, 1)': 0, '(1, 0)': 0, '(1, 1)': 0},
'2L': {'(0, 0)': 0, '(0, 1)': 0, '(1, 0)': 0, '(1, 1)': 1},
}
# Comprehension answers (all correct on first attempt for bots)
CQ_CORRECT = {'cq1': 1, 'cq2': 2, 'cq3': 3, 'cq4': 2, 'cq5': 3}
# ── Free-text pools ───────────────────────────────────────────────────────────
NOTES_POOL = [
"The red light seems to be the main predictor of the sound.",
"Both lights appear to need to be on for the ding to occur.",
"Blue light on its own seems to prevent the sound.",
"Sound seems independent of the lights I can observe.",
"When only red is on there is almost always a ding.",
"It looks like either light is sufficient to trigger the sound.",
"Sound only when both are off – very counterintuitive.",
"Red alone seems sufficient. Blue adds nothing.",
"Pattern unclear after observing. Need more trials.",
"Sound correlates strongly with both lights being on.",
"The sound occurred when red was on regardless of blue.",
"Both lights off seemed to suppress the sound most of the time.",
"I noticed the blue light was on most of the time when sound occurred.",
"I cannot find a clear pattern from the observations.",
"Red light on with blue off seems most predictive of a ding.",
"The rule seems to depend on some combination of both lights.",
"Most sounds occurred when red was off and blue was on.",
"I think the unobservable lights might be driving the sound here.",
]
EXAMPLE_NOTES_POOL = [
"I think the red light predicts the sound most of the time.",
"Both lights together seem to produce the ding.",
"The pattern here is not immediately obvious to me.",
"It seems like when both lights are on the ding is more likely.",
"I noticed that the blue light was often on when there was no sound.",
]
STRATEGY_POOL = [
"I looked for which single light most reliably predicted the ding across all trials.",
"I tracked whether the combination of lights mattered more than individual ones.",
"I focused on identifying which configuration always produced the sound.",
"I tried to learn the underlying rule by looking at the observation table carefully.",
"I noticed which light configurations were most associated with the sound and used that as my rule.",
"I paid close attention to how often each configuration appeared and whether the sound was consistent.",
"My main strategy was to find which single light, if any, was a sufficient predictor of the ding.",
"I looked at patterns across the whole table rather than individual rows.",
]
COMMENT_POOL = [
"The experiment was challenging but interesting.",
"Some machines were much harder than others.",
"I found the observation tables very helpful for spotting patterns.",
"The task was mentally demanding but rewarding.",
"I struggled with some of the more complex patterns.",
"Interesting study. I would be curious to learn the actual rules.",
"The tasks varied a lot in difficulty. Some were clear, others not at all.",
"I enjoyed the task overall. It reminded me of logical puzzles.",
"The time pressure on the observation pages was noticeable.",
]
# ── HTML-table helpers (copied from Combined_2lights/__init__.py) ──────────────
def html_table_freqs_original(case, freq):
"""Generate observation HTML table and return (html_str, shuffled_matrix)."""
h = np.tile(case[0], (freq[0], 1))
for i in range(len(freq) - 1):
a = np.tile(case[i + 1], (freq[i + 1], 1))
h = np.vstack((h, a))
np.random.shuffle(h)
red, blue, sound = [], [], []
other = ['?'] * sum(freq)
for i in range(sum(freq)):
red.append('
' if h[i][0] == 1 else '')
blue.append('' if h[i][1] == 1 else '')
sound.append(
'♪ DING ♪' if h[i][3] == 1
else '-'
)
html_mat = np.column_stack((red, blue, other, sound))
table = (
''
''
'| Red Light | '
'Blue Light | '
'Other Lights | '
'Sound |
'
)
for i in range(sum(freq)):
table += (
''
'| ' + html_mat[i][0] + ' | '
'' + html_mat[i][1] + ' | '
'' + html_mat[i][2] + ' | '
'' + html_mat[i][3] + ' |
'
)
table += '
'
return table, h
def _draw_observation_sound(case_def, freq, config_tuple):
"""Replicate Pay._draw_observation_sound: given a light config, sample
a random observation from the case/freq distribution and return its sound."""
config_cols = len(case_def[0]) - 2 # exclude Others and Sound
matching_rows, matching_weights = [], []
for i, row in enumerate(case_def):
if tuple(int(x) for x in row[:config_cols]) == config_tuple:
matching_rows.append(row)
matching_weights.append(freq[i] if freq[i] > 0 else 1)
if not matching_rows:
return None
drawn_row = random.choices(matching_rows, weights=matching_weights, k=1)[0]
return int(drawn_row[-1])
def sample_balanced_guess_rows(case_def, freq, guesses_per_config=3):
"""Sample 12 guess rows: 3 per light config, no two consecutive with the same config."""
n_cols = len(case_def[0])
config_cols = n_cols - 2 # exclude Others and Sound
config_groups = {}
for i, row in enumerate(case_def):
config = tuple(row[:config_cols])
config_groups.setdefault(config, {'rows': [], 'weights': []})
config_groups[config]['rows'].append(list(row))
config_groups[config]['weights'].append(freq[i])
config_pools = {}
for config, group in config_groups.items():
w = group['weights']
if sum(w) == 0:
w = [1] * len(w)
config_pools[config] = random.choices(group['rows'], weights=w, k=guesses_per_config)
result = []
prev = None
total = sum(len(v) for v in config_pools.values())
for _ in range(total):
avail = {k: v for k, v in config_pools.items() if len(v) > 0 and k != prev}
if not avail:
avail = {k: v for k, v in config_pools.items() if len(v) > 0}
max_cnt = max(len(v) for v in avail.values())
top = [k for k, v in avail.items() if len(v) == max_cnt]
chosen = random.choice(top)
result.append(config_pools[chosen].pop())
prev = chosen
return result
# ── Helpers ───────────────────────────────────────────────────────────────────
def rand_code():
return ''.join(random.choices(string.ascii_lowercase + string.digits, k=8))
def rand_prolific_id():
return ''.join(random.choices('0123456789abcdef', k=24))
def sanitize(value):
"""Replicate oTree's per-app CSV cell serialisation."""
if value is None:
return ''
if isinstance(value, str):
# replace newlines (e.g. inside player.case numpy repr)
return value.replace('\n', ' ').replace('\r', ' ')
return value
# ── Column headers ────────────────────────────────────────────────────────────
PARTICIPANT_STD_COLS = [
'participant.code', 'participant.label', 'participant.id_in_session',
'participant.time_started_utc', 'participant.visited',
'participant.mturk_worker_id', 'participant.mturk_assignment_id',
]
SESSION_COLS = [
'session.code', 'session.label', 'session.is_demo',
'session.config.participation_fee', 'session.config.real_world_currency_per_point',
]
PARTICIPANT_FIELD_COLS = [
'participant.notes', 'participant.cases_ordered', 'participant.realized_cases',
'participant.light_list', 'participant.guesses', 'participant.order_names',
'participant.sound', 'participant.current_case', 'participant.original_color',
'participant.guess_values', 'participant.guess_configs', 'participant.correct_predictions',
]
# Player custom fields for Combined_2lights (in model definition order)
PLAYER_CUSTOM_COLS = [
'ID_subject', 'notes', 'Example_notes',
'case', 'machine_name', 'case_order',
'error', 'original_color', 'table',
'explanation', 'bonus_message_effect', 'advice_text',
'predicted_correct_other',
'predicted_correct_self', 'certainty',
'difficulty', 'difficulty_certainty',
'prediction_strategy', 'final_comments',
'page_load_ts', 'time_on_page',
'num_wrong',
'cq1', 'cq2', 'cq3', 'cq4', 'cq5',
'n_lights',
'guess1', 'guess2', 'guess3', 'guess4', 'guess5', 'guess6',
'guess7', 'guess8', 'guess9', 'guess10', 'guess11', 'guess12',
'guess_example',
'row1', 'row2', 'row3', 'row4', 'row5', 'row6',
'row7', 'row8', 'row9', 'row10', 'row11', 'row12',
'guess_configs_json', 'order_names_json',
]
PLAYER_BUILTIN_COLS = ['player.id_in_group', 'player.role', 'player.payoff']
PLAYER_HEADER = PLAYER_BUILTIN_COLS + [f'player.{c}' for c in PLAYER_CUSTOM_COLS]
TAIL_COLS = ['subsession.round_number', 'group.id_in_subsession']
MAIN_HEADER = (
PARTICIPANT_STD_COLS + SESSION_COLS + PARTICIPANT_FIELD_COLS
+ PLAYER_HEADER + TAIL_COLS
)
# Pay player custom fields (in model definition order)
PAY_CUSTOM_COLS = ['payment_details', 'bonus_total', 'n_machines', 'n_correct_drawn']
PAY_PLAYER_HEADER = PLAYER_BUILTIN_COLS + [f'player.{c}' for c in PAY_CUSTOM_COLS]
PAY_HEADER = (
PARTICIPANT_STD_COLS + SESSION_COLS + PARTICIPANT_FIELD_COLS
+ PAY_PLAYER_HEADER + TAIL_COLS
)
# ── Data generation ───────────────────────────────────────────────────────────
main_rows = []
pay_rows = []
for pid in range(1, N_PLAYERS + 1):
p_code = rand_code()
prolific = rand_prolific_id()
t_start = SESSION_START + datetime.timedelta(
minutes=random.randint(pid * 2 - 2, pid * 2 + 30))
t_start_s = str(t_start) # e.g. '2025-06-20 09:05:12'
# Shuffle machine order for this participant
order = MACHINE_NAMES.copy()
random.shuffle(order)
# original_color = 1 always (RANDOMIZE_COLORS = 0, 2-light case)
original_colors = [1] * N_ROUNDS
# ── Participant-level accumulators ─────────────────────────────────────
p_notes = [] # one note string per round
p_guesses = [] # correctness: 1/0, 5×12 = 60 values
p_sound = [] # actual sound outcomes, 60 values
p_guess_values = [] # player's guess (0/1), 60 values
p_guess_configs = [] # config string e.g. "(0, 1)", 60 values
p_realized = [] # shuffled matrix per round (list of lists)
p_current_case = None # overwritten each round → last round's matrix
# Build cases_ordered = [[case1, freq1], [case1, freq2], ...]
cases_ordered = [[case1, MACHINE_FREQS[m]] for m in order]
# Comprehension (round 1 only): all correct on first attempt
cqs = CQ_CORRECT.copy()
num_wrong_r1 = random.randint(0, 5) # total wrong attempts before passing
guess_ex_val = random.randint(0, 1)
example_notes_val = random.choice(EXAMPLE_NOTES_POOL)
round_records = []
for r_idx in range(N_ROUNDS):
machine = order[r_idx]
freq = MACHINE_FREQS[machine]
orig_c = 1 # always 1
# Generate observation table
table_html, matrix = html_table_freqs_original(case1, freq)
# player.case: str(numpy matrix) with \n sanitised later
case_str = str(matrix)
# participant.realized_cases uses the matrix converted to a Python list
matrix_list = matrix.tolist()
p_realized.append(matrix_list)
p_current_case = matrix_list
# Guess rows (sampled balanced)
guess_rows = sample_balanced_guess_rows(case1, freq, GUESSES_PER_CONFIG)
row_strs = [str(np.array(guess_rows[i])) for i in range(N_GUESSES)]
# Bot guesses (~65% accuracy)
player_guesses = []
round_configs = []
for row in guess_rows:
sound_val = row[-1]
config = tuple(row[:2]) # (Red, Blue)
guess = sound_val if random.random() < 0.65 else (1 - sound_val)
player_guesses.append(int(guess))
correct = 1 if guess == sound_val else 0
p_guesses.append(correct)
p_sound.append(sound_val)
p_guess_values.append(int(guess))
round_configs.append(str(config))
p_guess_configs.append(str(config))
# Real participants write notes without machine-name prefix;
# participant.notes is populated from player.notes in Machine.before_next_page,
# so both must hold identical strings.
p_notes.append(random.choice(NOTES_POOL))
# guess_configs_json: JSON of this round's 12 config strings
guess_configs_json_val = json.dumps(round_configs)
# order_names_json: JSON of the full order (set every round by Load)
order_names_json_val = json.dumps(order)
# player.case_order: str(cases_ordered) in round 1 only
case_order_val = str(cases_ordered) if r_idx == 0 else None
# Timing
page_load_ts = t_start.timestamp() + r_idx * 400 + random.uniform(10, 60)
time_on_page = round(random.uniform(30.0, 170.0), 4)
# Post_Guesses_Confidence (every round)
predicted_correct_self = random.randint(5, 12)
certainty = random.choice(range(0, 101, 5))
# PredictionStrategy + FinalComments (last round only)
is_last = (r_idx == N_ROUNDS - 1)
difficulty = random.randint(1, 5) if is_last else None
difficulty_certainty = random.choice(range(0, 101, 5)) if is_last else None
prediction_strategy = random.choice(STRATEGY_POOL) if is_last else None
final_comments = random.choice(COMMENT_POOL) if is_last else None
round_records.append(dict(
machine=machine, table_html=table_html,
case_str=case_str, case_order_val=case_order_val,
row_strs=row_strs, player_guesses=player_guesses,
guess_configs_json=guess_configs_json_val,
order_names_json=order_names_json_val,
predicted_correct_self=predicted_correct_self, certainty=certainty,
difficulty=difficulty, difficulty_certainty=difficulty_certainty,
prediction_strategy=prediction_strategy, final_comments=final_comments,
page_load_ts=round(page_load_ts, 4), time_on_page=time_on_page,
))
# ── Participant-level fields ───────────────────────────────────────────
# Serialised as JSON (matching oTree's export of PARTICIPANT_FIELDS)
pf_notes = json.dumps(p_notes)
pf_cases_ordered = json.dumps(cases_ordered)
pf_realized_cases = json.dumps(p_realized)
pf_light_list = '' # never set → None → ''
pf_guesses = json.dumps(p_guesses)
pf_order_names = json.dumps(order)
pf_sound = json.dumps(p_sound)
pf_current_case = json.dumps(p_current_case)
pf_original_color = json.dumps([1] * N_ROUNDS)
pf_guess_values = json.dumps(p_guess_values)
pf_guess_configs = json.dumps(p_guess_configs)
pf_correct_preds = json.dumps(CORRECT_PREDICTIONS)
# ── Simulate Pay (FeedbackDrawSingle logic) ────────────────────────────
# 1) Pick one random machine
chosen_m_idx = random.randint(0, N_ROUNDS - 1)
machine_name = order[chosen_m_idx]
freq_pay = MACHINE_FREQS[machine_name]
# 2) Pick one random prediction from that machine
start = chosen_m_idx * N_GUESSES
m_gv = p_guess_values[start:start + N_GUESSES]
m_gc = p_guess_configs[start:start + N_GUESSES]
draw_idx = random.randint(0, N_GUESSES - 1)
drawn_guess = m_gv[draw_idx]
drawn_config = m_gc[draw_idx]
# 3) Draw a random observation with the same light config from the case data
config_tuple = tuple(int(x) for x in drawn_config.strip('()').split(','))
drawn_sound = _draw_observation_sound(case1, freq_pay, config_tuple)
is_correct = (drawn_guess == drawn_sound) if drawn_sound is not None else False
n_correct_pay = 1 if is_correct else 0
pay_details_list = [{
'machine': machine_name,
'draw_index': draw_idx + 1,
'config': drawn_config,
'guess': drawn_guess,
'drawn_sound': drawn_sound,
'is_correct': is_correct,
}]
bonus_total = n_correct_pay * 1.60 # £1.60 if correct, £0 otherwise
# ── Build Combined_2lights rows ────────────────────────────────────────
for r_idx, rd in enumerate(round_records):
r = r_idx + 1
is_r1 = r == 1
participant_std = [
p_code, '', pid, t_start_s, True, '', '',
]
session_vals = [
SESSION_CODE, '', False,
PARTICIPATION_FEE, RW_CURRENCY_PER_POINT,
]
participant_fields = [
pf_notes, pf_cases_ordered, pf_realized_cases,
pf_light_list, pf_guesses, pf_order_names,
pf_sound, pf_current_case, pf_original_color,
pf_guess_values, pf_guess_configs, pf_correct_preds,
]
player_vals = {
'id_in_group': 1,
'role': '',
'payoff': 0.0,
# Custom fields:
'ID_subject': prolific if is_r1 else '',
'notes': p_notes[r_idx],
'Example_notes': example_notes_val if is_r1 else '',
'case': sanitize(rd['case_str']),
'machine_name': rd['machine'],
'case_order': str(cases_ordered) if is_r1 else '',
'error': 0,
'original_color': 1,
'table': rd['table_html'],
'explanation': '', # Explicit page not in page_sequence
'bonus_message_effect': '', # page commented out
'advice_text': '', # page commented out
'predicted_correct_other': '', # page commented out
'predicted_correct_self': rd['predicted_correct_self'],
'certainty': rd['certainty'],
'difficulty': rd['difficulty'] if rd['difficulty'] is not None else '',
'difficulty_certainty': rd['difficulty_certainty'] if rd['difficulty_certainty'] is not None else '',
'prediction_strategy': rd['prediction_strategy'] if rd['prediction_strategy'] is not None else '',
'final_comments': rd['final_comments'] if rd['final_comments'] is not None else '',
'page_load_ts': rd['page_load_ts'],
'time_on_page': rd['time_on_page'],
'num_wrong': num_wrong_r1 if is_r1 else 0,
'cq1': cqs['cq1'] if is_r1 else '',
'cq2': cqs['cq2'] if is_r1 else '',
'cq3': cqs['cq3'] if is_r1 else '',
'cq4': cqs['cq4'] if is_r1 else '',
'cq5': cqs['cq5'] if is_r1 else '',
'n_lights': 4,
'guess1': rd['player_guesses'][0],
'guess2': rd['player_guesses'][1],
'guess3': rd['player_guesses'][2],
'guess4': rd['player_guesses'][3],
'guess5': rd['player_guesses'][4],
'guess6': rd['player_guesses'][5],
'guess7': rd['player_guesses'][6],
'guess8': rd['player_guesses'][7],
'guess9': rd['player_guesses'][8],
'guess10': rd['player_guesses'][9],
'guess11': rd['player_guesses'][10],
'guess12': rd['player_guesses'][11],
'guess_example': guess_ex_val if is_r1 else '',
'row1': rd['row_strs'][0],
'row2': rd['row_strs'][1],
'row3': rd['row_strs'][2],
'row4': rd['row_strs'][3],
'row5': rd['row_strs'][4],
'row6': rd['row_strs'][5],
'row7': rd['row_strs'][6],
'row8': rd['row_strs'][7],
'row9': rd['row_strs'][8],
'row10': rd['row_strs'][9],
'row11': rd['row_strs'][10],
'row12': rd['row_strs'][11],
'guess_configs_json': rd['guess_configs_json'],
'order_names_json': rd['order_names_json'],
}
player_row = (
[player_vals['id_in_group'], player_vals['role'], player_vals['payoff']]
+ [player_vals[c] for c in PLAYER_CUSTOM_COLS]
)
row = (
participant_std
+ session_vals
+ participant_fields
+ player_row
+ [r, 1] # subsession.round_number, group.id_in_subsession
)
main_rows.append(row)
# ── Build Pay row ──────────────────────────────────────────────────────
pay_player_vals = {
'id_in_group': 1,
'role': '',
'payoff': bonus_total,
'payment_details': json.dumps(pay_details_list),
'bonus_total': bonus_total,
'n_machines': N_ROUNDS,
'n_correct_drawn': n_correct_pay,
}
pay_player_row = (
[pay_player_vals['id_in_group'], pay_player_vals['role'], pay_player_vals['payoff']]
+ [pay_player_vals[c] for c in PAY_CUSTOM_COLS]
)
pay_row = (
[p_code, '', pid, t_start_s, True, '', '']
+ [SESSION_CODE, '', False, PARTICIPATION_FEE, RW_CURRENCY_PER_POINT]
+ [pf_notes, pf_cases_ordered, pf_realized_cases,
pf_light_list, pf_guesses, pf_order_names,
pf_sound, pf_current_case, pf_original_color,
pf_guess_values, pf_guess_configs, pf_correct_preds]
+ pay_player_row
+ [1, 1] # subsession.round_number=1, group.id_in_subsession=1
)
pay_rows.append(pay_row)
# ── Write output files ────────────────────────────────────────────────────────
out_dir = os.path.join(os.path.dirname(__file__), 'bot_export_2lights')
os.makedirs(out_dir, exist_ok=True)
out_main = os.path.join(out_dir, 'Combined_2lights.csv')
with open(out_main, 'w', newline='', encoding='utf-8') as f:
writer = csv.writer(f)
writer.writerow(MAIN_HEADER)
writer.writerows(main_rows)
print(f"Combined_2lights.csv : {len(main_rows)} rows × {len(MAIN_HEADER)} columns")
out_pay = os.path.join(out_dir, 'Pay.csv')
with open(out_pay, 'w', newline='', encoding='utf-8') as f:
writer = csv.writer(f)
writer.writerow(PAY_HEADER)
writer.writerows(pay_rows)
print(f"Pay.csv : {len(pay_rows)} rows × {len(PAY_HEADER)} columns")
print("Done.")