n               = n(),
.groups = "drop"
)
# Dataset generation for the analysis on Value (breaking in favor of low complexity) -- not used ----
# Keep only rows where the true model was NOT extracted
M <- M %>% ungroup() %>% filter(!is.na(trueextracted) & trueextracted == 0 )
# # # Ensure numeric bestmachine, then set performanceN := 1{bestmachine == N}, else 0
#  M <- M %>%
#    mutate(bestmachine = as.numeric(as.character(bestmachine))) %>%
#    mutate(across(
#      matches("^performance\\d+$") & !all_of(paste0("performance", 401:408)),
#      ~ {
#        col_n <- as.integer(sub("^performance", "", cur_column()))
#       as.integer(!is.na(bestmachine) & bestmachine == col_n)
#      }
#    ))
# ---- Tie-breaker: if a row has multiple performanceN == 1,
#      keep 1 only for the N(s) with the *lowest* complexityN (ties kept)
perf_cols <- grep("^performance\\d+$", names(M), value = TRUE)
Ns        <- as.integer(sub("^performance", "", perf_cols))
# matrix of performanceN
perf_mat <- as.matrix(M[, perf_cols, drop = FALSE])
# aligned complexityN matrix (NA where complexityN column is absent)
comp_mat <- matrix(NA_real_, nrow(M), length(Ns))
colnames(comp_mat) <- perf_cols
for (j in seq_along(Ns)) {
cx <- paste0("complexity", Ns[j])
if (cx %in% names(M)) {
comp_mat[, j] <- suppressWarnings(as.numeric(as.character(M[[cx]])))
}
}
active      <- perf_mat == 1L & !is.na(perf_mat)
more_than_1 <- rowSums(active, na.rm = TRUE) > 1
# Only active cells should compete; set others (and NA complexities) to +Inf
comp_masked <- comp_mat
comp_masked[!active] <- Inf
comp_masked[is.na(comp_masked)] <- Inf
# Row-wise *minimum* complexity among active Ns
min_comp <- apply(comp_masked, 1, min)
rows_to_update <- which(more_than_1 & is.finite(min_comp))
if (length(rows_to_update)) {
# Select cells whose complexity equals the row minimum (ties allowed)
selected <- active & sweep(comp_masked, 1, min_comp, FUN = "==")
for (i in rows_to_update) {
perf_mat[i, ] <- as.integer(selected[i, ])
}
}
# Write back the possibly updated one-hots
M[, perf_cols] <- perf_mat
#
# 2) Keep meta + families to summarise
#
wanted <- c("ID","task","name","math","stringnum","trueextracted","modelpartitions")
M <- M %>%
select(
any_of(wanted),
matches("^performanceL\\d+$|^performance\\d+$|^complexity\\d+$")
)
#
# 3) Build the (wide) per-n summary table
#
DF <- M %>% ungroup()
# Column families
perf_cols   <- grep("^performance\\d+$",  names(DF), value = TRUE)
perfL_cols  <- grep("^performanceL\\d+$", names(DF), value = TRUE)
compl_cols  <- grep("^complexity\\d+$",   names(DF), value = TRUE)
# Ns shared by performanceN and performanceLN
perf_n   <- as.integer(str_remove(perf_cols,  "^performance"))
perfL_n  <- as.integer(str_remove(perfL_cols, "^performanceL"))
common_n <- sort(intersect(perf_n, perfL_n))
# Coerce numerics safely
DFnum <- DF %>%
mutate(across(all_of(c(perf_cols, perfL_cols, compl_cols)),
~ suppressWarnings(as.numeric(as.character(.x)))))
# Helper: collapse complexity<n> to a single value for n
get_complexity_value <- function(n) {
cx <- paste0("complexity", n)
if (!cx %in% names(DFnum)) return(NA_real_)
vals <- DFnum[[cx]]
u <- unique(vals[!is.na(vals)])
if (length(u) == 1L) u else mean(vals, na.rm = TRUE)
}
# Per-n tables: columns = observed levels of performanceL<n> (ascending), values = mean(performance<n>)
tables <- lapply(common_n, function(n) {
ycol <- paste0("performance",  n)  # indicator 1{bestmachine == n} after tie-break
gcol <- paste0("performanceL", n)
tmp <- DFnum %>% transmute(level = .data[[gcol]], value = .data[[ycol]])
levs <- sort(unique(tmp$level[!is.na(tmp$level)]))
tmp %>%
group_by(level) %>%
summarise(avg = mean(value, na.rm = TRUE), .groups = "drop") %>%
mutate(level = factor(level, levels = levs)) %>%
arrange(level) %>%
pivot_wider(names_from = level, values_from = avg) %>%
mutate(
row = paste0("performance", n),
n   = n,
complexity = get_complexity_value(n),
.before = 1
) %>%
relocate(n, complexity, .after = row)
})
names(tables) <- paste0("n_", common_n)
# Combine; order rows by n and level-columns numerically
combined <- bind_rows(tables) %>% arrange(n)
lvl_cols <- setdiff(names(combined), c("row","n","complexity"))
ord <- order(suppressWarnings(as.numeric(lvl_cols)), na.last = TRUE)
combined <- combined %>% select(row, n, complexity, all_of(lvl_cols[ord]))
combined_df <- as.data.frame(combined)
rownames(combined_df) <- combined_df$row
combined_df$row <- NULL
#
# 4) Append complexity-pooled rows (from ORIGINAL DFnum)
#    and ONE additional row pooling ALL n together
#
# Map n -> its collapsed complexity (using the same helper)
comp_map <- tibble(
n = common_n,
complexity = sapply(common_n, get_complexity_value)
)
# Build long data over ALL n from DFnum (use n_i to avoid tibble scoping clash)
long_all <- bind_rows(lapply(common_n, function(n_i) {
ycol <- paste0("performance",  n_i)
gcol <- paste0("performanceL", n_i)
lvl <- DFnum[[gcol]]
val <- DFnum[[ycol]]
tibble(
n          = n_i,
complexity = comp_map$complexity[match(n_i, comp_map$n)],
level      = lvl,
value      = val
)
}))
# (a) Average within complexity × level (pooled over n with that complexity)
agg_complex <- long_all %>%
filter(!is.na(level)) %>%
group_by(complexity, level) %>%
summarise(avg = mean(value, na.rm = TRUE), .groups = "drop")
# Make one row per complexity; columns = ascending levels
levs_all <- sort(unique(agg_complex$level))
rows_complex <- agg_complex %>%
mutate(level = factor(level, levels = levs_all)) %>%
arrange(complexity, level) %>%
pivot_wider(names_from = level, values_from = avg) %>%
mutate(
row = paste0("complexity_", formatC(complexity, format = "fg", digits = 6)),
n   = NA_integer_,
.before = 1
) %>%
relocate(n, complexity, .after = row)
# (b) ONE additional row pooling ALL n together (overall average by level)
row_all <- long_all %>%
filter(!is.na(level)) %>%
group_by(level) %>%
summarise(avg = mean(value, na.rm = TRUE), .groups = "drop") %>%
mutate(level = factor(level, levels = levs_all)) %>%
arrange(level) %>%
pivot_wider(names_from = level, values_from = avg) %>%
mutate(
row = "All",
n   = NA_integer_,
complexity = NA_real_,
.before = 1
) %>%
relocate(n, complexity, .after = row)
# Align columns with combined_df and bind the new rows
align_to <- names(combined_df)
for (nm in c("rows_complex", "row_all")) {
dfx <- get(nm)
miss <- setdiff(align_to, names(dfx))
if (length(miss)) dfx[miss] <- NA
dfx <- dfx[, align_to, drop = FALSE]
assign(nm, dfx)
}
combined_df2 <- bind_rows(combined_df, rows_complex, row_all)
#
# 5) Replace zeros with NA in *numeric level columns only*
#num_cols <- names(which(sapply(combined_df2, is.numeric)))
#num_cols <- setdiff(num_cols, c("n","complexity"))
#combined_df2[num_cols] <- lapply(combined_df2[num_cols], function(v) {
#  v[v == 0] <- NA
#  v
#})
#
# 6) Row names:
#    - for pooled-by-complexity rows (n == NA & complexity not NA)
#      use the complexity number
#    - for the overall pooled row (n == NA & complexity NA) use "All"
#    - keep per-n rows as-is (their current rownames)
#
rn <- rownames(combined_df2)
pool_comp <- is.na(combined_df2$n) & !is.na(combined_df2$complexity)
rn[pool_comp] <- formatC(combined_df2$complexity[pool_comp], format = "fg", digits = 6)
pool_all <- is.na(combined_df2$n) & is.na(combined_df2$complexity)
rn[pool_all] <- "All"
rownames(combined_df2) <- make.unique(rn, sep = "_")
# ---- Final dataset
combined_df2
df_string <- df %>%
group_by(stringnum, modelpartitions) %>%
summarise(
mean_max1       = mean(max_value1, na.rm = TRUE),
mean_max2       = mean(max_value2, na.rm = TRUE),
mean_max3       = mean(max_value3, na.rm = TRUE),
mean_max4       = mean(max_value4, na.rm = TRUE),
mean_wrong      = mean(wrong, na.rm = TRUE),
mean_extracted  = mean(trueextracted, na.rm = TRUE),
n               = n(),
.groups = "drop"
)
# Table Disagreement ------
table_math <- M %>%
# keep only relevant variables
select(math, random, trueextracted, wrong) %>%
# reshape to long format
pivot_longer(
cols = c(random, trueextracted, wrong),
names_to = "category",
values_to = "value"
) %>%
# count subjects per math × category
group_by(math, category) %>%
summarise(
n = sum(value == 1, na.rm = TRUE),
total = n(),
.groups = "drop"
) %>%
# compute row-wise proportions
mutate(proportion = n / total) %>%
# reshape back to wide table
select(math, category, proportion) %>%
pivot_wider(
names_from = category,
values_from = proportion
) %>%
arrange(math)
# Dataset Generation -----------------------------------------------------
PATH_TO_DATA <- '~/Desktop/On the complexity of forming mental models/Replication Package/My analysis'
#UPLOADING MY VERSION
A1<-read.csv(file.path(PATH_TO_DATA,"extraction_parsed_additional_myversion.csv"))
PATH_TO_DATA <- '~/Desktop/On the complexity of forming mental models/Replication Package'
string<-read.csv(file.path(PATH_TO_DATA,"/Design/extractstrings.csv"))
A1<-A1%>%inner_join(string)
#Generating advicecode and maxadvicematch
A1_old<-read.csv(file.path(PATH_TO_DATA,"/Data/extraction_parsed_additional.csv"))
A1$advicecode <- A1_old$advicecode
A1$maxadvicematch <- A1_old$maxadvicematch
#Keep the correct variable
#A1<-A1%>%select(ID,task,name,math,stringnum,guesses,errors,outputs,correct,maxcorrect,advicecode,maxadvicematch,guessTime,adviceTime,times,states,a,ax,ay,b,bx,by,X,Xx,Xy,Y,Yx,Yy,maxcorrect_LD,maxcorrectS_LD,complexityBM_LD,complexityBMS_LD,modelpartitions,perfect_mixed)
safe_binom_p <- function(x, n, p = 0.5) {
if (n == 0) return(0)
binom.test(x, n, p)$p.value
}
M<-A1%>%
filter(!is.na(times))%>%
select(-times)%>%
arrange(ID,name,stringnum)%>%
group_by(ID,name,stringnum)%>%
mutate(
cumMistakes=cumsum(guesses!=outputs),
articulated=1*(maxadvicematch==6),
firstMistake=sum(cumMistakes==0)+1,
trueextracted=as.integer(binom.test(mean(correct),12,1/2)$p.value<=0.01 & correct>6),
#  trueextracted=as.integer(correct==12),
#We also want that the true model was not extracted
wrongD=as.integer(binom.test(mean(maxcorrect),12,1/2)$p.value<=0.01 & correct>6 & trueextracted==0),
#The stochastic ones that condition on the input
#Since we have multiple observations for each group mean gives back a scalar
wrongSI= as.integer((binom.test(mean(ax),mean(a),1/2)$p.value<=0.01 & (binom.test(mean(by),mean(b),1/2)$p.value>0.10 | binom.test(mean(bx),mean(b),1/2)$p.value>0.10)) |
(binom.test(mean(bx),mean(b),1/2)$p.value<=0.01 & (binom.test(mean(ay),mean(a),1/2)$p.value>0.10 | binom.test(mean(ax),mean(a),1/2)$p.value>0.10))|(binom.test(mean(ay),mean(a),1/2)$p.value<=0.01 & (binom.test(mean(by),mean(b),1/2)$p.value>0.10 | binom.test(mean(bx),mean(b),1/2)$p.value>0.10)) |
(binom.test(mean(by),mean(b),1/2)$p.value<=0.01 & (binom.test(mean(ay),mean(a),1/2)$p.value>0.10 | binom.test(mean(ax),mean(a),1/2)$p.value>0.10))),
performance401 = as.integer(
(binom.test(mean(ax), mean(a), 1/2)$p.value <= 0.01) &
(binom.test(mean(by), mean(b), 1/2)$p.value > 0.10 |
binom.test(mean(bx), mean(b), 1/2)$p.value > 0.10)
),
performance403 = as.integer(
(binom.test(mean(bx), mean(b), 1/2)$p.value <= 0.01) &
(binom.test(mean(ay), mean(a), 1/2)$p.value > 0.10 |
binom.test(mean(ax), mean(a), 1/2)$p.value > 0.10)
),
performance402 = as.integer(
(binom.test(mean(ay), mean(a), 1/2)$p.value <= 0.01) &
(binom.test(mean(by), mean(b), 1/2)$p.value > 0.10 |
binom.test(mean(bx), mean(b), 1/2)$p.value > 0.10)
),
performance404 = as.integer(
(binom.test(mean(by), mean(b), 1/2)$p.value <= 0.01) &
(binom.test(mean(ay), mean(a), 1/2)$p.value > 0.10 |
binom.test(mean(ax), mean(a), 1/2)$p.value > 0.10)
),
#The stochastic ones that condition on the output
wrongSO = as.integer((safe_binom_p(mean(Xx),mean(X),1/2)<=0.01 & (safe_binom_p(mean(Yy),mean(Y),1/2)>0.10 | safe_binom_p(mean(Yx),mean(Y),1/2)>0.10)) |
(safe_binom_p(mean(Yx),mean(Y),1/2)<=0.01 & (safe_binom_p(mean(Xy),mean(X),1/2)>0.10 | safe_binom_p(mean(Xx),mean(X),1/2)>0.10))|(safe_binom_p(mean(Xy),mean(X),1/2)<=0.01 & (safe_binom_p(mean(Yy),mean(Y),1/2)>0.10 | safe_binom_p(mean(Yx),mean(Y),1/2)>0.10)) |
(safe_binom_p(mean(Yy),mean(Y),1/2)<=0.01 & (safe_binom_p(mean(Xy),mean(X),1/2)>0.10 | safe_binom_p(mean(Xx),mean(X),1/2)>0.10))),
performance405 = as.integer(
(safe_binom_p(mean(Xx),mean(X),1/2) <= 0.01) &
(safe_binom_p(mean(Yy),mean(Y),1/2) > 0.10 | safe_binom_p(mean(Yx),mean(Y),1/2) > 0.10)
),
performance407 = as.integer(
(safe_binom_p(mean(Yx),mean(Y),1/2) <= 0.01) &
(safe_binom_p(mean(Xy),mean(X),1/2) > 0.10 | safe_binom_p(mean(Xx),mean(X),1/2) > 0.10)
),
performance406 = as.integer(
(safe_binom_p(mean(Xy),mean(X),1/2) <= 0.01) &
(safe_binom_p(mean(Yy),mean(Y),1/2) > 0.10 | safe_binom_p(mean(Yx),mean(Y),1/2) > 0.10)
),
performance408 = as.integer(
(safe_binom_p(mean(Yy),mean(Y),1/2) <= 0.01) &
(safe_binom_p(mean(Xy),mean(X),1/2) > 0.10 | safe_binom_p(mean(Xx),mean(X),1/2) > 0.10)
),
complexity401=2,
complexity402=2,
complexity403=2,
complexity404=2,
complexity405=2,
complexity406=2,
complexity407=2,
complexity408=2,
#Wrong SI
wrongSI = as.integer(
(replace_na(wrongSI == 1, FALSE) ) &
wrongD==0 &
trueextracted == 0
),
#Wrong stochastic overall
wrongSO = as.integer(
( replace_na(wrongSO == 1, FALSE)) &
wrongD==0 &
trueextracted == 0
),
#Wrong stochastic overall
wrongS = as.integer(
(replace_na(wrongSI == 1, FALSE) |
replace_na(wrongSO == 1, FALSE)) &
wrongD==0 &
trueextracted == 0
),
#Wrong stochastic overall
wrong = as.integer(
(replace_na(wrongD == 1, FALSE) |
replace_na(wrongS == 1, FALSE)) &
trueextracted == 0
),
#random models
random = as.integer((trueextracted == 0) & (wrong == 0)),
complexityW = complexityBM_LD,               # start as copy, IN MATLAB WE ALREADY SET IT TO THE SIMPLEST!!
complexityWM = complexityBM_LD,               # start as copy
complexityW = if_else(maxcorrectS_LD > maxcorrect_LD, 2, complexityW, # Put the complexity equal to 2 if the best model is stochastic
missing = complexityW)   ,
complexityWM = if_else(complexityBM_LD !=1, 2, complexityWM, # Put the complexity equal to 2 if the best model is stochastic
missing = complexityWM)   ,
perfect=as.integer(correct==12),
)%>%
summarise(
math = dplyr::first(math),
across(where(is.numeric), ~ mean(.x, na.rm = TRUE)),
.groups = "drop"
)%>%
mutate(
machine=ifelse(name=='auto2','A2',
ifelse(name=='auto3','A3',
ifelse(name=='hybrid2','H2',
ifelse(name=='hybrid3','H3',
ifelse(name=='instruct2','I2',
ifelse(name=='instruct3','I3',
ifelse(name=='switch2','S2','S3')
)
)
)
)
)
),
order=ifelse(name=='auto2',1,
ifelse(name=='auto3',5,
ifelse(name=='hybrid2',3,
ifelse(name=='hybrid3',7,
ifelse(name=='instruct2',2,
ifelse(name=='instruct3',6,
ifelse(name=='switch2',4,8)
)
)
)
)
)
)
)%>%
#varibales at the ID level
group_by(machine, order, stringnum)%>%
mutate(
extractRate = mean(trueextracted, na.rm = TRUE),
wrongDRate  = mean(wrongD,        na.rm = TRUE),
wrongSRate  = mean(wrongS,        na.rm = TRUE),
wrongRate   = mean(wrong,         na.rm = TRUE),
randomRate  = mean(random,        na.rm = TRUE),
max_valueD  = mean(maxcorrect_LD, na.rm = TRUE),
max_value1  = mean(maxcorrect1_LD, na.rm = TRUE),
max_value2  = mean(maxcorrect2_LD, na.rm = TRUE),
max_value3  = mean(maxcorrect3_LD, na.rm = TRUE),
max_value4  = mean(maxcorrect4_LD, na.rm = TRUE),
max_valueS  = mean(maxcorrectS_LD, na.rm = TRUE),
max_valueW = if_else(
is.na(maxcorrect_LD) & is.na(maxcorrectS_LD),
NA_real_,
pmax(maxcorrect_LD, maxcorrectS_LD, na.rm = TRUE)
),
max_valueM = if_else(
is.na(maxcorrect3_LD) & is.na(maxcorrect2_LD),
NA_real_,
pmax(maxcorrect2_LD, maxcorrect3_LD, na.rm = TRUE)
),
complexityWD = mean(complexityBM_LD,  na.rm = TRUE),
complexityWS = mean(complexityBMS_LD, na.rm = TRUE),
)
#Sanity check
cols <- c("wrongD","wrongS","trueextracted","random")
bad_rows <- M %>%
mutate(
.row = row_number(),
sum1 = rowSums(across(all_of(cols), ~ as.integer(coalesce(.x, 0) == 1L)))
) %>%
filter(sum1 != 1) %>%
select(.row, all_of(cols), sum1)
count(bad_rows)
#Keeping machines
machines_keep <- c("H2", "S2", "H3", "I3", "S3")
# Replacing NA to 12 for performanceLn that achieve 12
# 1) Identify performanceL<number> columns
perfL_cols <- grep("^performanceL\\d+$", names(M), value = TRUE)
# 2) Replace 12 with NA in those columns
for (nm in perfL_cols) {
v <- M[[nm]]
if (is.factor(v)) {
# Convert factor -> numeric safely
v <- as.character(v)
v <- suppressWarnings(as.numeric(v))
}
v[v == 12] <- NA
M[[nm]] <- v
}
# (Optional) check if any 12s remain
sapply(M[perfL_cols], function(x) sum(x == 12, na.rm = TRUE))
# --- Filter + order ---
M_filtered <- M %>%
dplyr::filter(machine %in% machines_keep) %>%
dplyr::mutate(order = match(machine, machines_keep))
# first step: add misalignment to M
M <- M %>%
mutate(
nonc = case_when(
( performance1 >= 11 | performance1002 >= 11 ) & trueextracted==0 ~ 1,
TRUE ~ 0
)
)
M <- M %>%
mutate(
misalignment = case_when(
wrong==1 & nonc==0 ~ 1,
TRUE ~ 0
)
)
# Table Disagreement ------
table_math <- M %>%
# keep only relevant variables
select(math, random, trueextracted, wrong) %>%
# reshape to long format
pivot_longer(
cols = c(random, trueextracted, wrong),
names_to = "category",
values_to = "value"
) %>%
# count subjects per math × category
group_by(math, category) %>%
summarise(
n = sum(value == 1, na.rm = TRUE),
total = n(),
.groups = "drop"
) %>%
# compute row-wise proportions
mutate(proportion = n / total) %>%
# reshape back to wide table
select(math, category, proportion) %>%
pivot_wider(
names_from = category,
values_from = proportion
) %>%
arrange(math)
table_math
# optional: format nicely
table_math_latex <- table_math %>%
mutate(across(-math, ~ round(.x, 3)))
# print LaTeX table
kable(
table_math_latex,
format = "latex",
booktabs = TRUE,
caption = "Proportion of subjects by math level and model classification",
col.names = c("Math level", "Random", "True extracted", "Wrong")
)
# TAB -------
df_string <- df %>%
group_by(stringnum, modelpartitions) %>%
summarise(
mean_max1       = mean(max_value1, na.rm = TRUE),
mean_max2       = mean(max_value2, na.rm = TRUE),
mean_max3       = mean(max_value3, na.rm = TRUE),
mean_max4       = mean(max_value4, na.rm = TRUE),
mean_wrong      = mean(wrong, na.rm = TRUE),
mean_extracted  = mean(trueextracted, na.rm = TRUE),
n               = n(),
.groups = "drop"
)
