n = n(), .groups = "drop" ) # Dataset generation for the analysis on Value (breaking in favor of low complexity) -- not used ---- # Keep only rows where the true model was NOT extracted M <- M %>% ungroup() %>% filter(!is.na(trueextracted) & trueextracted == 0 ) # # # Ensure numeric bestmachine, then set performanceN := 1{bestmachine == N}, else 0 # M <- M %>% # mutate(bestmachine = as.numeric(as.character(bestmachine))) %>% # mutate(across( # matches("^performance\\d+$") & !all_of(paste0("performance", 401:408)), # ~ { # col_n <- as.integer(sub("^performance", "", cur_column())) # as.integer(!is.na(bestmachine) & bestmachine == col_n) # } # )) # ---- Tie-breaker: if a row has multiple performanceN == 1, # keep 1 only for the N(s) with the *lowest* complexityN (ties kept) perf_cols <- grep("^performance\\d+$", names(M), value = TRUE) Ns <- as.integer(sub("^performance", "", perf_cols)) # matrix of performanceN perf_mat <- as.matrix(M[, perf_cols, drop = FALSE]) # aligned complexityN matrix (NA where complexityN column is absent) comp_mat <- matrix(NA_real_, nrow(M), length(Ns)) colnames(comp_mat) <- perf_cols for (j in seq_along(Ns)) { cx <- paste0("complexity", Ns[j]) if (cx %in% names(M)) { comp_mat[, j] <- suppressWarnings(as.numeric(as.character(M[[cx]]))) } } active <- perf_mat == 1L & !is.na(perf_mat) more_than_1 <- rowSums(active, na.rm = TRUE) > 1 # Only active cells should compete; set others (and NA complexities) to +Inf comp_masked <- comp_mat comp_masked[!active] <- Inf comp_masked[is.na(comp_masked)] <- Inf # Row-wise *minimum* complexity among active Ns min_comp <- apply(comp_masked, 1, min) rows_to_update <- which(more_than_1 & is.finite(min_comp)) if (length(rows_to_update)) { # Select cells whose complexity equals the row minimum (ties allowed) selected <- active & sweep(comp_masked, 1, min_comp, FUN = "==") for (i in rows_to_update) { perf_mat[i, ] <- as.integer(selected[i, ]) } } # Write back the possibly updated one-hots M[, perf_cols] <- perf_mat # # 2) Keep meta + families to summarise # wanted <- c("ID","task","name","math","stringnum","trueextracted","modelpartitions") M <- M %>% select( any_of(wanted), matches("^performanceL\\d+$|^performance\\d+$|^complexity\\d+$") ) # # 3) Build the (wide) per-n summary table # DF <- M %>% ungroup() # Column families perf_cols <- grep("^performance\\d+$", names(DF), value = TRUE) perfL_cols <- grep("^performanceL\\d+$", names(DF), value = TRUE) compl_cols <- grep("^complexity\\d+$", names(DF), value = TRUE) # Ns shared by performanceN and performanceLN perf_n <- as.integer(str_remove(perf_cols, "^performance")) perfL_n <- as.integer(str_remove(perfL_cols, "^performanceL")) common_n <- sort(intersect(perf_n, perfL_n)) # Coerce numerics safely DFnum <- DF %>% mutate(across(all_of(c(perf_cols, perfL_cols, compl_cols)), ~ suppressWarnings(as.numeric(as.character(.x))))) # Helper: collapse complexity to a single value for n get_complexity_value <- function(n) { cx <- paste0("complexity", n) if (!cx %in% names(DFnum)) return(NA_real_) vals <- DFnum[[cx]] u <- unique(vals[!is.na(vals)]) if (length(u) == 1L) u else mean(vals, na.rm = TRUE) } # Per-n tables: columns = observed levels of performanceL (ascending), values = mean(performance) tables <- lapply(common_n, function(n) { ycol <- paste0("performance", n) # indicator 1{bestmachine == n} after tie-break gcol <- paste0("performanceL", n) tmp <- DFnum %>% transmute(level = .data[[gcol]], value = .data[[ycol]]) levs <- sort(unique(tmp$level[!is.na(tmp$level)])) tmp %>% group_by(level) %>% summarise(avg = mean(value, na.rm = TRUE), .groups = "drop") %>% mutate(level = factor(level, levels = levs)) %>% arrange(level) %>% pivot_wider(names_from = level, values_from = avg) %>% mutate( row = paste0("performance", n), n = n, complexity = get_complexity_value(n), .before = 1 ) %>% relocate(n, complexity, .after = row) }) names(tables) <- paste0("n_", common_n) # Combine; order rows by n and level-columns numerically combined <- bind_rows(tables) %>% arrange(n) lvl_cols <- setdiff(names(combined), c("row","n","complexity")) ord <- order(suppressWarnings(as.numeric(lvl_cols)), na.last = TRUE) combined <- combined %>% select(row, n, complexity, all_of(lvl_cols[ord])) combined_df <- as.data.frame(combined) rownames(combined_df) <- combined_df$row combined_df$row <- NULL # # 4) Append complexity-pooled rows (from ORIGINAL DFnum) # and ONE additional row pooling ALL n together # # Map n -> its collapsed complexity (using the same helper) comp_map <- tibble( n = common_n, complexity = sapply(common_n, get_complexity_value) ) # Build long data over ALL n from DFnum (use n_i to avoid tibble scoping clash) long_all <- bind_rows(lapply(common_n, function(n_i) { ycol <- paste0("performance", n_i) gcol <- paste0("performanceL", n_i) lvl <- DFnum[[gcol]] val <- DFnum[[ycol]] tibble( n = n_i, complexity = comp_map$complexity[match(n_i, comp_map$n)], level = lvl, value = val ) })) # (a) Average within complexity × level (pooled over n with that complexity) agg_complex <- long_all %>% filter(!is.na(level)) %>% group_by(complexity, level) %>% summarise(avg = mean(value, na.rm = TRUE), .groups = "drop") # Make one row per complexity; columns = ascending levels levs_all <- sort(unique(agg_complex$level)) rows_complex <- agg_complex %>% mutate(level = factor(level, levels = levs_all)) %>% arrange(complexity, level) %>% pivot_wider(names_from = level, values_from = avg) %>% mutate( row = paste0("complexity_", formatC(complexity, format = "fg", digits = 6)), n = NA_integer_, .before = 1 ) %>% relocate(n, complexity, .after = row) # (b) ONE additional row pooling ALL n together (overall average by level) row_all <- long_all %>% filter(!is.na(level)) %>% group_by(level) %>% summarise(avg = mean(value, na.rm = TRUE), .groups = "drop") %>% mutate(level = factor(level, levels = levs_all)) %>% arrange(level) %>% pivot_wider(names_from = level, values_from = avg) %>% mutate( row = "All", n = NA_integer_, complexity = NA_real_, .before = 1 ) %>% relocate(n, complexity, .after = row) # Align columns with combined_df and bind the new rows align_to <- names(combined_df) for (nm in c("rows_complex", "row_all")) { dfx <- get(nm) miss <- setdiff(align_to, names(dfx)) if (length(miss)) dfx[miss] <- NA dfx <- dfx[, align_to, drop = FALSE] assign(nm, dfx) } combined_df2 <- bind_rows(combined_df, rows_complex, row_all) # # 5) Replace zeros with NA in *numeric level columns only* #num_cols <- names(which(sapply(combined_df2, is.numeric))) #num_cols <- setdiff(num_cols, c("n","complexity")) #combined_df2[num_cols] <- lapply(combined_df2[num_cols], function(v) { # v[v == 0] <- NA # v #}) # # 6) Row names: # - for pooled-by-complexity rows (n == NA & complexity not NA) # use the complexity number # - for the overall pooled row (n == NA & complexity NA) use "All" # - keep per-n rows as-is (their current rownames) # rn <- rownames(combined_df2) pool_comp <- is.na(combined_df2$n) & !is.na(combined_df2$complexity) rn[pool_comp] <- formatC(combined_df2$complexity[pool_comp], format = "fg", digits = 6) pool_all <- is.na(combined_df2$n) & is.na(combined_df2$complexity) rn[pool_all] <- "All" rownames(combined_df2) <- make.unique(rn, sep = "_") # ---- Final dataset combined_df2 df_string <- df %>% group_by(stringnum, modelpartitions) %>% summarise( mean_max1 = mean(max_value1, na.rm = TRUE), mean_max2 = mean(max_value2, na.rm = TRUE), mean_max3 = mean(max_value3, na.rm = TRUE), mean_max4 = mean(max_value4, na.rm = TRUE), mean_wrong = mean(wrong, na.rm = TRUE), mean_extracted = mean(trueextracted, na.rm = TRUE), n = n(), .groups = "drop" ) # Table Disagreement ------ table_math <- M %>% # keep only relevant variables select(math, random, trueextracted, wrong) %>% # reshape to long format pivot_longer( cols = c(random, trueextracted, wrong), names_to = "category", values_to = "value" ) %>% # count subjects per math × category group_by(math, category) %>% summarise( n = sum(value == 1, na.rm = TRUE), total = n(), .groups = "drop" ) %>% # compute row-wise proportions mutate(proportion = n / total) %>% # reshape back to wide table select(math, category, proportion) %>% pivot_wider( names_from = category, values_from = proportion ) %>% arrange(math) # Dataset Generation ----------------------------------------------------- PATH_TO_DATA <- '~/Desktop/On the complexity of forming mental models/Replication Package/My analysis' #UPLOADING MY VERSION A1<-read.csv(file.path(PATH_TO_DATA,"extraction_parsed_additional_myversion.csv")) PATH_TO_DATA <- '~/Desktop/On the complexity of forming mental models/Replication Package' string<-read.csv(file.path(PATH_TO_DATA,"/Design/extractstrings.csv")) A1<-A1%>%inner_join(string) #Generating advicecode and maxadvicematch A1_old<-read.csv(file.path(PATH_TO_DATA,"/Data/extraction_parsed_additional.csv")) A1$advicecode <- A1_old$advicecode A1$maxadvicematch <- A1_old$maxadvicematch #Keep the correct variable #A1<-A1%>%select(ID,task,name,math,stringnum,guesses,errors,outputs,correct,maxcorrect,advicecode,maxadvicematch,guessTime,adviceTime,times,states,a,ax,ay,b,bx,by,X,Xx,Xy,Y,Yx,Yy,maxcorrect_LD,maxcorrectS_LD,complexityBM_LD,complexityBMS_LD,modelpartitions,perfect_mixed) safe_binom_p <- function(x, n, p = 0.5) { if (n == 0) return(0) binom.test(x, n, p)$p.value } M<-A1%>% filter(!is.na(times))%>% select(-times)%>% arrange(ID,name,stringnum)%>% group_by(ID,name,stringnum)%>% mutate( cumMistakes=cumsum(guesses!=outputs), articulated=1*(maxadvicematch==6), firstMistake=sum(cumMistakes==0)+1, trueextracted=as.integer(binom.test(mean(correct),12,1/2)$p.value<=0.01 & correct>6), # trueextracted=as.integer(correct==12), #We also want that the true model was not extracted wrongD=as.integer(binom.test(mean(maxcorrect),12,1/2)$p.value<=0.01 & correct>6 & trueextracted==0), #The stochastic ones that condition on the input #Since we have multiple observations for each group mean gives back a scalar wrongSI= as.integer((binom.test(mean(ax),mean(a),1/2)$p.value<=0.01 & (binom.test(mean(by),mean(b),1/2)$p.value>0.10 | binom.test(mean(bx),mean(b),1/2)$p.value>0.10)) | (binom.test(mean(bx),mean(b),1/2)$p.value<=0.01 & (binom.test(mean(ay),mean(a),1/2)$p.value>0.10 | binom.test(mean(ax),mean(a),1/2)$p.value>0.10))|(binom.test(mean(ay),mean(a),1/2)$p.value<=0.01 & (binom.test(mean(by),mean(b),1/2)$p.value>0.10 | binom.test(mean(bx),mean(b),1/2)$p.value>0.10)) | (binom.test(mean(by),mean(b),1/2)$p.value<=0.01 & (binom.test(mean(ay),mean(a),1/2)$p.value>0.10 | binom.test(mean(ax),mean(a),1/2)$p.value>0.10))), performance401 = as.integer( (binom.test(mean(ax), mean(a), 1/2)$p.value <= 0.01) & (binom.test(mean(by), mean(b), 1/2)$p.value > 0.10 | binom.test(mean(bx), mean(b), 1/2)$p.value > 0.10) ), performance403 = as.integer( (binom.test(mean(bx), mean(b), 1/2)$p.value <= 0.01) & (binom.test(mean(ay), mean(a), 1/2)$p.value > 0.10 | binom.test(mean(ax), mean(a), 1/2)$p.value > 0.10) ), performance402 = as.integer( (binom.test(mean(ay), mean(a), 1/2)$p.value <= 0.01) & (binom.test(mean(by), mean(b), 1/2)$p.value > 0.10 | binom.test(mean(bx), mean(b), 1/2)$p.value > 0.10) ), performance404 = as.integer( (binom.test(mean(by), mean(b), 1/2)$p.value <= 0.01) & (binom.test(mean(ay), mean(a), 1/2)$p.value > 0.10 | binom.test(mean(ax), mean(a), 1/2)$p.value > 0.10) ), #The stochastic ones that condition on the output wrongSO = as.integer((safe_binom_p(mean(Xx),mean(X),1/2)<=0.01 & (safe_binom_p(mean(Yy),mean(Y),1/2)>0.10 | safe_binom_p(mean(Yx),mean(Y),1/2)>0.10)) | (safe_binom_p(mean(Yx),mean(Y),1/2)<=0.01 & (safe_binom_p(mean(Xy),mean(X),1/2)>0.10 | safe_binom_p(mean(Xx),mean(X),1/2)>0.10))|(safe_binom_p(mean(Xy),mean(X),1/2)<=0.01 & (safe_binom_p(mean(Yy),mean(Y),1/2)>0.10 | safe_binom_p(mean(Yx),mean(Y),1/2)>0.10)) | (safe_binom_p(mean(Yy),mean(Y),1/2)<=0.01 & (safe_binom_p(mean(Xy),mean(X),1/2)>0.10 | safe_binom_p(mean(Xx),mean(X),1/2)>0.10))), performance405 = as.integer( (safe_binom_p(mean(Xx),mean(X),1/2) <= 0.01) & (safe_binom_p(mean(Yy),mean(Y),1/2) > 0.10 | safe_binom_p(mean(Yx),mean(Y),1/2) > 0.10) ), performance407 = as.integer( (safe_binom_p(mean(Yx),mean(Y),1/2) <= 0.01) & (safe_binom_p(mean(Xy),mean(X),1/2) > 0.10 | safe_binom_p(mean(Xx),mean(X),1/2) > 0.10) ), performance406 = as.integer( (safe_binom_p(mean(Xy),mean(X),1/2) <= 0.01) & (safe_binom_p(mean(Yy),mean(Y),1/2) > 0.10 | safe_binom_p(mean(Yx),mean(Y),1/2) > 0.10) ), performance408 = as.integer( (safe_binom_p(mean(Yy),mean(Y),1/2) <= 0.01) & (safe_binom_p(mean(Xy),mean(X),1/2) > 0.10 | safe_binom_p(mean(Xx),mean(X),1/2) > 0.10) ), complexity401=2, complexity402=2, complexity403=2, complexity404=2, complexity405=2, complexity406=2, complexity407=2, complexity408=2, #Wrong SI wrongSI = as.integer( (replace_na(wrongSI == 1, FALSE) ) & wrongD==0 & trueextracted == 0 ), #Wrong stochastic overall wrongSO = as.integer( ( replace_na(wrongSO == 1, FALSE)) & wrongD==0 & trueextracted == 0 ), #Wrong stochastic overall wrongS = as.integer( (replace_na(wrongSI == 1, FALSE) | replace_na(wrongSO == 1, FALSE)) & wrongD==0 & trueextracted == 0 ), #Wrong stochastic overall wrong = as.integer( (replace_na(wrongD == 1, FALSE) | replace_na(wrongS == 1, FALSE)) & trueextracted == 0 ), #random models random = as.integer((trueextracted == 0) & (wrong == 0)), complexityW = complexityBM_LD, # start as copy, IN MATLAB WE ALREADY SET IT TO THE SIMPLEST!! complexityWM = complexityBM_LD, # start as copy complexityW = if_else(maxcorrectS_LD > maxcorrect_LD, 2, complexityW, # Put the complexity equal to 2 if the best model is stochastic missing = complexityW) , complexityWM = if_else(complexityBM_LD !=1, 2, complexityWM, # Put the complexity equal to 2 if the best model is stochastic missing = complexityWM) , perfect=as.integer(correct==12), )%>% summarise( math = dplyr::first(math), across(where(is.numeric), ~ mean(.x, na.rm = TRUE)), .groups = "drop" )%>% mutate( machine=ifelse(name=='auto2','A2', ifelse(name=='auto3','A3', ifelse(name=='hybrid2','H2', ifelse(name=='hybrid3','H3', ifelse(name=='instruct2','I2', ifelse(name=='instruct3','I3', ifelse(name=='switch2','S2','S3') ) ) ) ) ) ), order=ifelse(name=='auto2',1, ifelse(name=='auto3',5, ifelse(name=='hybrid2',3, ifelse(name=='hybrid3',7, ifelse(name=='instruct2',2, ifelse(name=='instruct3',6, ifelse(name=='switch2',4,8) ) ) ) ) ) ) )%>% #varibales at the ID level group_by(machine, order, stringnum)%>% mutate( extractRate = mean(trueextracted, na.rm = TRUE), wrongDRate = mean(wrongD, na.rm = TRUE), wrongSRate = mean(wrongS, na.rm = TRUE), wrongRate = mean(wrong, na.rm = TRUE), randomRate = mean(random, na.rm = TRUE), max_valueD = mean(maxcorrect_LD, na.rm = TRUE), max_value1 = mean(maxcorrect1_LD, na.rm = TRUE), max_value2 = mean(maxcorrect2_LD, na.rm = TRUE), max_value3 = mean(maxcorrect3_LD, na.rm = TRUE), max_value4 = mean(maxcorrect4_LD, na.rm = TRUE), max_valueS = mean(maxcorrectS_LD, na.rm = TRUE), max_valueW = if_else( is.na(maxcorrect_LD) & is.na(maxcorrectS_LD), NA_real_, pmax(maxcorrect_LD, maxcorrectS_LD, na.rm = TRUE) ), max_valueM = if_else( is.na(maxcorrect3_LD) & is.na(maxcorrect2_LD), NA_real_, pmax(maxcorrect2_LD, maxcorrect3_LD, na.rm = TRUE) ), complexityWD = mean(complexityBM_LD, na.rm = TRUE), complexityWS = mean(complexityBMS_LD, na.rm = TRUE), ) #Sanity check cols <- c("wrongD","wrongS","trueextracted","random") bad_rows <- M %>% mutate( .row = row_number(), sum1 = rowSums(across(all_of(cols), ~ as.integer(coalesce(.x, 0) == 1L))) ) %>% filter(sum1 != 1) %>% select(.row, all_of(cols), sum1) count(bad_rows) #Keeping machines machines_keep <- c("H2", "S2", "H3", "I3", "S3") # Replacing NA to 12 for performanceLn that achieve 12 # 1) Identify performanceL columns perfL_cols <- grep("^performanceL\\d+$", names(M), value = TRUE) # 2) Replace 12 with NA in those columns for (nm in perfL_cols) { v <- M[[nm]] if (is.factor(v)) { # Convert factor -> numeric safely v <- as.character(v) v <- suppressWarnings(as.numeric(v)) } v[v == 12] <- NA M[[nm]] <- v } # (Optional) check if any 12s remain sapply(M[perfL_cols], function(x) sum(x == 12, na.rm = TRUE)) # --- Filter + order --- M_filtered <- M %>% dplyr::filter(machine %in% machines_keep) %>% dplyr::mutate(order = match(machine, machines_keep)) # first step: add misalignment to M M <- M %>% mutate( nonc = case_when( ( performance1 >= 11 | performance1002 >= 11 ) & trueextracted==0 ~ 1, TRUE ~ 0 ) ) M <- M %>% mutate( misalignment = case_when( wrong==1 & nonc==0 ~ 1, TRUE ~ 0 ) ) # Table Disagreement ------ table_math <- M %>% # keep only relevant variables select(math, random, trueextracted, wrong) %>% # reshape to long format pivot_longer( cols = c(random, trueextracted, wrong), names_to = "category", values_to = "value" ) %>% # count subjects per math × category group_by(math, category) %>% summarise( n = sum(value == 1, na.rm = TRUE), total = n(), .groups = "drop" ) %>% # compute row-wise proportions mutate(proportion = n / total) %>% # reshape back to wide table select(math, category, proportion) %>% pivot_wider( names_from = category, values_from = proportion ) %>% arrange(math) table_math # optional: format nicely table_math_latex <- table_math %>% mutate(across(-math, ~ round(.x, 3))) # print LaTeX table kable( table_math_latex, format = "latex", booktabs = TRUE, caption = "Proportion of subjects by math level and model classification", col.names = c("Math level", "Random", "True extracted", "Wrong") ) # TAB ------- df_string <- df %>% group_by(stringnum, modelpartitions) %>% summarise( mean_max1 = mean(max_value1, na.rm = TRUE), mean_max2 = mean(max_value2, na.rm = TRUE), mean_max3 = mean(max_value3, na.rm = TRUE), mean_max4 = mean(max_value4, na.rm = TRUE), mean_wrong = mean(wrong, na.rm = TRUE), mean_extracted = mean(trueextracted, na.rm = TRUE), n = n(), .groups = "drop" )