import pandas as pd import numpy as np # read in file r1 = pd.read_csv("C:/Users/morit/OneDrive/Dokumente/Goethe Projects/XAI BauerZahnHinz/ISR revision/Results/all_apps_wide_2022-03-30.csv") r1["run_id"] = 1 r2 = pd.read_csv("C:/Users/morit/Downloads/all_apps_wide_2022-04-13(7).csv") r2["run_id"] = 2 r = r1.append(r2) # only consider completes r_done = r[r['participant._current_page_name'] == "ThankYouPage"] # read in predictions (for ai and xai treatment) shown in stage 2 apartments_w_pred_prices = pd.read_csv("Frontend/xai_experiment/immobilien_pred_prices.csv", index_col=0) pred_prices = (apartments_w_pred_prices.predicted / 50).round() * 50 # read in shap values (for xai treatment) shown in stage 2 apartments_w_shap = pd.read_csv("Frontend/xai_experiment/immobilien_shap.csv", index_col=0) # build data structure for analysis stage 1 vs 3 data_for_analysis_stage1_3 = pd.DataFrame() for p, participant in r_done.iterrows(): print(f"Preprocessing participant {p}...") # calculate stage 2 aggregated values for focal participant diff_pred_AI_vs_human, avg_conf_s2, avg_surprise_s2 = [], [], [] avg_shap_frankfurt, avg_shap_koeln, avg_shap_balcony, avg_shap_nobalcony, \ avg_shap_low_green, avg_shap_mid_green, avg_shap_high_green = [], [], [], [], [], [], [] for j, apartment_s2 in enumerate(participant["participant.immoSampleOrderS2"][1:-1].split(", ")): price_entered = participant[f"xai_experiment.1.player.treatmentPrice{j + 1}"] price_ai_pred = pred_prices[int(apartment_s2)] diff_pred_AI_vs_human.append(abs(price_entered - price_ai_pred)) avg_conf_s2.append(participant[f"xai_experiment.1.player.treatmentConfPrice{j + 1}"]) avg_surprise_s2.append(participant[f"xai_experiment.1.player.treatmentSurprise{j + 1}"]) # balcony shaps if apartments_w_pred_prices["balcony"][int(apartment_s2)] == 1: avg_shap_balcony.append(apartments_w_shap["balcony"][int(apartment_s2)]) else: avg_shap_nobalcony.append(apartments_w_shap["balcony"][int(apartment_s2)]) # city shaps if apartments_w_pred_prices["Frankfurt"][int(apartment_s2)] == 1: avg_shap_frankfurt.append(apartments_w_shap["Stadt"][int(apartment_s2)]) else: avg_shap_koeln.append(apartments_w_shap["Stadt"][int(apartment_s2)]) # green shaps if apartments_w_pred_prices["Anteil Gruenenwaehler"][int(apartment_s2)] == 1: avg_shap_low_green.append(apartments_w_shap["Anteil Gruenenwaehler"][int(apartment_s2)]) elif apartments_w_pred_prices["Anteil Gruenenwaehler"][int(apartment_s2)] == 2: avg_shap_mid_green.append(apartments_w_shap["Anteil Gruenenwaehler"][int(apartment_s2)]) else: avg_shap_high_green.append(apartments_w_shap["Anteil Gruenenwaehler"][int(apartment_s2)]) diff_pred_AI_vs_human = np.average(diff_pred_AI_vs_human) avg_conf_s2 = np.average(avg_conf_s2) avg_surprise_s2 = np.average(avg_surprise_s2) avg_shap_frankfurt = np.average(avg_shap_frankfurt) avg_shap_koeln = np.average(avg_shap_koeln) avg_shap_balcony = np.average(avg_shap_balcony) avg_shap_nobalcony = np.average(avg_shap_nobalcony) avg_shap_low_green = np.average(avg_shap_low_green) avg_shap_mid_green = np.average(avg_shap_mid_green) avg_shap_high_green = np.average(avg_shap_high_green) # iterate through stage 1 and 3 apartments and build row to append to dataframe for i, apartment in enumerate(participant["participant.immoSampleOrderS1"][1:-1].split(", ")): focal_row = {"ParticipantID": f"{participant.run_id}_{participant['participant.id_in_session']}", "Treatment": participant["participant.treatment"], "ApartmentID": apartment, "PriorBeliefCity": participant[f"xai_experiment.1.player.priorLocation{i + 1}"], "PriorBeliefBalcony": participant[f"xai_experiment.1.player.priorBalcony{i + 1}"], "PriorBeliefGreen": participant[f"xai_experiment.1.player.priorGreen{i + 1}"], "PriorConfCity": participant[f"xai_experiment.1.player.priorConfLocation{i + 1}"], "PriorConfBalcony": participant[f"xai_experiment.1.player.priorConfBalcony{i + 1}"], "PriorConfGreen": participant[f"xai_experiment.1.player.priorConfGreen{i + 1}"], "PriorPrice": participant[f"xai_experiment.1.player.priorPrice{i + 1}"], "PriorConfPrice": participant[f"xai_experiment.1.player.priorConfPrice{i + 1}"], "PriorImpCity": participant["xai_experiment.1.player.priorImpLocation"], "PriorImpBalcony": participant["xai_experiment.1.player.priorImpBalcony"], "PriorImpGreen": participant["xai_experiment.1.player.priorImpGreen"], "PostBeliefCity": participant[f"xai_experiment.1.player.postLocation{i + 1}"], "PostBeliefBalcony": participant[f"xai_experiment.1.player.postBalcony{i + 1}"], "PostBeliefGreen": participant[f"xai_experiment.1.player.postGreen{i + 1}"], "PostConfCity": participant[f"xai_experiment.1.player.postConfLocation{i + 1}"], "PostConfBalcony": participant[f"xai_experiment.1.player.postConfBalcony{i + 1}"], "PostConfGreen": participant[f"xai_experiment.1.player.postConfGreen{i + 1}"], "PostPrice": participant[f"xai_experiment.1.player.postPrice{i + 1}"], "PostConfPrice": participant[f"xai_experiment.1.player.postConfPrice{i + 1}"], "PostImpCity": participant["xai_experiment.1.player.postImpLocation"], "PostImpBalcony": participant["xai_experiment.1.player.postImpBalcony"], "PostImpGreen": participant["xai_experiment.1.player.postImpGreen"], "AvgDiffHumanestiAIPred": diff_pred_AI_vs_human, "AvgConfidenceS2": avg_conf_s2, "AvgSurpriseS2": avg_surprise_s2, "Balcony": apartments_w_pred_prices.balcony[int(apartment)], "City": "Frankfurt" if apartments_w_pred_prices.Frankfurt[int(apartment)] == 1 else "Koeln", "Green": apartments_w_pred_prices['Anteil Gruenenwaehler'][int(apartment)], "AvgShapFrankfurt": avg_shap_frankfurt, "AvgShapKoeln": avg_shap_koeln, "AvgShapBalcony": avg_shap_balcony, "AvgShapNobalcony": avg_shap_nobalcony, "AvgShapLowGreen": avg_shap_low_green, "AvgShapMidGreen": avg_shap_mid_green, "AvgShapHighGreen": avg_shap_high_green} # add AI survey values for k in range(0, 22): focal_row[f"SuveryAnswersPostStage2_{r_done.columns[91 + k].split('.')[-1]}"] = \ participant[r_done.columns[91 + k]] # add end of experiment survey values for k in range(0, 10): focal_row[f"SuveryAnswersLastPage_{r_done.columns[152 + k].split('.')[-1]}"] = \ participant[r_done.columns[152 + k]] data_for_analysis_stage1_3 = data_for_analysis_stage1_3.append(focal_row, ignore_index=True) data_for_analysis_stage1_3.to_csv("Data/ExperimentResults/Stage1_3.csv")