import pandas as pd import numpy as np # read in file r1 = pd.read_csv("C:/Users/morit/OneDrive/Dokumente/Goethe Projects/XAI BauerZahnHinz/ISR revision/Results/all_apps_wide_2022-03-30.csv") r1["run_id"] = 1 r2 = pd.read_csv("C:/Users/morit/Downloads/all_apps_wide_2022-04-13(7).csv") r2["run_id"] = 2 r = r1.append(r2) # only consider completes r_done = r[r['participant._current_page_name'] == "ThankYouPage"] # read in predictions (for ai and xai treatment) shown in stage 2 apartments_w_pred_prices = pd.read_csv("Frontend/xai_experiment/immobilien_pred_prices.csv", index_col=0) pred_prices = (apartments_w_pred_prices.predicted / 50).round() * 50 # read in shap values (for xai treatment) shown in stage 2 apartments_w_shap = pd.read_csv("Frontend/xai_experiment/immobilien_shap.csv", index_col=0) # build data structure for analysis of stage 2 (treatment stage) data_for_analysis_stage2 = pd.DataFrame() for p, participant in r_done.iterrows(): print(f"Preprocessing participant {p}...") # calculate stage 1 aggregated values for focal participant avg_belief_balcony, avg_belief_nobalcony, avg_belief_frankfurt, avg_belief_koeln, \ avg_belief_low_green, avg_belief_mid_green, avg_belief_high_green = [], [], [], [], [], [], [] for j, apartment_s1 in enumerate(participant["participant.immoSampleOrderS1"][1:-1].split(", ")): # balcony beliefs if apartments_w_pred_prices["balcony"][int(apartment_s1)] == 1: avg_belief_balcony.append(participant[f"xai_experiment.1.player.priorBalcony{j + 1}"]) else: avg_belief_nobalcony.append(participant[f"xai_experiment.1.player.priorBalcony{j + 1}"]) # city beliefs if apartments_w_pred_prices["Frankfurt"][int(apartment_s1)] == 1: avg_belief_frankfurt.append(participant[f"xai_experiment.1.player.priorLocation{j + 1}"]) else: avg_belief_koeln.append(participant[f"xai_experiment.1.player.priorLocation{j + 1}"]) # green beliefs if apartments_w_pred_prices["Anteil Gruenenwaehler"][int(apartment_s1)] == 1: avg_belief_low_green.append(participant[f"xai_experiment.1.player.priorGreen{j + 1}"]) elif apartments_w_pred_prices["Anteil Gruenenwaehler"][int(apartment_s1)] == 2: avg_belief_mid_green.append(participant[f"xai_experiment.1.player.priorGreen{j + 1}"]) else: avg_belief_high_green.append(participant[f"xai_experiment.1.player.priorGreen{j + 1}"]) avg_belief_balcony = np.average(avg_belief_balcony) if len(avg_belief_balcony) > 0 else np.nan avg_belief_nobalcony = np.average(avg_belief_nobalcony) if len(avg_belief_nobalcony) > 0 else np.nan avg_belief_frankfurt = np.average(avg_belief_frankfurt) if len(avg_belief_frankfurt) > 0 else np.nan avg_belief_koeln = np.average(avg_belief_koeln) if len(avg_belief_koeln) > 0 else np.nan avg_belief_low_green = np.average(avg_belief_low_green) if len(avg_belief_low_green) > 0 else np.nan avg_belief_mid_green = np.average(avg_belief_mid_green) if len(avg_belief_mid_green) > 0 else np.nan avg_belief_high_green = np.average(avg_belief_high_green) if len(avg_belief_high_green) > 0 else np.nan # iterate through stage 2 apartments and build row to append to dataframe for i, apartment in enumerate(participant["participant.immoSampleOrderS2"][1:-1].split(", ")): focal_row = {"ParticipantID": f"{participant.run_id}_{participant['participant.id_in_session']}", "Treatment": participant["participant.treatment"], "ApartmentID": apartment, "Price": participant[f"xai_experiment.1.player.treatmentPrice{i + 1}"], "Conf": participant[f"xai_experiment.1.player.treatmentConfPrice{i + 1}"], "Surprise": participant[f"xai_experiment.1.player.treatmentSurprise{i + 1}"], "PricePred": pred_prices[int(apartment)], "ShapBalcony": apartments_w_shap["balcony"][int(apartment)], "ShapCity": apartments_w_shap["Stadt"][int(apartment)], "ShapGreen": apartments_w_shap["Anteil Gruenenwaehler"][int(apartment)], "DiffHumanestiAIPred": participant[f"xai_experiment.1.player.treatmentPrice{i + 1}"] - pred_prices[ int(apartment)], "Balcony": apartments_w_pred_prices.balcony[int(apartment)], "City": "Frankfurt" if apartments_w_pred_prices.Frankfurt[int(apartment)] == 1 else "Koeln", "Green": apartments_w_pred_prices['Anteil Gruenenwaehler'][int(apartment)], "AvgBeliefBalconyYes": avg_belief_balcony, "AvgBeliefBalconyNo": avg_belief_nobalcony, "AvgBeliefFrankfurt": avg_belief_frankfurt, "AvgBeliefKoeln": avg_belief_koeln, "AvgBeliefLowGreen": avg_belief_low_green, "AvgBeliefMidGreen": avg_belief_mid_green, "AvgBeliefHighGreen": avg_belief_high_green, "ImpBalcony": participant["xai_experiment.1.player.priorImpBalcony"], "ImpCity": participant["xai_experiment.1.player.priorImpLocation"], "ImpGreen": participant["xai_experiment.1.player.priorImpGreen"]} # add AI survey values for k in range(0, 22): focal_row[f"SuveryAnswersPostStage2_{r_done.columns[91 + k].split('.')[-1]}"] = \ participant[r_done.columns[91 + k]] # add end of experiment survey values for k in range(0, 10): focal_row[f"SuveryAnswersLastPage_{r_done.columns[152 + k].split('.')[-1]}"] = \ participant[r_done.columns[152 + k]] data_for_analysis_stage2 = data_for_analysis_stage2.append(focal_row, ignore_index=True) data_for_analysis_stage2.to_csv("Data/ExperimentResults/Stage2.csv")