import pandas as pd import numpy as np # read in file r1 = pd.read_csv( "C:/Users/morit/OneDrive/Dokumente/Goethe Projects/XAI BauerZahnHinz/ISR revision/Results/all_apps_wide_2022-03-30.csv") r1["run_id"] = 1 r2 = pd.read_csv("C:/Users/morit/Downloads/all_apps_wide_2022-04-13(7).csv") r2["run_id"] = 2 r = r1.append(r2) # only consider completes r_done = r[r['participant._current_page_name'] == "ThankYouPage"] # read in predictions (for ai and xai treatment) shown in stage 2 apartments_w_pred_prices = pd.read_csv("Frontend/xai_experiment/immobilien_pred_prices.csv", index_col=0) pred_prices = (apartments_w_pred_prices.predicted / 50).round() * 50 # read in shap values (for xai treatment) shown in stage 2 apartments_w_shap = pd.read_csv("Frontend/xai_experiment/immobilien_shap.csv", index_col=0) # read in "simulated ground truth" for stage 4 apartment_s4_shap = pd.read_csv("Frontend/xai_experiment/immobilien_stage4_shap.csv", index_col=0) # # build data structure for analysis 3 # stage4_apartments = { 0: {1: 1, 2: 2, 3: 3}, 1: {1: 4, 2: 5, 3: 6} } data_for_analysis_stage4 = pd.DataFrame() for p, participant in r_done.iterrows(): print(f"Preprocessing participant {p}...") # calculate stage 1 aggregated values for focal participant avg_belief_adj_balcony, avg_conf_adj_balcony, avg_belief_adj_city, avg_conf_adj_city, \ avg_belief_adj_green, avg_conf_adj_green = [], [], [], [], [], [] for j, apartment_s1 in enumerate(participant["participant.immoSampleOrderS1"][1:-1].split(", ")): avg_belief_adj_balcony.append( abs(participant[f"xai_experiment.1.player.postBalcony{j + 1}"] - participant[f"xai_experiment.1.player.priorBalcony{j + 1}"]) ) avg_conf_adj_balcony.append( abs(participant[f"xai_experiment.1.player.postConfBalcony{j + 1}"] - participant[f"xai_experiment.1.player.priorConfBalcony{j + 1}"]) ) avg_belief_adj_city.append( abs(participant[f"xai_experiment.1.player.postLocation{j + 1}"] - participant[f"xai_experiment.1.player.priorLocation{j + 1}"]) ) avg_conf_adj_city.append( abs(participant[f"xai_experiment.1.player.postConfLocation{j + 1}"] - participant[f"xai_experiment.1.player.priorConfLocation{j + 1}"]) ) avg_belief_adj_green.append( abs(participant[f"xai_experiment.1.player.postGreen{j + 1}"] - participant[f"xai_experiment.1.player.priorGreen{j + 1}"]) ) avg_conf_adj_green.append( abs(participant[f"xai_experiment.1.player.postConfGreen{j + 1}"] - participant[f"xai_experiment.1.player.priorConfGreen{j + 1}"]) ) avg_belief_adj_balcony = np.average(avg_belief_adj_balcony) avg_conf_adj_balcony = np.average(avg_conf_adj_balcony) adj_imp_balcony = participant["xai_experiment.1.player.postImpBalcony"] - \ participant["xai_experiment.1.player.priorImpBalcony"] avg_belief_adj_city = np.average(avg_belief_adj_city) avg_conf_adj_city = np.average(avg_conf_adj_city) adj_imp_city = participant["xai_experiment.1.player.postImpLocation"] - \ participant["xai_experiment.1.player.priorImpLocation"] avg_belief_adj_green = np.average(avg_belief_adj_green) avg_conf_adj_green = np.average(avg_conf_adj_green) adj_imp_green = participant["xai_experiment.1.player.postImpGreen"] - \ participant["xai_experiment.1.player.priorImpGreen"] # calculate stage 2 aggregated values for focal participant diff_pred_AI_vs_human, avg_conf_s2, avg_surprise_s2 = [], [], [] for j, apartment_s2 in enumerate(participant["participant.immoSampleOrderS2"][1:-1].split(", ")): price_entered = participant[f"xai_experiment.1.player.treatmentPrice{j + 1}"] price_ai_pred = pred_prices[int(apartment_s2)] diff_pred_AI_vs_human.append(abs(price_entered - price_ai_pred)) avg_conf_s2.append(participant[f"xai_experiment.1.player.treatmentConfPrice{j + 1}"]) avg_surprise_s2.append(participant[f"xai_experiment.1.player.treatmentSurprise{j + 1}"]) diff_pred_AI_vs_human = np.average(diff_pred_AI_vs_human) avg_conf_s2 = np.average(avg_conf_s2) avg_surprise_s2 = np.average(avg_surprise_s2) # iterate through stage 4 apartments and build row to append to dataframe for i in range(0, 2): if i == 1: # in market apartment (düsseldorf) balcony_green = participant["participant.inMarketProp"][1:-1].split(", ") balcony_green = [int(i) for i in balcony_green] apartment = stage4_apartments[balcony_green[0]][balcony_green[1]] city = "Duesseldorf" market = "in" else: # out market apartment (chemnitz) balcony_green = participant["participant.outMarketProp"][1:-1].split(", ") balcony_green = [int(i) for i in balcony_green] apartment = 6 + stage4_apartments[balcony_green[0]][balcony_green[1]] city = "Chemnitz" market = "out" focal_apartment_shap = apartment_s4_shap[ (apartment_s4_shap[city] == 1) & (apartment_s4_shap["balcony"] == balcony_green[0]) & (apartment_s4_shap['Anteil Gruenenwaehler'] == balcony_green[1])] pred_price = focal_apartment_shap["pred_price"].item() focal_row = {"ParticipantID": f"{participant.run_id}_{participant['participant.id_in_session']}", "Treatment": participant["participant.treatment"], "ApartmentID": apartment, "Price": participant[f"xai_experiment.1.player.{market}Market_Price"], "Conf": participant[f"xai_experiment.1.player.{market}Market_ConfPrice"], "PricePred": pred_price, "ShapBalcony": focal_apartment_shap["shap_balcony"].item(), "ShapCity": focal_apartment_shap["shap_Stadt"].item(), "ShapGreen": focal_apartment_shap["shap_Anteil Gruenenwaehler"].item(), "DiffHumanestiAIPred": participant[f"xai_experiment.1.player.{market}Market_Price"] - pred_price, "AvgBeliefAdjCity": avg_belief_adj_city, "AvgConfAdjCity": avg_conf_adj_city, "AdjImportanceCity": adj_imp_city, "AvgBeliefAdjBalcony": avg_belief_adj_balcony, "AvgConfAdjBalcony": avg_conf_adj_balcony, "AdjImportanceBalcony": adj_imp_balcony, "AvgBeliefAdjGreen": avg_belief_adj_green, "AvgConfAdjGreen": avg_conf_adj_green, "AdjImportanceGreen": adj_imp_green, "AvgDiffHumanestiAIPred": diff_pred_AI_vs_human, "AvgConfidenceS2": avg_conf_s2, "AvgSurpriseS2": avg_surprise_s2, "Balcony": balcony_green[0], "City": city, "Green": balcony_green[1] } # add AI survey values for k in range(0, 22): focal_row[f"SuveryAnswersPostStage2_{r_done.columns[91 + k].split('.')[-1]}"] = \ participant[r_done.columns[91 + k]] # add end of experiment survey values for k in range(0, 10): focal_row[f"SuveryAnswersLastPage_{r_done.columns[152 + k].split('.')[-1]}"] = \ participant[r_done.columns[152 + k]] data_for_analysis_stage4 = data_for_analysis_stage4.append(focal_row, ignore_index=True) data_for_analysis_stage4.to_csv("Data/ExperimentResults/Stage4.csv")