import matplotlib.pyplot as plt from matplotlib.backends.backend_pdf import PdfPages import xgboost import pandas as pd import shap # train a "dummy" XGBoost model to get initial SHAP values X, y = shap.datasets.boston() model = xgboost.XGBRegressor().fit(X, y) explainer = shap.Explainer(model) shap_values = explainer(X) # get LIME values of experiment data = pd.read_csv("Data/transfer_complete_.csv") data_subset = data[['trustee', 'Competitiveness_Score', 'Openness', 'Conscentiousness', 'Agreeableness', 'Neuroticism', 'Extraversion', 'gender_y', 'financed_by_parents', 'younger_siblings', 'older_siblings', 'Competitiveness_Score_importance', 'Openness_importance', 'Conscentiousness_importance', 'Agreeableness_importance', 'Neuroticism_importance', 'Extraversion_importance', 'gender_importance', 'patience', 'younger_siblings_importance', 'older_siblings_importance']] data_subset = data_subset.drop_duplicates() data_subset = data_subset.set_index('trustee') data_characteristics = data_subset.loc[:, 'Competitiveness_Score':'older_siblings'] data_char_importance = data_subset.loc[:, 'Competitiveness_Score_importance':'older_siblings_importance'] # get mean absolute LIME values for each characteristic mean_abs_LIME_val = [] for (columnName, columnData) in data_char_importance.iteritems(): absolute_LIME_val = [abs(l) for l in columnData] val = str(round(sum(absolute_LIME_val)/len(absolute_LIME_val), 2)) mean_abs_LIME_val.append(" ("+val+")") # overwrite SHAP values # ...base_values are 0 for i in range(0, len(shap_values)): shap_values.base_values[i] = 1 # ...LIME feature importance shap_values.values = data_char_importance.values # ...borrower characteristics (values) shap_values.data = data_characteristics.values # ...characteristics (names) shap_values.feature_names = ['Competitiveness' + mean_abs_LIME_val[0], 'Openness' + mean_abs_LIME_val[1], 'Conscentiousness' + mean_abs_LIME_val[2], 'Agreeableness' + mean_abs_LIME_val[3], 'Neuroticism' + mean_abs_LIME_val[4], 'Extraversion' + mean_abs_LIME_val[5], 'Gender' + mean_abs_LIME_val[6], 'Patience' + mean_abs_LIME_val[7], 'Younger siblings' + mean_abs_LIME_val[8], 'Older siblings' + mean_abs_LIME_val[9]] # plot "shap" aka LIME values plt.style.use('science') plt.figure() shap.summary_plot(shap_values, plot_size=(12, 12), show=False, mz_custom = True) # set plot properties fig, ax = plt.gcf(), plt.gca() # set label sizes for item in ([ax.title, ax.xaxis.label, ax.yaxis.label] + ax.get_xticklabels() + ax.get_yticklabels()): if item == ax.get_xticklabels(): item.set_fontsize(17) continue item.set_fontsize(20) # show plot fig.tight_layout() # # save to pdf pp = PdfPages('multipage.pdf') fig.savefig(pp, format='pdf') pp.close() # fig.show()