import xgboost as xgb from sklearn.datasets import load_boston, fetch_california_housing from sklearn.model_selection import train_test_split from sklearn.model_selection import cross_val_score, KFold from sklearn.metrics import mean_squared_error import matplotlib.pyplot as plt plt.style.use(["science"]) boston = load_boston() cali = fetch_california_housing() x, y = boston.data, boston.target xtrain, xtest, ytrain, ytest=train_test_split(x, y, test_size=0.15) xgbr = xgb.XGBRegressor(verbosity=0) xgbr.fit(xtrain, ytrain) score = xgbr.score(xtrain, ytrain) print("Training score: ", score) kfold = KFold(n_splits=5, shuffle=True) kf_cv_scores = cross_val_score(xgbr, xtrain, ytrain, cv=kfold, scoring="r2") print("K-fold CV average score: %.2f" % kf_cv_scores.mean()) ypred = xgbr.predict(xtest) mse = mean_squared_error(ytest, ypred) print("MSE: %.2f" % mse) MSE: 3.35 print("RMSE: %.2f" % (mse**(1/2.0))) RMSE: 1.83 x_ax = range(len(ytest)) plt.plot(x_ax, ytest, label="original") plt.plot(x_ax, ypred, label="predicted") plt.title("Boston test and predicted data") plt.legend() plt.savefig("boston.pdf") plt.close()