Sachin Shekhar
SachinShekhar@Outlook.com
# Importing essential libraries
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.feature_selection import RFE
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score
import statsmodels.api as sm
from statsmodels.stats.outliers_influence import variance_inflation_factor
import warnings
warnings.filterwarnings('ignore')
# Loading data
df = pd.read_csv('day.csv')
df.head()
| instant | dteday | season | yr | mnth | holiday | weekday | workingday | weathersit | temp | atemp | hum | windspeed | casual | registered | cnt | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 01-01-2018 | 1 | 0 | 1 | 0 | 1 | 1 | 2 | 14.110847 | 18.18125 | 80.5833 | 10.749882 | 331 | 654 | 985 |
| 1 | 2 | 02-01-2018 | 1 | 0 | 1 | 0 | 2 | 1 | 2 | 14.902598 | 17.68695 | 69.6087 | 16.652113 | 131 | 670 | 801 |
| 2 | 3 | 03-01-2018 | 1 | 0 | 1 | 0 | 3 | 1 | 1 | 8.050924 | 9.47025 | 43.7273 | 16.636703 | 120 | 1229 | 1349 |
| 3 | 4 | 04-01-2018 | 1 | 0 | 1 | 0 | 4 | 1 | 1 | 8.200000 | 10.60610 | 59.0435 | 10.739832 | 108 | 1454 | 1562 |
| 4 | 5 | 05-01-2018 | 1 | 0 | 1 | 0 | 5 | 1 | 1 | 9.305237 | 11.46350 | 43.6957 | 12.522300 | 82 | 1518 | 1600 |
df.shape
(730, 16)
# Creating list of categorical & numerical variables for easy access
cat_vars = ['season', 'yr', 'mnth', 'holiday', 'weekday', 'workingday', 'weathersit']
num_vars = ['dteday', 'temp', 'atemp', 'hum', 'windspeed', 'casual', 'registered', 'cnt']
df.isnull().sum()
instant 0 dteday 0 season 0 yr 0 mnth 0 holiday 0 weekday 0 workingday 0 weathersit 0 temp 0 atemp 0 hum 0 windspeed 0 casual 0 registered 0 cnt 0 dtype: int64
df.duplicated().sum()
0
# 'instant' is an index which won't contribute to the dependent variable change
df.drop('instant', axis=1, inplace=True)
# 'cnt' variable includes counts of 'casual' and 'registered'
df.drop(['casual', 'registered'], axis=1, inplace=True)
num_vars.remove('casual')
num_vars.remove('registered')
df.dtypes
dteday object season int64 yr int64 mnth int64 holiday int64 weekday int64 workingday int64 weathersit int64 temp float64 atemp float64 hum float64 windspeed float64 cnt int64 dtype: object
# Changing datatype of 'dteday' to datetime64
df['dteday'] = pd.to_datetime(df['dteday'], format='%d-%m-%Y')
# Converting values of 'dteday' to absolute unix epoch day for easy analysis
df['dteday'] = (df['dteday'] - pd.Timestamp('1970-01-01')) // pd.Timedelta('1d')
df['dteday'].dtypes
dtype('int64')
# Converting categorical variable values into strings
df['season'].replace({1:'spring', 2:'summer', 3:'fall', 4:'winter'}, inplace=True)
df['yr'].replace({0: '2018', 1:'2019'}, inplace=True)
df['mnth'].replace({1: 'january', 2: 'february', 3: 'march', 4: 'april', 5: 'may', 6: 'june', 7: 'july', 8: 'august', 9: 'september', 10: 'october', 11: 'november', 12: 'december'}, inplace=True)
df['holiday'].replace({0: 'no', 1: 'yes'}, inplace=True)
df['weekday'].replace({0: 'sunday', 1: 'monday', 2: 'tuesday', 3: 'wednesday', 4: 'thursday', 5: 'friday', 6: 'saturday'}, inplace=True)
df['workingday'].replace({0: 'no', 1: 'yes'}, inplace=True)
df['weathersit'].replace({1: 'clear', 2: 'misty', 3: 'light_weather', 4: 'heavy_weather'}, inplace=True)
# Helper functions for Numerical Univariate Analysis
def num_uni_box_analysis(var, friendly_name):
ax = sns.boxplot(data=df, x=var)
ax.set_xticks(list(df[var].quantile([0,0.25,0.5,0.75,0.95,1])))
ax.set_xlabel(friendly_name)
ax.set_title(f'{friendly_name} Distribution Univariate Analysis', fontsize=20)
ax.figure.set_size_inches(24,8)
def num_uni_hist_analysis(var, friendly_name):
ax = sns.histplot(data=df ,x=var, stat='percent')
ax.set_xlabel(friendly_name)
ax.set_title(f'{friendly_name} Distribution Histogram', fontsize=20)
ax.figure.set_size_inches(24,8)
df.describe()
| dteday | temp | atemp | hum | windspeed | cnt | |
|---|---|---|---|---|---|---|
| count | 730.000000 | 730.000000 | 730.000000 | 730.000000 | 730.000000 | 730.000000 |
| mean | 17896.500000 | 20.319259 | 23.726322 | 62.765175 | 12.763620 | 4508.006849 |
| std | 210.877136 | 7.506729 | 8.150308 | 14.237589 | 5.195841 | 1936.011647 |
| min | 17532.000000 | 2.424346 | 3.953480 | 0.000000 | 1.500244 | 22.000000 |
| 25% | 17714.250000 | 13.811885 | 16.889713 | 52.000000 | 9.041650 | 3169.750000 |
| 50% | 17896.500000 | 20.465826 | 24.368225 | 62.625000 | 12.125325 | 4548.500000 |
| 75% | 18078.750000 | 26.880615 | 30.445775 | 72.989575 | 15.625589 | 5966.000000 |
| max | 18261.000000 | 35.328347 | 42.044800 | 97.250000 | 34.000021 | 8714.000000 |
num_uni_box_analysis('cnt', 'Rider Count')
plt.show()
num_uni_hist_analysis('cnt', 'Rider Count')
plt.show()
num_uni_box_analysis('dteday', 'Date')
plt.show()
pd.to_datetime((17896.5 * pd.Timedelta('1d')) + pd.Timestamp('1970-01-01'), unit='s')
Timestamp('2018-12-31 12:00:00')
The median makes sense as the data was collected for two years.
num_uni_hist_analysis('dteday', 'Date')
plt.show()
This uniform distribution makes sense as data was recorded every day.
num_uni_box_analysis('temp', 'Temperature')
plt.show()
num_uni_hist_analysis('temp', 'Temperature')
plt.show()
num_uni_box_analysis('atemp', 'Feel Temperature')
plt.show()
num_uni_hist_analysis('atemp', 'Feel Temperature')
plt.show()
num_uni_box_analysis('hum', 'Humidity')
plt.show()
num_uni_hist_analysis('hum', 'Humidity')
plt.show()
num_uni_box_analysis('windspeed', 'Wind Speed')
plt.show()
num_uni_hist_analysis('windspeed', 'Wind Speed')
plt.show()
Assessment:
Data present in numerical variables make sense.
# Helper function for Categorical Univariate Analysis
def cat_uni_analysis(var, friendly_name):
tmp_df = df[var].value_counts(normalize=True).mul(100).rename('Percent').reset_index().rename(columns={'index': var})
ax = sns.barplot(data=tmp_df, x=var, y='Percent')
for p in ax.patches:
txt = str(p.get_height().round(2)) + '%'
txt_x = p.get_x() + (p.get_width()/2)
txt_y = p.get_height()
ax.annotate(txt, (txt_x, txt_y), size=11, ha='center', va='bottom')
ax.set_title(f'{friendly_name} Univariate Analysis (Normalised)', fontsize=20)
ax.set_xlabel(friendly_name)
ax.figure.set_size_inches(16,8)
return ax
cat_uni_analysis('season', 'Season')
plt.show()
cat_uni_analysis('yr', 'Year')
plt.show()
cat_uni_analysis('mnth', 'Month')
plt.show()
cat_uni_analysis('holiday', 'Holiday')
plt.show()
cat_uni_analysis('weekday', 'Weekday')
plt.show()
cat_uni_analysis('workingday', 'Working Day')
plt.show()
cat_uni_analysis('weathersit', 'Weather')
plt.show()
Assessment:
Data present in categorical variables make sense.
ax = sns.pairplot(df)
ax.figure.set_size_inches(24,20)
# Helper function for Categorical Bivariate Analysis
def cat_bi_analysis(var, friendly_name):
ax = sns.boxplot(data=df, x=var, y='cnt')
ax.figure.set_size_inches(24,12)
ax.set_title(f'{friendly_name} vs Rental Count', fontsize=20)
return ax
cat_bi_analysis('season', 'Season')
plt.show()
Assessment:
ax = cat_bi_analysis('yr', 'Year')
ax.figure.set_size_inches(16,12)
plt.show()
Assessment:
2019 saw increase in rentals from 2018.
cat_bi_analysis('mnth', 'Month')
plt.show()
Assessment:
Starting January, no. of rentals increase until June. Then, it
stays steady for several months before falling down starting
October.
ax = cat_bi_analysis('holiday', 'Holiday')
ax.figure.set_size_inches(16,12)
plt.show()
Assessment:
When there is no holiday, rental count is more consistent.
cat_bi_analysis('weekday', 'Weekday')
plt.show()
Assessment:
Weekdays don't say anything special about no. of rentals.
Although, thursdays see more consistency in rentals.
ax = cat_bi_analysis('workingday', 'Working Day')
ax.figure.set_size_inches(16,12)
plt.show()
Assessment:
Workingday doesn't say anything about rental counts.
cat_bi_analysis('weathersit', 'Weather')
plt.show()
Assessment:
# Heatmap
ax = sns.heatmap(df.corr(), cmap='RdYlGn', annot=True)
ax.figure.set_size_inches(12,10)
plt.show()
cat_dum = pd.get_dummies(df[cat_vars], drop_first=True)
df.drop(columns=cat_vars, inplace=True)
df = pd.concat([df, cat_dum], axis=1)
df.columns
Index(['dteday', 'temp', 'atemp', 'hum', 'windspeed', 'cnt', 'season_spring',
'season_summer', 'season_winter', 'yr_2019', 'mnth_august',
'mnth_december', 'mnth_february', 'mnth_january', 'mnth_july',
'mnth_june', 'mnth_march', 'mnth_may', 'mnth_november', 'mnth_october',
'mnth_september', 'holiday_yes', 'weekday_monday', 'weekday_saturday',
'weekday_sunday', 'weekday_thursday', 'weekday_tuesday',
'weekday_wednesday', 'workingday_yes', 'weathersit_light_weather',
'weathersit_misty'],
dtype='object')
df_train, df_test = train_test_split(df, train_size=0.7, random_state=100)
df_train.shape
(510, 31)
df_test.shape
(220, 31)
# Learn Min & Max values
scaler = MinMaxScaler()
scaler.fit(df_train[num_vars])
MinMaxScaler()In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook.
MinMaxScaler()
# Scale
df_train[num_vars] = scaler.transform(df_train[num_vars])
df_train.head()
| dteday | temp | atemp | hum | windspeed | cnt | season_spring | season_summer | season_winter | yr_2019 | ... | holiday_yes | weekday_monday | weekday_saturday | weekday_sunday | weekday_thursday | weekday_tuesday | weekday_wednesday | workingday_yes | weathersit_light_weather | weathersit_misty | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 576 | 0.791209 | 0.815169 | 0.766351 | 0.725633 | 0.264686 | 0.827658 | 0 | 0 | 0 | 1 | ... | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 |
| 426 | 0.585165 | 0.442393 | 0.438975 | 0.640189 | 0.255342 | 0.465255 | 1 | 0 | 0 | 1 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 |
| 728 | 1.000000 | 0.245101 | 0.200348 | 0.498067 | 0.663106 | 0.204096 | 1 | 0 | 0 | 1 | ... | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 |
| 482 | 0.662088 | 0.395666 | 0.391735 | 0.504508 | 0.188475 | 0.482973 | 0 | 1 | 0 | 1 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 |
| 111 | 0.152473 | 0.345824 | 0.318819 | 0.751824 | 0.380981 | 0.191095 | 0 | 1 | 0 | 0 | ... | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 1 |
5 rows × 31 columns
# Creating function to easily calculate VIF
def calculate_vif(x_df):
vif_df = pd.DataFrame({'Feature': x_df.columns, 'VIF': [ variance_inflation_factor(x_df.values, i) for i in range(x_df.shape[1])]})
vif_df['VIF'] = vif_df['VIF'].round(2)
vif_df = vif_df.sort_values(by='VIF', ascending=False)
return vif_df
# Creating function to easily train models
def train_model(y_df, x_df):
# Preparing for intercept
x_df_sm = sm.add_constant(x_df)
# Training model
lr = sm.OLS(y_df, x_df_sm)
lr_model = lr.fit()
return lr_model
# Extract Target Variable
y_train = df_train.pop('cnt')
X_train = df_train
# Creating function to easily select features
def auto_select_features(n, x_df=X_train):
lm = LinearRegression()
lm.fit(x_df, y_train)
selector = RFE(lm, n_features_to_select=n)
selector = selector.fit(x_df, y_train)
top_features_df = pd.DataFrame({'Feature': x_df.columns, 'Selected': selector.support_, 'Rank': selector.ranking_})
selected_vars = top_features_df[top_features_df['Selected'] == True]['Feature'].to_list()
return selected_vars
X_train_1 = X_train[auto_select_features(15)]
train_model(y_train, X_train_1).summary()
| Dep. Variable: | cnt | R-squared: | 0.845 |
|---|---|---|---|
| Model: | OLS | Adj. R-squared: | 0.840 |
| Method: | Least Squares | F-statistic: | 179.4 |
| Date: | Tue, 30 Aug 2022 | Prob (F-statistic): | 8.57e-189 |
| Time: | 01:25:12 | Log-Likelihood: | 514.13 |
| No. Observations: | 510 | AIC: | -996.3 |
| Df Residuals: | 494 | BIC: | -928.5 |
| Df Model: | 15 | ||
| Covariance Type: | nonrobust |
| coef | std err | t | P>|t| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| const | 0.2876 | 0.033 | 8.733 | 0.000 | 0.223 | 0.352 |
| dteday | -0.0750 | 0.064 | -1.168 | 0.244 | -0.201 | 0.051 |
| temp | 0.4920 | 0.028 | 17.658 | 0.000 | 0.437 | 0.547 |
| hum | -0.1698 | 0.038 | -4.525 | 0.000 | -0.243 | -0.096 |
| windspeed | -0.1935 | 0.026 | -7.541 | 0.000 | -0.244 | -0.143 |
| season_summer | 0.0875 | 0.013 | 6.684 | 0.000 | 0.062 | 0.113 |
| season_winter | 0.1296 | 0.019 | 6.807 | 0.000 | 0.092 | 0.167 |
| yr_2019 | 0.2675 | 0.033 | 8.143 | 0.000 | 0.203 | 0.332 |
| mnth_august | 0.0586 | 0.017 | 3.500 | 0.001 | 0.026 | 0.092 |
| mnth_february | -0.0392 | 0.023 | -1.711 | 0.088 | -0.084 | 0.006 |
| mnth_january | -0.0671 | 0.024 | -2.785 | 0.006 | -0.114 | -0.020 |
| mnth_october | 0.0373 | 0.018 | 2.104 | 0.036 | 0.002 | 0.072 |
| mnth_september | 0.1272 | 0.017 | 7.592 | 0.000 | 0.094 | 0.160 |
| holiday_yes | -0.0897 | 0.025 | -3.518 | 0.000 | -0.140 | -0.040 |
| weathersit_light_weather | -0.2443 | 0.026 | -9.291 | 0.000 | -0.296 | -0.193 |
| weathersit_misty | -0.0541 | 0.010 | -5.187 | 0.000 | -0.075 | -0.034 |
| Omnibus: | 67.981 | Durbin-Watson: | 2.036 |
|---|---|---|---|
| Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 155.260 |
| Skew: | -0.713 | Prob(JB): | 1.93e-34 |
| Kurtosis: | 5.296 | Cond. No. | 31.9 |
calculate_vif(X_train_1)
| Feature | VIF | |
|---|---|---|
| 0 | dteday | 73.85 |
| 6 | yr_2019 | 30.19 |
| 2 | hum | 26.58 |
| 1 | temp | 14.17 |
| 5 | season_winter | 5.67 |
| 3 | windspeed | 4.05 |
| 4 | season_summer | 2.43 |
| 9 | mnth_january | 2.29 |
| 14 | weathersit_misty | 2.26 |
| 8 | mnth_february | 1.72 |
| 7 | mnth_august | 1.69 |
| 10 | mnth_october | 1.67 |
| 11 | mnth_september | 1.41 |
| 13 | weathersit_light_weather | 1.24 |
| 12 | holiday_yes | 1.05 |
# Dropping 'dteday' because it has both high p-value & high VIF
X_train_1 = X_train_1.drop('dteday', axis=1)
# Re-train model
train_model(y_train, X_train_1).summary()
| Dep. Variable: | cnt | R-squared: | 0.844 |
|---|---|---|---|
| Model: | OLS | Adj. R-squared: | 0.840 |
| Method: | Least Squares | F-statistic: | 192.0 |
| Date: | Tue, 30 Aug 2022 | Prob (F-statistic): | 1.20e-189 |
| Time: | 01:25:13 | Log-Likelihood: | 513.43 |
| No. Observations: | 510 | AIC: | -996.9 |
| Df Residuals: | 495 | BIC: | -933.3 |
| Df Model: | 14 | ||
| Covariance Type: | nonrobust |
| coef | std err | t | P>|t| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| const | 0.2721 | 0.030 | 9.027 | 0.000 | 0.213 | 0.331 |
| temp | 0.4899 | 0.028 | 17.612 | 0.000 | 0.435 | 0.545 |
| hum | -0.1741 | 0.037 | -4.662 | 0.000 | -0.247 | -0.101 |
| windspeed | -0.1922 | 0.026 | -7.495 | 0.000 | -0.243 | -0.142 |
| season_summer | 0.0935 | 0.012 | 7.757 | 0.000 | 0.070 | 0.117 |
| season_winter | 0.1153 | 0.015 | 7.890 | 0.000 | 0.087 | 0.144 |
| yr_2019 | 0.2303 | 0.008 | 28.508 | 0.000 | 0.214 | 0.246 |
| mnth_august | 0.0545 | 0.016 | 3.327 | 0.001 | 0.022 | 0.087 |
| mnth_february | -0.0260 | 0.020 | -1.304 | 0.193 | -0.065 | 0.013 |
| mnth_january | -0.0508 | 0.020 | -2.586 | 0.010 | -0.089 | -0.012 |
| mnth_october | 0.0405 | 0.018 | 2.311 | 0.021 | 0.006 | 0.075 |
| mnth_september | 0.1232 | 0.016 | 7.508 | 0.000 | 0.091 | 0.155 |
| holiday_yes | -0.0915 | 0.025 | -3.593 | 0.000 | -0.141 | -0.041 |
| weathersit_light_weather | -0.2429 | 0.026 | -9.245 | 0.000 | -0.295 | -0.191 |
| weathersit_misty | -0.0534 | 0.010 | -5.124 | 0.000 | -0.074 | -0.033 |
| Omnibus: | 70.142 | Durbin-Watson: | 2.046 |
|---|---|---|---|
| Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 165.258 |
| Skew: | -0.724 | Prob(JB): | 1.30e-36 |
| Kurtosis: | 5.384 | Cond. No. | 18.6 |
calculate_vif(X_train_1)
| Feature | VIF | |
|---|---|---|
| 1 | hum | 21.40 |
| 0 | temp | 13.18 |
| 2 | windspeed | 3.88 |
| 4 | season_winter | 2.98 |
| 3 | season_summer | 2.20 |
| 13 | weathersit_misty | 2.19 |
| 5 | yr_2019 | 2.06 |
| 8 | mnth_january | 1.79 |
| 6 | mnth_august | 1.63 |
| 9 | mnth_october | 1.60 |
| 7 | mnth_february | 1.47 |
| 10 | mnth_september | 1.37 |
| 12 | weathersit_light_weather | 1.22 |
| 11 | holiday_yes | 1.05 |
# Dropping 'mnth_february' because it has high p-value
X_train_1 = X_train_1.drop('mnth_february', axis=1)
# Re-train model
train_model(y_train, X_train_1).summary()
| Dep. Variable: | cnt | R-squared: | 0.844 |
|---|---|---|---|
| Model: | OLS | Adj. R-squared: | 0.840 |
| Method: | Least Squares | F-statistic: | 206.3 |
| Date: | Tue, 30 Aug 2022 | Prob (F-statistic): | 1.90e-190 |
| Time: | 01:25:14 | Log-Likelihood: | 512.56 |
| No. Observations: | 510 | AIC: | -997.1 |
| Df Residuals: | 496 | BIC: | -937.8 |
| Df Model: | 13 | ||
| Covariance Type: | nonrobust |
| coef | std err | t | P>|t| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| const | 0.2586 | 0.028 | 9.131 | 0.000 | 0.203 | 0.314 |
| temp | 0.5058 | 0.025 | 20.204 | 0.000 | 0.457 | 0.555 |
| hum | -0.1759 | 0.037 | -4.709 | 0.000 | -0.249 | -0.102 |
| windspeed | -0.1927 | 0.026 | -7.511 | 0.000 | -0.243 | -0.142 |
| season_summer | 0.0992 | 0.011 | 8.831 | 0.000 | 0.077 | 0.121 |
| season_winter | 0.1237 | 0.013 | 9.433 | 0.000 | 0.098 | 0.150 |
| yr_2019 | 0.2299 | 0.008 | 28.458 | 0.000 | 0.214 | 0.246 |
| mnth_august | 0.0568 | 0.016 | 3.485 | 0.001 | 0.025 | 0.089 |
| mnth_january | -0.0392 | 0.018 | -2.236 | 0.026 | -0.074 | -0.005 |
| mnth_october | 0.0388 | 0.017 | 2.220 | 0.027 | 0.004 | 0.073 |
| mnth_september | 0.1255 | 0.016 | 7.684 | 0.000 | 0.093 | 0.158 |
| holiday_yes | -0.0932 | 0.025 | -3.664 | 0.000 | -0.143 | -0.043 |
| weathersit_light_weather | -0.2420 | 0.026 | -9.208 | 0.000 | -0.294 | -0.190 |
| weathersit_misty | -0.0532 | 0.010 | -5.103 | 0.000 | -0.074 | -0.033 |
| Omnibus: | 66.371 | Durbin-Watson: | 2.046 |
|---|---|---|---|
| Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 154.759 |
| Skew: | -0.691 | Prob(JB): | 2.48e-34 |
| Kurtosis: | 5.318 | Cond. No. | 18.5 |
calculate_vif(X_train_1)
| Feature | VIF | |
|---|---|---|
| 1 | hum | 18.19 |
| 0 | temp | 11.70 |
| 2 | windspeed | 3.62 |
| 4 | season_winter | 2.57 |
| 12 | weathersit_misty | 2.16 |
| 5 | yr_2019 | 2.04 |
| 3 | season_summer | 1.97 |
| 6 | mnth_august | 1.61 |
| 8 | mnth_october | 1.60 |
| 7 | mnth_january | 1.55 |
| 9 | mnth_september | 1.35 |
| 11 | weathersit_light_weather | 1.21 |
| 10 | holiday_yes | 1.04 |
# Dropping 'hum' because it has high VIF
X_train_1 = X_train_1.drop('hum', axis=1)
# Re-train model
train_model(y_train, X_train_1).summary()
| Dep. Variable: | cnt | R-squared: | 0.837 |
|---|---|---|---|
| Model: | OLS | Adj. R-squared: | 0.833 |
| Method: | Least Squares | F-statistic: | 212.6 |
| Date: | Tue, 30 Aug 2022 | Prob (F-statistic): | 6.26e-187 |
| Time: | 01:25:14 | Log-Likelihood: | 501.40 |
| No. Observations: | 510 | AIC: | -976.8 |
| Df Residuals: | 497 | BIC: | -921.8 |
| Df Model: | 12 | ||
| Covariance Type: | nonrobust |
| coef | std err | t | P>|t| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| const | 0.1610 | 0.020 | 8.169 | 0.000 | 0.122 | 0.200 |
| temp | 0.4810 | 0.025 | 19.248 | 0.000 | 0.432 | 0.530 |
| windspeed | -0.1597 | 0.025 | -6.338 | 0.000 | -0.209 | -0.110 |
| season_summer | 0.0952 | 0.011 | 8.321 | 0.000 | 0.073 | 0.118 |
| season_winter | 0.1136 | 0.013 | 8.600 | 0.000 | 0.088 | 0.140 |
| yr_2019 | 0.2339 | 0.008 | 28.522 | 0.000 | 0.218 | 0.250 |
| mnth_august | 0.0549 | 0.017 | 3.303 | 0.001 | 0.022 | 0.088 |
| mnth_january | -0.0450 | 0.018 | -2.524 | 0.012 | -0.080 | -0.010 |
| mnth_october | 0.0347 | 0.018 | 1.948 | 0.052 | -0.000 | 0.070 |
| mnth_september | 0.1185 | 0.017 | 7.135 | 0.000 | 0.086 | 0.151 |
| holiday_yes | -0.0945 | 0.026 | -3.638 | 0.000 | -0.146 | -0.043 |
| weathersit_light_weather | -0.2892 | 0.025 | -11.656 | 0.000 | -0.338 | -0.240 |
| weathersit_misty | -0.0810 | 0.009 | -9.230 | 0.000 | -0.098 | -0.064 |
| Omnibus: | 64.117 | Durbin-Watson: | 2.056 |
|---|---|---|---|
| Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 148.090 |
| Skew: | -0.672 | Prob(JB): | 6.96e-33 |
| Kurtosis: | 5.272 | Cond. No. | 12.2 |
calculate_vif(X_train_1)
| Feature | VIF | |
|---|---|---|
| 0 | temp | 5.15 |
| 1 | windspeed | 3.35 |
| 4 | yr_2019 | 2.04 |
| 3 | season_winter | 1.99 |
| 2 | season_summer | 1.91 |
| 5 | mnth_august | 1.61 |
| 7 | mnth_october | 1.59 |
| 11 | weathersit_misty | 1.54 |
| 8 | mnth_september | 1.34 |
| 6 | mnth_january | 1.22 |
| 10 | weathersit_light_weather | 1.10 |
| 9 | holiday_yes | 1.04 |
# Dropping 'mnth_october' because it has high p-value
X_train_1 = X_train_1.drop('mnth_october', axis=1)
# Re-train model
train_model(y_train, X_train_1).summary()
| Dep. Variable: | cnt | R-squared: | 0.836 |
|---|---|---|---|
| Model: | OLS | Adj. R-squared: | 0.832 |
| Method: | Least Squares | F-statistic: | 230.3 |
| Date: | Tue, 30 Aug 2022 | Prob (F-statistic): | 2.65e-187 |
| Time: | 01:25:15 | Log-Likelihood: | 499.46 |
| No. Observations: | 510 | AIC: | -974.9 |
| Df Residuals: | 498 | BIC: | -924.1 |
| Df Model: | 11 | ||
| Covariance Type: | nonrobust |
| coef | std err | t | P>|t| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| const | 0.1550 | 0.020 | 7.942 | 0.000 | 0.117 | 0.193 |
| temp | 0.4911 | 0.025 | 20.028 | 0.000 | 0.443 | 0.539 |
| windspeed | -0.1570 | 0.025 | -6.223 | 0.000 | -0.207 | -0.107 |
| season_summer | 0.0941 | 0.011 | 8.212 | 0.000 | 0.072 | 0.117 |
| season_winter | 0.1261 | 0.012 | 10.869 | 0.000 | 0.103 | 0.149 |
| yr_2019 | 0.2335 | 0.008 | 28.400 | 0.000 | 0.217 | 0.250 |
| mnth_august | 0.0520 | 0.017 | 3.131 | 0.002 | 0.019 | 0.085 |
| mnth_january | -0.0422 | 0.018 | -2.368 | 0.018 | -0.077 | -0.007 |
| mnth_september | 0.1137 | 0.016 | 6.902 | 0.000 | 0.081 | 0.146 |
| holiday_yes | -0.0957 | 0.026 | -3.674 | 0.000 | -0.147 | -0.045 |
| weathersit_light_weather | -0.2834 | 0.025 | -11.473 | 0.000 | -0.332 | -0.235 |
| weathersit_misty | -0.0801 | 0.009 | -9.118 | 0.000 | -0.097 | -0.063 |
| Omnibus: | 56.180 | Durbin-Watson: | 2.067 |
|---|---|---|---|
| Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 121.643 |
| Skew: | -0.614 | Prob(JB): | 3.85e-27 |
| Kurtosis: | 5.053 | Cond. No. | 11.8 |
calculate_vif(X_train_1)
| Feature | VIF | |
|---|---|---|
| 0 | temp | 5.05 |
| 1 | windspeed | 3.34 |
| 4 | yr_2019 | 2.04 |
| 2 | season_summer | 1.89 |
| 5 | mnth_august | 1.59 |
| 3 | season_winter | 1.55 |
| 10 | weathersit_misty | 1.54 |
| 7 | mnth_september | 1.31 |
| 6 | mnth_january | 1.22 |
| 9 | weathersit_light_weather | 1.08 |
| 8 | holiday_yes | 1.04 |
lr_model_1 = train_model(y_train, X_train_1)
X_train_2 = X_train_1.drop('temp', axis=1)
train_model(y_train, X_train_2).summary()
| Dep. Variable: | cnt | R-squared: | 0.703 |
|---|---|---|---|
| Model: | OLS | Adj. R-squared: | 0.697 |
| Method: | Least Squares | F-statistic: | 118.3 |
| Date: | Tue, 30 Aug 2022 | Prob (F-statistic): | 8.53e-125 |
| Time: | 01:25:15 | Log-Likelihood: | 348.80 |
| No. Observations: | 510 | AIC: | -675.6 |
| Df Residuals: | 499 | BIC: | -629.0 |
| Df Model: | 10 | ||
| Covariance Type: | nonrobust |
| coef | std err | t | P>|t| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| const | 0.4553 | 0.017 | 27.130 | 0.000 | 0.422 | 0.488 |
| windspeed | -0.2593 | 0.033 | -7.815 | 0.000 | -0.324 | -0.194 |
| season_summer | 0.1169 | 0.015 | 7.637 | 0.000 | 0.087 | 0.147 |
| season_winter | 0.0686 | 0.015 | 4.548 | 0.000 | 0.039 | 0.098 |
| yr_2019 | 0.2502 | 0.011 | 22.795 | 0.000 | 0.229 | 0.272 |
| mnth_august | 0.1711 | 0.021 | 8.218 | 0.000 | 0.130 | 0.212 |
| mnth_january | -0.2037 | 0.021 | -9.541 | 0.000 | -0.246 | -0.162 |
| mnth_september | 0.1936 | 0.021 | 9.028 | 0.000 | 0.151 | 0.236 |
| holiday_yes | -0.1134 | 0.035 | -3.245 | 0.001 | -0.182 | -0.045 |
| weathersit_light_weather | -0.2900 | 0.033 | -8.747 | 0.000 | -0.355 | -0.225 |
| weathersit_misty | -0.0989 | 0.012 | -8.429 | 0.000 | -0.122 | -0.076 |
| Omnibus: | 5.753 | Durbin-Watson: | 1.932 |
|---|---|---|---|
| Prob(Omnibus): | 0.056 | Jarque-Bera (JB): | 5.805 |
| Skew: | -0.200 | Prob(JB): | 0.0549 |
| Kurtosis: | 3.337 | Cond. No. | 8.87 |
calculate_vif(X_train_2)
| Feature | VIF | |
|---|---|---|
| 0 | windspeed | 2.60 |
| 3 | yr_2019 | 1.84 |
| 1 | season_summer | 1.63 |
| 9 | weathersit_misty | 1.53 |
| 2 | season_winter | 1.47 |
| 5 | mnth_january | 1.21 |
| 4 | mnth_august | 1.19 |
| 6 | mnth_september | 1.12 |
| 8 | weathersit_light_weather | 1.08 |
| 7 | holiday_yes | 1.04 |
lr_model_2 = train_model(y_train, X_train_2)
# Starting without 'dteday'
X_train_3 = X_train.drop('dteday', axis=1)
X_train_3 = X_train_3[auto_select_features(15, X_train_3)]
# Train model
train_model(y_train, X_train_3).summary()
| Dep. Variable: | cnt | R-squared: | 0.845 |
|---|---|---|---|
| Model: | OLS | Adj. R-squared: | 0.840 |
| Method: | Least Squares | F-statistic: | 179.4 |
| Date: | Tue, 30 Aug 2022 | Prob (F-statistic): | 8.15e-189 |
| Time: | 01:25:16 | Log-Likelihood: | 514.19 |
| No. Observations: | 510 | AIC: | -996.4 |
| Df Residuals: | 494 | BIC: | -928.6 |
| Df Model: | 15 | ||
| Covariance Type: | nonrobust |
| coef | std err | t | P>|t| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| const | 0.3197 | 0.036 | 8.859 | 0.000 | 0.249 | 0.391 |
| temp | 0.4815 | 0.037 | 13.005 | 0.000 | 0.409 | 0.554 |
| hum | -0.1622 | 0.038 | -4.291 | 0.000 | -0.236 | -0.088 |
| windspeed | -0.1887 | 0.026 | -7.315 | 0.000 | -0.239 | -0.138 |
| season_spring | -0.0613 | 0.021 | -2.881 | 0.004 | -0.103 | -0.019 |
| season_summer | 0.0423 | 0.015 | 2.761 | 0.006 | 0.012 | 0.072 |
| season_winter | 0.1019 | 0.018 | 5.656 | 0.000 | 0.067 | 0.137 |
| yr_2019 | 0.2304 | 0.008 | 28.487 | 0.000 | 0.215 | 0.246 |
| mnth_december | -0.0355 | 0.018 | -2.024 | 0.043 | -0.070 | -0.001 |
| mnth_january | -0.0434 | 0.018 | -2.393 | 0.017 | -0.079 | -0.008 |
| mnth_july | -0.0553 | 0.018 | -3.030 | 0.003 | -0.091 | -0.019 |
| mnth_november | -0.0387 | 0.019 | -2.057 | 0.040 | -0.076 | -0.002 |
| mnth_september | 0.0755 | 0.017 | 4.466 | 0.000 | 0.042 | 0.109 |
| holiday_yes | -0.0911 | 0.026 | -3.557 | 0.000 | -0.141 | -0.041 |
| weathersit_light_weather | -0.2465 | 0.026 | -9.331 | 0.000 | -0.298 | -0.195 |
| weathersit_misty | -0.0543 | 0.010 | -5.194 | 0.000 | -0.075 | -0.034 |
| Omnibus: | 66.656 | Durbin-Watson: | 2.025 |
|---|---|---|---|
| Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 161.040 |
| Skew: | -0.682 | Prob(JB): | 1.07e-35 |
| Kurtosis: | 5.392 | Cond. No. | 20.8 |
calculate_vif(X_train_3)
| Feature | VIF | |
|---|---|---|
| 1 | hum | 30.89 |
| 0 | temp | 17.79 |
| 2 | windspeed | 4.72 |
| 3 | season_spring | 4.37 |
| 5 | season_winter | 4.06 |
| 4 | season_summer | 2.82 |
| 14 | weathersit_misty | 2.32 |
| 6 | yr_2019 | 2.09 |
| 10 | mnth_november | 1.85 |
| 8 | mnth_january | 1.75 |
| 9 | mnth_july | 1.59 |
| 7 | mnth_december | 1.56 |
| 11 | mnth_september | 1.41 |
| 13 | weathersit_light_weather | 1.28 |
| 12 | holiday_yes | 1.06 |
# Dropping 'hum' due to high VIF
X_train_3 = X_train_3.drop('hum', axis=1)
# Re-train model
train_model(y_train, X_train_3).summary()
| Dep. Variable: | cnt | R-squared: | 0.839 |
|---|---|---|---|
| Model: | OLS | Adj. R-squared: | 0.835 |
| Method: | Least Squares | F-statistic: | 184.5 |
| Date: | Tue, 30 Aug 2022 | Prob (F-statistic): | 4.75e-186 |
| Time: | 01:25:17 | Log-Likelihood: | 504.85 |
| No. Observations: | 510 | AIC: | -979.7 |
| Df Residuals: | 495 | BIC: | -916.2 |
| Df Model: | 14 | ||
| Covariance Type: | nonrobust |
| coef | std err | t | P>|t| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| const | 0.2450 | 0.032 | 7.618 | 0.000 | 0.182 | 0.308 |
| temp | 0.4387 | 0.036 | 12.093 | 0.000 | 0.367 | 0.510 |
| windspeed | -0.1585 | 0.025 | -6.276 | 0.000 | -0.208 | -0.109 |
| season_spring | -0.0713 | 0.021 | -3.314 | 0.001 | -0.113 | -0.029 |
| season_summer | 0.0349 | 0.015 | 2.251 | 0.025 | 0.004 | 0.065 |
| season_winter | 0.0869 | 0.018 | 4.831 | 0.000 | 0.052 | 0.122 |
| yr_2019 | 0.2345 | 0.008 | 28.687 | 0.000 | 0.218 | 0.251 |
| mnth_december | -0.0428 | 0.018 | -2.413 | 0.016 | -0.078 | -0.008 |
| mnth_january | -0.0500 | 0.018 | -2.719 | 0.007 | -0.086 | -0.014 |
| mnth_july | -0.0500 | 0.019 | -2.703 | 0.007 | -0.086 | -0.014 |
| mnth_november | -0.0395 | 0.019 | -2.064 | 0.040 | -0.077 | -0.002 |
| mnth_september | 0.0687 | 0.017 | 4.015 | 0.000 | 0.035 | 0.102 |
| holiday_yes | -0.0918 | 0.026 | -3.522 | 0.000 | -0.143 | -0.041 |
| weathersit_light_weather | -0.2917 | 0.025 | -11.840 | 0.000 | -0.340 | -0.243 |
| weathersit_misty | -0.0801 | 0.009 | -9.198 | 0.000 | -0.097 | -0.063 |
| Omnibus: | 69.242 | Durbin-Watson: | 2.024 |
|---|---|---|---|
| Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 171.476 |
| Skew: | -0.698 | Prob(JB): | 5.81e-38 |
| Kurtosis: | 5.473 | Cond. No. | 18.9 |
calculate_vif(X_train_3)
| Feature | VIF | |
|---|---|---|
| 0 | temp | 5.17 |
| 1 | windspeed | 4.67 |
| 4 | season_winter | 2.95 |
| 2 | season_spring | 2.89 |
| 3 | season_summer | 2.24 |
| 5 | yr_2019 | 2.07 |
| 9 | mnth_november | 1.81 |
| 7 | mnth_january | 1.66 |
| 8 | mnth_july | 1.59 |
| 13 | weathersit_misty | 1.57 |
| 6 | mnth_december | 1.47 |
| 10 | mnth_september | 1.35 |
| 12 | weathersit_light_weather | 1.09 |
| 11 | holiday_yes | 1.06 |
lr_model_3 = train_model(y_train, X_train_3)
# Continuing further with Model 3
X_train_4 = X_train_3.drop('temp', axis=1)
# Train model
train_model(y_train, X_train_4).summary()
| Dep. Variable: | cnt | R-squared: | 0.792 |
|---|---|---|---|
| Model: | OLS | Adj. R-squared: | 0.786 |
| Method: | Least Squares | F-statistic: | 144.9 |
| Date: | Tue, 30 Aug 2022 | Prob (F-statistic): | 1.82e-159 |
| Time: | 01:25:17 | Log-Likelihood: | 438.84 |
| No. Observations: | 510 | AIC: | -849.7 |
| Df Residuals: | 496 | BIC: | -790.4 |
| Df Model: | 13 | ||
| Covariance Type: | nonrobust |
| coef | std err | t | P>|t| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| const | 0.5954 | 0.016 | 37.460 | 0.000 | 0.564 | 0.627 |
| windspeed | -0.1902 | 0.029 | -6.662 | 0.000 | -0.246 | -0.134 |
| season_spring | -0.2503 | 0.018 | -14.125 | 0.000 | -0.285 | -0.216 |
| season_summer | -0.0497 | 0.016 | -3.160 | 0.002 | -0.081 | -0.019 |
| season_winter | -0.0231 | 0.018 | -1.307 | 0.192 | -0.058 | 0.012 |
| yr_2019 | 0.2457 | 0.009 | 26.616 | 0.000 | 0.228 | 0.264 |
| mnth_december | -0.1086 | 0.019 | -5.660 | 0.000 | -0.146 | -0.071 |
| mnth_january | -0.1202 | 0.020 | -6.056 | 0.000 | -0.159 | -0.081 |
| mnth_july | -0.0182 | 0.021 | -0.875 | 0.382 | -0.059 | 0.023 |
| mnth_november | -0.0995 | 0.021 | -4.731 | 0.000 | -0.141 | -0.058 |
| mnth_september | 0.0534 | 0.019 | 2.750 | 0.006 | 0.015 | 0.092 |
| holiday_yes | -0.0855 | 0.030 | -2.887 | 0.004 | -0.144 | -0.027 |
| weathersit_light_weather | -0.3117 | 0.028 | -11.150 | 0.000 | -0.367 | -0.257 |
| weathersit_misty | -0.0869 | 0.010 | -8.800 | 0.000 | -0.106 | -0.068 |
| Omnibus: | 56.274 | Durbin-Watson: | 1.943 |
|---|---|---|---|
| Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 132.272 |
| Skew: | -0.588 | Prob(JB): | 1.89e-29 |
| Kurtosis: | 5.200 | Cond. No. | 9.27 |
calculate_vif(X_train_4)
| Feature | VIF | |
|---|---|---|
| 0 | windspeed | 3.89 |
| 1 | season_spring | 2.89 |
| 3 | season_winter | 2.85 |
| 2 | season_summer | 2.02 |
| 4 | yr_2019 | 1.83 |
| 8 | mnth_november | 1.80 |
| 6 | mnth_january | 1.64 |
| 12 | weathersit_misty | 1.53 |
| 5 | mnth_december | 1.46 |
| 7 | mnth_july | 1.20 |
| 9 | mnth_september | 1.18 |
| 11 | weathersit_light_weather | 1.09 |
| 10 | holiday_yes | 1.06 |
# Dropping 'mnth_july' due to high p-value
X_train_4 = X_train_4.drop('mnth_july', axis=1)
# Re-train model
train_model(y_train, X_train_4).summary()
| Dep. Variable: | cnt | R-squared: | 0.791 |
|---|---|---|---|
| Model: | OLS | Adj. R-squared: | 0.786 |
| Method: | Least Squares | F-statistic: | 157.0 |
| Date: | Tue, 30 Aug 2022 | Prob (F-statistic): | 2.08e-160 |
| Time: | 01:25:18 | Log-Likelihood: | 438.45 |
| No. Observations: | 510 | AIC: | -850.9 |
| Df Residuals: | 497 | BIC: | -795.9 |
| Df Model: | 12 | ||
| Covariance Type: | nonrobust |
| coef | std err | t | P>|t| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| const | 0.5886 | 0.014 | 42.474 | 0.000 | 0.561 | 0.616 |
| windspeed | -0.1901 | 0.029 | -6.659 | 0.000 | -0.246 | -0.134 |
| season_spring | -0.2439 | 0.016 | -15.125 | 0.000 | -0.276 | -0.212 |
| season_summer | -0.0432 | 0.014 | -3.116 | 0.002 | -0.070 | -0.016 |
| season_winter | -0.0173 | 0.016 | -1.058 | 0.290 | -0.050 | 0.015 |
| yr_2019 | 0.2458 | 0.009 | 26.633 | 0.000 | 0.228 | 0.264 |
| mnth_december | -0.1081 | 0.019 | -5.635 | 0.000 | -0.146 | -0.070 |
| mnth_january | -0.1201 | 0.020 | -6.057 | 0.000 | -0.159 | -0.081 |
| mnth_november | -0.0987 | 0.021 | -4.699 | 0.000 | -0.140 | -0.057 |
| mnth_september | 0.0586 | 0.018 | 3.172 | 0.002 | 0.022 | 0.095 |
| holiday_yes | -0.0852 | 0.030 | -2.877 | 0.004 | -0.143 | -0.027 |
| weathersit_light_weather | -0.3118 | 0.028 | -11.156 | 0.000 | -0.367 | -0.257 |
| weathersit_misty | -0.0863 | 0.010 | -8.762 | 0.000 | -0.106 | -0.067 |
| Omnibus: | 56.410 | Durbin-Watson: | 1.953 |
|---|---|---|---|
| Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 131.512 |
| Skew: | -0.592 | Prob(JB): | 2.77e-29 |
| Kurtosis: | 5.188 | Cond. No. | 8.93 |
calculate_vif(X_train_4)
| Feature | VIF | |
|---|---|---|
| 0 | windspeed | 3.52 |
| 3 | season_winter | 2.75 |
| 1 | season_spring | 2.71 |
| 2 | season_summer | 1.87 |
| 7 | mnth_november | 1.80 |
| 4 | yr_2019 | 1.77 |
| 6 | mnth_january | 1.64 |
| 11 | weathersit_misty | 1.52 |
| 5 | mnth_december | 1.46 |
| 8 | mnth_september | 1.16 |
| 10 | weathersit_light_weather | 1.09 |
| 9 | holiday_yes | 1.06 |
# Dropping 'season_winter' due to high p-value
X_train_4 = X_train_4.drop('season_winter', axis=1)
# Re-train model
train_model(y_train, X_train_4).summary()
| Dep. Variable: | cnt | R-squared: | 0.791 |
|---|---|---|---|
| Model: | OLS | Adj. R-squared: | 0.786 |
| Method: | Least Squares | F-statistic: | 171.2 |
| Date: | Tue, 30 Aug 2022 | Prob (F-statistic): | 2.71e-161 |
| Time: | 01:25:19 | Log-Likelihood: | 437.88 |
| No. Observations: | 510 | AIC: | -851.8 |
| Df Residuals: | 498 | BIC: | -800.9 |
| Df Model: | 11 | ||
| Covariance Type: | nonrobust |
| coef | std err | t | P>|t| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| const | 0.5834 | 0.013 | 44.987 | 0.000 | 0.558 | 0.609 |
| windspeed | -0.1910 | 0.029 | -6.694 | 0.000 | -0.247 | -0.135 |
| season_spring | -0.2367 | 0.015 | -16.212 | 0.000 | -0.265 | -0.208 |
| season_summer | -0.0374 | 0.013 | -2.937 | 0.003 | -0.062 | -0.012 |
| yr_2019 | 0.2459 | 0.009 | 26.646 | 0.000 | 0.228 | 0.264 |
| mnth_december | -0.1166 | 0.017 | -6.689 | 0.000 | -0.151 | -0.082 |
| mnth_january | -0.1216 | 0.020 | -6.143 | 0.000 | -0.160 | -0.083 |
| mnth_november | -0.1101 | 0.018 | -6.120 | 0.000 | -0.145 | -0.075 |
| mnth_september | 0.0607 | 0.018 | 3.304 | 0.001 | 0.025 | 0.097 |
| holiday_yes | -0.0854 | 0.030 | -2.883 | 0.004 | -0.144 | -0.027 |
| weathersit_light_weather | -0.3155 | 0.028 | -11.374 | 0.000 | -0.370 | -0.261 |
| weathersit_misty | -0.0872 | 0.010 | -8.883 | 0.000 | -0.106 | -0.068 |
| Omnibus: | 60.252 | Durbin-Watson: | 1.945 |
|---|---|---|---|
| Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 145.320 |
| Skew: | -0.619 | Prob(JB): | 2.78e-32 |
| Kurtosis: | 5.303 | Cond. No. | 8.79 |
calculate_vif(X_train_4)
| Feature | VIF | |
|---|---|---|
| 0 | windspeed | 3.23 |
| 1 | season_spring | 2.37 |
| 3 | yr_2019 | 1.74 |
| 2 | season_summer | 1.70 |
| 5 | mnth_january | 1.62 |
| 10 | weathersit_misty | 1.48 |
| 6 | mnth_november | 1.23 |
| 7 | mnth_september | 1.16 |
| 4 | mnth_december | 1.13 |
| 9 | weathersit_light_weather | 1.07 |
| 8 | holiday_yes | 1.06 |
lr_model_4 = train_model(y_train, X_train_4)
# Continuing from Model 4 with objective to contain VIF within 2.5
X_train_5 = X_train_4.drop('windspeed', axis=1)
# Train model
train_model(y_train, X_train_5).summary()
| Dep. Variable: | cnt | R-squared: | 0.772 |
|---|---|---|---|
| Model: | OLS | Adj. R-squared: | 0.767 |
| Method: | Least Squares | F-statistic: | 169.0 |
| Date: | Tue, 30 Aug 2022 | Prob (F-statistic): | 3.79e-153 |
| Time: | 01:25:19 | Log-Likelihood: | 415.91 |
| No. Observations: | 510 | AIC: | -809.8 |
| Df Residuals: | 499 | BIC: | -763.2 |
| Df Model: | 10 | ||
| Covariance Type: | nonrobust |
| coef | std err | t | P>|t| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| const | 0.5309 | 0.011 | 49.274 | 0.000 | 0.510 | 0.552 |
| season_spring | -0.2616 | 0.015 | -17.771 | 0.000 | -0.291 | -0.233 |
| season_summer | -0.0521 | 0.013 | -3.988 | 0.000 | -0.078 | -0.026 |
| yr_2019 | 0.2452 | 0.010 | 25.469 | 0.000 | 0.226 | 0.264 |
| mnth_december | -0.1109 | 0.018 | -6.108 | 0.000 | -0.147 | -0.075 |
| mnth_january | -0.1062 | 0.021 | -5.178 | 0.000 | -0.146 | -0.066 |
| mnth_november | -0.1189 | 0.019 | -6.349 | 0.000 | -0.156 | -0.082 |
| mnth_september | 0.0662 | 0.019 | 3.461 | 0.001 | 0.029 | 0.104 |
| holiday_yes | -0.0879 | 0.031 | -2.844 | 0.005 | -0.149 | -0.027 |
| weathersit_light_weather | -0.3349 | 0.029 | -11.639 | 0.000 | -0.391 | -0.278 |
| weathersit_misty | -0.0861 | 0.010 | -8.412 | 0.000 | -0.106 | -0.066 |
| Omnibus: | 68.305 | Durbin-Watson: | 1.986 |
|---|---|---|---|
| Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 198.368 |
| Skew: | -0.637 | Prob(JB): | 8.41e-44 |
| Kurtosis: | 5.777 | Cond. No. | 8.28 |
calculate_vif(X_train_5)
| Feature | VIF | |
|---|---|---|
| 0 | season_spring | 1.84 |
| 4 | mnth_january | 1.61 |
| 9 | weathersit_misty | 1.44 |
| 1 | season_summer | 1.31 |
| 5 | mnth_november | 1.12 |
| 6 | mnth_september | 1.12 |
| 3 | mnth_december | 1.10 |
| 7 | holiday_yes | 1.06 |
| 8 | weathersit_light_weather | 1.03 |
| 2 | yr_2019 | 0.02 |
lr_model_5 = train_model(y_train, X_train_5)
X_train_6 = X_train[auto_select_features(10)]
# Train model
train_model(y_train, X_train_6).summary()
| Dep. Variable: | cnt | R-squared: | 0.833 |
|---|---|---|---|
| Model: | OLS | Adj. R-squared: | 0.830 |
| Method: | Least Squares | F-statistic: | 248.8 |
| Date: | Tue, 30 Aug 2022 | Prob (F-statistic): | 1.02e-186 |
| Time: | 01:25:20 | Log-Likelihood: | 495.21 |
| No. Observations: | 510 | AIC: | -968.4 |
| Df Residuals: | 499 | BIC: | -921.8 |
| Df Model: | 10 | ||
| Covariance Type: | nonrobust |
| coef | std err | t | P>|t| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| const | 0.2755 | 0.026 | 10.547 | 0.000 | 0.224 | 0.327 |
| temp | 0.5660 | 0.022 | 25.833 | 0.000 | 0.523 | 0.609 |
| hum | -0.2848 | 0.032 | -9.026 | 0.000 | -0.347 | -0.223 |
| windspeed | -0.2012 | 0.026 | -7.729 | 0.000 | -0.252 | -0.150 |
| season_summer | 0.1011 | 0.011 | 9.084 | 0.000 | 0.079 | 0.123 |
| season_winter | 0.1508 | 0.011 | 13.840 | 0.000 | 0.129 | 0.172 |
| yr_2019 | 0.2264 | 0.008 | 27.262 | 0.000 | 0.210 | 0.243 |
| mnth_august | 0.0496 | 0.017 | 2.973 | 0.003 | 0.017 | 0.082 |
| mnth_september | 0.1190 | 0.017 | 7.174 | 0.000 | 0.086 | 0.152 |
| holiday_yes | -0.0893 | 0.026 | -3.409 | 0.001 | -0.141 | -0.038 |
| weathersit_light_weather | -0.1916 | 0.026 | -7.496 | 0.000 | -0.242 | -0.141 |
| Omnibus: | 56.891 | Durbin-Watson: | 2.021 |
|---|---|---|---|
| Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 105.867 |
| Skew: | -0.673 | Prob(JB): | 1.03e-23 |
| Kurtosis: | 4.781 | Cond. No. | 15.4 |
calculate_vif(X_train_6)
| Feature | VIF | |
|---|---|---|
| 1 | hum | 10.02 |
| 0 | temp | 8.52 |
| 2 | windspeed | 3.57 |
| 5 | yr_2019 | 2.01 |
| 3 | season_summer | 1.82 |
| 4 | season_winter | 1.73 |
| 6 | mnth_august | 1.57 |
| 7 | mnth_september | 1.30 |
| 9 | weathersit_light_weather | 1.09 |
| 8 | holiday_yes | 1.04 |
# Dropping 'hum' due to high VIF
X_train_6 = X_train_6.drop('hum', axis=1)
# Re-train model
train_model(y_train, X_train_6).summary()
| Dep. Variable: | cnt | R-squared: | 0.806 |
|---|---|---|---|
| Model: | OLS | Adj. R-squared: | 0.802 |
| Method: | Least Squares | F-statistic: | 230.3 |
| Date: | Tue, 30 Aug 2022 | Prob (F-statistic): | 1.39e-171 |
| Time: | 01:25:21 | Log-Likelihood: | 456.65 |
| No. Observations: | 510 | AIC: | -893.3 |
| Df Residuals: | 500 | BIC: | -851.0 |
| Df Model: | 9 | ||
| Covariance Type: | nonrobust |
| coef | std err | t | P>|t| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| const | 0.0900 | 0.017 | 5.181 | 0.000 | 0.056 | 0.124 |
| temp | 0.5464 | 0.023 | 23.262 | 0.000 | 0.500 | 0.593 |
| windspeed | -0.1427 | 0.027 | -5.252 | 0.000 | -0.196 | -0.089 |
| season_summer | 0.0905 | 0.012 | 7.588 | 0.000 | 0.067 | 0.114 |
| season_winter | 0.1316 | 0.012 | 11.430 | 0.000 | 0.109 | 0.154 |
| yr_2019 | 0.2332 | 0.009 | 26.169 | 0.000 | 0.216 | 0.251 |
| mnth_august | 0.0394 | 0.018 | 2.196 | 0.029 | 0.004 | 0.075 |
| mnth_september | 0.1002 | 0.018 | 5.650 | 0.000 | 0.065 | 0.135 |
| holiday_yes | -0.0848 | 0.028 | -3.005 | 0.003 | -0.140 | -0.029 |
| weathersit_light_weather | -0.2520 | 0.027 | -9.483 | 0.000 | -0.304 | -0.200 |
| Omnibus: | 63.117 | Durbin-Watson: | 1.996 |
|---|---|---|---|
| Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 111.174 |
| Skew: | -0.757 | Prob(JB): | 7.23e-25 |
| Kurtosis: | 4.714 | Cond. No. | 9.93 |
calculate_vif(X_train_6)
| Feature | VIF | |
|---|---|---|
| 0 | temp | 4.99 |
| 1 | windspeed | 3.01 |
| 4 | yr_2019 | 2.00 |
| 2 | season_summer | 1.79 |
| 5 | mnth_august | 1.57 |
| 3 | season_winter | 1.44 |
| 6 | mnth_september | 1.29 |
| 8 | weathersit_light_weather | 1.06 |
| 7 | holiday_yes | 1.04 |
lr_model_6 = train_model(y_train, X_train_6)
X_train_7 = X_train[auto_select_features(5)]
# Train model
train_model(y_train, X_train_7).summary()
| Dep. Variable: | cnt | R-squared: | 0.750 |
|---|---|---|---|
| Model: | OLS | Adj. R-squared: | 0.748 |
| Method: | Least Squares | F-statistic: | 302.8 |
| Date: | Tue, 30 Aug 2022 | Prob (F-statistic): | 2.85e-149 |
| Time: | 01:25:21 | Log-Likelihood: | 392.69 |
| No. Observations: | 510 | AIC: | -773.4 |
| Df Residuals: | 504 | BIC: | -748.0 |
| Df Model: | 5 | ||
| Covariance Type: | nonrobust |
| coef | std err | t | P>|t| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| const | 0.2832 | 0.031 | 9.044 | 0.000 | 0.222 | 0.345 |
| temp | 0.5684 | 0.023 | 24.875 | 0.000 | 0.524 | 0.613 |
| hum | -0.1791 | 0.037 | -4.785 | 0.000 | -0.253 | -0.106 |
| windspeed | -0.2185 | 0.031 | -6.989 | 0.000 | -0.280 | -0.157 |
| yr_2019 | 0.2295 | 0.010 | 22.722 | 0.000 | 0.210 | 0.249 |
| weathersit_light_weather | -0.1752 | 0.031 | -5.661 | 0.000 | -0.236 | -0.114 |
| Omnibus: | 8.563 | Durbin-Watson: | 1.996 |
|---|---|---|---|
| Prob(Omnibus): | 0.014 | Jarque-Bera (JB): | 8.626 |
| Skew: | -0.276 | Prob(JB): | 0.0134 |
| Kurtosis: | 3.316 | Cond. No. | 14.7 |
calculate_vif(X_train_7)
| Feature | VIF | |
|---|---|---|
| 1 | hum | 8.17 |
| 0 | temp | 6.40 |
| 2 | windspeed | 3.40 |
| 3 | yr_2019 | 2.01 |
| 4 | weathersit_light_weather | 1.08 |
# Dropping hum due to high VIF
X_train_7 = X_train_7.drop('hum', axis=1)
# Re-train model
train_model(y_train, X_train_7).summary()
| Dep. Variable: | cnt | R-squared: | 0.739 |
|---|---|---|---|
| Model: | OLS | Adj. R-squared: | 0.737 |
| Method: | Least Squares | F-statistic: | 357.3 |
| Date: | Tue, 30 Aug 2022 | Prob (F-statistic): | 1.01e-145 |
| Time: | 01:25:22 | Log-Likelihood: | 381.36 |
| No. Observations: | 510 | AIC: | -752.7 |
| Df Residuals: | 505 | BIC: | -731.5 |
| Df Model: | 4 | ||
| Covariance Type: | nonrobust |
| coef | std err | t | P>|t| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| const | 0.1603 | 0.018 | 8.767 | 0.000 | 0.124 | 0.196 |
| temp | 0.5541 | 0.023 | 23.946 | 0.000 | 0.509 | 0.600 |
| windspeed | -0.1774 | 0.031 | -5.777 | 0.000 | -0.238 | -0.117 |
| yr_2019 | 0.2337 | 0.010 | 22.744 | 0.000 | 0.214 | 0.254 |
| weathersit_light_weather | -0.2165 | 0.030 | -7.135 | 0.000 | -0.276 | -0.157 |
| Omnibus: | 10.058 | Durbin-Watson: | 1.979 |
|---|---|---|---|
| Prob(Omnibus): | 0.007 | Jarque-Bera (JB): | 10.298 |
| Skew: | -0.300 | Prob(JB): | 0.00580 |
| Kurtosis: | 3.352 | Cond. No. | 8.98 |
calculate_vif(X_train_7)
| Feature | VIF | |
|---|---|---|
| 0 | temp | 3.06 |
| 1 | windspeed | 2.73 |
| 2 | yr_2019 | 1.98 |
| 3 | weathersit_light_weather | 1.04 |
lr_model_7 = train_model(y_train, X_train_7)
Going all manual
X_train_8 = X_train
# Train model
train_model(y_train, X_train_8).summary()
| Dep. Variable: | cnt | R-squared: | 0.850 |
|---|---|---|---|
| Model: | OLS | Adj. R-squared: | 0.841 |
| Method: | Least Squares | F-statistic: | 90.67 |
| Date: | Tue, 30 Aug 2022 | Prob (F-statistic): | 1.15e-176 |
| Time: | 01:25:22 | Log-Likelihood: | 523.13 |
| No. Observations: | 510 | AIC: | -984.3 |
| Df Residuals: | 479 | BIC: | -853.0 |
| Df Model: | 30 | ||
| Covariance Type: | nonrobust |
| coef | std err | t | P>|t| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| const | 0.4066 | 0.097 | 4.212 | 0.000 | 0.217 | 0.596 |
| dteday | -0.1107 | 0.330 | -0.335 | 0.738 | -0.759 | 0.538 |
| temp | 0.3893 | 0.144 | 2.708 | 0.007 | 0.107 | 0.672 |
| atemp | 0.0523 | 0.140 | 0.374 | 0.708 | -0.222 | 0.327 |
| hum | -0.1570 | 0.039 | -4.014 | 0.000 | -0.234 | -0.080 |
| windspeed | -0.1800 | 0.027 | -6.734 | 0.000 | -0.233 | -0.127 |
| season_spring | -0.0415 | 0.030 | -1.365 | 0.173 | -0.101 | 0.018 |
| season_summer | 0.0459 | 0.026 | 1.735 | 0.083 | -0.006 | 0.098 |
| season_winter | 0.1115 | 0.028 | 3.932 | 0.000 | 0.056 | 0.167 |
| yr_2019 | 0.2867 | 0.166 | 1.729 | 0.084 | -0.039 | 0.613 |
| mnth_august | 0.0478 | 0.066 | 0.729 | 0.466 | -0.081 | 0.177 |
| mnth_december | -0.0068 | 0.117 | -0.058 | 0.954 | -0.237 | 0.224 |
| mnth_february | -0.0458 | 0.042 | -1.092 | 0.275 | -0.128 | 0.037 |
| mnth_january | -0.0771 | 0.052 | -1.493 | 0.136 | -0.178 | 0.024 |
| mnth_july | -0.0134 | 0.055 | -0.244 | 0.808 | -0.121 | 0.094 |
| mnth_june | 0.0192 | 0.038 | 0.506 | 0.613 | -0.055 | 0.094 |
| mnth_march | -0.0033 | 0.028 | -0.117 | 0.907 | -0.058 | 0.052 |
| mnth_may | 0.0300 | 0.025 | 1.193 | 0.234 | -0.019 | 0.079 |
| mnth_november | -0.0062 | 0.106 | -0.059 | 0.953 | -0.214 | 0.202 |
| mnth_october | 0.0347 | 0.094 | 0.371 | 0.711 | -0.149 | 0.218 |
| mnth_september | 0.1149 | 0.077 | 1.498 | 0.135 | -0.036 | 0.266 |
| holiday_yes | -0.1344 | 0.066 | -2.050 | 0.041 | -0.263 | -0.006 |
| weekday_monday | -0.0247 | 0.015 | -1.692 | 0.091 | -0.053 | 0.004 |
| weekday_saturday | -0.0554 | 0.072 | -0.771 | 0.441 | -0.197 | 0.086 |
| weekday_sunday | -0.0496 | 0.072 | -0.691 | 0.490 | -0.191 | 0.091 |
| weekday_thursday | 0.0043 | 0.015 | 0.285 | 0.776 | -0.025 | 0.034 |
| weekday_tuesday | -0.0261 | 0.015 | -1.775 | 0.077 | -0.055 | 0.003 |
| weekday_wednesday | -0.0102 | 0.015 | -0.663 | 0.508 | -0.040 | 0.020 |
| workingday_yes | -0.0602 | 0.071 | -0.845 | 0.399 | -0.200 | 0.080 |
| weathersit_light_weather | -0.2496 | 0.027 | -9.321 | 0.000 | -0.302 | -0.197 |
| weathersit_misty | -0.0582 | 0.011 | -5.487 | 0.000 | -0.079 | -0.037 |
| Omnibus: | 77.579 | Durbin-Watson: | 2.009 |
|---|---|---|---|
| Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 204.012 |
| Skew: | -0.758 | Prob(JB): | 5.01e-45 |
| Kurtosis: | 5.702 | Cond. No. | 207. |
calculate_vif(X_train_8)
| Feature | VIF | |
|---|---|---|
| 0 | dteday | 1828.85 |
| 8 | yr_2019 | 697.07 |
| 1 | temp | 444.57 |
| 2 | atemp | 383.10 |
| 27 | workingday_yes | 102.24 |
| 10 | mnth_december | 63.07 |
| 17 | mnth_november | 52.74 |
| 3 | hum | 42.29 |
| 18 | mnth_october | 40.63 |
| 19 | mnth_september | 26.66 |
| 9 | mnth_august | 23.76 |
| 23 | weekday_sunday | 19.90 |
| 22 | weekday_saturday | 18.97 |
| 13 | mnth_july | 13.64 |
| 5 | season_spring | 13.56 |
| 7 | season_winter | 12.43 |
| 12 | mnth_january | 11.31 |
| 6 | season_summer | 10.15 |
| 14 | mnth_june | 6.33 |
| 11 | mnth_february | 6.04 |
| 4 | windspeed | 5.92 |
| 15 | mnth_march | 4.26 |
| 20 | holiday_yes | 3.76 |
| 16 | mnth_may | 3.16 |
| 29 | weathersit_misty | 2.47 |
| 21 | weekday_monday | 2.09 |
| 25 | weekday_tuesday | 2.09 |
| 24 | weekday_thursday | 2.04 |
| 26 | weekday_wednesday | 1.99 |
| 28 | weathersit_light_weather | 1.34 |
# Dropping 'dteday' due to high p-value & high VIF
X_train_8 = X_train_8.drop('dteday', axis=1)
# Re-train model
train_model(y_train, X_train_8).summary()
| Dep. Variable: | cnt | R-squared: | 0.850 |
|---|---|---|---|
| Model: | OLS | Adj. R-squared: | 0.841 |
| Method: | Least Squares | F-statistic: | 93.97 |
| Date: | Tue, 30 Aug 2022 | Prob (F-statistic): | 1.24e-177 |
| Time: | 01:25:23 | Log-Likelihood: | 523.07 |
| No. Observations: | 510 | AIC: | -986.1 |
| Df Residuals: | 480 | BIC: | -859.1 |
| Df Model: | 29 | ||
| Covariance Type: | nonrobust |
| coef | std err | t | P>|t| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| const | 0.3915 | 0.085 | 4.593 | 0.000 | 0.224 | 0.559 |
| temp | 0.3873 | 0.143 | 2.699 | 0.007 | 0.105 | 0.669 |
| atemp | 0.0547 | 0.139 | 0.392 | 0.695 | -0.219 | 0.329 |
| hum | -0.1579 | 0.039 | -4.050 | 0.000 | -0.234 | -0.081 |
| windspeed | -0.1802 | 0.027 | -6.751 | 0.000 | -0.233 | -0.128 |
| season_spring | -0.0409 | 0.030 | -1.349 | 0.178 | -0.100 | 0.019 |
| season_summer | 0.0458 | 0.026 | 1.733 | 0.084 | -0.006 | 0.098 |
| season_winter | 0.1121 | 0.028 | 3.969 | 0.000 | 0.057 | 0.168 |
| yr_2019 | 0.2312 | 0.008 | 28.360 | 0.000 | 0.215 | 0.247 |
| mnth_august | 0.0291 | 0.034 | 0.849 | 0.396 | -0.038 | 0.096 |
| mnth_december | -0.0444 | 0.034 | -1.308 | 0.191 | -0.111 | 0.022 |
| mnth_february | -0.0373 | 0.033 | -1.119 | 0.264 | -0.103 | 0.028 |
| mnth_january | -0.0640 | 0.034 | -1.887 | 0.060 | -0.131 | 0.003 |
| mnth_july | -0.0274 | 0.035 | -0.773 | 0.440 | -0.097 | 0.042 |
| mnth_june | 0.0097 | 0.025 | 0.384 | 0.701 | -0.040 | 0.059 |
| mnth_march | 0.0010 | 0.025 | 0.041 | 0.967 | -0.048 | 0.050 |
| mnth_may | 0.0256 | 0.021 | 1.196 | 0.232 | -0.016 | 0.068 |
| mnth_november | -0.0395 | 0.037 | -1.076 | 0.282 | -0.112 | 0.033 |
| mnth_october | 0.0058 | 0.036 | 0.159 | 0.874 | -0.065 | 0.077 |
| mnth_september | 0.0916 | 0.032 | 2.837 | 0.005 | 0.028 | 0.155 |
| holiday_yes | -0.1345 | 0.066 | -2.054 | 0.041 | -0.263 | -0.006 |
| weekday_monday | -0.0247 | 0.015 | -1.688 | 0.092 | -0.053 | 0.004 |
| weekday_saturday | -0.0557 | 0.072 | -0.777 | 0.438 | -0.197 | 0.085 |
| weekday_sunday | -0.0499 | 0.072 | -0.696 | 0.487 | -0.191 | 0.091 |
| weekday_thursday | 0.0042 | 0.015 | 0.280 | 0.780 | -0.025 | 0.034 |
| weekday_tuesday | -0.0261 | 0.015 | -1.776 | 0.076 | -0.055 | 0.003 |
| weekday_wednesday | -0.0103 | 0.015 | -0.670 | 0.503 | -0.040 | 0.020 |
| workingday_yes | -0.0605 | 0.071 | -0.849 | 0.396 | -0.200 | 0.079 |
| weathersit_light_weather | -0.2492 | 0.027 | -9.324 | 0.000 | -0.302 | -0.197 |
| weathersit_misty | -0.0578 | 0.011 | -5.486 | 0.000 | -0.079 | -0.037 |
| Omnibus: | 78.724 | Durbin-Watson: | 2.008 |
|---|---|---|---|
| Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 207.783 |
| Skew: | -0.768 | Prob(JB): | 7.59e-46 |
| Kurtosis: | 5.724 | Cond. No. | 92.4 |
calculate_vif(X_train_8)
| Feature | VIF | |
|---|---|---|
| 0 | temp | 441.52 |
| 1 | atemp | 382.19 |
| 26 | workingday_yes | 63.90 |
| 2 | hum | 41.23 |
| 4 | season_spring | 13.52 |
| 6 | season_winter | 12.43 |
| 22 | weekday_sunday | 12.35 |
| 21 | weekday_saturday | 11.84 |
| 5 | season_summer | 9.88 |
| 16 | mnth_november | 7.16 |
| 8 | mnth_august | 6.92 |
| 17 | mnth_october | 6.82 |
| 11 | mnth_january | 6.09 |
| 12 | mnth_july | 5.95 |
| 3 | windspeed | 5.87 |
| 9 | mnth_december | 5.78 |
| 18 | mnth_september | 5.04 |
| 10 | mnth_february | 4.50 |
| 14 | mnth_march | 3.72 |
| 13 | mnth_june | 3.09 |
| 19 | holiday_yes | 2.82 |
| 15 | mnth_may | 2.45 |
| 28 | weathersit_misty | 2.43 |
| 7 | yr_2019 | 2.14 |
| 20 | weekday_monday | 2.09 |
| 24 | weekday_tuesday | 2.09 |
| 23 | weekday_thursday | 2.04 |
| 25 | weekday_wednesday | 1.97 |
| 27 | weathersit_light_weather | 1.34 |
# Dropping 'atemp' due to high p-value & high VIF
X_train_8 = X_train_8.drop('atemp', axis=1)
# Re-train model
train_model(y_train, X_train_8).summary()
| Dep. Variable: | cnt | R-squared: | 0.850 |
|---|---|---|---|
| Model: | OLS | Adj. R-squared: | 0.841 |
| Method: | Least Squares | F-statistic: | 97.49 |
| Date: | Tue, 30 Aug 2022 | Prob (F-statistic): | 1.34e-178 |
| Time: | 01:25:24 | Log-Likelihood: | 522.99 |
| No. Observations: | 510 | AIC: | -988.0 |
| Df Residuals: | 481 | BIC: | -865.2 |
| Df Model: | 28 | ||
| Covariance Type: | nonrobust |
| coef | std err | t | P>|t| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| const | 0.3912 | 0.085 | 4.594 | 0.000 | 0.224 | 0.558 |
| temp | 0.4405 | 0.047 | 9.412 | 0.000 | 0.349 | 0.532 |
| hum | -0.1574 | 0.039 | -4.043 | 0.000 | -0.234 | -0.081 |
| windspeed | -0.1825 | 0.026 | -7.003 | 0.000 | -0.234 | -0.131 |
| season_spring | -0.0404 | 0.030 | -1.335 | 0.182 | -0.100 | 0.019 |
| season_summer | 0.0464 | 0.026 | 1.761 | 0.079 | -0.005 | 0.098 |
| season_winter | 0.1126 | 0.028 | 3.994 | 0.000 | 0.057 | 0.168 |
| yr_2019 | 0.2311 | 0.008 | 28.384 | 0.000 | 0.215 | 0.247 |
| mnth_august | 0.0273 | 0.034 | 0.806 | 0.421 | -0.039 | 0.094 |
| mnth_december | -0.0444 | 0.034 | -1.309 | 0.191 | -0.111 | 0.022 |
| mnth_february | -0.0374 | 0.033 | -1.123 | 0.262 | -0.103 | 0.028 |
| mnth_january | -0.0642 | 0.034 | -1.894 | 0.059 | -0.131 | 0.002 |
| mnth_july | -0.0285 | 0.035 | -0.808 | 0.420 | -0.098 | 0.041 |
| mnth_june | 0.0084 | 0.025 | 0.336 | 0.737 | -0.041 | 0.058 |
| mnth_march | 0.0008 | 0.025 | 0.032 | 0.974 | -0.048 | 0.050 |
| mnth_may | 0.0247 | 0.021 | 1.164 | 0.245 | -0.017 | 0.066 |
| mnth_november | -0.0396 | 0.037 | -1.077 | 0.282 | -0.112 | 0.033 |
| mnth_october | 0.0056 | 0.036 | 0.156 | 0.876 | -0.066 | 0.077 |
| mnth_september | 0.0908 | 0.032 | 2.822 | 0.005 | 0.028 | 0.154 |
| holiday_yes | -0.1348 | 0.065 | -2.059 | 0.040 | -0.263 | -0.006 |
| weekday_monday | -0.0246 | 0.015 | -1.687 | 0.092 | -0.053 | 0.004 |
| weekday_saturday | -0.0557 | 0.072 | -0.777 | 0.437 | -0.197 | 0.085 |
| weekday_sunday | -0.0495 | 0.072 | -0.691 | 0.490 | -0.190 | 0.091 |
| weekday_thursday | 0.0042 | 0.015 | 0.282 | 0.778 | -0.025 | 0.034 |
| weekday_tuesday | -0.0261 | 0.015 | -1.774 | 0.077 | -0.055 | 0.003 |
| weekday_wednesday | -0.0102 | 0.015 | -0.669 | 0.504 | -0.040 | 0.020 |
| workingday_yes | -0.0600 | 0.071 | -0.844 | 0.399 | -0.200 | 0.080 |
| weathersit_light_weather | -0.2499 | 0.027 | -9.380 | 0.000 | -0.302 | -0.198 |
| weathersit_misty | -0.0578 | 0.011 | -5.495 | 0.000 | -0.079 | -0.037 |
| Omnibus: | 78.047 | Durbin-Watson: | 2.007 |
|---|---|---|---|
| Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 205.442 |
| Skew: | -0.762 | Prob(JB): | 2.45e-45 |
| Kurtosis: | 5.710 | Cond. No. | 70.4 |
calculate_vif(X_train_8)
| Feature | VIF | |
|---|---|---|
| 25 | workingday_yes | 63.89 |
| 0 | temp | 43.77 |
| 1 | hum | 41.18 |
| 3 | season_spring | 13.50 |
| 5 | season_winter | 12.40 |
| 21 | weekday_sunday | 12.34 |
| 20 | weekday_saturday | 11.83 |
| 4 | season_summer | 9.85 |
| 15 | mnth_november | 7.16 |
| 16 | mnth_october | 6.82 |
| 7 | mnth_august | 6.79 |
| 10 | mnth_january | 6.09 |
| 11 | mnth_july | 5.91 |
| 8 | mnth_december | 5.78 |
| 2 | windspeed | 5.59 |
| 17 | mnth_september | 5.02 |
| 9 | mnth_february | 4.50 |
| 13 | mnth_march | 3.72 |
| 12 | mnth_june | 3.04 |
| 18 | holiday_yes | 2.81 |
| 14 | mnth_may | 2.43 |
| 27 | weathersit_misty | 2.43 |
| 6 | yr_2019 | 2.14 |
| 19 | weekday_monday | 2.09 |
| 23 | weekday_tuesday | 2.09 |
| 22 | weekday_thursday | 2.04 |
| 24 | weekday_wednesday | 1.97 |
| 26 | weathersit_light_weather | 1.33 |
# Dropping 'workingday_yes' due to high p-value & high VIF
X_train_8 = X_train_8.drop('workingday_yes', axis=1)
# Re-train model
train_model(y_train, X_train_8).summary()
| Dep. Variable: | cnt | R-squared: | 0.850 |
|---|---|---|---|
| Model: | OLS | Adj. R-squared: | 0.842 |
| Method: | Least Squares | F-statistic: | 101.1 |
| Date: | Tue, 30 Aug 2022 | Prob (F-statistic): | 1.88e-179 |
| Time: | 01:25:24 | Log-Likelihood: | 522.61 |
| No. Observations: | 510 | AIC: | -989.2 |
| Df Residuals: | 482 | BIC: | -870.7 |
| Df Model: | 27 | ||
| Covariance Type: | nonrobust |
| coef | std err | t | P>|t| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| const | 0.3301 | 0.045 | 7.359 | 0.000 | 0.242 | 0.418 |
| temp | 0.4421 | 0.047 | 9.455 | 0.000 | 0.350 | 0.534 |
| hum | -0.1565 | 0.039 | -4.025 | 0.000 | -0.233 | -0.080 |
| windspeed | -0.1831 | 0.026 | -7.034 | 0.000 | -0.234 | -0.132 |
| season_spring | -0.0401 | 0.030 | -1.326 | 0.185 | -0.100 | 0.019 |
| season_summer | 0.0466 | 0.026 | 1.766 | 0.078 | -0.005 | 0.098 |
| season_winter | 0.1119 | 0.028 | 3.971 | 0.000 | 0.057 | 0.167 |
| yr_2019 | 0.2315 | 0.008 | 28.493 | 0.000 | 0.216 | 0.248 |
| mnth_august | 0.0266 | 0.034 | 0.785 | 0.433 | -0.040 | 0.093 |
| mnth_december | -0.0437 | 0.034 | -1.288 | 0.198 | -0.110 | 0.023 |
| mnth_february | -0.0369 | 0.033 | -1.109 | 0.268 | -0.102 | 0.029 |
| mnth_january | -0.0638 | 0.034 | -1.884 | 0.060 | -0.130 | 0.003 |
| mnth_july | -0.0292 | 0.035 | -0.827 | 0.409 | -0.098 | 0.040 |
| mnth_june | 0.0077 | 0.025 | 0.309 | 0.757 | -0.041 | 0.057 |
| mnth_march | 0.0007 | 0.025 | 0.027 | 0.979 | -0.048 | 0.049 |
| mnth_may | 0.0241 | 0.021 | 1.134 | 0.257 | -0.018 | 0.066 |
| mnth_november | -0.0408 | 0.037 | -1.111 | 0.267 | -0.113 | 0.031 |
| mnth_october | 0.0062 | 0.036 | 0.172 | 0.864 | -0.065 | 0.077 |
| mnth_september | 0.0909 | 0.032 | 2.824 | 0.005 | 0.028 | 0.154 |
| holiday_yes | -0.0842 | 0.026 | -3.202 | 0.001 | -0.136 | -0.033 |
| weekday_monday | -0.0247 | 0.015 | -1.692 | 0.091 | -0.053 | 0.004 |
| weekday_saturday | 0.0034 | 0.015 | 0.226 | 0.822 | -0.026 | 0.033 |
| weekday_sunday | 0.0096 | 0.015 | 0.640 | 0.522 | -0.020 | 0.039 |
| weekday_thursday | 0.0041 | 0.015 | 0.274 | 0.785 | -0.025 | 0.034 |
| weekday_tuesday | -0.0258 | 0.015 | -1.758 | 0.079 | -0.055 | 0.003 |
| weekday_wednesday | -0.0094 | 0.015 | -0.618 | 0.537 | -0.039 | 0.021 |
| weathersit_light_weather | -0.2498 | 0.027 | -9.378 | 0.000 | -0.302 | -0.197 |
| weathersit_misty | -0.0577 | 0.011 | -5.482 | 0.000 | -0.078 | -0.037 |
| Omnibus: | 77.558 | Durbin-Watson: | 2.007 |
|---|---|---|---|
| Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 200.597 |
| Skew: | -0.764 | Prob(JB): | 2.76e-44 |
| Kurtosis: | 5.666 | Cond. No. | 35.1 |
calculate_vif(X_train_8)
| Feature | VIF | |
|---|---|---|
| 1 | hum | 37.70 |
| 0 | temp | 36.94 |
| 3 | season_spring | 11.85 |
| 5 | season_winter | 11.71 |
| 4 | season_summer | 7.02 |
| 16 | mnth_october | 6.32 |
| 15 | mnth_november | 6.07 |
| 7 | mnth_august | 5.89 |
| 11 | mnth_july | 5.30 |
| 10 | mnth_january | 5.22 |
| 2 | windspeed | 5.11 |
| 8 | mnth_december | 4.92 |
| 17 | mnth_september | 4.27 |
| 9 | mnth_february | 4.00 |
| 13 | mnth_march | 3.36 |
| 12 | mnth_june | 2.93 |
| 14 | mnth_may | 2.43 |
| 26 | weathersit_misty | 2.39 |
| 6 | yr_2019 | 2.14 |
| 23 | weekday_tuesday | 2.06 |
| 19 | weekday_monday | 2.03 |
| 22 | weekday_thursday | 1.99 |
| 24 | weekday_wednesday | 1.94 |
| 21 | weekday_sunday | 1.92 |
| 20 | weekday_saturday | 1.85 |
| 25 | weathersit_light_weather | 1.32 |
| 18 | holiday_yes | 1.12 |
# Dropping 'season_spring' due to high p-value & high VIF
X_train_8 = X_train_8.drop('season_spring', axis=1)
# Re-train model
train_model(y_train, X_train_8).summary()
| Dep. Variable: | cnt | R-squared: | 0.849 |
|---|---|---|---|
| Model: | OLS | Adj. R-squared: | 0.841 |
| Method: | Least Squares | F-statistic: | 104.8 |
| Date: | Tue, 30 Aug 2022 | Prob (F-statistic): | 4.33e-180 |
| Time: | 01:25:25 | Log-Likelihood: | 521.68 |
| No. Observations: | 510 | AIC: | -989.4 |
| Df Residuals: | 483 | BIC: | -875.0 |
| Df Model: | 26 | ||
| Covariance Type: | nonrobust |
| coef | std err | t | P>|t| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| const | 0.3058 | 0.041 | 7.461 | 0.000 | 0.225 | 0.386 |
| temp | 0.4498 | 0.046 | 9.690 | 0.000 | 0.359 | 0.541 |
| hum | -0.1584 | 0.039 | -4.072 | 0.000 | -0.235 | -0.082 |
| windspeed | -0.1859 | 0.026 | -7.155 | 0.000 | -0.237 | -0.135 |
| season_summer | 0.0694 | 0.020 | 3.472 | 0.001 | 0.030 | 0.109 |
| season_winter | 0.1343 | 0.023 | 5.947 | 0.000 | 0.090 | 0.179 |
| yr_2019 | 0.2312 | 0.008 | 28.445 | 0.000 | 0.215 | 0.247 |
| mnth_august | 0.0468 | 0.030 | 1.544 | 0.123 | -0.013 | 0.106 |
| mnth_december | -0.0475 | 0.034 | -1.407 | 0.160 | -0.114 | 0.019 |
| mnth_february | -0.0527 | 0.031 | -1.695 | 0.091 | -0.114 | 0.008 |
| mnth_january | -0.0791 | 0.032 | -2.480 | 0.013 | -0.142 | -0.016 |
| mnth_july | -0.0095 | 0.032 | -0.296 | 0.767 | -0.072 | 0.053 |
| mnth_june | 0.0135 | 0.025 | 0.550 | 0.582 | -0.035 | 0.062 |
| mnth_march | -0.0099 | 0.023 | -0.423 | 0.673 | -0.056 | 0.036 |
| mnth_may | 0.0227 | 0.021 | 1.068 | 0.286 | -0.019 | 0.064 |
| mnth_november | -0.0397 | 0.037 | -1.082 | 0.280 | -0.112 | 0.032 |
| mnth_october | 0.0062 | 0.036 | 0.170 | 0.865 | -0.065 | 0.077 |
| mnth_september | 0.1069 | 0.030 | 3.581 | 0.000 | 0.048 | 0.166 |
| holiday_yes | -0.0846 | 0.026 | -3.217 | 0.001 | -0.136 | -0.033 |
| weekday_monday | -0.0255 | 0.015 | -1.749 | 0.081 | -0.054 | 0.003 |
| weekday_saturday | 0.0028 | 0.015 | 0.186 | 0.853 | -0.027 | 0.033 |
| weekday_sunday | 0.0098 | 0.015 | 0.649 | 0.516 | -0.020 | 0.039 |
| weekday_thursday | 0.0044 | 0.015 | 0.296 | 0.767 | -0.025 | 0.034 |
| weekday_tuesday | -0.0261 | 0.015 | -1.774 | 0.077 | -0.055 | 0.003 |
| weekday_wednesday | -0.0094 | 0.015 | -0.618 | 0.537 | -0.039 | 0.021 |
| weathersit_light_weather | -0.2475 | 0.027 | -9.303 | 0.000 | -0.300 | -0.195 |
| weathersit_misty | -0.0573 | 0.011 | -5.441 | 0.000 | -0.078 | -0.037 |
| Omnibus: | 75.649 | Durbin-Watson: | 2.012 |
|---|---|---|---|
| Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 192.184 |
| Skew: | -0.751 | Prob(JB): | 1.85e-42 |
| Kurtosis: | 5.605 | Cond. No. | 34.4 |
calculate_vif(X_train_8)
| Feature | VIF | |
|---|---|---|
| 0 | temp | 36.69 |
| 1 | hum | 36.05 |
| 4 | season_winter | 8.03 |
| 15 | mnth_october | 6.16 |
| 14 | mnth_november | 5.87 |
| 6 | mnth_august | 5.21 |
| 3 | season_summer | 4.83 |
| 2 | windspeed | 4.81 |
| 10 | mnth_july | 4.75 |
| 7 | mnth_december | 4.38 |
| 16 | mnth_september | 4.03 |
| 9 | mnth_january | 3.12 |
| 11 | mnth_june | 2.90 |
| 8 | mnth_february | 2.48 |
| 13 | mnth_may | 2.42 |
| 25 | weathersit_misty | 2.37 |
| 12 | mnth_march | 2.35 |
| 5 | yr_2019 | 2.13 |
| 22 | weekday_tuesday | 2.05 |
| 18 | weekday_monday | 2.00 |
| 21 | weekday_thursday | 1.98 |
| 23 | weekday_wednesday | 1.93 |
| 20 | weekday_sunday | 1.90 |
| 19 | weekday_saturday | 1.83 |
| 24 | weathersit_light_weather | 1.31 |
| 17 | holiday_yes | 1.12 |
# Dropping 'mnth_october' due to high p-value & high VIF
X_train_8 = X_train_8.drop('mnth_october', axis=1)
# Re-train model
train_model(y_train, X_train_8).summary()
| Dep. Variable: | cnt | R-squared: | 0.849 |
|---|---|---|---|
| Model: | OLS | Adj. R-squared: | 0.842 |
| Method: | Least Squares | F-statistic: | 109.2 |
| Date: | Tue, 30 Aug 2022 | Prob (F-statistic): | 4.16e-181 |
| Time: | 01:25:25 | Log-Likelihood: | 521.67 |
| No. Observations: | 510 | AIC: | -991.3 |
| Df Residuals: | 484 | BIC: | -881.2 |
| Df Model: | 25 | ||
| Covariance Type: | nonrobust |
| coef | std err | t | P>|t| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| const | 0.3083 | 0.038 | 8.062 | 0.000 | 0.233 | 0.383 |
| temp | 0.4496 | 0.046 | 9.699 | 0.000 | 0.358 | 0.541 |
| hum | -0.1576 | 0.039 | -4.086 | 0.000 | -0.233 | -0.082 |
| windspeed | -0.1858 | 0.026 | -7.161 | 0.000 | -0.237 | -0.135 |
| season_summer | 0.0675 | 0.017 | 4.067 | 0.000 | 0.035 | 0.100 |
| season_winter | 0.1366 | 0.018 | 7.645 | 0.000 | 0.101 | 0.172 |
| yr_2019 | 0.2312 | 0.008 | 28.491 | 0.000 | 0.215 | 0.247 |
| mnth_august | 0.0440 | 0.025 | 1.732 | 0.084 | -0.006 | 0.094 |
| mnth_december | -0.0521 | 0.020 | -2.558 | 0.011 | -0.092 | -0.012 |
| mnth_february | -0.0556 | 0.026 | -2.137 | 0.033 | -0.107 | -0.004 |
| mnth_january | -0.0820 | 0.027 | -3.060 | 0.002 | -0.135 | -0.029 |
| mnth_july | -0.0122 | 0.028 | -0.444 | 0.657 | -0.066 | 0.042 |
| mnth_june | 0.0120 | 0.023 | 0.525 | 0.600 | -0.033 | 0.057 |
| mnth_march | -0.0121 | 0.020 | -0.616 | 0.538 | -0.051 | 0.026 |
| mnth_may | 0.0217 | 0.020 | 1.063 | 0.288 | -0.018 | 0.062 |
| mnth_november | -0.0450 | 0.020 | -2.292 | 0.022 | -0.084 | -0.006 |
| mnth_september | 0.1035 | 0.022 | 4.677 | 0.000 | 0.060 | 0.147 |
| holiday_yes | -0.0844 | 0.026 | -3.216 | 0.001 | -0.136 | -0.033 |
| weekday_monday | -0.0255 | 0.015 | -1.747 | 0.081 | -0.054 | 0.003 |
| weekday_saturday | 0.0029 | 0.015 | 0.189 | 0.850 | -0.027 | 0.033 |
| weekday_sunday | 0.0098 | 0.015 | 0.652 | 0.515 | -0.020 | 0.039 |
| weekday_thursday | 0.0045 | 0.015 | 0.298 | 0.766 | -0.025 | 0.034 |
| weekday_tuesday | -0.0261 | 0.015 | -1.778 | 0.076 | -0.055 | 0.003 |
| weekday_wednesday | -0.0094 | 0.015 | -0.617 | 0.537 | -0.039 | 0.021 |
| weathersit_light_weather | -0.2473 | 0.027 | -9.311 | 0.000 | -0.300 | -0.195 |
| weathersit_misty | -0.0573 | 0.011 | -5.454 | 0.000 | -0.078 | -0.037 |
| Omnibus: | 75.109 | Durbin-Watson: | 2.011 |
|---|---|---|---|
| Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 190.219 |
| Skew: | -0.747 | Prob(JB): | 4.95e-42 |
| Kurtosis: | 5.592 | Cond. No. | 25.3 |
calculate_vif(X_train_8)
| Feature | VIF | |
|---|---|---|
| 0 | temp | 35.77 |
| 1 | hum | 32.56 |
| 2 | windspeed | 4.67 |
| 4 | season_winter | 4.54 |
| 6 | mnth_august | 3.92 |
| 3 | season_summer | 3.77 |
| 10 | mnth_july | 3.71 |
| 11 | mnth_june | 2.55 |
| 9 | mnth_january | 2.53 |
| 15 | mnth_september | 2.46 |
| 24 | weathersit_misty | 2.35 |
| 13 | mnth_may | 2.23 |
| 5 | yr_2019 | 2.13 |
| 21 | weekday_tuesday | 2.04 |
| 8 | mnth_february | 2.00 |
| 17 | weekday_monday | 1.97 |
| 20 | weekday_thursday | 1.96 |
| 14 | mnth_november | 1.92 |
| 22 | weekday_wednesday | 1.91 |
| 19 | weekday_sunday | 1.89 |
| 12 | mnth_march | 1.88 |
| 18 | weekday_saturday | 1.81 |
| 7 | mnth_december | 1.79 |
| 23 | weathersit_light_weather | 1.31 |
| 16 | holiday_yes | 1.12 |
# Dropping 'weekday_saturday' due to high p-value
X_train_8 = X_train_8.drop('weekday_saturday', axis=1)
# Re-train model
train_model(y_train, X_train_8).summary()
| Dep. Variable: | cnt | R-squared: | 0.849 |
|---|---|---|---|
| Model: | OLS | Adj. R-squared: | 0.842 |
| Method: | Least Squares | F-statistic: | 114.0 |
| Date: | Tue, 30 Aug 2022 | Prob (F-statistic): | 3.92e-182 |
| Time: | 01:25:26 | Log-Likelihood: | 521.65 |
| No. Observations: | 510 | AIC: | -993.3 |
| Df Residuals: | 485 | BIC: | -887.4 |
| Df Model: | 24 | ||
| Covariance Type: | nonrobust |
| coef | std err | t | P>|t| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| const | 0.3099 | 0.037 | 8.312 | 0.000 | 0.237 | 0.383 |
| temp | 0.4496 | 0.046 | 9.710 | 0.000 | 0.359 | 0.541 |
| hum | -0.1580 | 0.038 | -4.105 | 0.000 | -0.234 | -0.082 |
| windspeed | -0.1859 | 0.026 | -7.173 | 0.000 | -0.237 | -0.135 |
| season_summer | 0.0676 | 0.017 | 4.075 | 0.000 | 0.035 | 0.100 |
| season_winter | 0.1367 | 0.018 | 7.657 | 0.000 | 0.102 | 0.172 |
| yr_2019 | 0.2312 | 0.008 | 28.519 | 0.000 | 0.215 | 0.247 |
| mnth_august | 0.0440 | 0.025 | 1.735 | 0.083 | -0.006 | 0.094 |
| mnth_december | -0.0522 | 0.020 | -2.567 | 0.011 | -0.092 | -0.012 |
| mnth_february | -0.0557 | 0.026 | -2.142 | 0.033 | -0.107 | -0.005 |
| mnth_january | -0.0821 | 0.027 | -3.064 | 0.002 | -0.135 | -0.029 |
| mnth_july | -0.0121 | 0.028 | -0.440 | 0.660 | -0.066 | 0.042 |
| mnth_june | 0.0118 | 0.023 | 0.518 | 0.605 | -0.033 | 0.057 |
| mnth_march | -0.0122 | 0.020 | -0.620 | 0.536 | -0.051 | 0.026 |
| mnth_may | 0.0216 | 0.020 | 1.063 | 0.288 | -0.018 | 0.062 |
| mnth_november | -0.0451 | 0.020 | -2.300 | 0.022 | -0.084 | -0.007 |
| mnth_september | 0.1035 | 0.022 | 4.681 | 0.000 | 0.060 | 0.147 |
| holiday_yes | -0.0843 | 0.026 | -3.218 | 0.001 | -0.136 | -0.033 |
| weekday_monday | -0.0268 | 0.013 | -2.114 | 0.035 | -0.052 | -0.002 |
| weekday_sunday | 0.0084 | 0.013 | 0.640 | 0.522 | -0.017 | 0.034 |
| weekday_thursday | 0.0031 | 0.013 | 0.237 | 0.813 | -0.023 | 0.029 |
| weekday_tuesday | -0.0275 | 0.013 | -2.144 | 0.033 | -0.053 | -0.002 |
| weekday_wednesday | -0.0108 | 0.013 | -0.802 | 0.423 | -0.037 | 0.016 |
| weathersit_light_weather | -0.2474 | 0.027 | -9.323 | 0.000 | -0.300 | -0.195 |
| weathersit_misty | -0.0572 | 0.010 | -5.456 | 0.000 | -0.078 | -0.037 |
| Omnibus: | 75.078 | Durbin-Watson: | 2.012 |
|---|---|---|---|
| Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 190.847 |
| Skew: | -0.746 | Prob(JB): | 3.62e-42 |
| Kurtosis: | 5.599 | Cond. No. | 25.1 |
calculate_vif(X_train_8)
| Feature | VIF | |
|---|---|---|
| 0 | temp | 35.18 |
| 1 | hum | 32.43 |
| 2 | windspeed | 4.64 |
| 4 | season_winter | 4.50 |
| 6 | mnth_august | 3.92 |
| 3 | season_summer | 3.72 |
| 10 | mnth_july | 3.70 |
| 11 | mnth_june | 2.55 |
| 15 | mnth_september | 2.46 |
| 9 | mnth_january | 2.46 |
| 23 | weathersit_misty | 2.35 |
| 13 | mnth_may | 2.23 |
| 5 | yr_2019 | 2.13 |
| 8 | mnth_february | 1.96 |
| 14 | mnth_november | 1.91 |
| 12 | mnth_march | 1.86 |
| 7 | mnth_december | 1.78 |
| 20 | weekday_tuesday | 1.60 |
| 17 | weekday_monday | 1.55 |
| 19 | weekday_thursday | 1.55 |
| 21 | weekday_wednesday | 1.53 |
| 18 | weekday_sunday | 1.50 |
| 22 | weathersit_light_weather | 1.30 |
| 16 | holiday_yes | 1.12 |
# Dropping 'weekday_thursday' due to high p-value
X_train_8 = X_train_8.drop('weekday_thursday', axis=1)
# Re-train model
train_model(y_train, X_train_8).summary()
| Dep. Variable: | cnt | R-squared: | 0.849 |
|---|---|---|---|
| Model: | OLS | Adj. R-squared: | 0.842 |
| Method: | Least Squares | F-statistic: | 119.2 |
| Date: | Tue, 30 Aug 2022 | Prob (F-statistic): | 3.65e-183 |
| Time: | 01:25:26 | Log-Likelihood: | 521.62 |
| No. Observations: | 510 | AIC: | -995.2 |
| Df Residuals: | 486 | BIC: | -893.6 |
| Df Model: | 23 | ||
| Covariance Type: | nonrobust |
| coef | std err | t | P>|t| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| const | 0.3109 | 0.037 | 8.405 | 0.000 | 0.238 | 0.384 |
| temp | 0.4497 | 0.046 | 9.723 | 0.000 | 0.359 | 0.541 |
| hum | -0.1580 | 0.038 | -4.112 | 0.000 | -0.234 | -0.083 |
| windspeed | -0.1860 | 0.026 | -7.183 | 0.000 | -0.237 | -0.135 |
| season_summer | 0.0678 | 0.017 | 4.097 | 0.000 | 0.035 | 0.100 |
| season_winter | 0.1368 | 0.018 | 7.679 | 0.000 | 0.102 | 0.172 |
| yr_2019 | 0.2312 | 0.008 | 28.567 | 0.000 | 0.215 | 0.247 |
| mnth_august | 0.0440 | 0.025 | 1.738 | 0.083 | -0.006 | 0.094 |
| mnth_december | -0.0525 | 0.020 | -2.584 | 0.010 | -0.092 | -0.013 |
| mnth_february | -0.0555 | 0.026 | -2.137 | 0.033 | -0.106 | -0.004 |
| mnth_january | -0.0821 | 0.027 | -3.067 | 0.002 | -0.135 | -0.029 |
| mnth_july | -0.0120 | 0.028 | -0.435 | 0.664 | -0.066 | 0.042 |
| mnth_june | 0.0116 | 0.023 | 0.511 | 0.610 | -0.033 | 0.056 |
| mnth_march | -0.0122 | 0.020 | -0.620 | 0.536 | -0.051 | 0.026 |
| mnth_may | 0.0216 | 0.020 | 1.064 | 0.288 | -0.018 | 0.062 |
| mnth_november | -0.0452 | 0.020 | -2.308 | 0.021 | -0.084 | -0.007 |
| mnth_september | 0.1035 | 0.022 | 4.686 | 0.000 | 0.060 | 0.147 |
| holiday_yes | -0.0845 | 0.026 | -3.229 | 0.001 | -0.136 | -0.033 |
| weekday_monday | -0.0279 | 0.012 | -2.346 | 0.019 | -0.051 | -0.005 |
| weekday_sunday | 0.0074 | 0.012 | 0.596 | 0.552 | -0.017 | 0.032 |
| weekday_tuesday | -0.0285 | 0.012 | -2.386 | 0.017 | -0.052 | -0.005 |
| weekday_wednesday | -0.0118 | 0.013 | -0.932 | 0.352 | -0.037 | 0.013 |
| weathersit_light_weather | -0.2480 | 0.026 | -9.403 | 0.000 | -0.300 | -0.196 |
| weathersit_misty | -0.0574 | 0.010 | -5.482 | 0.000 | -0.078 | -0.037 |
| Omnibus: | 75.371 | Durbin-Watson: | 2.012 |
|---|---|---|---|
| Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 191.496 |
| Skew: | -0.749 | Prob(JB): | 2.61e-42 |
| Kurtosis: | 5.602 | Cond. No. | 25.0 |
calculate_vif(X_train_8)
| Feature | VIF | |
|---|---|---|
| 0 | temp | 34.95 |
| 1 | hum | 32.34 |
| 2 | windspeed | 4.63 |
| 4 | season_winter | 4.47 |
| 6 | mnth_august | 3.92 |
| 10 | mnth_july | 3.70 |
| 3 | season_summer | 3.68 |
| 11 | mnth_june | 2.54 |
| 15 | mnth_september | 2.45 |
| 9 | mnth_january | 2.44 |
| 22 | weathersit_misty | 2.33 |
| 13 | mnth_may | 2.23 |
| 5 | yr_2019 | 2.13 |
| 8 | mnth_february | 1.93 |
| 14 | mnth_november | 1.91 |
| 12 | mnth_march | 1.85 |
| 7 | mnth_december | 1.78 |
| 19 | weekday_tuesday | 1.40 |
| 17 | weekday_monday | 1.38 |
| 20 | weekday_wednesday | 1.38 |
| 18 | weekday_sunday | 1.34 |
| 21 | weathersit_light_weather | 1.29 |
| 16 | holiday_yes | 1.12 |
# Dropping 'mnth_july' due to high p-value
X_train_8 = X_train_8.drop('mnth_july', axis=1)
# Re-train model
train_model(y_train, X_train_8).summary()
| Dep. Variable: | cnt | R-squared: | 0.849 |
|---|---|---|---|
| Model: | OLS | Adj. R-squared: | 0.843 |
| Method: | Least Squares | F-statistic: | 124.8 |
| Date: | Tue, 30 Aug 2022 | Prob (F-statistic): | 3.55e-184 |
| Time: | 01:25:27 | Log-Likelihood: | 521.52 |
| No. Observations: | 510 | AIC: | -997.0 |
| Df Residuals: | 487 | BIC: | -899.6 |
| Df Model: | 22 | ||
| Covariance Type: | nonrobust |
| coef | std err | t | P>|t| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| const | 0.3098 | 0.037 | 8.402 | 0.000 | 0.237 | 0.382 |
| temp | 0.4389 | 0.039 | 11.276 | 0.000 | 0.362 | 0.515 |
| hum | -0.1559 | 0.038 | -4.093 | 0.000 | -0.231 | -0.081 |
| windspeed | -0.1844 | 0.026 | -7.198 | 0.000 | -0.235 | -0.134 |
| season_summer | 0.0707 | 0.015 | 4.674 | 0.000 | 0.041 | 0.100 |
| season_winter | 0.1404 | 0.016 | 8.902 | 0.000 | 0.109 | 0.171 |
| yr_2019 | 0.2315 | 0.008 | 28.716 | 0.000 | 0.216 | 0.247 |
| mnth_august | 0.0521 | 0.017 | 3.007 | 0.003 | 0.018 | 0.086 |
| mnth_december | -0.0520 | 0.020 | -2.567 | 0.011 | -0.092 | -0.012 |
| mnth_february | -0.0531 | 0.025 | -2.094 | 0.037 | -0.103 | -0.003 |
| mnth_january | -0.0804 | 0.026 | -3.039 | 0.003 | -0.132 | -0.028 |
| mnth_june | 0.0175 | 0.018 | 0.960 | 0.338 | -0.018 | 0.053 |
| mnth_march | -0.0094 | 0.019 | -0.508 | 0.612 | -0.046 | 0.027 |
| mnth_may | 0.0250 | 0.019 | 1.332 | 0.184 | -0.012 | 0.062 |
| mnth_november | -0.0453 | 0.020 | -2.316 | 0.021 | -0.084 | -0.007 |
| mnth_september | 0.1095 | 0.017 | 6.341 | 0.000 | 0.076 | 0.143 |
| holiday_yes | -0.0835 | 0.026 | -3.205 | 0.001 | -0.135 | -0.032 |
| weekday_monday | -0.0282 | 0.012 | -2.380 | 0.018 | -0.051 | -0.005 |
| weekday_sunday | 0.0071 | 0.012 | 0.577 | 0.564 | -0.017 | 0.031 |
| weekday_tuesday | -0.0288 | 0.012 | -2.417 | 0.016 | -0.052 | -0.005 |
| weekday_wednesday | -0.0120 | 0.013 | -0.948 | 0.344 | -0.037 | 0.013 |
| weathersit_light_weather | -0.2485 | 0.026 | -9.438 | 0.000 | -0.300 | -0.197 |
| weathersit_misty | -0.0577 | 0.010 | -5.532 | 0.000 | -0.078 | -0.037 |
| Omnibus: | 76.510 | Durbin-Watson: | 2.016 |
|---|---|---|---|
| Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 196.590 |
| Skew: | -0.756 | Prob(JB): | 2.05e-43 |
| Kurtosis: | 5.639 | Cond. No. | 24.5 |
calculate_vif(X_train_8)
| Feature | VIF | |
|---|---|---|
| 1 | hum | 31.94 |
| 0 | temp | 19.57 |
| 2 | windspeed | 4.56 |
| 4 | season_winter | 3.48 |
| 3 | season_summer | 3.06 |
| 9 | mnth_january | 2.40 |
| 21 | weathersit_misty | 2.32 |
| 5 | yr_2019 | 2.11 |
| 13 | mnth_november | 1.91 |
| 12 | mnth_may | 1.90 |
| 8 | mnth_february | 1.84 |
| 6 | mnth_august | 1.84 |
| 7 | mnth_december | 1.78 |
| 11 | mnth_march | 1.65 |
| 10 | mnth_june | 1.63 |
| 14 | mnth_september | 1.51 |
| 18 | weekday_tuesday | 1.40 |
| 19 | weekday_wednesday | 1.38 |
| 16 | weekday_monday | 1.37 |
| 17 | weekday_sunday | 1.34 |
| 20 | weathersit_light_weather | 1.28 |
| 15 | holiday_yes | 1.11 |
# Dropping 'mnth_march' due to high p-value
X_train_8 = X_train_8.drop('mnth_march', axis=1)
# Re-train model
train_model(y_train, X_train_8).summary()
| Dep. Variable: | cnt | R-squared: | 0.849 |
|---|---|---|---|
| Model: | OLS | Adj. R-squared: | 0.843 |
| Method: | Least Squares | F-statistic: | 130.9 |
| Date: | Tue, 30 Aug 2022 | Prob (F-statistic): | 3.48e-185 |
| Time: | 01:25:27 | Log-Likelihood: | 521.38 |
| No. Observations: | 510 | AIC: | -998.8 |
| Df Residuals: | 488 | BIC: | -905.6 |
| Df Model: | 21 | ||
| Covariance Type: | nonrobust |
| coef | std err | t | P>|t| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| const | 0.3007 | 0.032 | 9.366 | 0.000 | 0.238 | 0.364 |
| temp | 0.4492 | 0.033 | 13.519 | 0.000 | 0.384 | 0.514 |
| hum | -0.1575 | 0.038 | -4.152 | 0.000 | -0.232 | -0.083 |
| windspeed | -0.1846 | 0.026 | -7.210 | 0.000 | -0.235 | -0.134 |
| season_summer | 0.0728 | 0.015 | 5.017 | 0.000 | 0.044 | 0.101 |
| season_winter | 0.1439 | 0.014 | 10.158 | 0.000 | 0.116 | 0.172 |
| yr_2019 | 0.2313 | 0.008 | 28.759 | 0.000 | 0.215 | 0.247 |
| mnth_august | 0.0540 | 0.017 | 3.202 | 0.001 | 0.021 | 0.087 |
| mnth_december | -0.0478 | 0.018 | -2.591 | 0.010 | -0.084 | -0.012 |
| mnth_february | -0.0460 | 0.021 | -2.175 | 0.030 | -0.088 | -0.004 |
| mnth_january | -0.0725 | 0.021 | -3.386 | 0.001 | -0.115 | -0.030 |
| mnth_june | 0.0183 | 0.018 | 1.008 | 0.314 | -0.017 | 0.054 |
| mnth_may | 0.0264 | 0.019 | 1.425 | 0.155 | -0.010 | 0.063 |
| mnth_november | -0.0427 | 0.019 | -2.263 | 0.024 | -0.080 | -0.006 |
| mnth_september | 0.1118 | 0.017 | 6.722 | 0.000 | 0.079 | 0.144 |
| holiday_yes | -0.0832 | 0.026 | -3.199 | 0.001 | -0.134 | -0.032 |
| weekday_monday | -0.0277 | 0.012 | -2.346 | 0.019 | -0.051 | -0.004 |
| weekday_sunday | 0.0072 | 0.012 | 0.584 | 0.559 | -0.017 | 0.032 |
| weekday_tuesday | -0.0286 | 0.012 | -2.400 | 0.017 | -0.052 | -0.005 |
| weekday_wednesday | -0.0118 | 0.013 | -0.930 | 0.353 | -0.037 | 0.013 |
| weathersit_light_weather | -0.2478 | 0.026 | -9.431 | 0.000 | -0.299 | -0.196 |
| weathersit_misty | -0.0575 | 0.010 | -5.522 | 0.000 | -0.078 | -0.037 |
| Omnibus: | 74.968 | Durbin-Watson: | 2.019 |
|---|---|---|---|
| Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 187.078 |
| Skew: | -0.751 | Prob(JB): | 2.38e-41 |
| Kurtosis: | 5.559 | Cond. No. | 19.8 |
calculate_vif(X_train_8)
| Feature | VIF | |
|---|---|---|
| 1 | hum | 26.45 |
| 0 | temp | 17.72 |
| 2 | windspeed | 4.20 |
| 4 | season_winter | 3.11 |
| 3 | season_summer | 3.03 |
| 20 | weathersit_misty | 2.27 |
| 5 | yr_2019 | 2.09 |
| 9 | mnth_january | 1.99 |
| 12 | mnth_november | 1.88 |
| 11 | mnth_may | 1.80 |
| 6 | mnth_august | 1.76 |
| 7 | mnth_december | 1.67 |
| 10 | mnth_june | 1.61 |
| 8 | mnth_february | 1.58 |
| 13 | mnth_september | 1.43 |
| 17 | weekday_tuesday | 1.40 |
| 18 | weekday_wednesday | 1.38 |
| 15 | weekday_monday | 1.37 |
| 16 | weekday_sunday | 1.33 |
| 19 | weathersit_light_weather | 1.26 |
| 14 | holiday_yes | 1.11 |
# Dropping 'weekday_sunday' due to high p-value
X_train_8 = X_train_8.drop('weekday_sunday', axis=1)
# Re-train model
train_model(y_train, X_train_8).summary()
| Dep. Variable: | cnt | R-squared: | 0.849 |
|---|---|---|---|
| Model: | OLS | Adj. R-squared: | 0.843 |
| Method: | Least Squares | F-statistic: | 137.6 |
| Date: | Tue, 30 Aug 2022 | Prob (F-statistic): | 3.47e-186 |
| Time: | 01:25:28 | Log-Likelihood: | 521.21 |
| No. Observations: | 510 | AIC: | -1000. |
| Df Residuals: | 489 | BIC: | -911.5 |
| Df Model: | 20 | ||
| Covariance Type: | nonrobust |
| coef | std err | t | P>|t| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| const | 0.3028 | 0.032 | 9.500 | 0.000 | 0.240 | 0.365 |
| temp | 0.4494 | 0.033 | 13.536 | 0.000 | 0.384 | 0.515 |
| hum | -0.1584 | 0.038 | -4.184 | 0.000 | -0.233 | -0.084 |
| windspeed | -0.1843 | 0.026 | -7.206 | 0.000 | -0.235 | -0.134 |
| season_summer | 0.0726 | 0.014 | 5.010 | 0.000 | 0.044 | 0.101 |
| season_winter | 0.1440 | 0.014 | 10.171 | 0.000 | 0.116 | 0.172 |
| yr_2019 | 0.2313 | 0.008 | 28.789 | 0.000 | 0.216 | 0.247 |
| mnth_august | 0.0538 | 0.017 | 3.192 | 0.002 | 0.021 | 0.087 |
| mnth_december | -0.0475 | 0.018 | -2.579 | 0.010 | -0.084 | -0.011 |
| mnth_february | -0.0461 | 0.021 | -2.181 | 0.030 | -0.088 | -0.005 |
| mnth_january | -0.0726 | 0.021 | -3.393 | 0.001 | -0.115 | -0.031 |
| mnth_june | 0.0182 | 0.018 | 1.002 | 0.317 | -0.017 | 0.054 |
| mnth_may | 0.0262 | 0.019 | 1.416 | 0.157 | -0.010 | 0.063 |
| mnth_november | -0.0433 | 0.019 | -2.301 | 0.022 | -0.080 | -0.006 |
| mnth_september | 0.1116 | 0.017 | 6.718 | 0.000 | 0.079 | 0.144 |
| holiday_yes | -0.0830 | 0.026 | -3.191 | 0.002 | -0.134 | -0.032 |
| weekday_monday | -0.0294 | 0.011 | -2.585 | 0.010 | -0.052 | -0.007 |
| weekday_tuesday | -0.0304 | 0.012 | -2.640 | 0.009 | -0.053 | -0.008 |
| weekday_wednesday | -0.0135 | 0.012 | -1.106 | 0.269 | -0.038 | 0.011 |
| weathersit_light_weather | -0.2481 | 0.026 | -9.450 | 0.000 | -0.300 | -0.197 |
| weathersit_misty | -0.0570 | 0.010 | -5.497 | 0.000 | -0.077 | -0.037 |
| Omnibus: | 73.000 | Durbin-Watson: | 2.014 |
|---|---|---|---|
| Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 183.029 |
| Skew: | -0.730 | Prob(JB): | 1.80e-40 |
| Kurtosis: | 5.545 | Cond. No. | 19.7 |
calculate_vif(X_train_8)
| Feature | VIF | |
|---|---|---|
| 1 | hum | 26.42 |
| 0 | temp | 17.60 |
| 2 | windspeed | 4.17 |
| 4 | season_winter | 3.10 |
| 3 | season_summer | 3.03 |
| 19 | weathersit_misty | 2.26 |
| 5 | yr_2019 | 2.09 |
| 9 | mnth_january | 1.99 |
| 12 | mnth_november | 1.88 |
| 11 | mnth_may | 1.80 |
| 6 | mnth_august | 1.76 |
| 7 | mnth_december | 1.66 |
| 10 | mnth_june | 1.61 |
| 8 | mnth_february | 1.58 |
| 13 | mnth_september | 1.43 |
| 16 | weekday_tuesday | 1.31 |
| 17 | weekday_wednesday | 1.30 |
| 15 | weekday_monday | 1.28 |
| 18 | weathersit_light_weather | 1.26 |
| 14 | holiday_yes | 1.11 |
# Dropping 'mnth_june' due to high p-value
X_train_8 = X_train_8.drop('mnth_june', axis=1)
# Re-train model
train_model(y_train, X_train_8).summary()
| Dep. Variable: | cnt | R-squared: | 0.849 |
|---|---|---|---|
| Model: | OLS | Adj. R-squared: | 0.843 |
| Method: | Least Squares | F-statistic: | 144.8 |
| Date: | Tue, 30 Aug 2022 | Prob (F-statistic): | 4.67e-187 |
| Time: | 01:25:29 | Log-Likelihood: | 520.68 |
| No. Observations: | 510 | AIC: | -1001. |
| Df Residuals: | 490 | BIC: | -916.7 |
| Df Model: | 19 | ||
| Covariance Type: | nonrobust |
| coef | std err | t | P>|t| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| const | 0.2991 | 0.032 | 9.447 | 0.000 | 0.237 | 0.361 |
| temp | 0.4630 | 0.030 | 15.290 | 0.000 | 0.404 | 0.523 |
| hum | -0.1618 | 0.038 | -4.290 | 0.000 | -0.236 | -0.088 |
| windspeed | -0.1861 | 0.026 | -7.290 | 0.000 | -0.236 | -0.136 |
| season_summer | 0.0771 | 0.014 | 5.593 | 0.000 | 0.050 | 0.104 |
| season_winter | 0.1437 | 0.014 | 10.154 | 0.000 | 0.116 | 0.172 |
| yr_2019 | 0.2309 | 0.008 | 28.780 | 0.000 | 0.215 | 0.247 |
| mnth_august | 0.0494 | 0.016 | 3.036 | 0.003 | 0.017 | 0.081 |
| mnth_december | -0.0453 | 0.018 | -2.478 | 0.014 | -0.081 | -0.009 |
| mnth_february | -0.0435 | 0.021 | -2.074 | 0.039 | -0.085 | -0.002 |
| mnth_january | -0.0691 | 0.021 | -3.272 | 0.001 | -0.111 | -0.028 |
| mnth_may | 0.0196 | 0.017 | 1.131 | 0.258 | -0.014 | 0.054 |
| mnth_november | -0.0418 | 0.019 | -2.226 | 0.026 | -0.079 | -0.005 |
| mnth_september | 0.1089 | 0.016 | 6.644 | 0.000 | 0.077 | 0.141 |
| holiday_yes | -0.0840 | 0.026 | -3.231 | 0.001 | -0.135 | -0.033 |
| weekday_monday | -0.0291 | 0.011 | -2.559 | 0.011 | -0.051 | -0.007 |
| weekday_tuesday | -0.0304 | 0.012 | -2.643 | 0.008 | -0.053 | -0.008 |
| weekday_wednesday | -0.0134 | 0.012 | -1.093 | 0.275 | -0.037 | 0.011 |
| weathersit_light_weather | -0.2480 | 0.026 | -9.447 | 0.000 | -0.300 | -0.196 |
| weathersit_misty | -0.0567 | 0.010 | -5.470 | 0.000 | -0.077 | -0.036 |
| Omnibus: | 72.757 | Durbin-Watson: | 2.015 |
|---|---|---|---|
| Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 177.468 |
| Skew: | -0.737 | Prob(JB): | 2.91e-39 |
| Kurtosis: | 5.485 | Cond. No. | 19.4 |
calculate_vif(X_train_8)
| Feature | VIF | |
|---|---|---|
| 1 | hum | 25.37 |
| 0 | temp | 14.62 |
| 2 | windspeed | 4.08 |
| 4 | season_winter | 3.10 |
| 3 | season_summer | 2.77 |
| 18 | weathersit_misty | 2.25 |
| 5 | yr_2019 | 2.08 |
| 9 | mnth_january | 1.96 |
| 11 | mnth_november | 1.87 |
| 7 | mnth_december | 1.65 |
| 6 | mnth_august | 1.64 |
| 10 | mnth_may | 1.58 |
| 8 | mnth_february | 1.57 |
| 12 | mnth_september | 1.39 |
| 15 | weekday_tuesday | 1.31 |
| 16 | weekday_wednesday | 1.30 |
| 14 | weekday_monday | 1.28 |
| 17 | weathersit_light_weather | 1.26 |
| 13 | holiday_yes | 1.11 |
# Dropping 'weekday_wednesday' due to high p-value
X_train_8 = X_train_8.drop('weekday_wednesday', axis=1)
# Re-train model
train_model(y_train, X_train_8).summary()
| Dep. Variable: | cnt | R-squared: | 0.848 |
|---|---|---|---|
| Model: | OLS | Adj. R-squared: | 0.843 |
| Method: | Least Squares | F-statistic: | 152.7 |
| Date: | Tue, 30 Aug 2022 | Prob (F-statistic): | 6.74e-188 |
| Time: | 01:25:29 | Log-Likelihood: | 520.06 |
| No. Observations: | 510 | AIC: | -1002. |
| Df Residuals: | 491 | BIC: | -921.7 |
| Df Model: | 18 | ||
| Covariance Type: | nonrobust |
| coef | std err | t | P>|t| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| const | 0.2964 | 0.032 | 9.390 | 0.000 | 0.234 | 0.358 |
| temp | 0.4644 | 0.030 | 15.347 | 0.000 | 0.405 | 0.524 |
| hum | -0.1629 | 0.038 | -4.319 | 0.000 | -0.237 | -0.089 |
| windspeed | -0.1849 | 0.026 | -7.250 | 0.000 | -0.235 | -0.135 |
| season_summer | 0.0768 | 0.014 | 5.571 | 0.000 | 0.050 | 0.104 |
| season_winter | 0.1427 | 0.014 | 10.102 | 0.000 | 0.115 | 0.170 |
| yr_2019 | 0.2307 | 0.008 | 28.758 | 0.000 | 0.215 | 0.246 |
| mnth_august | 0.0488 | 0.016 | 3.000 | 0.003 | 0.017 | 0.081 |
| mnth_december | -0.0446 | 0.018 | -2.441 | 0.015 | -0.081 | -0.009 |
| mnth_february | -0.0435 | 0.021 | -2.075 | 0.039 | -0.085 | -0.002 |
| mnth_january | -0.0694 | 0.021 | -3.288 | 0.001 | -0.111 | -0.028 |
| mnth_may | 0.0192 | 0.017 | 1.110 | 0.267 | -0.015 | 0.053 |
| mnth_november | -0.0406 | 0.019 | -2.165 | 0.031 | -0.077 | -0.004 |
| mnth_september | 0.1092 | 0.016 | 6.661 | 0.000 | 0.077 | 0.141 |
| holiday_yes | -0.0895 | 0.025 | -3.510 | 0.000 | -0.140 | -0.039 |
| weekday_monday | -0.0267 | 0.011 | -2.389 | 0.017 | -0.049 | -0.005 |
| weekday_tuesday | -0.0277 | 0.011 | -2.466 | 0.014 | -0.050 | -0.006 |
| weathersit_light_weather | -0.2477 | 0.026 | -9.434 | 0.000 | -0.299 | -0.196 |
| weathersit_misty | -0.0566 | 0.010 | -5.452 | 0.000 | -0.077 | -0.036 |
| Omnibus: | 71.655 | Durbin-Watson: | 2.017 |
|---|---|---|---|
| Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 178.070 |
| Skew: | -0.721 | Prob(JB): | 2.15e-39 |
| Kurtosis: | 5.511 | Cond. No. | 19.4 |
calculate_vif(X_train_8)
| Feature | VIF | |
|---|---|---|
| 1 | hum | 25.16 |
| 0 | temp | 14.62 |
| 2 | windspeed | 4.08 |
| 4 | season_winter | 3.08 |
| 3 | season_summer | 2.77 |
| 17 | weathersit_misty | 2.25 |
| 5 | yr_2019 | 2.08 |
| 9 | mnth_january | 1.95 |
| 11 | mnth_november | 1.87 |
| 7 | mnth_december | 1.65 |
| 6 | mnth_august | 1.64 |
| 10 | mnth_may | 1.58 |
| 8 | mnth_february | 1.57 |
| 12 | mnth_september | 1.39 |
| 15 | weekday_tuesday | 1.25 |
| 16 | weathersit_light_weather | 1.25 |
| 14 | weekday_monday | 1.24 |
| 13 | holiday_yes | 1.07 |
# Dropping 'mnth_may' due to high p-value
X_train_8 = X_train_8.drop('mnth_may', axis=1)
# Re-train model
train_model(y_train, X_train_8).summary()
| Dep. Variable: | cnt | R-squared: | 0.848 |
|---|---|---|---|
| Model: | OLS | Adj. R-squared: | 0.843 |
| Method: | Least Squares | F-statistic: | 161.6 |
| Date: | Tue, 30 Aug 2022 | Prob (F-statistic): | 9.63e-189 |
| Time: | 01:25:30 | Log-Likelihood: | 519.42 |
| No. Observations: | 510 | AIC: | -1003. |
| Df Residuals: | 492 | BIC: | -926.6 |
| Df Model: | 17 | ||
| Covariance Type: | nonrobust |
| coef | std err | t | P>|t| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| const | 0.2908 | 0.031 | 9.331 | 0.000 | 0.230 | 0.352 |
| temp | 0.4686 | 0.030 | 15.603 | 0.000 | 0.410 | 0.528 |
| hum | -0.1568 | 0.037 | -4.201 | 0.000 | -0.230 | -0.083 |
| windspeed | -0.1859 | 0.025 | -7.290 | 0.000 | -0.236 | -0.136 |
| season_summer | 0.0835 | 0.012 | 6.729 | 0.000 | 0.059 | 0.108 |
| season_winter | 0.1427 | 0.014 | 10.098 | 0.000 | 0.115 | 0.170 |
| yr_2019 | 0.2303 | 0.008 | 28.730 | 0.000 | 0.215 | 0.246 |
| mnth_august | 0.0477 | 0.016 | 2.939 | 0.003 | 0.016 | 0.080 |
| mnth_december | -0.0437 | 0.018 | -2.391 | 0.017 | -0.080 | -0.008 |
| mnth_february | -0.0417 | 0.021 | -1.992 | 0.047 | -0.083 | -0.001 |
| mnth_january | -0.0675 | 0.021 | -3.209 | 0.001 | -0.109 | -0.026 |
| mnth_november | -0.0396 | 0.019 | -2.114 | 0.035 | -0.076 | -0.003 |
| mnth_september | 0.1084 | 0.016 | 6.616 | 0.000 | 0.076 | 0.141 |
| holiday_yes | -0.0901 | 0.025 | -3.534 | 0.000 | -0.140 | -0.040 |
| weekday_monday | -0.0272 | 0.011 | -2.440 | 0.015 | -0.049 | -0.005 |
| weekday_tuesday | -0.0281 | 0.011 | -2.501 | 0.013 | -0.050 | -0.006 |
| weathersit_light_weather | -0.2497 | 0.026 | -9.530 | 0.000 | -0.301 | -0.198 |
| weathersit_misty | -0.0568 | 0.010 | -5.479 | 0.000 | -0.077 | -0.036 |
| Omnibus: | 71.020 | Durbin-Watson: | 2.022 |
|---|---|---|---|
| Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 170.053 |
| Skew: | -0.727 | Prob(JB): | 1.18e-37 |
| Kurtosis: | 5.427 | Cond. No. | 19.0 |
calculate_vif(X_train_8)
| Feature | VIF | |
|---|---|---|
| 1 | hum | 25.08 |
| 0 | temp | 14.58 |
| 2 | windspeed | 4.01 |
| 4 | season_winter | 3.08 |
| 3 | season_summer | 2.30 |
| 16 | weathersit_misty | 2.25 |
| 5 | yr_2019 | 2.07 |
| 9 | mnth_january | 1.95 |
| 10 | mnth_november | 1.87 |
| 7 | mnth_december | 1.65 |
| 6 | mnth_august | 1.63 |
| 8 | mnth_february | 1.57 |
| 11 | mnth_september | 1.39 |
| 14 | weekday_tuesday | 1.25 |
| 15 | weathersit_light_weather | 1.25 |
| 13 | weekday_monday | 1.23 |
| 12 | holiday_yes | 1.07 |
# Dropping 'hum' due to high VIF
X_train_8 = X_train_8.drop('hum', axis=1)
# Re-train model
train_model(y_train, X_train_8).summary()
| Dep. Variable: | cnt | R-squared: | 0.843 |
|---|---|---|---|
| Model: | OLS | Adj. R-squared: | 0.838 |
| Method: | Least Squares | F-statistic: | 165.0 |
| Date: | Tue, 30 Aug 2022 | Prob (F-statistic): | 4.10e-186 |
| Time: | 01:25:30 | Log-Likelihood: | 510.43 |
| No. Observations: | 510 | AIC: | -986.9 |
| Df Residuals: | 493 | BIC: | -914.9 |
| Df Model: | 16 | ||
| Covariance Type: | nonrobust |
| coef | std err | t | P>|t| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| const | 0.2110 | 0.025 | 8.399 | 0.000 | 0.162 | 0.260 |
| temp | 0.4391 | 0.030 | 14.790 | 0.000 | 0.381 | 0.497 |
| windspeed | -0.1576 | 0.025 | -6.303 | 0.000 | -0.207 | -0.108 |
| season_summer | 0.0780 | 0.013 | 6.215 | 0.000 | 0.053 | 0.103 |
| season_winter | 0.1315 | 0.014 | 9.322 | 0.000 | 0.104 | 0.159 |
| yr_2019 | 0.2341 | 0.008 | 28.905 | 0.000 | 0.218 | 0.250 |
| mnth_august | 0.0454 | 0.016 | 2.750 | 0.006 | 0.013 | 0.078 |
| mnth_december | -0.0515 | 0.018 | -2.788 | 0.006 | -0.088 | -0.015 |
| mnth_february | -0.0475 | 0.021 | -2.238 | 0.026 | -0.089 | -0.006 |
| mnth_january | -0.0776 | 0.021 | -3.649 | 0.000 | -0.119 | -0.036 |
| mnth_november | -0.0397 | 0.019 | -2.086 | 0.038 | -0.077 | -0.002 |
| mnth_september | 0.1014 | 0.017 | 6.119 | 0.000 | 0.069 | 0.134 |
| holiday_yes | -0.0910 | 0.026 | -3.509 | 0.000 | -0.142 | -0.040 |
| weekday_monday | -0.0275 | 0.011 | -2.428 | 0.016 | -0.050 | -0.005 |
| weekday_tuesday | -0.0318 | 0.011 | -2.791 | 0.005 | -0.054 | -0.009 |
| weathersit_light_weather | -0.2935 | 0.024 | -12.005 | 0.000 | -0.342 | -0.245 |
| weathersit_misty | -0.0817 | 0.009 | -9.434 | 0.000 | -0.099 | -0.065 |
| Omnibus: | 69.898 | Durbin-Watson: | 2.030 |
|---|---|---|---|
| Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 166.985 |
| Skew: | -0.717 | Prob(JB): | 5.49e-37 |
| Kurtosis: | 5.409 | Cond. No. | 16.2 |
calculate_vif(X_train_8)
| Feature | VIF | |
|---|---|---|
| 0 | temp | 5.24 |
| 1 | windspeed | 3.98 |
| 3 | season_winter | 2.63 |
| 4 | yr_2019 | 2.06 |
| 2 | season_summer | 2.04 |
| 9 | mnth_november | 1.82 |
| 5 | mnth_august | 1.63 |
| 15 | weathersit_misty | 1.57 |
| 6 | mnth_december | 1.42 |
| 10 | mnth_september | 1.35 |
| 8 | mnth_january | 1.30 |
| 7 | mnth_february | 1.27 |
| 12 | weekday_monday | 1.23 |
| 13 | weekday_tuesday | 1.23 |
| 14 | weathersit_light_weather | 1.09 |
| 11 | holiday_yes | 1.07 |
lr_model_8 = train_model(y_train, X_train_8)
# Continuing from Model 8 to contain VIF within 5
X_train_9 = X_train_8.drop('temp', axis=1)
# Train model
train_model(y_train, X_train_9).summary()
| Dep. Variable: | cnt | R-squared: | 0.773 |
|---|---|---|---|
| Model: | OLS | Adj. R-squared: | 0.766 |
| Method: | Least Squares | F-statistic: | 112.0 |
| Date: | Tue, 30 Aug 2022 | Prob (F-statistic): | 4.27e-148 |
| Time: | 01:25:31 | Log-Likelihood: | 416.79 |
| No. Observations: | 510 | AIC: | -801.6 |
| Df Residuals: | 494 | BIC: | -733.8 |
| Df Model: | 15 | ||
| Covariance Type: | nonrobust |
| coef | std err | t | P>|t| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| const | 0.5261 | 0.016 | 32.917 | 0.000 | 0.495 | 0.558 |
| windspeed | -0.2304 | 0.029 | -7.827 | 0.000 | -0.288 | -0.173 |
| season_summer | 0.0449 | 0.015 | 3.032 | 0.003 | 0.016 | 0.074 |
| season_winter | 0.0841 | 0.016 | 5.097 | 0.000 | 0.052 | 0.116 |
| yr_2019 | 0.2461 | 0.010 | 25.453 | 0.000 | 0.227 | 0.265 |
| mnth_august | 0.1007 | 0.019 | 5.223 | 0.000 | 0.063 | 0.139 |
| mnth_december | -0.1661 | 0.020 | -8.251 | 0.000 | -0.206 | -0.127 |
| mnth_february | -0.2087 | 0.022 | -9.550 | 0.000 | -0.252 | -0.166 |
| mnth_january | -0.2760 | 0.020 | -13.935 | 0.000 | -0.315 | -0.237 |
| mnth_november | -0.1121 | 0.022 | -5.075 | 0.000 | -0.155 | -0.069 |
| mnth_september | 0.1198 | 0.020 | 6.037 | 0.000 | 0.081 | 0.159 |
| holiday_yes | -0.0855 | 0.031 | -2.748 | 0.006 | -0.147 | -0.024 |
| weekday_monday | -0.0329 | 0.014 | -2.418 | 0.016 | -0.060 | -0.006 |
| weekday_tuesday | -0.0280 | 0.014 | -2.047 | 0.041 | -0.055 | -0.001 |
| weathersit_light_weather | -0.3117 | 0.029 | -10.634 | 0.000 | -0.369 | -0.254 |
| weathersit_misty | -0.0934 | 0.010 | -9.024 | 0.000 | -0.114 | -0.073 |
| Omnibus: | 55.168 | Durbin-Watson: | 1.933 |
|---|---|---|---|
| Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 90.368 |
| Skew: | -0.702 | Prob(JB): | 2.38e-20 |
| Kurtosis: | 4.510 | Cond. No. | 9.34 |
calculate_vif(X_train_9)
| Feature | VIF | |
|---|---|---|
| 0 | windspeed | 3.21 |
| 2 | season_winter | 2.54 |
| 3 | yr_2019 | 1.86 |
| 1 | season_summer | 1.82 |
| 8 | mnth_november | 1.81 |
| 14 | weathersit_misty | 1.56 |
| 5 | mnth_december | 1.41 |
| 7 | mnth_january | 1.28 |
| 6 | mnth_february | 1.26 |
| 4 | mnth_august | 1.24 |
| 11 | weekday_monday | 1.21 |
| 12 | weekday_tuesday | 1.21 |
| 9 | mnth_september | 1.17 |
| 13 | weathersit_light_weather | 1.09 |
| 10 | holiday_yes | 1.07 |
lr_model_9 = train_model(y_train, X_train_9)
# Creating function to plot residual distribution
def get_res(x_df, lr_model):
X_train_sm = sm.add_constant(x_df)
y_train_predicted = lr_model.predict(X_train_sm)
residuals = y_train - y_train_predicted
return (y_train_predicted, residuals)
def plot_res_dist(x_df, lr_model):
ax = sns.distplot(get_res(x_df, lr_model)[1])
ax.figure.set_size_inches(10, 8)
ax.set_xlabel('Residuals')
return ax
def plot_res_scatter(x_df, lr_model):
ax = sns.scatterplot(get_res(x_df, lr_model)[0], get_res(x_df, lr_model)[1])
ax.figure.set_size_inches(10, 8)
ax.set_xlabel('Predicted')
ax.set_ylabel('Residuals')
sns.lineplot(y_train, [0]*len(y_train), color='red')
return ax
plot_res_dist(X_train_1, lr_model_1)
plt.show()
Assessment:
Residuals are normally distributed around the mean 0. So, the
"Normality of Residual" assumption holds.
plot_res_scatter(X_train_1, lr_model_1)
plt.show()
Assessment:
Homoscedasticity assumption holds.
plot_res_dist(X_train_2, lr_model_2)
plt.show()
Assessment:
Residuals are normally distributed around the mean 0. So, the
"Normality of Residual" assumption holds.
plot_res_scatter(X_train_2, lr_model_2)
plt.show()
Assessment:
Homoscedasticity assumption holds.
plot_res_dist(X_train_3, lr_model_3)
plt.show()
Assessment:
Residuals are normally distributed around the mean 0. So, the
"Normality of Residual" assumption holds.
plot_res_scatter(X_train_3, lr_model_3)
plt.show()
Assessment:
Homoscedasticity assumption holds.
plot_res_dist(X_train_4, lr_model_4)
plt.show()
Assessment:
Residuals are normally distributed around the mean 0. So, the
"Normality of Residual" assumption holds.
plot_res_scatter(X_train_4, lr_model_4)
plt.show()
Assessment:
Homoscedasticity assumption holds.
plot_res_dist(X_train_5, lr_model_5)
plt.show()
Assessment:
Residuals are normally distributed around the mean 0. So, the
"Normality of Residual" assumption holds.
plot_res_scatter(X_train_5, lr_model_5)
plt.show()
Assessment:
Homoscedasticity assumption holds.
plot_res_dist(X_train_6, lr_model_6)
plt.show()
Assessment:
Residuals are normally distributed around the mean 0. So, the
"Normality of Residual" assumption holds.
plot_res_scatter(X_train_6, lr_model_6)
plt.show()
Assessment:
Homoscedasticity assumption holds.
plot_res_dist(X_train_7, lr_model_7)
plt.show()
Assessment:
Residuals are normally distributed around the mean 0. So, the
"Normality of Residual" assumption holds.
plot_res_scatter(X_train_7, lr_model_7)
plt.show()
Assessment:
Homoscedasticity assumption holds.
plot_res_dist(X_train_8, lr_model_8)
plt.show()
Assessment:
Residuals are normally distributed around the mean 0. So, the
"Normality of Residual" assumption holds.
plot_res_scatter(X_train_8, lr_model_8)
plt.show()
Assessment:
Homoscedasticity assumption holds.
plot_res_dist(X_train_9, lr_model_9)
plt.show()
Assessment:
Residuals are normally distributed around the mean 0. So, the
"Normality of Residual" assumption holds.
plot_res_scatter(X_train_9, lr_model_9)
plt.show()
Assessment:
Homoscedasticity assumption holds.
df_test[num_vars] = scaler.transform(df_test[num_vars])
y_test = df_test.pop('cnt')
X_test = df_test
# Creating function to easily calculate R-squared on test set
def r2_on_test_set(lr_model, train_df):
X_test_df = X_test[train_df.columns]
X_test_sm = sm.add_constant(X_test_df)
y_test_predicted = lr_model.predict(X_test_sm)
r2 = r2_score(y_test, y_test_predicted)
return r2
def get_adjusted_r2(r2,p):
N = y_test.shape[0]
return 1 - (((1-r2)*(N-1))/(N-p-1))
model_names = ['Model 1', 'Model 2', 'Model 3', 'Model 4', 'Model 5', 'Model 6', 'Model 7', 'Model 8', 'Model 9']
lr_models = [lr_model_1, lr_model_2, lr_model_3, lr_model_4, lr_model_5, lr_model_6, lr_model_7, lr_model_8, lr_model_9]
train_sets = [X_train_1, X_train_2, X_train_3, X_train_4, X_train_5, X_train_6, X_train_7, X_train_8, X_train_9]
test_r2_scores = []
num_predictors = []
test_adjusted_r2_scores = []
train_r2_scores = []
train_adjusted_r2_scores = []
for lr_model, train_set in zip(lr_models, train_sets):
r2 = r2_on_test_set(lr_model, train_set)
p = len(lr_model.params)
adjusted_r2 = get_adjusted_r2(r2,p)
test_r2_scores.append(r2)
num_predictors.append(p)
test_adjusted_r2_scores.append(adjusted_r2)
train_r2_scores.append(lr_model.rsquared)
train_adjusted_r2_scores.append(lr_model.rsquared_adj)
result_df = pd.DataFrame({'Model': model_names, 'No. of Predictors': num_predictors, 'R-squared (Train)': train_r2_scores, 'R-squared (Test)': test_r2_scores, 'Adj. R-squared (Train)': train_adjusted_r2_scores, 'Adj. R-squared (Test)': test_adjusted_r2_scores})
result_df
| Model | No. of Predictors | R-squared (Train) | R-squared (Test) | Adj. R-squared (Train) | Adj. R-squared (Test) | |
|---|---|---|---|---|---|---|
| 0 | Model 1 | 12 | 0.835710 | 0.791771 | 0.832081 | 0.779700 |
| 1 | Model 2 | 11 | 0.703381 | 0.558414 | 0.697437 | 0.535061 |
| 2 | Model 3 | 15 | 0.839147 | 0.813676 | 0.834598 | 0.799976 |
| 3 | Model 4 | 12 | 0.790832 | 0.779084 | 0.786211 | 0.766277 |
| 4 | Model 5 | 11 | 0.772009 | 0.791792 | 0.767440 | 0.780781 |
| 5 | Model 6 | 10 | 0.805678 | 0.773139 | 0.802180 | 0.762284 |
| 6 | Model 7 | 5 | 0.738932 | 0.693713 | 0.736865 | 0.686557 |
| 7 | Model 8 | 17 | 0.842630 | 0.810699 | 0.837522 | 0.794768 |
| 8 | Model 9 | 16 | 0.772802 | 0.730680 | 0.765904 | 0.709452 |
Assessment:
final_model = lr_model_3
final_model.summary()
| Dep. Variable: | cnt | R-squared: | 0.839 |
|---|---|---|---|
| Model: | OLS | Adj. R-squared: | 0.835 |
| Method: | Least Squares | F-statistic: | 184.5 |
| Date: | Tue, 30 Aug 2022 | Prob (F-statistic): | 4.75e-186 |
| Time: | 01:25:41 | Log-Likelihood: | 504.85 |
| No. Observations: | 510 | AIC: | -979.7 |
| Df Residuals: | 495 | BIC: | -916.2 |
| Df Model: | 14 | ||
| Covariance Type: | nonrobust |
| coef | std err | t | P>|t| | [0.025 | 0.975] | |
|---|---|---|---|---|---|---|
| const | 0.2450 | 0.032 | 7.618 | 0.000 | 0.182 | 0.308 |
| temp | 0.4387 | 0.036 | 12.093 | 0.000 | 0.367 | 0.510 |
| windspeed | -0.1585 | 0.025 | -6.276 | 0.000 | -0.208 | -0.109 |
| season_spring | -0.0713 | 0.021 | -3.314 | 0.001 | -0.113 | -0.029 |
| season_summer | 0.0349 | 0.015 | 2.251 | 0.025 | 0.004 | 0.065 |
| season_winter | 0.0869 | 0.018 | 4.831 | 0.000 | 0.052 | 0.122 |
| yr_2019 | 0.2345 | 0.008 | 28.687 | 0.000 | 0.218 | 0.251 |
| mnth_december | -0.0428 | 0.018 | -2.413 | 0.016 | -0.078 | -0.008 |
| mnth_january | -0.0500 | 0.018 | -2.719 | 0.007 | -0.086 | -0.014 |
| mnth_july | -0.0500 | 0.019 | -2.703 | 0.007 | -0.086 | -0.014 |
| mnth_november | -0.0395 | 0.019 | -2.064 | 0.040 | -0.077 | -0.002 |
| mnth_september | 0.0687 | 0.017 | 4.015 | 0.000 | 0.035 | 0.102 |
| holiday_yes | -0.0918 | 0.026 | -3.522 | 0.000 | -0.143 | -0.041 |
| weathersit_light_weather | -0.2917 | 0.025 | -11.840 | 0.000 | -0.340 | -0.243 |
| weathersit_misty | -0.0801 | 0.009 | -9.198 | 0.000 | -0.097 | -0.063 |
| Omnibus: | 69.242 | Durbin-Watson: | 2.024 |
|---|---|---|---|
| Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 171.476 |
| Skew: | -0.698 | Prob(JB): | 5.81e-38 |
| Kurtosis: | 5.473 | Cond. No. | 18.9 |