James McCool
commited on
Commit
·
5a6e98e
1
Parent(s):
d7354fe
removing prints from regressors
Browse files- src/sports/NBA_own_regress.py +1 -127
- src/sports/NHL_own_regress.py +1 -122
src/sports/NBA_own_regress.py
CHANGED
|
@@ -100,130 +100,4 @@ knn_model = KNeighborsRegressor(
|
|
| 100 |
|
| 101 |
knn_model.fit(X_train, y_train)
|
| 102 |
|
| 103 |
-
__all__ = ['xgb_model', 'lgb_model', 'knn_model']
|
| 104 |
-
|
| 105 |
-
if __name__ == '__main__':
|
| 106 |
-
X_full = df_clean[feature_cols]
|
| 107 |
-
y_full = df_clean['Exposure Overall']
|
| 108 |
-
|
| 109 |
-
# Get predictions from all your models on the full dataset
|
| 110 |
-
y_pred_xgb_full = np.clip(xgb_model.predict(X_full), 0, 100)
|
| 111 |
-
y_pred_lgb_full = np.clip(lgb_model.predict(X_full), 0, 100) * 100
|
| 112 |
-
y_pred_knn_full = np.clip(knn_model.predict(X_full), 0, 100)
|
| 113 |
-
|
| 114 |
-
# Create combo prediction
|
| 115 |
-
y_pred_combo_full = (y_pred_xgb_full + y_pred_lgb_full + y_pred_knn_full) / 3
|
| 116 |
-
|
| 117 |
-
# Create full comparison DataFrame
|
| 118 |
-
comparison_full = pd.DataFrame({
|
| 119 |
-
'Actual_Exposure': y_full.values,
|
| 120 |
-
'XGB': y_pred_xgb_full,
|
| 121 |
-
'LGB': y_pred_lgb_full,
|
| 122 |
-
'KNN': y_pred_knn_full,
|
| 123 |
-
'Combo': y_pred_combo_full,
|
| 124 |
-
'Abs_Error': np.abs(y_full.values - y_pred_combo_full)
|
| 125 |
-
})
|
| 126 |
-
|
| 127 |
-
# Add back the full features for context
|
| 128 |
-
comparison_full = pd.concat([
|
| 129 |
-
X_full.reset_index(drop=True),
|
| 130 |
-
comparison_full.reset_index(drop=True)
|
| 131 |
-
], axis=1)
|
| 132 |
-
|
| 133 |
-
# You can also add the original columns from df_clean for more context
|
| 134 |
-
comparison_full['Player'] = df_clean['Player'].values
|
| 135 |
-
comparison_full['Contest_Date'] = df_clean['Contest Date'].values
|
| 136 |
-
comparison_full['Pos'] = df_clean['Pos'].values
|
| 137 |
-
|
| 138 |
-
# Overall performance metrics on full dataset
|
| 139 |
-
print("\n=== Full Dataset Performance ===")
|
| 140 |
-
for model_name, predictions in [('XGB', y_pred_xgb_full), ('LGB', y_pred_lgb_full), ('KNN', y_pred_knn_full), ('Combo', y_pred_combo_full)]:
|
| 141 |
-
rmse = np.sqrt(mean_squared_error(y_full, predictions))
|
| 142 |
-
mae = mean_absolute_error(y_full, predictions)
|
| 143 |
-
r2 = r2_score(y_full, predictions)
|
| 144 |
-
print(f"{model_name:8} - RMSE: {rmse:6.2f}, MAE: {mae:6.2f}, R²: {r2:6.3f}")
|
| 145 |
-
|
| 146 |
-
# Analysis on full dataset
|
| 147 |
-
print("\n=== Highest Ownership (Full Data) ===")
|
| 148 |
-
print(comparison_full.sort_values('Actual_Exposure', ascending=False).head(10)[
|
| 149 |
-
['Player', 'Pos', 'Salary', 'Actual', 'value', 'Actual_Exposure', 'XGB', 'LGB', 'KNN', 'Combo', 'Abs_Error']
|
| 150 |
-
])
|
| 151 |
-
|
| 152 |
-
print("\n=== Highest Predicted Ownership (Full Data) ===")
|
| 153 |
-
print(comparison_full.sort_values('Combo', ascending=False).head(10)[
|
| 154 |
-
['Player', 'Pos', 'Salary', 'Actual', 'value', 'Actual_Exposure', 'XGB', 'LGB', 'KNN', 'Combo', 'Abs_Error']
|
| 155 |
-
])
|
| 156 |
-
|
| 157 |
-
print("\n=== Worst Predictions (Full Data) ===")
|
| 158 |
-
print(comparison_full.nlargest(10, 'Abs_Error')[
|
| 159 |
-
['Player', 'Pos', 'Salary', 'Actual', 'value', 'Actual_Exposure', 'XGB', 'LGB', 'KNN', 'Combo', 'Abs_Error']
|
| 160 |
-
])
|
| 161 |
-
|
| 162 |
-
print("\n=== Best Predictions (Full Data) ===")
|
| 163 |
-
print(comparison_full.nsmallest(10, 'Abs_Error')[
|
| 164 |
-
['Player', 'Pos', 'Salary', 'Actual', 'value', 'Actual_Exposure', 'XGB', 'LGB', 'KNN', 'Combo', 'Abs_Error']
|
| 165 |
-
])
|
| 166 |
-
|
| 167 |
-
# Prepare the current data with the same feature engineering
|
| 168 |
-
current_projections['Actual'] = current_projections['Median'] # Rename to match training
|
| 169 |
-
current_projections['value'] = current_projections['Actual'] / (current_projections['Salary'] / 1000)
|
| 170 |
-
current_projections['value_adv'] = current_projections['value'] - current_projections['value'].mean()
|
| 171 |
-
current_projections['actual_adv'] = current_projections['Actual'] - current_projections['Actual'].mean()
|
| 172 |
-
|
| 173 |
-
# Create the same engineered features
|
| 174 |
-
# Assuming all rows are from the same contest (current slate)
|
| 175 |
-
current_projections['contest_size'] = len(current_projections) # All players in current slate
|
| 176 |
-
|
| 177 |
-
# Create value_play feature (same logic as training)
|
| 178 |
-
current_projections['value_play'] = np.where(
|
| 179 |
-
(current_projections['Salary'] <= 4000) &
|
| 180 |
-
(current_projections['Actual'] / (current_projections['Salary'] / 1000) >= 6.0),
|
| 181 |
-
1, 0
|
| 182 |
-
)
|
| 183 |
-
|
| 184 |
-
current_projections['value_density'] = current_projections['value_play'].sum() / current_projections['Player'].count()
|
| 185 |
-
|
| 186 |
-
current_projections['base_ownership'] = 800.0 / current_projections['contest_size']
|
| 187 |
-
|
| 188 |
-
current_projections['strong_play'] = np.where((current_projections['Actual'] / (current_projections['Salary'] / 1000) >= 6.0), 1, 0)
|
| 189 |
-
current_projections['punt_play'] = np.where((current_projections['Salary'] < 3500) & (current_projections['Actual'] / (current_projections['Salary'] / 1000) >= 5.0), 1, 0)
|
| 190 |
-
|
| 191 |
-
current_projections['ownership_share'] = current_projections['Own'].sum() / current_projections['Player'].count() * 800
|
| 192 |
-
# Prepare features in the same order as training
|
| 193 |
-
X_current = current_projections[feature_cols]
|
| 194 |
-
|
| 195 |
-
# Make predictions with all your models
|
| 196 |
-
current_projections['XGB'] = np.clip(xgb_model.predict(X_current), 0, 100)
|
| 197 |
-
current_projections['LGB'] = np.clip(lgb_model.predict(X_current), 0, 100) * 100
|
| 198 |
-
current_projections['KNN'] = np.clip(knn_model.predict(X_current), 0, 100)
|
| 199 |
-
|
| 200 |
-
# Create combo prediction
|
| 201 |
-
current_projections['Combo'] = (
|
| 202 |
-
(current_projections['XGB'] * .30) +
|
| 203 |
-
(current_projections['LGB'] * .30) +
|
| 204 |
-
(current_projections['KNN'] * .40)
|
| 205 |
-
)
|
| 206 |
-
|
| 207 |
-
current_projections['Combo'] = np.where((current_projections['value'] < 5.0) & (current_projections['Salary'] < 9000), current_projections['Combo'] * .75, current_projections['Combo'])
|
| 208 |
-
current_projections['Combo'] = np.where((current_projections['Median'] < 18.0), current_projections['Combo'] * .33, current_projections['Combo'])
|
| 209 |
-
current_projections['Combo'] = np.where((current_projections['Salary'] > 5000) & (current_projections['value'] < 4.5), 1, current_projections['Combo'])
|
| 210 |
-
current_projections['Combo'] = np.where(current_projections['value'] > 6.0, current_projections['Combo'] * ((current_projections['value'] / 6.0)), current_projections['Combo'])
|
| 211 |
-
current_projections['Combo'] = np.where((current_projections['Salary'] > 9000), current_projections['Combo'] * .75, current_projections['Combo'])
|
| 212 |
-
current_projections['Combo'] = np.where((current_projections['Salary'] > 9000) & (current_projections['Combo'] < current_projections['value']), current_projections['value'], current_projections['Combo'])
|
| 213 |
-
current_projections['Combo'] = np.where((current_projections['Median'] > 20.0) & (current_projections['Salary'] < 3500), current_projections['Combo'] * (10 - current_projections['strong_play'].sum()).clip(0, 3), current_projections['Combo'])
|
| 214 |
-
current_projections['Combo'] = np.where(current_projections['Position'].str.contains('/'), current_projections['Combo'] * 1.25, current_projections['Combo'] * .75)
|
| 215 |
-
|
| 216 |
-
power_scale = 1.10
|
| 217 |
-
combo_powered = current_projections['Combo'] ** power_scale
|
| 218 |
-
|
| 219 |
-
norm_var = 800.0 / combo_powered.sum()
|
| 220 |
-
current_projections['Combo_powered'] = combo_powered * norm_var
|
| 221 |
-
|
| 222 |
-
# Display predictions sorted by predicted ownership
|
| 223 |
-
print("\n=== Current Slate - Predicted Ownership ===")
|
| 224 |
-
print(f'the strong_play count is {current_projections['strong_play'].sum()}')
|
| 225 |
-
display_cols = ['Player', 'Position', 'Salary', 'Median', 'value', 'Own', 'XGB', 'LGB', 'KNN', 'Combo', 'Combo_powered']
|
| 226 |
-
print(f'sum of Own is {current_projections['Own'].sum()} while sum of combo is {current_projections['Combo'].sum()} while combo_powered is {current_projections['Combo_powered'].sum()}')
|
| 227 |
-
print(f'sum of position C is {current_projections[current_projections['Position'] == 'C']['Combo_powered'].sum()}')
|
| 228 |
-
print(current_projections.sort_values('Combo_powered', ascending=False)[display_cols].head(20))
|
| 229 |
-
print(current_projections[current_projections['Position'] == 'C'].sort_values('Combo_powered', ascending=False)[display_cols].head(20))
|
|
|
|
| 100 |
|
| 101 |
knn_model.fit(X_train, y_train)
|
| 102 |
|
| 103 |
+
__all__ = ['xgb_model', 'lgb_model', 'knn_model']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
src/sports/NHL_own_regress.py
CHANGED
|
@@ -96,125 +96,4 @@ knn_model = KNeighborsRegressor(
|
|
| 96 |
|
| 97 |
knn_model.fit(X_train, y_train)
|
| 98 |
|
| 99 |
-
__all__ = ['xgb_model', 'lgb_model', 'knn_model']
|
| 100 |
-
|
| 101 |
-
if __name__ == '__main__':
|
| 102 |
-
X_full = df_clean[feature_cols]
|
| 103 |
-
y_full = df_clean['Exposure Overall']
|
| 104 |
-
|
| 105 |
-
# Get predictions from all your models on the full dataset
|
| 106 |
-
y_pred_xgb_full = np.clip(xgb_model.predict(X_full), 0, 100)
|
| 107 |
-
y_pred_lgb_full = np.clip(lgb_model.predict(X_full), 0, 100) * 100
|
| 108 |
-
y_pred_knn_full = np.clip(knn_model.predict(X_full), 0, 100)
|
| 109 |
-
|
| 110 |
-
# Create combo prediction
|
| 111 |
-
y_pred_combo_full = (y_pred_xgb_full + y_pred_lgb_full + y_pred_knn_full) / 3
|
| 112 |
-
|
| 113 |
-
# Create full comparison DataFrame
|
| 114 |
-
comparison_full = pd.DataFrame({
|
| 115 |
-
'Actual_Exposure': y_full.values,
|
| 116 |
-
'XGB': y_pred_xgb_full,
|
| 117 |
-
'LGB': y_pred_lgb_full,
|
| 118 |
-
'KNN': y_pred_knn_full,
|
| 119 |
-
'Combo': y_pred_combo_full,
|
| 120 |
-
'Abs_Error': np.abs(y_full.values - y_pred_combo_full)
|
| 121 |
-
})
|
| 122 |
-
|
| 123 |
-
# Add back the full features for context
|
| 124 |
-
comparison_full = pd.concat([
|
| 125 |
-
X_full.reset_index(drop=True),
|
| 126 |
-
comparison_full.reset_index(drop=True)
|
| 127 |
-
], axis=1)
|
| 128 |
-
|
| 129 |
-
# You can also add the original columns from df_clean for more context
|
| 130 |
-
comparison_full['Player'] = df_clean['Player'].values
|
| 131 |
-
comparison_full['Contest_Date'] = df_clean['Contest Date'].values
|
| 132 |
-
comparison_full['Pos'] = df_clean['Pos'].values
|
| 133 |
-
|
| 134 |
-
# Overall performance metrics on full dataset
|
| 135 |
-
print("\n=== Full Dataset Performance ===")
|
| 136 |
-
for model_name, predictions in [('XGB', y_pred_xgb_full), ('LGB', y_pred_lgb_full), ('KNN', y_pred_knn_full), ('Combo', y_pred_combo_full)]:
|
| 137 |
-
rmse = np.sqrt(mean_squared_error(y_full, predictions))
|
| 138 |
-
mae = mean_absolute_error(y_full, predictions)
|
| 139 |
-
r2 = r2_score(y_full, predictions)
|
| 140 |
-
print(f"{model_name:8} - RMSE: {rmse:6.2f}, MAE: {mae:6.2f}, R²: {r2:6.3f}")
|
| 141 |
-
|
| 142 |
-
# Analysis on full dataset
|
| 143 |
-
print("\n=== Highest Ownership (Full Data) ===")
|
| 144 |
-
print(comparison_full.sort_values('Actual_Exposure', ascending=False).head(10)[
|
| 145 |
-
['Player', 'Pos', 'Salary', 'Actual', 'value', 'Actual_Exposure', 'XGB', 'LGB', 'KNN', 'Combo', 'Abs_Error']
|
| 146 |
-
])
|
| 147 |
-
|
| 148 |
-
print("\n=== Highest Predicted Ownership (Full Data) ===")
|
| 149 |
-
print(comparison_full.sort_values('Combo', ascending=False).head(10)[
|
| 150 |
-
['Player', 'Pos', 'Salary', 'Actual', 'value', 'Actual_Exposure', 'XGB', 'LGB', 'KNN', 'Combo', 'Abs_Error']
|
| 151 |
-
])
|
| 152 |
-
|
| 153 |
-
print("\n=== Worst Predictions (Full Data) ===")
|
| 154 |
-
print(comparison_full.nlargest(10, 'Abs_Error')[
|
| 155 |
-
['Player', 'Pos', 'Salary', 'Actual', 'value', 'Actual_Exposure', 'XGB', 'LGB', 'KNN', 'Combo', 'Abs_Error']
|
| 156 |
-
])
|
| 157 |
-
|
| 158 |
-
print("\n=== Best Predictions (Full Data) ===")
|
| 159 |
-
print(comparison_full.nsmallest(10, 'Abs_Error')[
|
| 160 |
-
['Player', 'Pos', 'Salary', 'Actual', 'value', 'Actual_Exposure', 'XGB', 'LGB', 'KNN', 'Combo', 'Abs_Error']
|
| 161 |
-
])
|
| 162 |
-
|
| 163 |
-
# Prepare the current data with the same feature engineering
|
| 164 |
-
current_projections['Actual'] = current_projections['Median'] # Rename to match training
|
| 165 |
-
current_projections['value'] = current_projections['Actual'] / (current_projections['Salary'] / 1000)
|
| 166 |
-
current_projections['value_adv'] = current_projections['value'] - current_projections['value'].mean()
|
| 167 |
-
current_projections['actual_adv'] = current_projections['Actual'] - current_projections['Actual'].mean()
|
| 168 |
-
|
| 169 |
-
# Create the same engineered features
|
| 170 |
-
# Assuming all rows are from the same contest (current slate)
|
| 171 |
-
current_projections['contest_size'] = len(current_projections) # All players in current slate
|
| 172 |
-
|
| 173 |
-
# Create value_play feature (same logic as training)
|
| 174 |
-
current_projections['value_play'] = np.where(
|
| 175 |
-
(current_projections['Salary'] <= 4500) &
|
| 176 |
-
(current_projections['Actual'] / (current_projections['Salary'] / 1000) >= 2.0),
|
| 177 |
-
1, 0
|
| 178 |
-
)
|
| 179 |
-
|
| 180 |
-
current_projections['value_density'] = current_projections['value_play'].sum() / current_projections['Player'].count()
|
| 181 |
-
|
| 182 |
-
current_projections['base_ownership'] = 900.0 / current_projections['contest_size']
|
| 183 |
-
|
| 184 |
-
current_projections['strong_play'] = np.where((current_projections['Actual'] / (current_projections['Salary'] / 1000) >= 2.0), 1, 0)
|
| 185 |
-
current_projections['punt_play'] = np.where((current_projections['Salary'] < 3500) & (current_projections['Actual'] / (current_projections['Salary'] / 1000) >= 2.0), 1, 0)
|
| 186 |
-
|
| 187 |
-
current_projections['ownership_share'] = current_projections['Own'].sum() / current_projections['Player'].count() * 900
|
| 188 |
-
# Prepare features in the same order as training
|
| 189 |
-
X_current = current_projections[feature_cols]
|
| 190 |
-
|
| 191 |
-
# Make predictions with all your models
|
| 192 |
-
current_projections['XGB'] = np.clip(xgb_model.predict(X_current), 0, 100)
|
| 193 |
-
current_projections['LGB'] = np.clip(lgb_model.predict(X_current), 0, 100) * 100
|
| 194 |
-
current_projections['KNN'] = np.clip(knn_model.predict(X_current), 0, 100)
|
| 195 |
-
|
| 196 |
-
# Create combo prediction
|
| 197 |
-
current_projections['Combo'] = (
|
| 198 |
-
(current_projections['XGB'] * .30) +
|
| 199 |
-
(current_projections['LGB'] * .30) +
|
| 200 |
-
(current_projections['KNN'] * .40)
|
| 201 |
-
)
|
| 202 |
-
|
| 203 |
-
current_projections['Combo'] = np.where((current_projections['value'] < 1.5) & (current_projections['Salary'] < 7500), current_projections['Combo'] * .75, current_projections['Combo'])
|
| 204 |
-
current_projections['Combo'] = np.where((current_projections['Salary'] > 5000) & (current_projections['value'] < 1.5), 1, current_projections['Combo'])
|
| 205 |
-
current_projections['Combo'] = np.where(current_projections['value'] > 2.0, current_projections['Combo'] * (2 + (current_projections['value'] - 2.0)), current_projections['Combo'])
|
| 206 |
-
current_projections['Combo'] = np.where((current_projections['Salary'] > 8500), current_projections['Combo'] * 1.5, current_projections['Combo'])
|
| 207 |
-
|
| 208 |
-
power_scale = 1.50
|
| 209 |
-
combo_powered = current_projections['Combo'] ** power_scale
|
| 210 |
-
|
| 211 |
-
norm_var = 900.0 / combo_powered.sum()
|
| 212 |
-
current_projections['Combo_powered'] = combo_powered * norm_var
|
| 213 |
-
|
| 214 |
-
# Display predictions sorted by predicted ownership
|
| 215 |
-
print("\n=== Current Slate - Predicted Ownership ===")
|
| 216 |
-
display_cols = ['Player', 'Position', 'Salary', 'Median', 'value', 'Own', 'XGB', 'LGB', 'KNN', 'Combo', 'Combo_powered']
|
| 217 |
-
print(f'sum of Own is {current_projections['Own'].sum()} while sum of combo is {current_projections['Combo'].sum()} while combo_powered is {current_projections['Combo_powered'].sum()}')
|
| 218 |
-
print(f'sum of position C is {current_projections[current_projections['Position'] == 'C']['Combo_powered'].sum()}')
|
| 219 |
-
print(current_projections.sort_values('Combo_powered', ascending=False)[display_cols].head(20))
|
| 220 |
-
print(current_projections[current_projections['Position'] == 'C'].sort_values('Combo_powered', ascending=False)[display_cols].head(20))
|
|
|
|
| 96 |
|
| 97 |
knn_model.fit(X_train, y_train)
|
| 98 |
|
| 99 |
+
__all__ = ['xgb_model', 'lgb_model', 'knn_model']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|