James McCool commited on
Commit
5a6e98e
·
1 Parent(s): d7354fe

removing prints from regressors

Browse files
src/sports/NBA_own_regress.py CHANGED
@@ -100,130 +100,4 @@ knn_model = KNeighborsRegressor(
100
 
101
  knn_model.fit(X_train, y_train)
102
 
103
- __all__ = ['xgb_model', 'lgb_model', 'knn_model']
104
-
105
- if __name__ == '__main__':
106
- X_full = df_clean[feature_cols]
107
- y_full = df_clean['Exposure Overall']
108
-
109
- # Get predictions from all your models on the full dataset
110
- y_pred_xgb_full = np.clip(xgb_model.predict(X_full), 0, 100)
111
- y_pred_lgb_full = np.clip(lgb_model.predict(X_full), 0, 100) * 100
112
- y_pred_knn_full = np.clip(knn_model.predict(X_full), 0, 100)
113
-
114
- # Create combo prediction
115
- y_pred_combo_full = (y_pred_xgb_full + y_pred_lgb_full + y_pred_knn_full) / 3
116
-
117
- # Create full comparison DataFrame
118
- comparison_full = pd.DataFrame({
119
- 'Actual_Exposure': y_full.values,
120
- 'XGB': y_pred_xgb_full,
121
- 'LGB': y_pred_lgb_full,
122
- 'KNN': y_pred_knn_full,
123
- 'Combo': y_pred_combo_full,
124
- 'Abs_Error': np.abs(y_full.values - y_pred_combo_full)
125
- })
126
-
127
- # Add back the full features for context
128
- comparison_full = pd.concat([
129
- X_full.reset_index(drop=True),
130
- comparison_full.reset_index(drop=True)
131
- ], axis=1)
132
-
133
- # You can also add the original columns from df_clean for more context
134
- comparison_full['Player'] = df_clean['Player'].values
135
- comparison_full['Contest_Date'] = df_clean['Contest Date'].values
136
- comparison_full['Pos'] = df_clean['Pos'].values
137
-
138
- # Overall performance metrics on full dataset
139
- print("\n=== Full Dataset Performance ===")
140
- for model_name, predictions in [('XGB', y_pred_xgb_full), ('LGB', y_pred_lgb_full), ('KNN', y_pred_knn_full), ('Combo', y_pred_combo_full)]:
141
- rmse = np.sqrt(mean_squared_error(y_full, predictions))
142
- mae = mean_absolute_error(y_full, predictions)
143
- r2 = r2_score(y_full, predictions)
144
- print(f"{model_name:8} - RMSE: {rmse:6.2f}, MAE: {mae:6.2f}, R²: {r2:6.3f}")
145
-
146
- # Analysis on full dataset
147
- print("\n=== Highest Ownership (Full Data) ===")
148
- print(comparison_full.sort_values('Actual_Exposure', ascending=False).head(10)[
149
- ['Player', 'Pos', 'Salary', 'Actual', 'value', 'Actual_Exposure', 'XGB', 'LGB', 'KNN', 'Combo', 'Abs_Error']
150
- ])
151
-
152
- print("\n=== Highest Predicted Ownership (Full Data) ===")
153
- print(comparison_full.sort_values('Combo', ascending=False).head(10)[
154
- ['Player', 'Pos', 'Salary', 'Actual', 'value', 'Actual_Exposure', 'XGB', 'LGB', 'KNN', 'Combo', 'Abs_Error']
155
- ])
156
-
157
- print("\n=== Worst Predictions (Full Data) ===")
158
- print(comparison_full.nlargest(10, 'Abs_Error')[
159
- ['Player', 'Pos', 'Salary', 'Actual', 'value', 'Actual_Exposure', 'XGB', 'LGB', 'KNN', 'Combo', 'Abs_Error']
160
- ])
161
-
162
- print("\n=== Best Predictions (Full Data) ===")
163
- print(comparison_full.nsmallest(10, 'Abs_Error')[
164
- ['Player', 'Pos', 'Salary', 'Actual', 'value', 'Actual_Exposure', 'XGB', 'LGB', 'KNN', 'Combo', 'Abs_Error']
165
- ])
166
-
167
- # Prepare the current data with the same feature engineering
168
- current_projections['Actual'] = current_projections['Median'] # Rename to match training
169
- current_projections['value'] = current_projections['Actual'] / (current_projections['Salary'] / 1000)
170
- current_projections['value_adv'] = current_projections['value'] - current_projections['value'].mean()
171
- current_projections['actual_adv'] = current_projections['Actual'] - current_projections['Actual'].mean()
172
-
173
- # Create the same engineered features
174
- # Assuming all rows are from the same contest (current slate)
175
- current_projections['contest_size'] = len(current_projections) # All players in current slate
176
-
177
- # Create value_play feature (same logic as training)
178
- current_projections['value_play'] = np.where(
179
- (current_projections['Salary'] <= 4000) &
180
- (current_projections['Actual'] / (current_projections['Salary'] / 1000) >= 6.0),
181
- 1, 0
182
- )
183
-
184
- current_projections['value_density'] = current_projections['value_play'].sum() / current_projections['Player'].count()
185
-
186
- current_projections['base_ownership'] = 800.0 / current_projections['contest_size']
187
-
188
- current_projections['strong_play'] = np.where((current_projections['Actual'] / (current_projections['Salary'] / 1000) >= 6.0), 1, 0)
189
- current_projections['punt_play'] = np.where((current_projections['Salary'] < 3500) & (current_projections['Actual'] / (current_projections['Salary'] / 1000) >= 5.0), 1, 0)
190
-
191
- current_projections['ownership_share'] = current_projections['Own'].sum() / current_projections['Player'].count() * 800
192
- # Prepare features in the same order as training
193
- X_current = current_projections[feature_cols]
194
-
195
- # Make predictions with all your models
196
- current_projections['XGB'] = np.clip(xgb_model.predict(X_current), 0, 100)
197
- current_projections['LGB'] = np.clip(lgb_model.predict(X_current), 0, 100) * 100
198
- current_projections['KNN'] = np.clip(knn_model.predict(X_current), 0, 100)
199
-
200
- # Create combo prediction
201
- current_projections['Combo'] = (
202
- (current_projections['XGB'] * .30) +
203
- (current_projections['LGB'] * .30) +
204
- (current_projections['KNN'] * .40)
205
- )
206
-
207
- current_projections['Combo'] = np.where((current_projections['value'] < 5.0) & (current_projections['Salary'] < 9000), current_projections['Combo'] * .75, current_projections['Combo'])
208
- current_projections['Combo'] = np.where((current_projections['Median'] < 18.0), current_projections['Combo'] * .33, current_projections['Combo'])
209
- current_projections['Combo'] = np.where((current_projections['Salary'] > 5000) & (current_projections['value'] < 4.5), 1, current_projections['Combo'])
210
- current_projections['Combo'] = np.where(current_projections['value'] > 6.0, current_projections['Combo'] * ((current_projections['value'] / 6.0)), current_projections['Combo'])
211
- current_projections['Combo'] = np.where((current_projections['Salary'] > 9000), current_projections['Combo'] * .75, current_projections['Combo'])
212
- current_projections['Combo'] = np.where((current_projections['Salary'] > 9000) & (current_projections['Combo'] < current_projections['value']), current_projections['value'], current_projections['Combo'])
213
- current_projections['Combo'] = np.where((current_projections['Median'] > 20.0) & (current_projections['Salary'] < 3500), current_projections['Combo'] * (10 - current_projections['strong_play'].sum()).clip(0, 3), current_projections['Combo'])
214
- current_projections['Combo'] = np.where(current_projections['Position'].str.contains('/'), current_projections['Combo'] * 1.25, current_projections['Combo'] * .75)
215
-
216
- power_scale = 1.10
217
- combo_powered = current_projections['Combo'] ** power_scale
218
-
219
- norm_var = 800.0 / combo_powered.sum()
220
- current_projections['Combo_powered'] = combo_powered * norm_var
221
-
222
- # Display predictions sorted by predicted ownership
223
- print("\n=== Current Slate - Predicted Ownership ===")
224
- print(f'the strong_play count is {current_projections['strong_play'].sum()}')
225
- display_cols = ['Player', 'Position', 'Salary', 'Median', 'value', 'Own', 'XGB', 'LGB', 'KNN', 'Combo', 'Combo_powered']
226
- print(f'sum of Own is {current_projections['Own'].sum()} while sum of combo is {current_projections['Combo'].sum()} while combo_powered is {current_projections['Combo_powered'].sum()}')
227
- print(f'sum of position C is {current_projections[current_projections['Position'] == 'C']['Combo_powered'].sum()}')
228
- print(current_projections.sort_values('Combo_powered', ascending=False)[display_cols].head(20))
229
- print(current_projections[current_projections['Position'] == 'C'].sort_values('Combo_powered', ascending=False)[display_cols].head(20))
 
100
 
101
  knn_model.fit(X_train, y_train)
102
 
103
+ __all__ = ['xgb_model', 'lgb_model', 'knn_model']
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
src/sports/NHL_own_regress.py CHANGED
@@ -96,125 +96,4 @@ knn_model = KNeighborsRegressor(
96
 
97
  knn_model.fit(X_train, y_train)
98
 
99
- __all__ = ['xgb_model', 'lgb_model', 'knn_model']
100
-
101
- if __name__ == '__main__':
102
- X_full = df_clean[feature_cols]
103
- y_full = df_clean['Exposure Overall']
104
-
105
- # Get predictions from all your models on the full dataset
106
- y_pred_xgb_full = np.clip(xgb_model.predict(X_full), 0, 100)
107
- y_pred_lgb_full = np.clip(lgb_model.predict(X_full), 0, 100) * 100
108
- y_pred_knn_full = np.clip(knn_model.predict(X_full), 0, 100)
109
-
110
- # Create combo prediction
111
- y_pred_combo_full = (y_pred_xgb_full + y_pred_lgb_full + y_pred_knn_full) / 3
112
-
113
- # Create full comparison DataFrame
114
- comparison_full = pd.DataFrame({
115
- 'Actual_Exposure': y_full.values,
116
- 'XGB': y_pred_xgb_full,
117
- 'LGB': y_pred_lgb_full,
118
- 'KNN': y_pred_knn_full,
119
- 'Combo': y_pred_combo_full,
120
- 'Abs_Error': np.abs(y_full.values - y_pred_combo_full)
121
- })
122
-
123
- # Add back the full features for context
124
- comparison_full = pd.concat([
125
- X_full.reset_index(drop=True),
126
- comparison_full.reset_index(drop=True)
127
- ], axis=1)
128
-
129
- # You can also add the original columns from df_clean for more context
130
- comparison_full['Player'] = df_clean['Player'].values
131
- comparison_full['Contest_Date'] = df_clean['Contest Date'].values
132
- comparison_full['Pos'] = df_clean['Pos'].values
133
-
134
- # Overall performance metrics on full dataset
135
- print("\n=== Full Dataset Performance ===")
136
- for model_name, predictions in [('XGB', y_pred_xgb_full), ('LGB', y_pred_lgb_full), ('KNN', y_pred_knn_full), ('Combo', y_pred_combo_full)]:
137
- rmse = np.sqrt(mean_squared_error(y_full, predictions))
138
- mae = mean_absolute_error(y_full, predictions)
139
- r2 = r2_score(y_full, predictions)
140
- print(f"{model_name:8} - RMSE: {rmse:6.2f}, MAE: {mae:6.2f}, R²: {r2:6.3f}")
141
-
142
- # Analysis on full dataset
143
- print("\n=== Highest Ownership (Full Data) ===")
144
- print(comparison_full.sort_values('Actual_Exposure', ascending=False).head(10)[
145
- ['Player', 'Pos', 'Salary', 'Actual', 'value', 'Actual_Exposure', 'XGB', 'LGB', 'KNN', 'Combo', 'Abs_Error']
146
- ])
147
-
148
- print("\n=== Highest Predicted Ownership (Full Data) ===")
149
- print(comparison_full.sort_values('Combo', ascending=False).head(10)[
150
- ['Player', 'Pos', 'Salary', 'Actual', 'value', 'Actual_Exposure', 'XGB', 'LGB', 'KNN', 'Combo', 'Abs_Error']
151
- ])
152
-
153
- print("\n=== Worst Predictions (Full Data) ===")
154
- print(comparison_full.nlargest(10, 'Abs_Error')[
155
- ['Player', 'Pos', 'Salary', 'Actual', 'value', 'Actual_Exposure', 'XGB', 'LGB', 'KNN', 'Combo', 'Abs_Error']
156
- ])
157
-
158
- print("\n=== Best Predictions (Full Data) ===")
159
- print(comparison_full.nsmallest(10, 'Abs_Error')[
160
- ['Player', 'Pos', 'Salary', 'Actual', 'value', 'Actual_Exposure', 'XGB', 'LGB', 'KNN', 'Combo', 'Abs_Error']
161
- ])
162
-
163
- # Prepare the current data with the same feature engineering
164
- current_projections['Actual'] = current_projections['Median'] # Rename to match training
165
- current_projections['value'] = current_projections['Actual'] / (current_projections['Salary'] / 1000)
166
- current_projections['value_adv'] = current_projections['value'] - current_projections['value'].mean()
167
- current_projections['actual_adv'] = current_projections['Actual'] - current_projections['Actual'].mean()
168
-
169
- # Create the same engineered features
170
- # Assuming all rows are from the same contest (current slate)
171
- current_projections['contest_size'] = len(current_projections) # All players in current slate
172
-
173
- # Create value_play feature (same logic as training)
174
- current_projections['value_play'] = np.where(
175
- (current_projections['Salary'] <= 4500) &
176
- (current_projections['Actual'] / (current_projections['Salary'] / 1000) >= 2.0),
177
- 1, 0
178
- )
179
-
180
- current_projections['value_density'] = current_projections['value_play'].sum() / current_projections['Player'].count()
181
-
182
- current_projections['base_ownership'] = 900.0 / current_projections['contest_size']
183
-
184
- current_projections['strong_play'] = np.where((current_projections['Actual'] / (current_projections['Salary'] / 1000) >= 2.0), 1, 0)
185
- current_projections['punt_play'] = np.where((current_projections['Salary'] < 3500) & (current_projections['Actual'] / (current_projections['Salary'] / 1000) >= 2.0), 1, 0)
186
-
187
- current_projections['ownership_share'] = current_projections['Own'].sum() / current_projections['Player'].count() * 900
188
- # Prepare features in the same order as training
189
- X_current = current_projections[feature_cols]
190
-
191
- # Make predictions with all your models
192
- current_projections['XGB'] = np.clip(xgb_model.predict(X_current), 0, 100)
193
- current_projections['LGB'] = np.clip(lgb_model.predict(X_current), 0, 100) * 100
194
- current_projections['KNN'] = np.clip(knn_model.predict(X_current), 0, 100)
195
-
196
- # Create combo prediction
197
- current_projections['Combo'] = (
198
- (current_projections['XGB'] * .30) +
199
- (current_projections['LGB'] * .30) +
200
- (current_projections['KNN'] * .40)
201
- )
202
-
203
- current_projections['Combo'] = np.where((current_projections['value'] < 1.5) & (current_projections['Salary'] < 7500), current_projections['Combo'] * .75, current_projections['Combo'])
204
- current_projections['Combo'] = np.where((current_projections['Salary'] > 5000) & (current_projections['value'] < 1.5), 1, current_projections['Combo'])
205
- current_projections['Combo'] = np.where(current_projections['value'] > 2.0, current_projections['Combo'] * (2 + (current_projections['value'] - 2.0)), current_projections['Combo'])
206
- current_projections['Combo'] = np.where((current_projections['Salary'] > 8500), current_projections['Combo'] * 1.5, current_projections['Combo'])
207
-
208
- power_scale = 1.50
209
- combo_powered = current_projections['Combo'] ** power_scale
210
-
211
- norm_var = 900.0 / combo_powered.sum()
212
- current_projections['Combo_powered'] = combo_powered * norm_var
213
-
214
- # Display predictions sorted by predicted ownership
215
- print("\n=== Current Slate - Predicted Ownership ===")
216
- display_cols = ['Player', 'Position', 'Salary', 'Median', 'value', 'Own', 'XGB', 'LGB', 'KNN', 'Combo', 'Combo_powered']
217
- print(f'sum of Own is {current_projections['Own'].sum()} while sum of combo is {current_projections['Combo'].sum()} while combo_powered is {current_projections['Combo_powered'].sum()}')
218
- print(f'sum of position C is {current_projections[current_projections['Position'] == 'C']['Combo_powered'].sum()}')
219
- print(current_projections.sort_values('Combo_powered', ascending=False)[display_cols].head(20))
220
- print(current_projections[current_projections['Position'] == 'C'].sort_values('Combo_powered', ascending=False)[display_cols].head(20))
 
96
 
97
  knn_model.fit(X_train, y_train)
98
 
99
+ __all__ = ['xgb_model', 'lgb_model', 'knn_model']