Spaces:

Multichem-PD
/

Paydirt_model_updates

Sleeping

App Files Files Community

James McCool commited on Nov 20, 2025

Commit

5a6e98e

1 Parent(s): d7354fe

removing prints from regressors

Browse files

Files changed (2) hide show

src/sports/NBA_own_regress.py +1 -127
src/sports/NHL_own_regress.py +1 -122

src/sports/NBA_own_regress.py CHANGED Viewed

@@ -100,130 +100,4 @@ knn_model = KNeighborsRegressor(
 knn_model.fit(X_train, y_train)
-__all__ = ['xgb_model', 'lgb_model', 'knn_model']
-if __name__ == '__main__':
-    X_full = df_clean[feature_cols]
-    y_full = df_clean['Exposure Overall']
-    # Get predictions from all your models on the full dataset
-    y_pred_xgb_full = np.clip(xgb_model.predict(X_full), 0, 100)
-    y_pred_lgb_full = np.clip(lgb_model.predict(X_full), 0, 100) * 100
-    y_pred_knn_full = np.clip(knn_model.predict(X_full), 0, 100)
-    # Create combo prediction
-    y_pred_combo_full = (y_pred_xgb_full + y_pred_lgb_full + y_pred_knn_full) / 3
-    # Create full comparison DataFrame
-    comparison_full = pd.DataFrame({
-        'Actual_Exposure': y_full.values,
-        'XGB': y_pred_xgb_full,
-        'LGB': y_pred_lgb_full,
-        'KNN': y_pred_knn_full,
-        'Combo': y_pred_combo_full,
-        'Abs_Error': np.abs(y_full.values - y_pred_combo_full)
-    })
-    # Add back the full features for context
-    comparison_full = pd.concat([
-        X_full.reset_index(drop=True),
-        comparison_full.reset_index(drop=True)
-    ], axis=1)
-    # You can also add the original columns from df_clean for more context
-    comparison_full['Player'] = df_clean['Player'].values
-    comparison_full['Contest_Date'] = df_clean['Contest Date'].values
-    comparison_full['Pos'] = df_clean['Pos'].values
-    # Overall performance metrics on full dataset
-    print("\n=== Full Dataset Performance ===")
-    for model_name, predictions in [('XGB', y_pred_xgb_full), ('LGB', y_pred_lgb_full), ('KNN', y_pred_knn_full), ('Combo', y_pred_combo_full)]:
-        rmse = np.sqrt(mean_squared_error(y_full, predictions))
-        mae = mean_absolute_error(y_full, predictions)
-        r2 = r2_score(y_full, predictions)
-        print(f"{model_name:8} - RMSE: {rmse:6.2f}, MAE: {mae:6.2f}, R²: {r2:6.3f}")
-    # Analysis on full dataset
-    print("\n=== Highest Ownership (Full Data) ===")
-    print(comparison_full.sort_values('Actual_Exposure', ascending=False).head(10)[
-        ['Player', 'Pos', 'Salary', 'Actual', 'value', 'Actual_Exposure', 'XGB', 'LGB', 'KNN', 'Combo', 'Abs_Error']
-    ])
-    print("\n=== Highest Predicted Ownership (Full Data) ===")
-    print(comparison_full.sort_values('Combo', ascending=False).head(10)[
-        ['Player', 'Pos', 'Salary', 'Actual', 'value', 'Actual_Exposure', 'XGB', 'LGB', 'KNN', 'Combo', 'Abs_Error']
-    ])
-    print("\n=== Worst Predictions (Full Data) ===")
-    print(comparison_full.nlargest(10, 'Abs_Error')[
-        ['Player', 'Pos', 'Salary', 'Actual', 'value', 'Actual_Exposure', 'XGB', 'LGB', 'KNN', 'Combo', 'Abs_Error']
-    ])
-    print("\n=== Best Predictions (Full Data) ===")
-    print(comparison_full.nsmallest(10, 'Abs_Error')[
-        ['Player', 'Pos', 'Salary', 'Actual', 'value', 'Actual_Exposure', 'XGB', 'LGB', 'KNN', 'Combo', 'Abs_Error']
-    ])
-    # Prepare the current data with the same feature engineering
-    current_projections['Actual'] = current_projections['Median']  # Rename to match training
-    current_projections['value'] = current_projections['Actual'] / (current_projections['Salary'] / 1000)
-    current_projections['value_adv'] = current_projections['value'] - current_projections['value'].mean()
-    current_projections['actual_adv'] = current_projections['Actual'] - current_projections['Actual'].mean()
-    # Create the same engineered features
-    # Assuming all rows are from the same contest (current slate)
-    current_projections['contest_size'] = len(current_projections)  # All players in current slate
-    # Create value_play feature (same logic as training)
-    current_projections['value_play'] = np.where(
-        (current_projections['Salary'] <= 4000) &
-        (current_projections['Actual'] / (current_projections['Salary'] / 1000) >= 6.0),
-        1, 0
-    )
-    current_projections['value_density'] = current_projections['value_play'].sum() / current_projections['Player'].count()
-    current_projections['base_ownership'] = 800.0 / current_projections['contest_size']
-    current_projections['strong_play'] = np.where((current_projections['Actual'] / (current_projections['Salary'] / 1000) >= 6.0), 1, 0)
-    current_projections['punt_play'] = np.where((current_projections['Salary'] < 3500) & (current_projections['Actual'] / (current_projections['Salary'] / 1000) >= 5.0), 1, 0)
-    current_projections['ownership_share'] = current_projections['Own'].sum() / current_projections['Player'].count() * 800
-    # Prepare features in the same order as training
-    X_current = current_projections[feature_cols]
-    # Make predictions with all your models
-    current_projections['XGB'] = np.clip(xgb_model.predict(X_current), 0, 100)
-    current_projections['LGB'] = np.clip(lgb_model.predict(X_current), 0, 100) * 100
-    current_projections['KNN'] = np.clip(knn_model.predict(X_current), 0, 100)
-    # Create combo prediction
-    current_projections['Combo'] = (
-        (current_projections['XGB'] * .30) +
-        (current_projections['LGB'] * .30) +
-        (current_projections['KNN'] * .40)
-    )
-    current_projections['Combo'] = np.where((current_projections['value'] < 5.0) & (current_projections['Salary'] < 9000), current_projections['Combo'] * .75, current_projections['Combo'])
-    current_projections['Combo'] = np.where((current_projections['Median'] < 18.0), current_projections['Combo'] * .33, current_projections['Combo'])
-    current_projections['Combo'] = np.where((current_projections['Salary'] > 5000) & (current_projections['value'] < 4.5), 1, current_projections['Combo'])
-    current_projections['Combo'] = np.where(current_projections['value'] > 6.0, current_projections['Combo'] * ((current_projections['value'] / 6.0)), current_projections['Combo'])
-    current_projections['Combo'] = np.where((current_projections['Salary'] > 9000), current_projections['Combo'] * .75, current_projections['Combo'])
-    current_projections['Combo'] = np.where((current_projections['Salary'] > 9000) & (current_projections['Combo'] < current_projections['value']), current_projections['value'], current_projections['Combo'])
-    current_projections['Combo'] = np.where((current_projections['Median'] > 20.0) & (current_projections['Salary'] < 3500), current_projections['Combo'] * (10 - current_projections['strong_play'].sum()).clip(0, 3), current_projections['Combo'])
-    current_projections['Combo'] = np.where(current_projections['Position'].str.contains('/'), current_projections['Combo'] * 1.25, current_projections['Combo'] * .75)
-    power_scale = 1.10
-    combo_powered = current_projections['Combo'] ** power_scale
-    norm_var = 800.0 / combo_powered.sum()
-    current_projections['Combo_powered'] = combo_powered * norm_var
-    # Display predictions sorted by predicted ownership
-    print("\n=== Current Slate - Predicted Ownership ===")
-    print(f'the strong_play count is {current_projections['strong_play'].sum()}')
-    display_cols = ['Player', 'Position', 'Salary', 'Median', 'value', 'Own', 'XGB', 'LGB', 'KNN', 'Combo', 'Combo_powered']
-    print(f'sum of Own is {current_projections['Own'].sum()} while sum of combo is {current_projections['Combo'].sum()} while combo_powered is {current_projections['Combo_powered'].sum()}')
-    print(f'sum of position C is {current_projections[current_projections['Position'] == 'C']['Combo_powered'].sum()}')
-    print(current_projections.sort_values('Combo_powered', ascending=False)[display_cols].head(20))
-    print(current_projections[current_projections['Position'] == 'C'].sort_values('Combo_powered', ascending=False)[display_cols].head(20))


100
101	knn_model.fit(X_train, y_train)
102
103	+ __all__ = ['xgb_model', 'lgb_model', 'knn_model']

src/sports/NHL_own_regress.py CHANGED Viewed

@@ -96,125 +96,4 @@ knn_model = KNeighborsRegressor(
 knn_model.fit(X_train, y_train)
-__all__ = ['xgb_model', 'lgb_model', 'knn_model']
-if __name__ == '__main__':
-    X_full = df_clean[feature_cols]
-    y_full = df_clean['Exposure Overall']
-    # Get predictions from all your models on the full dataset
-    y_pred_xgb_full = np.clip(xgb_model.predict(X_full), 0, 100)
-    y_pred_lgb_full = np.clip(lgb_model.predict(X_full), 0, 100) * 100
-    y_pred_knn_full = np.clip(knn_model.predict(X_full), 0, 100)
-    # Create combo prediction
-    y_pred_combo_full = (y_pred_xgb_full + y_pred_lgb_full + y_pred_knn_full) / 3
-    # Create full comparison DataFrame
-    comparison_full = pd.DataFrame({
-        'Actual_Exposure': y_full.values,
-        'XGB': y_pred_xgb_full,
-        'LGB': y_pred_lgb_full,
-        'KNN': y_pred_knn_full,
-        'Combo': y_pred_combo_full,
-        'Abs_Error': np.abs(y_full.values - y_pred_combo_full)
-    })
-    # Add back the full features for context
-    comparison_full = pd.concat([
-        X_full.reset_index(drop=True),
-        comparison_full.reset_index(drop=True)
-    ], axis=1)
-    # You can also add the original columns from df_clean for more context
-    comparison_full['Player'] = df_clean['Player'].values
-    comparison_full['Contest_Date'] = df_clean['Contest Date'].values
-    comparison_full['Pos'] = df_clean['Pos'].values
-    # Overall performance metrics on full dataset
-    print("\n=== Full Dataset Performance ===")
-    for model_name, predictions in [('XGB', y_pred_xgb_full), ('LGB', y_pred_lgb_full), ('KNN', y_pred_knn_full), ('Combo', y_pred_combo_full)]:
-        rmse = np.sqrt(mean_squared_error(y_full, predictions))
-        mae = mean_absolute_error(y_full, predictions)
-        r2 = r2_score(y_full, predictions)
-        print(f"{model_name:8} - RMSE: {rmse:6.2f}, MAE: {mae:6.2f}, R²: {r2:6.3f}")
-    # Analysis on full dataset
-    print("\n=== Highest Ownership (Full Data) ===")
-    print(comparison_full.sort_values('Actual_Exposure', ascending=False).head(10)[
-        ['Player', 'Pos', 'Salary', 'Actual', 'value', 'Actual_Exposure', 'XGB', 'LGB', 'KNN', 'Combo', 'Abs_Error']
-    ])
-    print("\n=== Highest Predicted Ownership (Full Data) ===")
-    print(comparison_full.sort_values('Combo', ascending=False).head(10)[
-        ['Player', 'Pos', 'Salary', 'Actual', 'value', 'Actual_Exposure', 'XGB', 'LGB', 'KNN', 'Combo', 'Abs_Error']
-    ])
-    print("\n=== Worst Predictions (Full Data) ===")
-    print(comparison_full.nlargest(10, 'Abs_Error')[
-        ['Player', 'Pos', 'Salary', 'Actual', 'value', 'Actual_Exposure', 'XGB', 'LGB', 'KNN', 'Combo', 'Abs_Error']
-    ])
-    print("\n=== Best Predictions (Full Data) ===")
-    print(comparison_full.nsmallest(10, 'Abs_Error')[
-        ['Player', 'Pos', 'Salary', 'Actual', 'value', 'Actual_Exposure', 'XGB', 'LGB', 'KNN', 'Combo', 'Abs_Error']
-    ])
-    # Prepare the current data with the same feature engineering
-    current_projections['Actual'] = current_projections['Median']  # Rename to match training
-    current_projections['value'] = current_projections['Actual'] / (current_projections['Salary'] / 1000)
-    current_projections['value_adv'] = current_projections['value'] - current_projections['value'].mean()
-    current_projections['actual_adv'] = current_projections['Actual'] - current_projections['Actual'].mean()
-    # Create the same engineered features
-    # Assuming all rows are from the same contest (current slate)
-    current_projections['contest_size'] = len(current_projections)  # All players in current slate
-    # Create value_play feature (same logic as training)
-    current_projections['value_play'] = np.where(
-        (current_projections['Salary'] <= 4500) &
-        (current_projections['Actual'] / (current_projections['Salary'] / 1000) >= 2.0),
-        1, 0
-    )
-    current_projections['value_density'] = current_projections['value_play'].sum() / current_projections['Player'].count()
-    current_projections['base_ownership'] = 900.0 / current_projections['contest_size']
-    current_projections['strong_play'] = np.where((current_projections['Actual'] / (current_projections['Salary'] / 1000) >= 2.0), 1, 0)
-    current_projections['punt_play'] = np.where((current_projections['Salary'] < 3500) & (current_projections['Actual'] / (current_projections['Salary'] / 1000) >= 2.0), 1, 0)
-    current_projections['ownership_share'] = current_projections['Own'].sum() / current_projections['Player'].count() * 900
-    # Prepare features in the same order as training
-    X_current = current_projections[feature_cols]
-    # Make predictions with all your models
-    current_projections['XGB'] = np.clip(xgb_model.predict(X_current), 0, 100)
-    current_projections['LGB'] = np.clip(lgb_model.predict(X_current), 0, 100) * 100
-    current_projections['KNN'] = np.clip(knn_model.predict(X_current), 0, 100)
-    # Create combo prediction
-    current_projections['Combo'] = (
-        (current_projections['XGB'] * .30) +
-        (current_projections['LGB'] * .30) +
-        (current_projections['KNN'] * .40)
-    )
-    current_projections['Combo'] = np.where((current_projections['value'] < 1.5) & (current_projections['Salary'] < 7500), current_projections['Combo'] * .75, current_projections['Combo'])
-    current_projections['Combo'] = np.where((current_projections['Salary'] > 5000) & (current_projections['value'] < 1.5), 1, current_projections['Combo'])
-    current_projections['Combo'] = np.where(current_projections['value'] > 2.0, current_projections['Combo'] * (2 + (current_projections['value'] - 2.0)), current_projections['Combo'])
-    current_projections['Combo'] = np.where((current_projections['Salary'] > 8500), current_projections['Combo'] * 1.5, current_projections['Combo'])
-    power_scale = 1.50
-    combo_powered = current_projections['Combo'] ** power_scale
-    norm_var = 900.0 / combo_powered.sum()
-    current_projections['Combo_powered'] = combo_powered * norm_var
-    # Display predictions sorted by predicted ownership
-    print("\n=== Current Slate - Predicted Ownership ===")
-    display_cols = ['Player', 'Position', 'Salary', 'Median', 'value', 'Own', 'XGB', 'LGB', 'KNN', 'Combo', 'Combo_powered']
-    print(f'sum of Own is {current_projections['Own'].sum()} while sum of combo is {current_projections['Combo'].sum()} while combo_powered is {current_projections['Combo_powered'].sum()}')
-    print(f'sum of position C is {current_projections[current_projections['Position'] == 'C']['Combo_powered'].sum()}')
-    print(current_projections.sort_values('Combo_powered', ascending=False)[display_cols].head(20))
-    print(current_projections[current_projections['Position'] == 'C'].sort_values('Combo_powered', ascending=False)[display_cols].head(20))


96
97	knn_model.fit(X_train, y_train)
98
99	+ __all__ = ['xgb_model', 'lgb_model', 'knn_model']