|
|
|
|
|
""" |
|
|
Hugging Face Compatible NBA Performance Predictor |
|
|
Description: Wrapper for NBA XGBoost model to work with Hugging Face Hub |
|
|
""" |
|
|
|
|
|
import os |
|
|
import json |
|
|
import numpy as np |
|
|
import pandas as pd |
|
|
import xgboost as xgb |
|
|
import joblib |
|
|
from typing import Dict, List, Union, Any |
|
|
from huggingface_hub import PyTorchModelHubMixin |
|
|
|
|
|
|
|
|
class NBAPerformancePredictorHF(PyTorchModelHubMixin): |
|
|
""" |
|
|
Hugging Face compatible NBA Performance Predictor using XGBoost |
|
|
""" |
|
|
|
|
|
def __init__(self, model_dir: str = None, **kwargs): |
|
|
""" |
|
|
Initialize the Hugging Face compatible model |
|
|
|
|
|
Args: |
|
|
model_dir (str): Directory containing the saved model files |
|
|
""" |
|
|
super().__init__() |
|
|
self.model = None |
|
|
self.scaler = None |
|
|
self.feature_names = None |
|
|
self.target_column = 'PTS' |
|
|
self.model_metadata = {} |
|
|
|
|
|
if model_dir and os.path.exists(model_dir): |
|
|
self.load_model(model_dir) |
|
|
|
|
|
def load_model(self, model_dir: str): |
|
|
""" |
|
|
Load the saved XGBoost model and preprocessing components |
|
|
|
|
|
Args: |
|
|
model_dir (str): Directory containing the saved model files |
|
|
""" |
|
|
|
|
|
metadata_path = os.path.join(model_dir, "model_metadata.json") |
|
|
if os.path.exists(metadata_path): |
|
|
with open(metadata_path, 'r') as f: |
|
|
self.model_metadata = json.load(f) |
|
|
|
|
|
self.feature_names = self.model_metadata.get('feature_names', []) |
|
|
self.target_column = self.model_metadata.get('target_column', 'PTS') |
|
|
|
|
|
|
|
|
model_path = os.path.join(model_dir, "xgboost_model.json") |
|
|
if os.path.exists(model_path): |
|
|
self.model = xgb.XGBRegressor() |
|
|
self.model.load_model(model_path) |
|
|
|
|
|
|
|
|
scaler_path = os.path.join(model_dir, "scaler.joblib") |
|
|
if os.path.exists(scaler_path): |
|
|
self.scaler = joblib.load(scaler_path) |
|
|
|
|
|
print(f"Model loaded successfully from {model_dir}/") |
|
|
|
|
|
def predict(self, player_stats: Union[Dict, List[Dict]]) -> Union[float, List[float]]: |
|
|
""" |
|
|
Make predictions for NBA player performance |
|
|
|
|
|
Args: |
|
|
player_stats: Dictionary or list of dictionaries with player statistics |
|
|
|
|
|
Returns: |
|
|
Predicted points per game (float or list of floats) |
|
|
""" |
|
|
if self.model is None: |
|
|
raise ValueError("Model not loaded! Please load a trained model first.") |
|
|
|
|
|
|
|
|
if isinstance(player_stats, dict): |
|
|
player_stats = [player_stats] |
|
|
single_input = True |
|
|
else: |
|
|
single_input = False |
|
|
|
|
|
predictions = [] |
|
|
|
|
|
for stats in player_stats: |
|
|
|
|
|
input_df = pd.DataFrame([stats]) |
|
|
|
|
|
|
|
|
for feature in self.feature_names: |
|
|
if feature not in input_df.columns: |
|
|
input_df[feature] = 0 |
|
|
|
|
|
|
|
|
input_df = input_df[self.feature_names] |
|
|
|
|
|
|
|
|
prediction = self.model.predict(input_df)[0] |
|
|
predictions.append(float(prediction)) |
|
|
|
|
|
return predictions[0] if single_input else predictions |
|
|
|
|
|
def predict_batch(self, player_stats_list: List[Dict]) -> List[Dict]: |
|
|
""" |
|
|
Make batch predictions with detailed output |
|
|
|
|
|
Args: |
|
|
player_stats_list: List of player statistics dictionaries |
|
|
|
|
|
Returns: |
|
|
List of prediction results with metadata |
|
|
""" |
|
|
predictions = self.predict(player_stats_list) |
|
|
|
|
|
results = [] |
|
|
for i, (stats, pred) in enumerate(zip(player_stats_list, predictions)): |
|
|
result = { |
|
|
'input_id': i, |
|
|
'predicted_points': round(pred, 2), |
|
|
'player_name': stats.get('Player', f'Player_{i}'), |
|
|
'confidence': 'high' if pred > 0 else 'low', |
|
|
'input_features': len([k for k, v in stats.items() if v != 0]) |
|
|
} |
|
|
results.append(result) |
|
|
|
|
|
return results |
|
|
|
|
|
def get_feature_info(self) -> Dict: |
|
|
""" |
|
|
Get information about the features used by the model |
|
|
|
|
|
Returns: |
|
|
Dictionary with feature information |
|
|
""" |
|
|
return { |
|
|
'total_features': len(self.feature_names) if self.feature_names else 0, |
|
|
'feature_names': self.feature_names[:20] if self.feature_names else [], |
|
|
'target_variable': self.target_column, |
|
|
'model_type': self.model_metadata.get('model_type', 'XGBRegressor'), |
|
|
'required_features': [ |
|
|
'Age', 'G', 'GS', 'MP', 'FG', 'FGA', 'FG_1', |
|
|
'Pos_encoded', 'Team_encoded', 'Age_category_encoded' |
|
|
] |
|
|
} |
|
|
|
|
|
def create_example_input(self) -> Dict: |
|
|
""" |
|
|
Create an example input for testing the model |
|
|
|
|
|
Returns: |
|
|
Dictionary with example player statistics |
|
|
""" |
|
|
return { |
|
|
'Age': 27, |
|
|
'G': 75, |
|
|
'GS': 70, |
|
|
'MP': 35.0, |
|
|
'FG': 8.5, |
|
|
'FGA': 18.0, |
|
|
'FG_1': 0.472, |
|
|
'Pos_encoded': 2, |
|
|
'Team_encoded': 15, |
|
|
'Age_category_encoded': 1, |
|
|
'PTS_lag_1': 22.5, |
|
|
'PTS_lag_2': 21.0, |
|
|
'TRB_lag_1': 7.2, |
|
|
'AST_lag_1': 4.8, |
|
|
'Points_per_minute_lag_1': 0.64, |
|
|
'Efficiency_lag_1': 1.0 |
|
|
} |
|
|
|
|
|
def _save_pretrained(self, save_directory: str, **kwargs): |
|
|
""" |
|
|
Save the model for Hugging Face Hub (required by PyTorchModelHubMixin) |
|
|
""" |
|
|
|
|
|
model_path = os.path.join(save_directory, "xgboost_model.json") |
|
|
if self.model: |
|
|
self.model.save_model(model_path) |
|
|
|
|
|
|
|
|
if self.model_metadata: |
|
|
metadata_path = os.path.join(save_directory, "model_metadata.json") |
|
|
with open(metadata_path, 'w') as f: |
|
|
json.dump(self.model_metadata, f, indent=2) |
|
|
|
|
|
|
|
|
if self.scaler: |
|
|
scaler_path = os.path.join(save_directory, "scaler.joblib") |
|
|
joblib.dump(self.scaler, scaler_path) |
|
|
|
|
|
print(f"Model saved to {save_directory}") |
|
|
|
|
|
def _from_pretrained(cls, *, model_id: str, revision: str, cache_dir: str, |
|
|
force_download: bool, proxies: Dict, resume_download: bool, |
|
|
local_files_only: bool, token: str, **model_kwargs): |
|
|
""" |
|
|
Load the model from Hugging Face Hub (required by PyTorchModelHubMixin) |
|
|
""" |
|
|
return cls(model_dir=cache_dir, **model_kwargs) |
|
|
|
|
|
|
|
|
def create_model_card(model_dir: str = "nba_model", output_path: str = "README.md"): |
|
|
""" |
|
|
Create a model card for Hugging Face Hub |
|
|
|
|
|
Args: |
|
|
model_dir (str): Directory containing the model |
|
|
output_path (str): Path to save the model card |
|
|
""" |
|
|
model_card_content = """ |
|
|
# NBA Player Performance Predictor |
|
|
|
|
|
## Model Description |
|
|
|
|
|
This model predicts NBA player points per game (PPG) using XGBoost regression with time-series features. The model uses historical player statistics, lag features, and engineered metrics to make predictions. |
|
|
|
|
|
## Model Details |
|
|
|
|
|
- **Model Type**: XGBoost Regressor |
|
|
- **Task**: Regression (Predicting NBA player points per game) |
|
|
- **Framework**: scikit-learn, XGBoost |
|
|
- **Performance**: RMSE ~3-5 points per game, R² ~0.6-0.8 |
|
|
|
|
|
## Features |
|
|
|
|
|
The model uses various features including: |
|
|
- Basic stats: Age, Games, Minutes Played, Field Goals, etc. |
|
|
- Lag features: Previous season performance metrics |
|
|
- Rolling averages: 2-3 year performance averages |
|
|
- Efficiency metrics: Points per minute, overall efficiency |
|
|
- Categorical encodings: Position, Team, Age category |
|
|
|
|
|
## Usage |
|
|
|
|
|
```python |
|
|
from huggingface_model import NBAPerformancePredictorHF |
|
|
|
|
|
# Load the model |
|
|
model = NBAPerformancePredictorHF("path/to/model") |
|
|
|
|
|
# Example prediction |
|
|
player_stats = { |
|
|
'Age': 27, |
|
|
'G': 75, |
|
|
'GS': 70, |
|
|
'MP': 35.0, |
|
|
'FG': 8.5, |
|
|
'FGA': 18.0, |
|
|
'FG_1': 0.472, |
|
|
'Pos_encoded': 2, |
|
|
'Team_encoded': 15, |
|
|
'Age_category_encoded': 1, |
|
|
'PTS_lag_1': 22.5, |
|
|
'PTS_lag_2': 21.0, |
|
|
'TRB_lag_1': 7.2, |
|
|
'AST_lag_1': 4.8 |
|
|
} |
|
|
|
|
|
predicted_points = model.predict(player_stats) |
|
|
print(f"Predicted PPG: {predicted_points:.2f}") |
|
|
``` |
|
|
|
|
|
## Training Data |
|
|
|
|
|
The model was trained on NBA player statistics from multiple seasons, including: |
|
|
- Regular season statistics |
|
|
- Playoff performance data |
|
|
- Historical player performance trends |
|
|
|
|
|
## Limitations |
|
|
|
|
|
- Requires historical data (lag features) for accurate predictions |
|
|
- Performance may vary for rookie players or players with limited history |
|
|
- Model is trained on specific NBA eras and may need retraining for different time periods |
|
|
|
|
|
## Ethical Considerations |
|
|
|
|
|
This model is for educational and analytical purposes. It should not be used for: |
|
|
- Player salary negotiations |
|
|
- Draft decisions without additional context |
|
|
- Any form of discrimination or bias |
|
|
|
|
|
## Citation |
|
|
|
|
|
``` |
|
|
@misc{nba_performance_predictor, |
|
|
title={NBA Player Performance Predictor using XGBoost}, |
|
|
year={2024}, |
|
|
publisher={Hugging Face}, |
|
|
howpublished={\\url{https://huggingface.co/your-username/nba-performance-predictor}} |
|
|
} |
|
|
``` |
|
|
""" |
|
|
|
|
|
with open(output_path, 'w') as f: |
|
|
f.write(model_card_content) |
|
|
|
|
|
print(f"Model card created: {output_path}") |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
|
|
|
print("NBA Performance Predictor - Hugging Face Compatible Version") |
|
|
|
|
|
|
|
|
model_dir = "nba_model" |
|
|
if os.path.exists(model_dir): |
|
|
model = NBAPerformancePredictorHF(model_dir) |
|
|
|
|
|
|
|
|
example_stats = model.create_example_input() |
|
|
prediction = model.predict(example_stats) |
|
|
print(f"Example prediction: {prediction:.2f} PPG") |
|
|
|
|
|
|
|
|
feature_info = model.get_feature_info() |
|
|
print(f"Model uses {feature_info['total_features']} features") |
|
|
else: |
|
|
print(f"Model directory '{model_dir}' not found. Train the model first using nba_xgboost_predictor.py") |