nba-performance-predictor / huggingface_model.py
arvindrangarajan's picture
Upload folder using huggingface_hub
060d2a9 verified
#!/usr/bin/env python3
"""
Hugging Face Compatible NBA Performance Predictor
Description: Wrapper for NBA XGBoost model to work with Hugging Face Hub
"""
import os
import json
import numpy as np
import pandas as pd
import xgboost as xgb
import joblib
from typing import Dict, List, Union, Any
from huggingface_hub import PyTorchModelHubMixin
class NBAPerformancePredictorHF(PyTorchModelHubMixin):
"""
Hugging Face compatible NBA Performance Predictor using XGBoost
"""
def __init__(self, model_dir: str = None, **kwargs):
"""
Initialize the Hugging Face compatible model
Args:
model_dir (str): Directory containing the saved model files
"""
super().__init__()
self.model = None
self.scaler = None
self.feature_names = None
self.target_column = 'PTS'
self.model_metadata = {}
if model_dir and os.path.exists(model_dir):
self.load_model(model_dir)
def load_model(self, model_dir: str):
"""
Load the saved XGBoost model and preprocessing components
Args:
model_dir (str): Directory containing the saved model files
"""
# Load metadata
metadata_path = os.path.join(model_dir, "model_metadata.json")
if os.path.exists(metadata_path):
with open(metadata_path, 'r') as f:
self.model_metadata = json.load(f)
self.feature_names = self.model_metadata.get('feature_names', [])
self.target_column = self.model_metadata.get('target_column', 'PTS')
# Load the XGBoost model
model_path = os.path.join(model_dir, "xgboost_model.json")
if os.path.exists(model_path):
self.model = xgb.XGBRegressor()
self.model.load_model(model_path)
# Load the scaler
scaler_path = os.path.join(model_dir, "scaler.joblib")
if os.path.exists(scaler_path):
self.scaler = joblib.load(scaler_path)
print(f"Model loaded successfully from {model_dir}/")
def predict(self, player_stats: Union[Dict, List[Dict]]) -> Union[float, List[float]]:
"""
Make predictions for NBA player performance
Args:
player_stats: Dictionary or list of dictionaries with player statistics
Returns:
Predicted points per game (float or list of floats)
"""
if self.model is None:
raise ValueError("Model not loaded! Please load a trained model first.")
# Handle single input
if isinstance(player_stats, dict):
player_stats = [player_stats]
single_input = True
else:
single_input = False
predictions = []
for stats in player_stats:
# Create DataFrame with the same structure as training data
input_df = pd.DataFrame([stats])
# Ensure all required features are present
for feature in self.feature_names:
if feature not in input_df.columns:
input_df[feature] = 0 # Default value for missing features
# Select only the features used in training
input_df = input_df[self.feature_names]
# Make prediction
prediction = self.model.predict(input_df)[0]
predictions.append(float(prediction))
return predictions[0] if single_input else predictions
def predict_batch(self, player_stats_list: List[Dict]) -> List[Dict]:
"""
Make batch predictions with detailed output
Args:
player_stats_list: List of player statistics dictionaries
Returns:
List of prediction results with metadata
"""
predictions = self.predict(player_stats_list)
results = []
for i, (stats, pred) in enumerate(zip(player_stats_list, predictions)):
result = {
'input_id': i,
'predicted_points': round(pred, 2),
'player_name': stats.get('Player', f'Player_{i}'),
'confidence': 'high' if pred > 0 else 'low', # Simple confidence measure
'input_features': len([k for k, v in stats.items() if v != 0])
}
results.append(result)
return results
def get_feature_info(self) -> Dict:
"""
Get information about the features used by the model
Returns:
Dictionary with feature information
"""
return {
'total_features': len(self.feature_names) if self.feature_names else 0,
'feature_names': self.feature_names[:20] if self.feature_names else [], # First 20
'target_variable': self.target_column,
'model_type': self.model_metadata.get('model_type', 'XGBRegressor'),
'required_features': [
'Age', 'G', 'GS', 'MP', 'FG', 'FGA', 'FG_1',
'Pos_encoded', 'Team_encoded', 'Age_category_encoded'
]
}
def create_example_input(self) -> Dict:
"""
Create an example input for testing the model
Returns:
Dictionary with example player statistics
"""
return {
'Age': 27,
'G': 75,
'GS': 70,
'MP': 35.0,
'FG': 8.5,
'FGA': 18.0,
'FG_1': 0.472,
'Pos_encoded': 2, # Forward
'Team_encoded': 15,
'Age_category_encoded': 1, # Prime
'PTS_lag_1': 22.5,
'PTS_lag_2': 21.0,
'TRB_lag_1': 7.2,
'AST_lag_1': 4.8,
'Points_per_minute_lag_1': 0.64,
'Efficiency_lag_1': 1.0
}
def _save_pretrained(self, save_directory: str, **kwargs):
"""
Save the model for Hugging Face Hub (required by PyTorchModelHubMixin)
"""
# Save the XGBoost model
model_path = os.path.join(save_directory, "xgboost_model.json")
if self.model:
self.model.save_model(model_path)
# Save preprocessing components and metadata
if self.model_metadata:
metadata_path = os.path.join(save_directory, "model_metadata.json")
with open(metadata_path, 'w') as f:
json.dump(self.model_metadata, f, indent=2)
# Save the scaler
if self.scaler:
scaler_path = os.path.join(save_directory, "scaler.joblib")
joblib.dump(self.scaler, scaler_path)
print(f"Model saved to {save_directory}")
def _from_pretrained(cls, *, model_id: str, revision: str, cache_dir: str,
force_download: bool, proxies: Dict, resume_download: bool,
local_files_only: bool, token: str, **model_kwargs):
"""
Load the model from Hugging Face Hub (required by PyTorchModelHubMixin)
"""
return cls(model_dir=cache_dir, **model_kwargs)
def create_model_card(model_dir: str = "nba_model", output_path: str = "README.md"):
"""
Create a model card for Hugging Face Hub
Args:
model_dir (str): Directory containing the model
output_path (str): Path to save the model card
"""
model_card_content = """
# NBA Player Performance Predictor
## Model Description
This model predicts NBA player points per game (PPG) using XGBoost regression with time-series features. The model uses historical player statistics, lag features, and engineered metrics to make predictions.
## Model Details
- **Model Type**: XGBoost Regressor
- **Task**: Regression (Predicting NBA player points per game)
- **Framework**: scikit-learn, XGBoost
- **Performance**: RMSE ~3-5 points per game, R² ~0.6-0.8
## Features
The model uses various features including:
- Basic stats: Age, Games, Minutes Played, Field Goals, etc.
- Lag features: Previous season performance metrics
- Rolling averages: 2-3 year performance averages
- Efficiency metrics: Points per minute, overall efficiency
- Categorical encodings: Position, Team, Age category
## Usage
```python
from huggingface_model import NBAPerformancePredictorHF
# Load the model
model = NBAPerformancePredictorHF("path/to/model")
# Example prediction
player_stats = {
'Age': 27,
'G': 75,
'GS': 70,
'MP': 35.0,
'FG': 8.5,
'FGA': 18.0,
'FG_1': 0.472,
'Pos_encoded': 2,
'Team_encoded': 15,
'Age_category_encoded': 1,
'PTS_lag_1': 22.5,
'PTS_lag_2': 21.0,
'TRB_lag_1': 7.2,
'AST_lag_1': 4.8
}
predicted_points = model.predict(player_stats)
print(f"Predicted PPG: {predicted_points:.2f}")
```
## Training Data
The model was trained on NBA player statistics from multiple seasons, including:
- Regular season statistics
- Playoff performance data
- Historical player performance trends
## Limitations
- Requires historical data (lag features) for accurate predictions
- Performance may vary for rookie players or players with limited history
- Model is trained on specific NBA eras and may need retraining for different time periods
## Ethical Considerations
This model is for educational and analytical purposes. It should not be used for:
- Player salary negotiations
- Draft decisions without additional context
- Any form of discrimination or bias
## Citation
```
@misc{nba_performance_predictor,
title={NBA Player Performance Predictor using XGBoost},
year={2024},
publisher={Hugging Face},
howpublished={\\url{https://huggingface.co/your-username/nba-performance-predictor}}
}
```
"""
with open(output_path, 'w') as f:
f.write(model_card_content)
print(f"Model card created: {output_path}")
if __name__ == "__main__":
# Example usage
print("NBA Performance Predictor - Hugging Face Compatible Version")
# Create model instance (assumes model is already trained and saved)
model_dir = "nba_model"
if os.path.exists(model_dir):
model = NBAPerformancePredictorHF(model_dir)
# Test prediction
example_stats = model.create_example_input()
prediction = model.predict(example_stats)
print(f"Example prediction: {prediction:.2f} PPG")
# Get feature info
feature_info = model.get_feature_info()
print(f"Model uses {feature_info['total_features']} features")
else:
print(f"Model directory '{model_dir}' not found. Train the model first using nba_xgboost_predictor.py")