Spaces:

arvindrangarajan
/

nba-performance-predictor

Sleeping

App Files Files Community

nba-performance-predictor / huggingface_model.py

arvindrangarajan

Upload folder using huggingface_hub

060d2a9 verified 6 months ago

raw

history blame contribute delete

10.8 kB

	#!/usr/bin/env python3
	"""
	Hugging Face Compatible NBA Performance Predictor
	Description: Wrapper for NBA XGBoost model to work with Hugging Face Hub
	"""

	import os
	import json
	import numpy as np
	import pandas as pd
	import xgboost as xgb
	import joblib
	from typing import Dict, List, Union, Any
	from huggingface_hub import PyTorchModelHubMixin


	class NBAPerformancePredictorHF(PyTorchModelHubMixin):
	"""
	Hugging Face compatible NBA Performance Predictor using XGBoost
	"""

	def __init__(self, model_dir: str = None, **kwargs):
	"""
	Initialize the Hugging Face compatible model

	Args:
	model_dir (str): Directory containing the saved model files
	"""
	super().__init__()
	self.model = None
	self.scaler = None
	self.feature_names = None
	self.target_column = 'PTS'
	self.model_metadata = {}

	if model_dir and os.path.exists(model_dir):
	self.load_model(model_dir)

	def load_model(self, model_dir: str):
	"""
	Load the saved XGBoost model and preprocessing components

	Args:
	model_dir (str): Directory containing the saved model files
	"""
	# Load metadata
	metadata_path = os.path.join(model_dir, "model_metadata.json")
	if os.path.exists(metadata_path):
	with open(metadata_path, 'r') as f:
	self.model_metadata = json.load(f)

	self.feature_names = self.model_metadata.get('feature_names', [])
	self.target_column = self.model_metadata.get('target_column', 'PTS')

	# Load the XGBoost model
	model_path = os.path.join(model_dir, "xgboost_model.json")
	if os.path.exists(model_path):
	self.model = xgb.XGBRegressor()
	self.model.load_model(model_path)

	# Load the scaler
	scaler_path = os.path.join(model_dir, "scaler.joblib")
	if os.path.exists(scaler_path):
	self.scaler = joblib.load(scaler_path)

	print(f"Model loaded successfully from {model_dir}/")

	def predict(self, player_stats: Union[Dict, List[Dict]]) -> Union[float, List[float]]:
	"""
	Make predictions for NBA player performance

	Args:
	player_stats: Dictionary or list of dictionaries with player statistics

	Returns:
	Predicted points per game (float or list of floats)
	"""
	if self.model is None:
	raise ValueError("Model not loaded! Please load a trained model first.")

	# Handle single input
	if isinstance(player_stats, dict):
	player_stats = [player_stats]
	single_input = True
	else:
	single_input = False

	predictions = []

	for stats in player_stats:
	# Create DataFrame with the same structure as training data
	input_df = pd.DataFrame([stats])

	# Ensure all required features are present
	for feature in self.feature_names:
	if feature not in input_df.columns:
	input_df[feature] = 0 # Default value for missing features

	# Select only the features used in training
	input_df = input_df[self.feature_names]

	# Make prediction
	prediction = self.model.predict(input_df)[0]
	predictions.append(float(prediction))

	return predictions[0] if single_input else predictions

	def predict_batch(self, player_stats_list: List[Dict]) -> List[Dict]:
	"""
	Make batch predictions with detailed output

	Args:
	player_stats_list: List of player statistics dictionaries

	Returns:
	List of prediction results with metadata
	"""
	predictions = self.predict(player_stats_list)

	results = []
	for i, (stats, pred) in enumerate(zip(player_stats_list, predictions)):
	result = {
	'input_id': i,
	'predicted_points': round(pred, 2),
	'player_name': stats.get('Player', f'Player_{i}'),
	'confidence': 'high' if pred > 0 else 'low', # Simple confidence measure
	'input_features': len([k for k, v in stats.items() if v != 0])
	}
	results.append(result)

	return results

	def get_feature_info(self) -> Dict:
	"""
	Get information about the features used by the model

	Returns:
	Dictionary with feature information
	"""
	return {
	'total_features': len(self.feature_names) if self.feature_names else 0,
	'feature_names': self.feature_names[:20] if self.feature_names else [], # First 20
	'target_variable': self.target_column,
	'model_type': self.model_metadata.get('model_type', 'XGBRegressor'),
	'required_features': [
	'Age', 'G', 'GS', 'MP', 'FG', 'FGA', 'FG_1',
	'Pos_encoded', 'Team_encoded', 'Age_category_encoded'
	]
	}

	def create_example_input(self) -> Dict:
	"""
	Create an example input for testing the model

	Returns:
	Dictionary with example player statistics
	"""
	return {
	'Age': 27,
	'G': 75,
	'GS': 70,
	'MP': 35.0,
	'FG': 8.5,
	'FGA': 18.0,
	'FG_1': 0.472,
	'Pos_encoded': 2, # Forward
	'Team_encoded': 15,
	'Age_category_encoded': 1, # Prime
	'PTS_lag_1': 22.5,
	'PTS_lag_2': 21.0,
	'TRB_lag_1': 7.2,
	'AST_lag_1': 4.8,
	'Points_per_minute_lag_1': 0.64,
	'Efficiency_lag_1': 1.0
	}

	def _save_pretrained(self, save_directory: str, **kwargs):
	"""
	Save the model for Hugging Face Hub (required by PyTorchModelHubMixin)
	"""
	# Save the XGBoost model
	model_path = os.path.join(save_directory, "xgboost_model.json")
	if self.model:
	self.model.save_model(model_path)

	# Save preprocessing components and metadata
	if self.model_metadata:
	metadata_path = os.path.join(save_directory, "model_metadata.json")
	with open(metadata_path, 'w') as f:
	json.dump(self.model_metadata, f, indent=2)

	# Save the scaler
	if self.scaler:
	scaler_path = os.path.join(save_directory, "scaler.joblib")
	joblib.dump(self.scaler, scaler_path)

	print(f"Model saved to {save_directory}")

	def _from_pretrained(cls, *, model_id: str, revision: str, cache_dir: str,
	force_download: bool, proxies: Dict, resume_download: bool,
	local_files_only: bool, token: str, **model_kwargs):
	"""
	Load the model from Hugging Face Hub (required by PyTorchModelHubMixin)
	"""
	return cls(model_dir=cache_dir, **model_kwargs)


	def create_model_card(model_dir: str = "nba_model", output_path: str = "README.md"):
	"""
	Create a model card for Hugging Face Hub

	Args:
	model_dir (str): Directory containing the model
	output_path (str): Path to save the model card
	"""
	model_card_content = """
	# NBA Player Performance Predictor

	## Model Description

	This model predicts NBA player points per game (PPG) using XGBoost regression with time-series features. The model uses historical player statistics, lag features, and engineered metrics to make predictions.

	## Model Details

	- Model Type: XGBoost Regressor
	- Task: Regression (Predicting NBA player points per game)
	- Framework: scikit-learn, XGBoost
	- Performance: RMSE ~3-5 points per game, R² ~0.6-0.8

	## Features

	The model uses various features including:
	- Basic stats: Age, Games, Minutes Played, Field Goals, etc.
	- Lag features: Previous season performance metrics
	- Rolling averages: 2-3 year performance averages
	- Efficiency metrics: Points per minute, overall efficiency
	- Categorical encodings: Position, Team, Age category

	## Usage

	```python
	from huggingface_model import NBAPerformancePredictorHF

	# Load the model
	model = NBAPerformancePredictorHF("path/to/model")

	# Example prediction
	player_stats = {
	'Age': 27,
	'G': 75,
	'GS': 70,
	'MP': 35.0,
	'FG': 8.5,
	'FGA': 18.0,
	'FG_1': 0.472,
	'Pos_encoded': 2,
	'Team_encoded': 15,
	'Age_category_encoded': 1,
	'PTS_lag_1': 22.5,
	'PTS_lag_2': 21.0,
	'TRB_lag_1': 7.2,
	'AST_lag_1': 4.8
	}

	predicted_points = model.predict(player_stats)
	print(f"Predicted PPG: {predicted_points:.2f}")
	```

	## Training Data

	The model was trained on NBA player statistics from multiple seasons, including:
	- Regular season statistics
	- Playoff performance data
	- Historical player performance trends

	## Limitations

	- Requires historical data (lag features) for accurate predictions
	- Performance may vary for rookie players or players with limited history
	- Model is trained on specific NBA eras and may need retraining for different time periods

	## Ethical Considerations

	This model is for educational and analytical purposes. It should not be used for:
	- Player salary negotiations
	- Draft decisions without additional context
	- Any form of discrimination or bias

	## Citation

	```
	@misc{nba_performance_predictor,
	title={NBA Player Performance Predictor using XGBoost},
	year={2024},
	publisher={Hugging Face},
	howpublished={\\url{https://huggingface.co/your-username/nba-performance-predictor}}
	}
	```
	"""

	with open(output_path, 'w') as f:
	f.write(model_card_content)

	print(f"Model card created: {output_path}")


	if __name__ == "__main__":
	# Example usage
	print("NBA Performance Predictor - Hugging Face Compatible Version")

	# Create model instance (assumes model is already trained and saved)
	model_dir = "nba_model"
	if os.path.exists(model_dir):
	model = NBAPerformancePredictorHF(model_dir)

	# Test prediction
	example_stats = model.create_example_input()
	prediction = model.predict(example_stats)
	print(f"Example prediction: {prediction:.2f} PPG")

	# Get feature info
	feature_info = model.get_feature_info()
	print(f"Model uses {feature_info['total_features']} features")
	else:
	print(f"Model directory '{model_dir}' not found. Train the model first using nba_xgboost_predictor.py")