Spaces:

jashdoshi77
/

NBA_PREDICTOR

Running

App Files Files Community

NBA_PREDICTOR / src /visualization.py

jashdoshi77

Initial commit: NBA Sage Predictor for Hugging Face Spaces (with LFS for large files)

c095e08 10 days ago

raw

history blame contribute delete

21.6 kB

	"""
	NBA ML Prediction System - Data Visualization
	==============================================
	Create and save visualizations for analysis and reporting.
	All graphs are saved to the 'graphs' folder.
	"""

	import pandas as pd
	import numpy as np
	import matplotlib.pyplot as plt
	import matplotlib.dates as mdates
	import seaborn as sns
	from pathlib import Path
	from typing import Optional, List, Dict
	from datetime import datetime
	import logging

	from src.config import PROJECT_ROOT, RAW_DATA_DIR, PROCESSED_DATA_DIR

	logger = logging.getLogger(__name__)

	# =============================================================================
	# CONFIGURATION
	# =============================================================================
	GRAPHS_DIR = PROJECT_ROOT / "graphs"
	GRAPHS_DIR.mkdir(exist_ok=True)

	# Style settings
	plt.style.use('dark_background')
	COLORS = {
	'primary': '#7c3aed',
	'secondary': '#00d4ff',
	'success': '#10b981',
	'warning': '#f59e0b',
	'danger': '#ef4444',
	'gradient': ['#7c3aed', '#00d4ff', '#f472b6']
	}

	def save_figure(fig, name: str, dpi: int = 150):
	"""Save figure to graphs folder."""
	path = GRAPHS_DIR / f"{name}.png"
	fig.savefig(path, dpi=dpi, bbox_inches='tight', facecolor='#1a1a2e', edgecolor='none')
	logger.info(f"Saved graph to {path}")
	plt.close(fig)
	return path


	# =============================================================================
	# TEAM PERFORMANCE VISUALIZATIONS
	# =============================================================================
	class TeamVisualizer:
	"""Visualization for team-level statistics."""

	def plot_elo_history(self, elo_history: pd.DataFrame, team_abbrev: str = None) -> Path:
	"""
	Plot ELO rating history over time.

	Args:
	elo_history: DataFrame with columns [date, team, elo]
	"""
	fig, ax = plt.subplots(figsize=(14, 7))

	if team_abbrev:
	data = elo_history[elo_history['team'] == team_abbrev]
	ax.plot(data['date'], data['elo'], color=COLORS['primary'], linewidth=2)
	ax.fill_between(data['date'], 1500, data['elo'], alpha=0.3, color=COLORS['primary'])
	title = f"{team_abbrev} ELO Rating History"
	else:
	# Plot top 5 teams
	for i, (team, data) in enumerate(elo_history.groupby('team')):
	if i < 5:
	ax.plot(data['date'], data['elo'], label=team, linewidth=2)
	ax.legend(loc='upper left')
	title = "ELO Rating History - Top Teams"

	ax.axhline(y=1500, color='white', linestyle='--', alpha=0.3, label='Average')
	ax.set_xlabel('Date', fontsize=12)
	ax.set_ylabel('ELO Rating', fontsize=12)
	ax.set_title(title, fontsize=16, fontweight='bold')
	ax.grid(True, alpha=0.2)

	return save_figure(fig, f"elo_history_{team_abbrev or 'all'}")

	def plot_team_comparison(self, team_stats: pd.DataFrame,
	metrics: List[str] = None) -> Path:
	"""
	Radar chart comparing multiple teams.
	"""
	if metrics is None:
	metrics = ['PTS', 'AST', 'REB', 'STL', 'BLK', 'FG_PCT']

	# Normalize metrics to 0-1 scale
	normalized = team_stats[metrics].copy()
	for col in metrics:
	normalized[col] = (normalized[col] - normalized[col].min()) / (normalized[col].max() - normalized[col].min())

	# Create radar chart
	fig, ax = plt.subplots(figsize=(10, 10), subplot_kw=dict(polar=True))

	angles = np.linspace(0, 2 * np.pi, len(metrics), endpoint=False).tolist()
	angles += angles[:1]

	for i, (idx, row) in enumerate(team_stats.head(5).iterrows()):
	values = normalized.loc[idx, metrics].tolist()
	values += values[:1]
	ax.plot(angles, values, linewidth=2, label=row.get('TEAM_ABBREVIATION', f'Team {i+1}'))
	ax.fill(angles, values, alpha=0.1)

	ax.set_xticks(angles[:-1])
	ax.set_xticklabels(metrics)
	ax.legend(loc='upper right', bbox_to_anchor=(1.3, 1))
	ax.set_title('Team Comparison', fontsize=16, fontweight='bold', pad=20)

	return save_figure(fig, "team_comparison_radar")

	def plot_standings(self, standings: pd.DataFrame) -> Path:
	"""
	Horizontal bar chart of team standings by win percentage.
	"""
	fig, ax = plt.subplots(figsize=(12, 10))

	data = standings.sort_values('W_PCT', ascending=True).tail(15)
	colors = [COLORS['primary'] if i >= 7 else COLORS['secondary']
	for i in range(len(data))]

	bars = ax.barh(data['TEAM_ABBREVIATION'], data['W_PCT'], color=colors, edgecolor='white', linewidth=0.5)

	# Add playoff line
	ax.axvline(x=0.5, color=COLORS['warning'], linestyle='--', linewidth=2, label='Playoff Cutoff')

	ax.set_xlabel('Win Percentage', fontsize=12)
	ax.set_title('Team Standings', fontsize=16, fontweight='bold')
	ax.set_xlim(0, 1)
	ax.legend()
	ax.grid(axis='x', alpha=0.2)

	# Add value labels
	for bar, val in zip(bars, data['W_PCT']):
	ax.text(val + 0.02, bar.get_y() + bar.get_height()/2,
	f'{val:.1%}', va='center', fontsize=10)

	return save_figure(fig, "standings")


	# =============================================================================
	# GAME PREDICTION VISUALIZATIONS
	# =============================================================================
	class GameVisualizer:
	"""Visualization for game predictions and analysis."""

	def plot_prediction_calibration(self, predictions: pd.DataFrame) -> Path:
	"""
	Calibration curve - how well do probabilities match actual outcomes.
	"""
	fig, ax = plt.subplots(figsize=(10, 10))

	# Bin predictions
	bins = np.linspace(0, 1, 11)
	bin_centers = (bins[:-1] + bins[1:]) / 2

	predicted_proba = predictions['predicted_proba']
	actual = predictions['actual']

	bin_indices = np.digitize(predicted_proba, bins) - 1
	bin_indices = np.clip(bin_indices, 0, 9)

	actual_fractions = []
	for i in range(10):
	mask = bin_indices == i
	if mask.sum() > 0:
	actual_fractions.append(actual[mask].mean())
	else:
	actual_fractions.append(np.nan)

	# Perfect calibration line
	ax.plot([0, 1], [0, 1], 'w--', linewidth=2, label='Perfect Calibration')

	# Actual calibration
	ax.plot(bin_centers, actual_fractions, 'o-', color=COLORS['primary'],
	linewidth=3, markersize=10, label='Model Calibration')

	ax.set_xlabel('Predicted Probability', fontsize=12)
	ax.set_ylabel('Actual Win Rate', fontsize=12)
	ax.set_title('Prediction Calibration Curve', fontsize=16, fontweight='bold')
	ax.legend()
	ax.grid(True, alpha=0.2)
	ax.set_xlim(0, 1)
	ax.set_ylim(0, 1)

	return save_figure(fig, "calibration_curve")

	def plot_accuracy_by_confidence(self, predictions: pd.DataFrame) -> Path:
	"""
	How does accuracy change with prediction confidence?
	"""
	fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(14, 6))

	# Calculate confidence as distance from 0.5
	predictions = predictions.copy()
	predictions['confidence'] = abs(predictions['predicted_proba'] - 0.5)
	predictions['correct'] = predictions['predicted'] == predictions['actual']

	# Bin by confidence
	bins = np.linspace(0, 0.5, 6)
	predictions['confidence_bin'] = pd.cut(predictions['confidence'], bins)

	accuracy_by_conf = predictions.groupby('confidence_bin')['correct'].mean()
	count_by_conf = predictions.groupby('confidence_bin').size()

	# Accuracy plot
	ax1.bar(range(len(accuracy_by_conf)), accuracy_by_conf.values,
	color=COLORS['primary'], edgecolor='white')
	ax1.set_xticks(range(len(accuracy_by_conf)))
	ax1.set_xticklabels(['Low', 'Med-Low', 'Medium', 'Med-High', 'High'], rotation=45)
	ax1.set_ylabel('Accuracy', fontsize=12)
	ax1.set_title('Accuracy by Confidence Level', fontsize=14, fontweight='bold')
	ax1.set_ylim(0, 1)
	ax1.axhline(y=0.5, color='white', linestyle='--', alpha=0.3)

	# Count plot
	ax2.bar(range(len(count_by_conf)), count_by_conf.values,
	color=COLORS['secondary'], edgecolor='white')
	ax2.set_xticks(range(len(count_by_conf)))
	ax2.set_xticklabels(['Low', 'Med-Low', 'Medium', 'Med-High', 'High'], rotation=45)
	ax2.set_ylabel('Number of Predictions', fontsize=12)
	ax2.set_title('Prediction Distribution', fontsize=14, fontweight='bold')

	plt.tight_layout()
	return save_figure(fig, "accuracy_by_confidence")

	def plot_feature_importance(self, importance_df: pd.DataFrame, top_n: int = 15) -> Path:
	"""
	Bar chart of feature importance.
	"""
	fig, ax = plt.subplots(figsize=(12, 8))

	data = importance_df.head(top_n).sort_values('avg_importance', ascending=True)

	bars = ax.barh(data['feature'], data['avg_importance'],
	color=COLORS['primary'], edgecolor='white', linewidth=0.5)

	ax.set_xlabel('Importance Score', fontsize=12)
	ax.set_title('Top Features for Game Prediction', fontsize=16, fontweight='bold')
	ax.grid(axis='x', alpha=0.2)

	return save_figure(fig, "feature_importance")


	# =============================================================================
	# MVP VISUALIZATIONS
	# =============================================================================
	class MVPVisualizer:
	"""Visualization for MVP race analysis."""

	def plot_mvp_race(self, mvp_df: pd.DataFrame) -> Path:
	"""
	Horizontal bar chart of MVP race standings.
	"""
	fig, ax = plt.subplots(figsize=(12, 8))

	data = mvp_df.head(10).sort_values('mvp_score', ascending=True)
	colors = plt.cm.Purples(np.linspace(0.3, 0.9, len(data)))

	bars = ax.barh(data['PLAYER_NAME'], data['mvp_score'], color=colors, edgecolor='white')

	ax.set_xlabel('MVP Score', fontsize=12)
	ax.set_title('MVP Race 2024-25', fontsize=16, fontweight='bold')
	ax.grid(axis='x', alpha=0.2)

	# Add value labels
	for bar, val in zip(bars, data['mvp_score']):
	ax.text(val + 0.5, bar.get_y() + bar.get_height()/2,
	f'{val:.1f}', va='center', fontsize=10)

	return save_figure(fig, "mvp_race")

	def plot_mvp_similarity(self, mvp_df: pd.DataFrame) -> Path:
	"""
	Scatter plot of MVP score vs historical similarity.
	"""
	fig, ax = plt.subplots(figsize=(10, 8))

	scatter = ax.scatter(mvp_df['mvp_similarity'], mvp_df['mvp_score'],
	s=mvp_df['PTS'] * 10, c=mvp_df['mvp_score'],
	cmap='Purples', alpha=0.7, edgecolor='white')

	# Add labels for top candidates
	for idx, row in mvp_df.head(5).iterrows():
	ax.annotate(row['PLAYER_NAME'],
	(row['mvp_similarity'], row['mvp_score']),
	xytext=(10, 10), textcoords='offset points',
	fontsize=10, color='white')

	ax.set_xlabel('Similarity to Historical MVPs', fontsize=12)
	ax.set_ylabel('MVP Score', fontsize=12)
	ax.set_title('MVP Score vs Historical Similarity', fontsize=16, fontweight='bold')
	ax.grid(True, alpha=0.2)

	cbar = plt.colorbar(scatter, ax=ax)
	cbar.set_label('MVP Score', fontsize=10)

	return save_figure(fig, "mvp_similarity_scatter")

	def plot_stat_comparison(self, mvp_df: pd.DataFrame,
	stats: List[str] = None) -> Path:
	"""
	Bar chart comparing stats of top MVP candidates.
	"""
	if stats is None:
	stats = ['PTS', 'REB', 'AST']

	fig, axes = plt.subplots(1, len(stats), figsize=(5 * len(stats), 6))
	if len(stats) == 1:
	axes = [axes]

	top_players = mvp_df.head(5)

	for ax, stat in zip(axes, stats):
	colors = plt.cm.Purples(np.linspace(0.4, 0.9, len(top_players)))
	bars = ax.bar(top_players['PLAYER_NAME'], top_players[stat], color=colors, edgecolor='white')
	ax.set_ylabel(stat, fontsize=12)
	ax.set_title(f'{stat} Comparison', fontsize=14, fontweight='bold')
	ax.tick_params(axis='x', rotation=45)

	for bar, val in zip(bars, top_players[stat]):
	ax.text(bar.get_x() + bar.get_width()/2, bar.get_height() + 0.5,
	f'{val:.1f}', ha='center', fontsize=10)

	plt.tight_layout()
	return save_figure(fig, "mvp_stat_comparison")


	# =============================================================================
	# CHAMPIONSHIP VISUALIZATIONS
	# =============================================================================
	class ChampionshipVisualizer:
	"""Visualization for championship predictions."""

	def plot_championship_odds(self, champ_df: pd.DataFrame) -> Path:
	"""
	Pie chart of championship probabilities.
	"""
	fig, ax = plt.subplots(figsize=(10, 10))

	data = champ_df.head(8)
	colors = plt.cm.Purples(np.linspace(0.3, 0.9, len(data)))

	wedges, texts, autotexts = ax.pie(
	data['champ_probability'],
	labels=data['TEAM_ABBREVIATION'],
	autopct='%1.1f%%',
	colors=colors,
	explode=[0.05] * len(data),
	shadow=True,
	startangle=90
	)

	for text in texts:
	text.set_fontsize(12)
	text.set_color('white')
	for autotext in autotexts:
	autotext.set_fontsize(10)
	autotext.set_color('white')

	ax.set_title('Championship Probabilities', fontsize=16, fontweight='bold')

	return save_figure(fig, "championship_odds_pie")

	def plot_strength_vs_experience(self, champ_df: pd.DataFrame) -> Path:
	"""
	Scatter plot of team strength vs playoff experience.
	"""
	fig, ax = plt.subplots(figsize=(12, 8))

	scatter = ax.scatter(
	champ_df['playoff_experience'],
	champ_df['strength_rating'],
	s=champ_df['champ_probability'] * 3000,
	c=champ_df['champ_probability'],
	cmap='Purples',
	alpha=0.7,
	edgecolor='white',
	linewidth=2
	)

	# Add labels
	for idx, row in champ_df.iterrows():
	ax.annotate(
	row['TEAM_ABBREVIATION'],
	(row['playoff_experience'], row['strength_rating']),
	xytext=(10, 5), textcoords='offset points',
	fontsize=11, color='white', fontweight='bold'
	)

	ax.set_xlabel('Playoff Experience Index', fontsize=12)
	ax.set_ylabel('Strength Rating', fontsize=12)
	ax.set_title('Championship Contenders: Strength vs Experience', fontsize=16, fontweight='bold')
	ax.grid(True, alpha=0.2)

	cbar = plt.colorbar(scatter, ax=ax)
	cbar.set_label('Championship Probability', fontsize=10)

	return save_figure(fig, "strength_vs_experience")


	# =============================================================================
	# SEASON ANALYSIS VISUALIZATIONS
	# =============================================================================
	class SeasonVisualizer:
	"""Visualization for historical season analysis."""

	def plot_scoring_trends(self, season_data: pd.DataFrame) -> Path:
	"""
	Line chart of scoring trends across seasons.
	"""
	fig, ax = plt.subplots(figsize=(14, 7))

	ax.plot(season_data['season'], season_data['avg_pts'],
	color=COLORS['primary'], linewidth=3, marker='o', markersize=8)
	ax.fill_between(season_data['season'], season_data['avg_pts'], alpha=0.3, color=COLORS['primary'])

	ax.set_xlabel('Season', fontsize=12)
	ax.set_ylabel('Average Points Per Game', fontsize=12)
	ax.set_title('NBA Scoring Trends Over Time', fontsize=16, fontweight='bold')
	ax.tick_params(axis='x', rotation=45)
	ax.grid(True, alpha=0.2)

	return save_figure(fig, "scoring_trends")

	def plot_three_point_revolution(self, season_data: pd.DataFrame) -> Path:
	"""
	Dual-axis chart showing 3PA and 3P% trends.
	"""
	fig, ax1 = plt.subplots(figsize=(14, 7))
	ax2 = ax1.twinx()

	ax1.bar(season_data['season'], season_data['avg_3pa'],
	color=COLORS['secondary'], alpha=0.7, label='3-Point Attempts')
	ax2.plot(season_data['season'], season_data['avg_3pct'],
	color=COLORS['primary'], linewidth=3, marker='o', label='3-Point %')

	ax1.set_xlabel('Season', fontsize=12)
	ax1.set_ylabel('3-Point Attempts', fontsize=12, color=COLORS['secondary'])
	ax2.set_ylabel('3-Point Percentage', fontsize=12, color=COLORS['primary'])
	ax1.set_title('The 3-Point Revolution', fontsize=16, fontweight='bold')
	ax1.tick_params(axis='x', rotation=45)

	lines1, labels1 = ax1.get_legend_handles_labels()
	lines2, labels2 = ax2.get_legend_handles_labels()
	ax1.legend(lines1 + lines2, labels1 + labels2, loc='upper left')

	return save_figure(fig, "three_point_revolution")


	# =============================================================================
	# MASTER VISUALIZER
	# =============================================================================
	class NBAVisualizer:
	"""Master class combining all visualization capabilities."""

	def __init__(self):
	self.team = TeamVisualizer()
	self.game = GameVisualizer()
	self.mvp = MVPVisualizer()
	self.championship = ChampionshipVisualizer()
	self.season = SeasonVisualizer()

	def generate_all_visualizations(self, data: Dict[str, pd.DataFrame]) -> List[Path]:
	"""
	Generate all available visualizations from provided data.

	Args:
	data: Dict with keys like 'standings', 'mvp', 'championship', etc.

	Returns:
	List of paths to saved graphs
	"""
	saved_paths = []

	if 'standings' in data:
	saved_paths.append(self.team.plot_standings(data['standings']))

	if 'mvp' in data:
	saved_paths.append(self.mvp.plot_mvp_race(data['mvp']))
	saved_paths.append(self.mvp.plot_stat_comparison(data['mvp']))

	if 'championship' in data:
	saved_paths.append(self.championship.plot_championship_odds(data['championship']))
	saved_paths.append(self.championship.plot_strength_vs_experience(data['championship']))

	if 'predictions' in data:
	saved_paths.append(self.game.plot_calibration(data['predictions']))
	saved_paths.append(self.game.plot_accuracy_by_confidence(data['predictions']))

	logger.info(f"Generated {len(saved_paths)} visualizations")
	return saved_paths


	# =============================================================================
	# CLI INTERFACE
	# =============================================================================
	if __name__ == "__main__":
	print(f"Generating sample visualizations to {GRAPHS_DIR}...")

	# Create sample data for testing
	sample_mvp = pd.DataFrame({
	'PLAYER_NAME': ['Shai Gilgeous-Alexander', 'Nikola Jokic', 'Jayson Tatum',
	'Luka Doncic', 'Giannis Antetokounmpo'],
	'PTS': [31.5, 26.8, 27.2, 28.5, 30.5],
	'REB': [5.5, 12.5, 8.2, 8.8, 11.5],
	'AST': [6.0, 9.2, 4.8, 8.2, 6.5],
	'mvp_score': [85.2, 82.1, 78.5, 77.2, 76.8],
	'mvp_similarity': [0.92, 0.95, 0.85, 0.88, 0.90]
	})

	sample_champ = pd.DataFrame({
	'TEAM_ABBREVIATION': ['OKC', 'CLE', 'BOS', 'DEN', 'MEM', 'HOU', 'NYK', 'GSW'],
	'W_PCT': [0.70, 0.68, 0.65, 0.62, 0.60, 0.58, 0.55, 0.52],
	'playoff_experience': [0.3, 0.5, 0.8, 0.9, 0.4, 0.2, 0.5, 0.95],
	'strength_rating': [45, 42, 40, 38, 35, 33, 30, 28],
	'champ_probability': [0.18, 0.15, 0.14, 0.12, 0.10, 0.09, 0.08, 0.07]
	})

	viz = NBAVisualizer()

	# Generate sample visualizations
	print("Creating MVP race chart...")
	viz.mvp.plot_mvp_race(sample_mvp)

	print("Creating MVP stat comparison...")
	viz.mvp.plot_stat_comparison(sample_mvp)

	print("Creating championship odds pie chart...")
	viz.championship.plot_championship_odds(sample_champ)

	print("Creating strength vs experience chart...")
	viz.championship.plot_strength_vs_experience(sample_champ)

	print(f"\n✅ Visualizations saved to: {GRAPHS_DIR}")