Spaces:
Running
Running
File size: 6,548 Bytes
c095e08 fd577ad c095e08 fd577ad c095e08 fd577ad c095e08 f475b4e c095e08 f475b4e c095e08 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 |
"""
NBA ML Prediction System - Configuration
=========================================
Central configuration for data collection, model training, and predictions.
"""
from pathlib import Path
from dataclasses import dataclass, field
from typing import List
# =============================================================================
# PATHS
# =============================================================================
import os
PROJECT_ROOT = Path(__file__).parent.parent
# Use environment variable for data dir if set (for HF Spaces persistent storage)
DATA_DIR = Path(os.environ.get("NBA_ML_DATA_DIR", str(PROJECT_ROOT / "data")))
RAW_DATA_DIR = DATA_DIR / "raw"
PROCESSED_DATA_DIR = DATA_DIR / "processed"
API_CACHE_DIR = DATA_DIR / "api_data"
MODELS_DIR = Path(os.environ.get("NBA_ML_MODELS_DIR", str(PROJECT_ROOT / "models")))
# Create directories if they don't exist
for dir_path in [RAW_DATA_DIR, PROCESSED_DATA_DIR, API_CACHE_DIR, MODELS_DIR]:
dir_path.mkdir(parents=True, exist_ok=True)
# =============================================================================
# SEASONS
# =============================================================================
# Extended dataset: 23 years (2003-2026) for comprehensive training
INITIAL_SEASON_START = 2003
INITIAL_SEASON_END = 2026 # Current 2025-26 season
FULL_SEASON_START = 2003 # Full dataset starts from 2003
def get_season_strings(start_year: int = INITIAL_SEASON_START,
end_year: int = INITIAL_SEASON_END) -> List[str]:
"""Generate season strings like '2003-04', '2004-05', etc."""
return [f"{year}-{str(year+1)[-2:]}" for year in range(start_year, end_year)]
SEASON_STRINGS = get_season_strings()
# =============================================================================
# CHROMADB CONFIGURATION
# =============================================================================
import os
@dataclass
class ChromaDBConfig:
"""Configuration for ChromaDB prediction tracking.
Reads from environment variables for security, with fallback defaults.
"""
tenant: str = os.environ.get("CHROMADB_TENANT", "70e82e68-9fa7-4224-9975-d49d355f6328")
database: str = os.environ.get("CHROMADB_DATABASE", "NBA_ML")
api_key: str = os.environ.get("CHROMADB_API_KEY", "ck-2bXunZK4X3BFSPHtwLG2Ki9xr5r6ZPxzADESDperHweT")
collection_name: str = "predictions"
CHROMADB_CONFIG = ChromaDBConfig()
# =============================================================================
# LIVE DATA CONFIGURATION
# =============================================================================
LIVE_REFRESH_INTERVAL = 15 # Seconds between live score refreshes
# =============================================================================
# API CONFIGURATION
# =============================================================================
@dataclass
class APIConfig:
"""Configuration for NBA API requests with robustness features."""
base_delay: float = 0.6 # Base delay between requests (seconds)
max_retries: int = 3 # Maximum retry attempts
initial_backoff: float = 2.0 # Initial backoff in seconds
max_backoff: float = 60.0 # Maximum backoff in seconds
backoff_multiplier: float = 2.0 # Exponential backoff multiplier
timeout: int = 30 # Request timeout in seconds
API_CONFIG = APIConfig()
# =============================================================================
# ELO CONFIGURATION
# =============================================================================
@dataclass
class ELOConfig:
"""Configuration for ELO rating calculations."""
initial_rating: float = 1500.0
k_factor: float = 20.0 # How much ratings change per game
home_advantage: float = 100.0 # ELO points added for home team
season_regression: float = 0.85 # Regress to mean at season start (85% = only 15% carryover)
ELO_CONFIG = ELOConfig()
# =============================================================================
# FEATURE CONFIGURATION
# =============================================================================
@dataclass
class FeatureConfig:
"""Configuration for feature engineering."""
rolling_windows: List[int] = field(default_factory=lambda: [5, 10, 20])
min_games_for_features: int = 5 # Minimum games before generating features
FEATURE_CONFIG = FeatureConfig()
# =============================================================================
# MODEL CONFIGURATION
# =============================================================================
@dataclass
class ModelConfig:
"""Configuration for model training."""
test_seasons: List[str] = field(default_factory=lambda: ["2024-25"])
val_seasons: List[str] = field(default_factory=lambda: ["2023-24"])
random_state: int = 42
# XGBoost defaults
xgb_params: dict = field(default_factory=lambda: {
"n_estimators": 500,
"max_depth": 6,
"learning_rate": 0.05,
"subsample": 0.8,
"colsample_bytree": 0.8,
"random_state": 42
})
# LightGBM defaults
lgb_params: dict = field(default_factory=lambda: {
"n_estimators": 500,
"max_depth": 6,
"learning_rate": 0.05,
"subsample": 0.8,
"colsample_bytree": 0.8,
"random_state": 42,
"verbose": -1
})
MODEL_CONFIG = ModelConfig()
# =============================================================================
# TEAM MAPPINGS
# =============================================================================
# NBA Team IDs (for reference)
NBA_TEAMS = {
1610612737: "ATL", 1610612738: "BOS", 1610612739: "CLE", 1610612740: "NOP",
1610612741: "CHI", 1610612742: "DAL", 1610612743: "DEN", 1610612744: "GSW",
1610612745: "HOU", 1610612746: "LAC", 1610612747: "LAL", 1610612748: "MIA",
1610612749: "MIL", 1610612750: "MIN", 1610612751: "BKN", 1610612752: "NYK",
1610612753: "ORL", 1610612754: "IND", 1610612755: "PHI", 1610612756: "PHX",
1610612757: "POR", 1610612758: "SAC", 1610612759: "SAS", 1610612760: "OKC",
1610612761: "TOR", 1610612762: "UTA", 1610612763: "MEM", 1610612764: "WAS",
1610612765: "DET", 1610612766: "CHA"
}
# =============================================================================
# INJURY STATUS WEIGHTS
# =============================================================================
INJURY_IMPACT = {
"Out": 1.0, # Full impact - player not available
"Doubtful": 0.8,
"Questionable": 0.5,
"Probable": 0.2,
"Available": 0.0
}
|