Update src/streamlit_app.py
Browse files- src/streamlit_app.py +173 -570
src/streamlit_app.py
CHANGED
|
@@ -4,11 +4,13 @@ import numpy as np
|
|
| 4 |
import requests
|
| 5 |
import os
|
| 6 |
from datetime import datetime
|
| 7 |
-
from bs4 import BeautifulSoup, Comment
|
| 8 |
-
import re
|
| 9 |
-
import plotly.express as px
|
| 10 |
-
import plotly.graph_objects as go
|
| 11 |
|
|
|
|
|
|
|
| 12 |
|
| 13 |
# Page configuration
|
| 14 |
st.set_page_config(
|
|
@@ -33,551 +35,188 @@ if 'chat_history' not in st.session_state:
|
|
| 33 |
st.session_state.chat_history = []
|
| 34 |
|
| 35 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 36 |
-
#
|
| 37 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 38 |
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
import time
|
| 45 |
-
import random
|
| 46 |
-
from urllib.parse import urljoin
|
| 47 |
-
|
| 48 |
-
@st.cache_data(ttl=3600)
|
| 49 |
-
def fetch_html(url):
|
| 50 |
-
"""Fetch raw HTML for a URL (with error handling and rate limiting)."""
|
| 51 |
-
try:
|
| 52 |
-
# Add random delay to be respectful to basketball-reference.com
|
| 53 |
-
time.sleep(random.uniform(0.5, 1.5))
|
| 54 |
-
|
| 55 |
-
headers = {
|
| 56 |
-
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
|
| 57 |
-
}
|
| 58 |
-
|
| 59 |
-
resp = requests.get(url, timeout=30, headers=headers)
|
| 60 |
-
resp.raise_for_status()
|
| 61 |
-
return resp.text
|
| 62 |
-
except requests.exceptions.RequestException as e:
|
| 63 |
-
st.error(f"Failed to fetch {url}: {e}")
|
| 64 |
-
return ""
|
| 65 |
-
except Exception as e:
|
| 66 |
-
st.error(f"An unexpected error occurred while fetching {url}: {e}")
|
| 67 |
-
return ""
|
| 68 |
-
|
| 69 |
-
def parse_table(html, table_id=None):
|
| 70 |
-
"""
|
| 71 |
-
Given raw HTML and optional table_id, locate that <table>,
|
| 72 |
-
handling cases where it's commented out, then parse it with pandas.read_html.
|
| 73 |
-
"""
|
| 74 |
-
if not html:
|
| 75 |
-
return pd.DataFrame()
|
| 76 |
-
|
| 77 |
-
soup = BeautifulSoup(html, "html.parser")
|
| 78 |
-
tbl_html = ""
|
| 79 |
-
|
| 80 |
-
if table_id:
|
| 81 |
-
# First, try to find the table directly
|
| 82 |
-
tbl = soup.find("table", {"id": table_id})
|
| 83 |
-
if tbl:
|
| 84 |
-
tbl_html = str(tbl)
|
| 85 |
-
else:
|
| 86 |
-
# If not found directly, search for it within HTML comments
|
| 87 |
-
comments = soup.find_all(string=lambda text: isinstance(text, Comment))
|
| 88 |
-
for comment in comments:
|
| 89 |
-
comment_soup = BeautifulSoup(comment, "html.parser")
|
| 90 |
-
tbl = comment_soup.find("table", {"id": table_id})
|
| 91 |
-
if tbl:
|
| 92 |
-
tbl_html = str(tbl)
|
| 93 |
-
break
|
| 94 |
-
else:
|
| 95 |
-
# fallback: first table on page (only if no table_id specified)
|
| 96 |
-
first = soup.find("table")
|
| 97 |
-
if first:
|
| 98 |
-
tbl_html = str(first)
|
| 99 |
|
| 100 |
-
|
| 101 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
return pd.DataFrame()
|
| 110 |
-
except ValueError as e:
|
| 111 |
-
# No tables found in the provided HTML string
|
| 112 |
-
st.warning(f"No tables found in HTML: {e}")
|
| 113 |
-
return pd.DataFrame()
|
| 114 |
-
except Exception as e:
|
| 115 |
-
st.error(f"Error parsing table with pandas: {e}")
|
| 116 |
-
return pd.DataFrame(
|
| 117 |
|
| 118 |
-
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 119 |
-
# Basketball-Reference Data Fetching Utilities
|
| 120 |
-
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 121 |
@st.cache_data(ttl=3600)
|
| 122 |
-
def
|
| 123 |
"""
|
| 124 |
-
|
| 125 |
-
|
| 126 |
"""
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
continue
|
| 135 |
-
|
| 136 |
-
soup = BeautifulSoup(html, "html.parser")
|
| 137 |
-
# The players table is usually directly available, not commented out.
|
| 138 |
-
table = soup.find("table", {"id": "players"})
|
| 139 |
-
if not table:
|
| 140 |
-
continue
|
| 141 |
-
|
| 142 |
-
# Look for both tbody and direct tr children
|
| 143 |
-
rows = table.select("tbody tr") if table.select("tbody tr") else table.select("tr")
|
| 144 |
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
continue
|
| 152 |
-
name = a.text.strip()
|
| 153 |
-
href = a["href"].strip()
|
| 154 |
-
full_url = urljoin("https://www.basketball-reference.com", href)
|
| 155 |
-
records.append({"name": name, "url": full_url})
|
| 156 |
-
|
| 157 |
-
return pd.DataFrame(records)
|
| 158 |
-
|
| 159 |
-
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 160 |
-
@st.cache_data(ttl=300)
|
| 161 |
-
def player_season_stats(bbr_url):
|
| 162 |
-
"""
|
| 163 |
-
Scrapes a player's perβseason table (id="per_game") from their BBR page.
|
| 164 |
-
Returns cleaned DataFrame with season-by-season averages.
|
| 165 |
-
"""
|
| 166 |
-
html = fetch_html(bbr_url)
|
| 167 |
-
if not html:
|
| 168 |
-
return pd.DataFrame()
|
| 169 |
-
|
| 170 |
-
# Use the parse_table function for consistency
|
| 171 |
-
df = parse_table(html, "per_game")
|
| 172 |
-
|
| 173 |
-
# If per_game table not found, try alternatives
|
| 174 |
-
if df.empty:
|
| 175 |
-
# Try other common table IDs for season stats
|
| 176 |
-
for table_id in ["stats", "per_game_stats", "totals", "advanced"]:
|
| 177 |
-
df = parse_table(html, table_id)
|
| 178 |
-
if not df.empty:
|
| 179 |
-
break
|
| 180 |
-
|
| 181 |
-
# If still empty, try to find any table with Season column
|
| 182 |
-
if df.empty:
|
| 183 |
-
soup = BeautifulSoup(html, "html.parser")
|
| 184 |
-
all_tables = soup.find_all("table")
|
| 185 |
-
for table in all_tables:
|
| 186 |
-
try:
|
| 187 |
-
temp_df = pd.read_html(str(table), header=0)[0]
|
| 188 |
-
# Check if it has Season column and looks like season stats
|
| 189 |
-
if not temp_df.empty and 'Season' in temp_df.columns and 'Date' not in temp_df.columns:
|
| 190 |
-
df = temp_df
|
| 191 |
-
break
|
| 192 |
-
except:
|
| 193 |
-
continue
|
| 194 |
-
|
| 195 |
-
if df.empty:
|
| 196 |
-
st.warning(f"Could not find season stats table at {bbr_url}")
|
| 197 |
-
return pd.DataFrame()
|
| 198 |
-
|
| 199 |
-
# Handle potential MultiIndex columns
|
| 200 |
-
if isinstance(df.columns, pd.MultiIndex):
|
| 201 |
-
df.columns = ['_'.join(str(col).strip() for col in cols if str(col).strip() and 'Unnamed' not in str(col))
|
| 202 |
-
for cols in df.columns.values]
|
| 203 |
-
|
| 204 |
-
# Clean column names
|
| 205 |
-
df.columns = [str(col).strip() for col in df.columns]
|
| 206 |
-
|
| 207 |
-
# Verify we have the right table (should have Season column, not Date)
|
| 208 |
-
if 'Season' not in df.columns:
|
| 209 |
-
st.warning(f"Table found but no Season column. Available columns: {df.columns.tolist()}")
|
| 210 |
-
return pd.DataFrame()
|
| 211 |
-
|
| 212 |
-
if 'Date' in df.columns:
|
| 213 |
-
st.warning("Found game log table instead of season stats. Skipping.")
|
| 214 |
-
return pd.DataFrame()
|
| 215 |
-
|
| 216 |
-
# Clean the data - remove header rows
|
| 217 |
-
df = df[df["Season"].astype(str) != "Season"].copy()
|
| 218 |
-
df = df[df["Season"].notna()].copy()
|
| 219 |
-
|
| 220 |
-
# Remove any completely empty rows
|
| 221 |
-
df = df.dropna(how='all').copy()
|
| 222 |
-
|
| 223 |
-
if df.empty:
|
| 224 |
-
st.warning("No valid season data found after cleaning")
|
| 225 |
-
return pd.DataFrame()
|
| 226 |
-
|
| 227 |
-
# Clean season format
|
| 228 |
-
df["Season"] = df["Season"].astype(str).str.strip()
|
| 229 |
-
df['Season'] = df['Season'].str.replace('-', 'β') # Ensure en-dash for consistency
|
| 230 |
-
|
| 231 |
-
# Standardize column names to match expected format
|
| 232 |
-
column_mapping = {
|
| 233 |
-
'G': 'GP', 'GS': 'GS', 'MP': 'MIN',
|
| 234 |
-
'FG%': 'FG_PCT', '3P%': 'FG3_PCT', 'FT%': 'FT_PCT',
|
| 235 |
-
'TRB': 'REB', 'AST': 'AST', 'STL': 'STL', 'BLK': 'BLK', 'TOV': 'TO',
|
| 236 |
-
'PF': 'PF', 'PTS': 'PTS',
|
| 237 |
-
'Age': 'AGE', 'Tm': 'TEAM_ABBREVIATION', 'Lg': 'LEAGUE_ID', 'Pos': 'POSITION',
|
| 238 |
-
'FG': 'FGM', 'FGA': 'FGA', '3P': 'FG3M', '3PA': 'FG3A',
|
| 239 |
-
'2P': 'FGM2', '2PA': 'FGA2', '2P%': 'FG2_PCT', 'eFG%': 'EFG_PCT',
|
| 240 |
-
'FT': 'FTM', 'FTA': 'FTA', 'ORB': 'OREB', 'DRB': 'DREB'
|
| 241 |
-
}
|
| 242 |
-
|
| 243 |
-
# Apply column mapping only for columns that exist
|
| 244 |
-
for old_col, new_col in column_mapping.items():
|
| 245 |
-
if old_col in df.columns:
|
| 246 |
-
df = df.rename(columns={old_col: new_col})
|
| 247 |
-
|
| 248 |
-
# Clean team names if TEAM_ABBREVIATION column exists
|
| 249 |
-
if 'TEAM_ABBREVIATION' in df.columns:
|
| 250 |
-
df['TEAM_ABBREVIATION'] = df['TEAM_ABBREVIATION'].apply(clean_team_name)
|
| 251 |
-
|
| 252 |
-
# Convert numeric columns
|
| 253 |
-
non_numeric_cols = {'Season', 'TEAM_ABBREVIATION', 'LEAGUE_ID', 'POSITION', 'Player'}
|
| 254 |
-
for col in df.columns:
|
| 255 |
-
if col not in non_numeric_cols:
|
| 256 |
-
df[col] = pd.to_numeric(df[col], errors="coerce")
|
| 257 |
-
|
| 258 |
-
return df
|
| 259 |
|
| 260 |
-
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 261 |
@st.cache_data(ttl=300)
|
| 262 |
-
def
|
| 263 |
"""
|
| 264 |
-
|
| 265 |
-
|
| 266 |
"""
|
| 267 |
-
if not player_name or not player_name.strip():
|
| 268 |
-
st.warning("Please provide a valid player name")
|
| 269 |
-
return pd.DataFrame()
|
| 270 |
-
|
| 271 |
-
# Get player index
|
| 272 |
try:
|
| 273 |
-
|
| 274 |
-
if
|
| 275 |
-
st.error("Could not load player index. Please try again later.")
|
| 276 |
-
return pd.DataFrame()
|
| 277 |
-
except Exception as e:
|
| 278 |
-
st.error(f"Error loading player index: {e}")
|
| 279 |
-
return pd.DataFrame()
|
| 280 |
-
|
| 281 |
-
# Clean the search term
|
| 282 |
-
search_name = player_name.strip()
|
| 283 |
-
|
| 284 |
-
# Search for player (case insensitive, exact match first)
|
| 285 |
-
exact_matches = player_index[player_index['name'].str.lower() == search_name.lower()]
|
| 286 |
-
|
| 287 |
-
if not exact_matches.empty:
|
| 288 |
-
matches = exact_matches
|
| 289 |
-
else:
|
| 290 |
-
# Try partial matching
|
| 291 |
-
matches = player_index[player_index['name'].str.contains(search_name, case=False, na=False, regex=False)]
|
| 292 |
-
|
| 293 |
-
if matches.empty:
|
| 294 |
-
st.warning(f"No player found matching '{player_name}'. Please check the spelling and try again.")
|
| 295 |
-
return pd.DataFrame()
|
| 296 |
-
|
| 297 |
-
if len(matches) > 1:
|
| 298 |
-
st.info(f"Multiple players found matching '{player_name}':")
|
| 299 |
-
for i, (_, row) in enumerate(matches.head(5).iterrows(), 1):
|
| 300 |
-
st.info(f"{i}. {row['name']}")
|
| 301 |
-
st.info("Using the first match. For more specific results, try using the full player name.")
|
| 302 |
-
|
| 303 |
-
# Get stats for the first match
|
| 304 |
-
try:
|
| 305 |
-
player_url = matches.iloc[0]['url']
|
| 306 |
-
player_stats = player_season_stats(player_url)
|
| 307 |
-
|
| 308 |
-
if player_stats.empty:
|
| 309 |
-
st.warning(f"Could not retrieve stats for {matches.iloc[0]['name']}")
|
| 310 |
return pd.DataFrame()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 311 |
|
| 312 |
-
#
|
| 313 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 314 |
|
| 315 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 316 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 317 |
except Exception as e:
|
| 318 |
-
st.error(f"Error
|
| 319 |
return pd.DataFrame()
|
| 320 |
|
| 321 |
-
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 322 |
@st.cache_data(ttl=300)
|
| 323 |
-
def
|
| 324 |
"""
|
| 325 |
-
|
| 326 |
-
|
| 327 |
-
Returns cleaned DataFrame.
|
| 328 |
"""
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
|
| 334 |
-
# Try multiple possible table IDs for team stats
|
| 335 |
-
possible_table_ids = ["per_game-team", "per_game_team", "team-stats-per_game", "teams_per_game"]
|
| 336 |
-
df = pd.DataFrame()
|
| 337 |
-
|
| 338 |
-
for table_id in possible_table_ids:
|
| 339 |
-
df = parse_table(html, table_id=table_id)
|
| 340 |
-
if not df.empty:
|
| 341 |
-
break
|
| 342 |
-
|
| 343 |
-
# If no specific table found, try to find any table with team data
|
| 344 |
-
if df.empty:
|
| 345 |
-
soup = BeautifulSoup(html, "html.parser")
|
| 346 |
-
tables = soup.find_all("table")
|
| 347 |
-
for table in tables:
|
| 348 |
-
if table.find("th", string=lambda text: text and "team" in text.lower()):
|
| 349 |
-
df = parse_table(str(table))
|
| 350 |
-
if not df.empty:
|
| 351 |
-
break
|
| 352 |
-
|
| 353 |
-
if df.empty:
|
| 354 |
-
st.warning(f"Could not find team stats table for {year}")
|
| 355 |
-
return pd.DataFrame()
|
| 356 |
-
|
| 357 |
-
# Handle potential MultiIndex columns
|
| 358 |
-
if isinstance(df.columns, pd.MultiIndex):
|
| 359 |
-
df.columns = ['_'.join(str(col).strip() for col in cols if str(col).strip() and str(col).strip() != 'Unnamed: 0_level_0')
|
| 360 |
-
for cols in df.columns.values]
|
| 361 |
-
|
| 362 |
-
# Clean column names
|
| 363 |
-
df.columns = [str(col).strip() for col in df.columns]
|
| 364 |
-
|
| 365 |
-
# Find team column
|
| 366 |
-
team_col = None
|
| 367 |
-
for col in df.columns:
|
| 368 |
-
if 'team' in col.lower() or col in ['Team', 'Tm']:
|
| 369 |
-
team_col = col
|
| 370 |
-
break
|
| 371 |
-
|
| 372 |
-
if team_col is None:
|
| 373 |
-
st.warning(f"Could not find team column in team stats. Available columns: {df.columns.tolist()}")
|
| 374 |
-
return pd.DataFrame()
|
| 375 |
-
|
| 376 |
-
# Rename team column to standard name
|
| 377 |
-
if team_col != 'Team':
|
| 378 |
-
df = df.rename(columns={team_col: 'Team'})
|
| 379 |
-
|
| 380 |
-
# Remove header rows
|
| 381 |
-
df = df[df["Team"].astype(str) != "Team"].copy()
|
| 382 |
-
df = df[df["Team"].notna()].copy()
|
| 383 |
-
|
| 384 |
-
# Rename Team to Tm for consistency
|
| 385 |
-
df = df.rename(columns={"Team": "Tm"})
|
| 386 |
-
|
| 387 |
-
# Standardize column names
|
| 388 |
-
column_mapping = {
|
| 389 |
-
'G': 'GP', 'MP': 'MIN',
|
| 390 |
-
'FG%': 'FG_PCT', '3P%': 'FG3_PCT', 'FT%': 'FT_PCT',
|
| 391 |
-
'TRB': 'REB', 'AST': 'AST', 'STL': 'STL', 'BLK': 'BLK', 'TOV': 'TO',
|
| 392 |
-
'PF': 'PF', 'PTS': 'PTS',
|
| 393 |
-
'Rk': 'RANK', 'W': 'WINS', 'L': 'LOSSES', 'W/L%': 'WIN_LOSS_PCT',
|
| 394 |
-
'FG': 'FGM', 'FGA': 'FGA', '3P': 'FG3M', '3PA': 'FG3A',
|
| 395 |
-
'2P': 'FGM2', '2PA': 'FGA2', '2P%': 'FG2_PCT', 'eFG%': 'EFG_PCT',
|
| 396 |
-
'FT': 'FTM', 'FTA': 'FTA', 'ORB': 'OREB', 'DRB': 'DREB'
|
| 397 |
-
}
|
| 398 |
-
|
| 399 |
-
# Apply column mapping only for columns that exist
|
| 400 |
-
for old_col, new_col in column_mapping.items():
|
| 401 |
-
if old_col in df.columns:
|
| 402 |
-
df = df.rename(columns={old_col: new_col})
|
| 403 |
-
|
| 404 |
-
# Clean team names
|
| 405 |
-
df['Tm'] = df['Tm'].apply(clean_team_name)
|
| 406 |
-
|
| 407 |
-
# Convert numeric columns
|
| 408 |
-
non_numeric_cols = {"Tm", "RANK"}
|
| 409 |
-
for col in df.columns:
|
| 410 |
-
if col not in non_numeric_cols:
|
| 411 |
-
df[col] = pd.to_numeric(df[col], errors="coerce")
|
| 412 |
-
|
| 413 |
-
return df
|
| 414 |
-
|
| 415 |
-
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 416 |
-
# Additional utility functions for team data processing
|
| 417 |
-
|
| 418 |
-
@st.cache_data(ttl=300)
|
| 419 |
-
def team_opponent_stats(year):
|
| 420 |
-
"""
|
| 421 |
-
Scrapes the league's opponent perβgame team stats table from:
|
| 422 |
-
https://www.basketball-reference.com/leagues/NBA_{year}_opp_per_game.html
|
| 423 |
-
Returns cleaned DataFrame with opponent stats.
|
| 424 |
-
"""
|
| 425 |
-
url = f"https://www.basketball-reference.com/leagues/NBA_{year}_opp_per_game.html"
|
| 426 |
-
html = fetch_html(url)
|
| 427 |
-
if not html:
|
| 428 |
-
return pd.DataFrame()
|
| 429 |
-
|
| 430 |
-
# Try multiple possible table IDs for opponent stats
|
| 431 |
-
possible_table_ids = ["opp-stats-per_game", "opp_per_game", "opponent-stats-per_game"]
|
| 432 |
-
df = pd.DataFrame()
|
| 433 |
-
|
| 434 |
-
for table_id in possible_table_ids:
|
| 435 |
-
df = parse_table(html, table_id=table_id)
|
| 436 |
-
if not df.empty:
|
| 437 |
-
break
|
| 438 |
-
|
| 439 |
-
if df.empty:
|
| 440 |
-
st.warning(f"Could not find opponent stats table for {year}")
|
| 441 |
-
return pd.DataFrame()
|
| 442 |
-
|
| 443 |
-
# Clean and process the same way as regular team stats
|
| 444 |
-
if isinstance(df.columns, pd.MultiIndex):
|
| 445 |
-
df.columns = ['_'.join(str(col).strip() for col in cols if str(col).strip() and str(col).strip() != 'Unnamed: 0_level_0')
|
| 446 |
-
for cols in df.columns.values]
|
| 447 |
-
|
| 448 |
-
df.columns = [str(col).strip() for col in df.columns]
|
| 449 |
-
|
| 450 |
-
# Find team column
|
| 451 |
-
team_col = None
|
| 452 |
-
for col in df.columns:
|
| 453 |
-
if 'team' in col.lower() or col in ['Team', 'Tm']:
|
| 454 |
-
team_col = col
|
| 455 |
-
break
|
| 456 |
-
|
| 457 |
-
if team_col is None:
|
| 458 |
-
return pd.DataFrame()
|
| 459 |
-
|
| 460 |
-
if team_col != 'Team':
|
| 461 |
-
df = df.rename(columns={team_col: 'Team'})
|
| 462 |
-
|
| 463 |
-
df = df[df["Team"].astype(str) != "Team"].copy()
|
| 464 |
-
df = df[df["Team"].notna()].copy()
|
| 465 |
-
df = df.rename(columns={"Team": "Tm"})
|
| 466 |
-
|
| 467 |
-
# Apply team name cleaning
|
| 468 |
-
df['Tm'] = df['Tm'].apply(clean_team_name)
|
| 469 |
-
|
| 470 |
-
# Same column standardization as regular team stats
|
| 471 |
-
column_mapping = {
|
| 472 |
-
'G': 'OPP_GP', 'MP': 'OPP_MIN',
|
| 473 |
-
'FG%': 'OPP_FG_PCT', '3P%': 'OPP_FG3_PCT', 'FT%': 'OPP_FT_PCT',
|
| 474 |
-
'TRB': 'OPP_REB', 'AST': 'OPP_AST', 'STL': 'OPP_STL', 'BLK': 'OPP_BLK', 'TOV': 'OPP_TO',
|
| 475 |
-
'PF': 'OPP_PF', 'PTS': 'OPP_PTS',
|
| 476 |
-
'FG': 'OPP_FGM', 'FGA': 'OPP_FGA', '3P': 'OPP_FG3M', '3PA': 'OPP_FG3A',
|
| 477 |
-
'2P': 'OPP_FGM2', '2PA': 'OPP_FGA2', '2P%': 'OPP_FG2_PCT', 'eFG%': 'OPP_EFG_PCT',
|
| 478 |
-
'FT': 'OPP_FTM', 'FTA': 'OPP_FTA', 'ORB': 'OPP_OREB', 'DRB': 'OPP_DREB'
|
| 479 |
-
}
|
| 480 |
-
|
| 481 |
-
for old_col, new_col in column_mapping.items():
|
| 482 |
-
if old_col in df.columns:
|
| 483 |
-
df = df.rename(columns={old_col: new_col})
|
| 484 |
|
| 485 |
-
|
| 486 |
-
|
| 487 |
-
|
| 488 |
-
|
| 489 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 490 |
|
| 491 |
-
|
|
|
|
| 492 |
|
| 493 |
-
|
| 494 |
-
|
| 495 |
-
|
| 496 |
-
|
| 497 |
-
|
| 498 |
-
|
| 499 |
-
|
| 500 |
-
|
| 501 |
-
|
| 502 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 503 |
|
| 504 |
-
|
| 505 |
-
|
| 506 |
-
|
| 507 |
-
|
| 508 |
-
# Look for conference tables
|
| 509 |
-
for conference in ['E', 'W']: # Eastern and Western conference IDs
|
| 510 |
-
table_id = f"standings_{conference}"
|
| 511 |
-
table = soup.find("table", {"id": table_id})
|
| 512 |
-
if table:
|
| 513 |
-
df = parse_table(str(table))
|
| 514 |
-
if not df.empty:
|
| 515 |
-
df['Conference'] = 'Eastern' if conference == 'E' else 'Western'
|
| 516 |
-
standings_data.append(df)
|
| 517 |
-
|
| 518 |
-
if not standings_data:
|
| 519 |
return pd.DataFrame()
|
| 520 |
-
|
| 521 |
-
# Combine conference standings
|
| 522 |
-
df = pd.concat(standings_data, ignore_index=True)
|
| 523 |
-
|
| 524 |
-
# Clean team names if 'Team' column exists
|
| 525 |
-
if 'Team' in df.columns:
|
| 526 |
-
df['Team'] = df['Team'].apply(clean_team_name)
|
| 527 |
-
|
| 528 |
-
return df
|
| 529 |
-
|
| 530 |
-
def validate_dataframe(df, required_columns=None):
|
| 531 |
-
"""
|
| 532 |
-
Validate that a DataFrame has the expected structure and data.
|
| 533 |
-
"""
|
| 534 |
-
if df.empty:
|
| 535 |
-
return False, "DataFrame is empty"
|
| 536 |
-
|
| 537 |
-
if required_columns:
|
| 538 |
-
missing_cols = [col for col in required_columns if col not in df.columns]
|
| 539 |
-
if missing_cols:
|
| 540 |
-
return False, f"Missing required columns: {missing_cols}"
|
| 541 |
-
|
| 542 |
-
return True, "DataFrame is valid"
|
| 543 |
-
|
| 544 |
-
def clean_team_name(team_name):
|
| 545 |
-
"""
|
| 546 |
-
Clean and standardize team names from Basketball Reference.
|
| 547 |
-
"""
|
| 548 |
-
if pd.isna(team_name):
|
| 549 |
-
return team_name
|
| 550 |
-
|
| 551 |
-
# Remove any asterisks or other symbols
|
| 552 |
-
team_name = str(team_name).strip().replace('*', '')
|
| 553 |
-
|
| 554 |
-
# Handle special cases for team name variations
|
| 555 |
-
team_mapping = {
|
| 556 |
-
'NOP': 'NO', # New Orleans Pelicans sometimes shown as NOP
|
| 557 |
-
'PHX': 'PHO', # Phoenix Suns sometimes shown as PHX
|
| 558 |
-
'BRK': 'BKN', # Brooklyn Nets sometimes shown as BRK
|
| 559 |
-
'CHA': 'CHO', # Charlotte sometimes inconsistent
|
| 560 |
-
'UTA': 'UTH' # Utah Jazz sometimes shown as UTA
|
| 561 |
-
}
|
| 562 |
-
|
| 563 |
-
return team_mapping.get(team_name, team_name)
|
| 564 |
|
| 565 |
-
def retry_fetch(func, *args, max_retries=3, **kwargs):
|
| 566 |
-
"""
|
| 567 |
-
Retry a function call with exponential backoff.
|
| 568 |
-
"""
|
| 569 |
-
for attempt in range(max_retries):
|
| 570 |
-
try:
|
| 571 |
-
result = func(*args, **kwargs)
|
| 572 |
-
if not (isinstance(result, pd.DataFrame) and result.empty):
|
| 573 |
-
return result
|
| 574 |
-
except Exception as e:
|
| 575 |
-
if attempt == max_retries - 1:
|
| 576 |
-
st.error(f"Failed after {max_retries} attempts: {e}")
|
| 577 |
-
return pd.DataFrame()
|
| 578 |
-
time.sleep(2 ** attempt) # Exponential backoff
|
| 579 |
-
|
| 580 |
-
return pd.DataFrame()
|
| 581 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 582 |
# Perplexity integration
|
| 583 |
PERP_KEY = os.getenv("PERPLEXITY_API_KEY")
|
|
@@ -589,7 +228,7 @@ def ask_perp(prompt, system="You are a helpful NBA analyst AI.", max_tokens=500,
|
|
| 589 |
return ""
|
| 590 |
hdr = {'Authorization':f'Bearer {PERP_KEY}','Content-Type':'application/json'}
|
| 591 |
payload = {
|
| 592 |
-
"model":"sonar-
|
| 593 |
"messages":[{"role":"system","content":system},{"role":"user","content":prompt}],
|
| 594 |
"max_tokens":max_tokens, "temperature":temp
|
| 595 |
}
|
|
@@ -611,28 +250,6 @@ def ask_perp(prompt, system="You are a helpful NBA analyst AI.", max_tokens=500,
|
|
| 611 |
st.error(f"An unexpected error occurred with Perplexity API: {e}")
|
| 612 |
return ""
|
| 613 |
|
| 614 |
-
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 615 |
-
# Helper for dynamic season generation
|
| 616 |
-
def get_available_seasons(num_seasons=6):
|
| 617 |
-
"""Generates a list of recent NBA seasons in 'YYYYβYY' format."""
|
| 618 |
-
current_year = datetime.now().year
|
| 619 |
-
current_month = datetime.now().month
|
| 620 |
-
|
| 621 |
-
# Determine the latest season end year.
|
| 622 |
-
# If it's before July (e.g., May 2025), the current season is 2024-25 (ends 2025).
|
| 623 |
-
# If it's July or later (e.g., July 2025), the 2024-25 season just finished,
|
| 624 |
-
# and the next season (2025-26) is considered the "current" one for future projections.
|
| 625 |
-
latest_season_end_year = current_year
|
| 626 |
-
if current_month >= 7: # NBA season typically ends in June
|
| 627 |
-
latest_season_end_year += 1
|
| 628 |
-
|
| 629 |
-
seasons_list = []
|
| 630 |
-
for i in range(num_seasons):
|
| 631 |
-
end_year = latest_season_end_year - i
|
| 632 |
-
start_year = end_year - 1
|
| 633 |
-
seasons_list.append(f"{start_year}β{end_year}")
|
| 634 |
-
return sorted(seasons_list, reverse=True) # Sort to show most recent first
|
| 635 |
-
|
| 636 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 637 |
# Plotting functions (retained from previous version)
|
| 638 |
def create_comparison_chart(data, players_names, metric):
|
|
@@ -716,7 +333,7 @@ def main():
|
|
| 716 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 717 |
def player_vs_player():
|
| 718 |
st.markdown('<h2 class="section-header">Player vs Player Comparison</h2>', unsafe_allow_html=True)
|
| 719 |
-
idx =
|
| 720 |
names = idx['name'].tolist()
|
| 721 |
selected_players = st.multiselect("Select Players (up to 4)", names, max_selections=4)
|
| 722 |
|
|
@@ -730,18 +347,11 @@ def player_vs_player():
|
|
| 730 |
|
| 731 |
stats_tabs = st.tabs(["Basic Stats", "Advanced Stats", "Visualizations"])
|
| 732 |
all_player_season_data = [] # To store individual season rows for each player
|
| 733 |
-
|
| 734 |
-
players_with_no_season_data = []
|
| 735 |
|
| 736 |
with st.spinner("Fetching player data..."):
|
| 737 |
for player_name in selected_players:
|
| 738 |
-
|
| 739 |
-
if player_url_row.empty:
|
| 740 |
-
players_not_found_in_index.append(player_name)
|
| 741 |
-
continue
|
| 742 |
-
|
| 743 |
-
player_url = player_url_row.iat[0]
|
| 744 |
-
df_player_career = player_season_stats(player_url)
|
| 745 |
|
| 746 |
if not df_player_career.empty:
|
| 747 |
# Filter for selected seasons. The player_season_stats function
|
|
@@ -749,22 +359,16 @@ def player_vs_player():
|
|
| 749 |
filtered_df = df_player_career[df_player_career['Season'].isin(selected_seasons)].copy()
|
| 750 |
|
| 751 |
if not filtered_df.empty:
|
| 752 |
-
|
| 753 |
all_player_season_data.append(filtered_df)
|
| 754 |
else:
|
| 755 |
-
|
| 756 |
-
players_with_no_season_data.append(player_name)
|
| 757 |
else:
|
| 758 |
-
|
| 759 |
-
players_with_no_season_data.append(player_name) # Treat as no data for selected seasons
|
| 760 |
-
|
| 761 |
-
# Report on players not found in index
|
| 762 |
-
if players_not_found_in_index:
|
| 763 |
-
st.error(f"The following players were not found in the Basketball-Reference index: {', '.join(players_not_found_in_index)}. Please check spelling.")
|
| 764 |
|
| 765 |
# Report on players with no data for selected seasons
|
| 766 |
-
if
|
| 767 |
-
st.info(f"No data found for the selected seasons ({', '.join(selected_seasons)}) for: {', '.join(
|
| 768 |
|
| 769 |
if not all_player_season_data:
|
| 770 |
st.error("No data available for any of the selected players and seasons to display. Please adjust your selections.")
|
|
@@ -899,12 +503,12 @@ def team_vs_team():
|
|
| 899 |
# Extract the end year from the season string (e.g., "2024β25" -> 2025)
|
| 900 |
year_for_team_stats = int(selected_season_str.split('β')[1])
|
| 901 |
|
| 902 |
-
tm_df =
|
| 903 |
if tm_df.empty:
|
| 904 |
-
st.info(f"No team data available for the {selected_season_str} season. This might be because the season hasn't started or data is not yet available.")
|
| 905 |
return
|
| 906 |
|
| 907 |
-
teams = tm_df['
|
| 908 |
selected_teams = st.multiselect("Select Teams (up to 4)", teams, max_selections=4)
|
| 909 |
|
| 910 |
if st.button("Run Comparison"):
|
|
@@ -917,11 +521,10 @@ def team_vs_team():
|
|
| 917 |
|
| 918 |
with st.spinner("Fetching team data..."):
|
| 919 |
for t in selected_teams:
|
| 920 |
-
df = tm_df[tm_df.
|
| 921 |
if not df.empty:
|
| 922 |
# For team stats, we usually get one row per team per season from team_per_game
|
| 923 |
# So, no need for .mean() here, just take the row.
|
| 924 |
-
df['Team'] = t # Add 'Team' column for consistency
|
| 925 |
df['Season'] = selected_season_str # Add 'Season' column
|
| 926 |
stats.append(df.iloc[0].to_dict()) # Convert the single row to dict
|
| 927 |
else:
|
|
@@ -1029,7 +632,7 @@ def ai_chat():
|
|
| 1029 |
|
| 1030 |
def young_projections():
|
| 1031 |
st.markdown('<h2 class="section-header">Young Player Projections</h2>', unsafe_allow_html=True)
|
| 1032 |
-
all_p_df =
|
| 1033 |
all_p = all_p_df['name'].tolist()
|
| 1034 |
sp = st.selectbox("Select or enter player", [""]+all_p)
|
| 1035 |
if not sp:
|
|
@@ -1061,7 +664,7 @@ def young_projections():
|
|
| 1061 |
|
| 1062 |
def similar_players():
|
| 1063 |
st.markdown('<h2 class="section-header">Similar Players Finder</h2>', unsafe_allow_html=True)
|
| 1064 |
-
all_p_df =
|
| 1065 |
all_p = all_p_df['name'].tolist()
|
| 1066 |
tp = st.selectbox("Target Player", all_p)
|
| 1067 |
crit = st.multiselect("Criteria",["Position","Height/Weight","Playing Style","Statistical Profile","Age/Experience"],default=["Playing Style","Statistical Profile"])
|
|
|
|
| 4 |
import requests
|
| 5 |
import os
|
| 6 |
from datetime import datetime
|
| 7 |
+
from bs4 import BeautifulSoup, Comment # Keep Comment for parse_table if needed, though BRScraper handles it
|
| 8 |
+
import re
|
| 9 |
+
import plotly.express as px
|
| 10 |
+
import plotly.graph_objects as go
|
| 11 |
|
| 12 |
+
# Import BRScraper
|
| 13 |
+
from BRScraper import nba
|
| 14 |
|
| 15 |
# Page configuration
|
| 16 |
st.set_page_config(
|
|
|
|
| 35 |
st.session_state.chat_history = []
|
| 36 |
|
| 37 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 38 |
+
# BRScraper Data Fetching Utilities
|
| 39 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 40 |
|
| 41 |
+
# Helper for dynamic season generation
|
| 42 |
+
def get_available_seasons(num_seasons=6):
|
| 43 |
+
"""Generates a list of recent NBA seasons in 'YYYYβYY' format."""
|
| 44 |
+
current_year = datetime.now().year
|
| 45 |
+
current_month = datetime.now().month
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
|
| 47 |
+
# Determine the latest season end year.
|
| 48 |
+
# If it's before July (e.g., May 2025), the current season is 2024-25 (ends 2025).
|
| 49 |
+
# If it's July or later (e.g., July 2025), the 2024-25 season just finished,
|
| 50 |
+
# and the next season (2025-26) is considered the "current" one for future projections.
|
| 51 |
+
latest_season_end_year = current_year
|
| 52 |
+
if current_month >= 7: # NBA season typically ends in June
|
| 53 |
+
latest_season_end_year += 1
|
| 54 |
|
| 55 |
+
seasons_list = []
|
| 56 |
+
for i in range(num_seasons):
|
| 57 |
+
end_year = latest_season_end_year - i
|
| 58 |
+
start_year = end_year - 1
|
| 59 |
+
seasons_list.append(f"{start_year}β{end_year}")
|
| 60 |
+
return sorted(seasons_list, reverse=True) # Sort to show most recent first
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 61 |
|
|
|
|
|
|
|
|
|
|
| 62 |
@st.cache_data(ttl=3600)
|
| 63 |
+
def get_player_index_brscraper():
|
| 64 |
"""
|
| 65 |
+
Uses BRScraper to get a list of players from a recent season's stats.
|
| 66 |
+
This serves as our player index for the multiselect.
|
| 67 |
"""
|
| 68 |
+
latest_season_end_year = int(get_available_seasons(1)[0].split('β')[1])
|
| 69 |
+
try:
|
| 70 |
+
# Get per_game stats for the latest season to get a list of active players
|
| 71 |
+
df = nba.get_stats(latest_season_end_year, info='per_game', rename=False)
|
| 72 |
+
if df.empty:
|
| 73 |
+
st.error(f"Could not fetch player list for {latest_season_end_year} from BRScraper.")
|
| 74 |
+
return pd.DataFrame(columns=['name']) # Return empty with 'name' column
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
|
| 76 |
+
# Extract unique player names
|
| 77 |
+
player_names = df['Player'].unique().tolist()
|
| 78 |
+
return pd.DataFrame({'name': player_names})
|
| 79 |
+
except Exception as e:
|
| 80 |
+
st.error(f"Error fetching player index with BRScraper: {e}")
|
| 81 |
+
return pd.DataFrame(columns=['name'])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 82 |
|
|
|
|
| 83 |
@st.cache_data(ttl=300)
|
| 84 |
+
def get_player_career_stats_brscraper(player_name):
|
| 85 |
"""
|
| 86 |
+
Uses BRScraper to get a player's career stats.
|
| 87 |
+
Applies column renaming and numeric conversion.
|
| 88 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
try:
|
| 90 |
+
df = nba.get_player_stats(player_name)
|
| 91 |
+
if df.empty:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 92 |
return pd.DataFrame()
|
| 93 |
+
|
| 94 |
+
# Standardize column names
|
| 95 |
+
column_mapping = {
|
| 96 |
+
'Season': 'Season', # Keep as is, will convert to en-dash later
|
| 97 |
+
'G': 'GP', 'GS': 'GS', 'MP': 'MIN',
|
| 98 |
+
'FG%': 'FG_PCT', '3P%': 'FG3_PCT', 'FT%': 'FT_PCT',
|
| 99 |
+
'TRB': 'REB', 'AST': 'AST', 'STL': 'STL', 'BLK': 'BLK', 'TOV': 'TO',
|
| 100 |
+
'PF': 'PF', 'PTS': 'PTS',
|
| 101 |
+
'Age': 'AGE', 'Tm': 'TEAM_ABBREVIATION', 'Lg': 'LEAGUE_ID', 'Pos': 'POSITION',
|
| 102 |
+
'FG': 'FGM', 'FGA': 'FGA', '3P': 'FG3M', '3PA': 'FG3A',
|
| 103 |
+
'2P': 'FGM2', '2PA': 'FGA2', '2P%': 'FG2_PCT', 'eFG%': 'EFG_PCT',
|
| 104 |
+
'FT': 'FTM', 'FTA': 'FTA', 'ORB': 'OREB', 'DRB': 'DREB'
|
| 105 |
+
}
|
| 106 |
|
| 107 |
+
# Apply column mapping only for columns that exist
|
| 108 |
+
for old_col, new_col in column_mapping.items():
|
| 109 |
+
if old_col in df.columns:
|
| 110 |
+
df = df.rename(columns={old_col: new_col})
|
| 111 |
+
|
| 112 |
+
# Ensure 'Season' column is string and uses en-dash
|
| 113 |
+
if 'Season' in df.columns:
|
| 114 |
+
df['Season'] = df['Season'].astype(str).str.replace('-', 'β')
|
| 115 |
|
| 116 |
+
# Convert numeric columns
|
| 117 |
+
non_numeric_cols = {'Season', 'TEAM_ABBREVIATION', 'LEAGUE_ID', 'POSITION', 'Player'}
|
| 118 |
+
for col in df.columns:
|
| 119 |
+
if col not in non_numeric_cols:
|
| 120 |
+
df[col] = pd.to_numeric(df[col], errors="coerce")
|
| 121 |
|
| 122 |
+
# Add 'Player' column for consistency with app logic
|
| 123 |
+
df['Player'] = player_name
|
| 124 |
+
|
| 125 |
+
return df
|
| 126 |
except Exception as e:
|
| 127 |
+
st.error(f"Error fetching stats for {player_name} with BRScraper: {e}")
|
| 128 |
return pd.DataFrame()
|
| 129 |
|
|
|
|
| 130 |
@st.cache_data(ttl=300)
|
| 131 |
+
def get_team_season_stats_brscraper(year):
|
| 132 |
"""
|
| 133 |
+
Uses BRScraper to get per-game team stats for a given season year.
|
| 134 |
+
Applies column renaming and numeric conversion.
|
|
|
|
| 135 |
"""
|
| 136 |
+
try:
|
| 137 |
+
df = nba.get_stats(year, info='per_game', rename=False)
|
| 138 |
+
if df.empty:
|
| 139 |
+
return pd.DataFrame()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 140 |
|
| 141 |
+
# Filter for team rows (BRScraper's get_stats includes player rows too)
|
| 142 |
+
# Team rows typically have 'Rk' (Rank) as NaN or a non-numeric value after cleaning
|
| 143 |
+
# Or they are the first few rows before player data starts.
|
| 144 |
+
# A common pattern is that team rows have 'Player' as NaN or a team name.
|
| 145 |
+
# Let's assume 'Player' column is NaN for team rows, or 'Tm' is not NaN.
|
| 146 |
+
|
| 147 |
+
# First, standardize column names to make filtering easier
|
| 148 |
+
df.columns = [str(col).strip() for col in df.columns]
|
| 149 |
+
|
| 150 |
+
# Rename 'Tm' to 'Team' for easier filtering if it exists
|
| 151 |
+
if 'Tm' in df.columns:
|
| 152 |
+
df = df.rename(columns={'Tm': 'Team'})
|
| 153 |
+
|
| 154 |
+
# Filter out player rows. Player rows usually have a non-null 'Player' column.
|
| 155 |
+
# Team rows might have 'Player' as NaN or the team name itself.
|
| 156 |
+
# A more robust way is to look for rows where 'Rk' is not numeric (e.g., 'Rk' for team totals)
|
| 157 |
+
# Or where 'Player' is NaN and 'Team' is not NaN.
|
| 158 |
+
|
| 159 |
+
# Let's try to identify team rows by checking if 'Player' column is missing or NaN
|
| 160 |
+
# and 'Team' column is present and not NaN.
|
| 161 |
+
if 'Player' in df.columns:
|
| 162 |
+
# Filter out rows where 'Player' is not NaN (these are player stats)
|
| 163 |
+
team_df = df[df['Player'].isna()].copy()
|
| 164 |
+
else:
|
| 165 |
+
# If no 'Player' column, assume all rows are team-related, or need further inspection
|
| 166 |
+
team_df = df.copy()
|
| 167 |
+
|
| 168 |
+
# Further refine: ensure 'Team' column is present and not NaN
|
| 169 |
+
if 'Team' not in team_df.columns or team_df['Team'].isna().all():
|
| 170 |
+
# Fallback: if 'Team' column is problematic, try to find rows where 'Rk' is 'Rk' (header)
|
| 171 |
+
# or where 'Rk' is not a number (e.g., 'Rk' for team totals)
|
| 172 |
+
if 'Rk' in df.columns:
|
| 173 |
+
team_df = df[pd.to_numeric(df['Rk'], errors='coerce').isna()].copy()
|
| 174 |
+
if 'Player' in team_df.columns: # Remove Player column if it's still there
|
| 175 |
+
team_df = team_df.drop(columns=['Player'])
|
| 176 |
+
else:
|
| 177 |
+
st.warning(f"Could not reliably identify team rows for year {year}. Returning all data.")
|
| 178 |
+
team_df = df.copy() # Fallback to returning all data if filtering is hard
|
| 179 |
|
| 180 |
+
if team_df.empty:
|
| 181 |
+
return pd.DataFrame()
|
| 182 |
|
| 183 |
+
# Standardize column names
|
| 184 |
+
column_mapping = {
|
| 185 |
+
'G': 'GP', 'MP': 'MIN',
|
| 186 |
+
'FG%': 'FG_PCT', '3P%': 'FG3_PCT', 'FT%': 'FT_PCT',
|
| 187 |
+
'TRB': 'REB', 'AST': 'AST', 'STL': 'STL', 'BLK': 'BLK', 'TOV': 'TO',
|
| 188 |
+
'PF': 'PF', 'PTS': 'PTS',
|
| 189 |
+
'Rk': 'RANK', 'W': 'WINS', 'L': 'LOSSES', 'W/L%': 'WIN_LOSS_PCT',
|
| 190 |
+
'FG': 'FGM', 'FGA': 'FGA', '3P': 'FG3M', '3PA': 'FG3A',
|
| 191 |
+
'2P': 'FGM2', '2PA': 'FGA2', '2P%': 'FG2_PCT', 'eFG%': 'EFG_PCT',
|
| 192 |
+
'FT': 'FTM', 'FTA': 'FTA', 'ORB': 'OREB', 'DRB': 'DREB'
|
| 193 |
+
}
|
| 194 |
+
|
| 195 |
+
for old_col, new_col in column_mapping.items():
|
| 196 |
+
if old_col in team_df.columns:
|
| 197 |
+
team_df = team_df.rename(columns={old_col: new_col})
|
| 198 |
+
|
| 199 |
+
# Convert numeric columns
|
| 200 |
+
non_numeric_cols = {"Team", "Tm", "RANK"} # 'Team' or 'Tm' will be the team identifier
|
| 201 |
+
for col in team_df.columns:
|
| 202 |
+
if col not in non_numeric_cols:
|
| 203 |
+
team_df[col] = pd.to_numeric(team_df[col], errors="coerce")
|
| 204 |
+
|
| 205 |
+
# Ensure 'Team' column is present and clean it
|
| 206 |
+
if 'Team' in team_df.columns:
|
| 207 |
+
team_df['Team'] = team_df['Team'].astype(str).str.replace('*', '').str.strip()
|
| 208 |
+
elif 'Tm' in team_df.columns: # Fallback if 'Team' was not the original name
|
| 209 |
+
team_df = team_df.rename(columns={'Tm': 'Team'})
|
| 210 |
+
team_df['Team'] = team_df['Team'].astype(str).str.replace('*', '').str.strip()
|
| 211 |
+
else:
|
| 212 |
+
st.warning(f"Could not find a 'Team' or 'Tm' column in the processed team data for {year}.")
|
| 213 |
+
return pd.DataFrame()
|
| 214 |
|
| 215 |
+
return team_df
|
| 216 |
+
except Exception as e:
|
| 217 |
+
st.error(f"Error fetching team stats for {year} with BRScraper: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 218 |
return pd.DataFrame()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 219 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 220 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 221 |
# Perplexity integration
|
| 222 |
PERP_KEY = os.getenv("PERPLEXITY_API_KEY")
|
|
|
|
| 228 |
return ""
|
| 229 |
hdr = {'Authorization':f'Bearer {PERP_KEY}','Content-Type':'application/json'}
|
| 230 |
payload = {
|
| 231 |
+
"model":"sonar-medium-online", # Changed to a commonly available online model
|
| 232 |
"messages":[{"role":"system","content":system},{"role":"user","content":prompt}],
|
| 233 |
"max_tokens":max_tokens, "temperature":temp
|
| 234 |
}
|
|
|
|
| 250 |
st.error(f"An unexpected error occurred with Perplexity API: {e}")
|
| 251 |
return ""
|
| 252 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 253 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 254 |
# Plotting functions (retained from previous version)
|
| 255 |
def create_comparison_chart(data, players_names, metric):
|
|
|
|
| 333 |
# βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 334 |
def player_vs_player():
|
| 335 |
st.markdown('<h2 class="section-header">Player vs Player Comparison</h2>', unsafe_allow_html=True)
|
| 336 |
+
idx = get_player_index_brscraper() # Use BRScraper for player index
|
| 337 |
names = idx['name'].tolist()
|
| 338 |
selected_players = st.multiselect("Select Players (up to 4)", names, max_selections=4)
|
| 339 |
|
|
|
|
| 347 |
|
| 348 |
stats_tabs = st.tabs(["Basic Stats", "Advanced Stats", "Visualizations"])
|
| 349 |
all_player_season_data = [] # To store individual season rows for each player
|
| 350 |
+
players_with_no_data = []
|
|
|
|
| 351 |
|
| 352 |
with st.spinner("Fetching player data..."):
|
| 353 |
for player_name in selected_players:
|
| 354 |
+
df_player_career = get_player_career_stats_brscraper(player_name)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 355 |
|
| 356 |
if not df_player_career.empty:
|
| 357 |
# Filter for selected seasons. The player_season_stats function
|
|
|
|
| 359 |
filtered_df = df_player_career[df_player_career['Season'].isin(selected_seasons)].copy()
|
| 360 |
|
| 361 |
if not filtered_df.empty:
|
| 362 |
+
# 'Player' column is already added by get_player_career_stats_brscraper
|
| 363 |
all_player_season_data.append(filtered_df)
|
| 364 |
else:
|
| 365 |
+
players_with_no_data.append(player_name)
|
|
|
|
| 366 |
else:
|
| 367 |
+
players_with_no_data.append(player_name)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 368 |
|
| 369 |
# Report on players with no data for selected seasons
|
| 370 |
+
if players_with_no_data:
|
| 371 |
+
st.info(f"No data found for the selected seasons ({', '.join(selected_seasons)}) for: {', '.join(players_with_no_data)}. This might be because the season hasn't started or data is not yet available, or the player name was not found by BRScraper.")
|
| 372 |
|
| 373 |
if not all_player_season_data:
|
| 374 |
st.error("No data available for any of the selected players and seasons to display. Please adjust your selections.")
|
|
|
|
| 503 |
# Extract the end year from the season string (e.g., "2024β25" -> 2025)
|
| 504 |
year_for_team_stats = int(selected_season_str.split('β')[1])
|
| 505 |
|
| 506 |
+
tm_df = get_team_season_stats_brscraper(year_for_team_stats) # Use BRScraper for team stats
|
| 507 |
if tm_df.empty:
|
| 508 |
+
st.info(f"No team data available for the {selected_season_str} season. This might be because the season hasn't started or data is not yet available, or BRScraper could not fetch it.")
|
| 509 |
return
|
| 510 |
|
| 511 |
+
teams = tm_df['Team'].unique().tolist() # Use 'Team' column from BRScraper output
|
| 512 |
selected_teams = st.multiselect("Select Teams (up to 4)", teams, max_selections=4)
|
| 513 |
|
| 514 |
if st.button("Run Comparison"):
|
|
|
|
| 521 |
|
| 522 |
with st.spinner("Fetching team data..."):
|
| 523 |
for t in selected_teams:
|
| 524 |
+
df = tm_df[tm_df.Team == t].copy() # Filter by 'Team' column
|
| 525 |
if not df.empty:
|
| 526 |
# For team stats, we usually get one row per team per season from team_per_game
|
| 527 |
# So, no need for .mean() here, just take the row.
|
|
|
|
| 528 |
df['Season'] = selected_season_str # Add 'Season' column
|
| 529 |
stats.append(df.iloc[0].to_dict()) # Convert the single row to dict
|
| 530 |
else:
|
|
|
|
| 632 |
|
| 633 |
def young_projections():
|
| 634 |
st.markdown('<h2 class="section-header">Young Player Projections</h2>', unsafe_allow_html=True)
|
| 635 |
+
all_p_df = get_player_index_brscraper()
|
| 636 |
all_p = all_p_df['name'].tolist()
|
| 637 |
sp = st.selectbox("Select or enter player", [""]+all_p)
|
| 638 |
if not sp:
|
|
|
|
| 664 |
|
| 665 |
def similar_players():
|
| 666 |
st.markdown('<h2 class="section-header">Similar Players Finder</h2>', unsafe_allow_html=True)
|
| 667 |
+
all_p_df = get_player_index_brscraper()
|
| 668 |
all_p = all_p_df['name'].tolist()
|
| 669 |
tp = st.selectbox("Target Player", all_p)
|
| 670 |
crit = st.multiselect("Criteria",["Position","Height/Weight","Playing Style","Statistical Profile","Age/Experience"],default=["Playing Style","Statistical Profile"])
|