Bb ref
Browse files- src/streamlit_app.py +314 -88
src/streamlit_app.py
CHANGED
|
@@ -5,33 +5,14 @@ import plotly.express as px
|
|
| 5 |
import plotly.graph_objects as go
|
| 6 |
from plotly.subplots import make_subplots
|
| 7 |
import requests
|
| 8 |
-
from
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
leagueleaders, playerestimatedmetrics, teamestimatedmetrics
|
| 12 |
-
)
|
| 13 |
-
from nba_api.stats.static import players, teams
|
| 14 |
-
import time
|
| 15 |
from datetime import datetime
|
| 16 |
import json
|
| 17 |
import os
|
| 18 |
|
| 19 |
-
|
| 20 |
-
# The error "PermissionError: [Errno 13] Permission denied: '/.streamlit'"
|
| 21 |
-
# occurs because Streamlit tries to write to a non-writable directory.
|
| 22 |
-
# To fix this in your Dockerfile or when running your Docker container,
|
| 23 |
-
# you should set one of the following environment variables:
|
| 24 |
-
#
|
| 25 |
-
# Option 1 (Recommended): Set the HOME environment variable to a writable path.
|
| 26 |
-
# In your Dockerfile: ENV HOME /tmp
|
| 27 |
-
# Or when running: docker run -e HOME=/tmp your_image_name
|
| 28 |
-
#
|
| 29 |
-
# Option 2: Disable Streamlit's usage statistics gathering.
|
| 30 |
-
# In your Dockerfile: ENV STREAMLIT_BROWSER_GATHER_USAGE_STATS=False
|
| 31 |
-
# Or when running: docker run -e STREAMLIT_BROWSER_GATHER_USAGE_STATS=False your_image_name
|
| 32 |
-
#
|
| 33 |
-
# Option 1 is generally more robust as it provides a writable home directory
|
| 34 |
-
# for any application that might need it.
|
| 35 |
# -----------------------------------------------------------------------
|
| 36 |
|
| 37 |
# Page configuration
|
|
@@ -75,6 +56,34 @@ if 'chat_history' not in st.session_state:
|
|
| 75 |
PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_API_KEY")
|
| 76 |
PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"
|
| 77 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 78 |
# ---------- Perplexity API Functions ----------
|
| 79 |
def get_perplexity_response(api_key, prompt, system_message="You are a helpful NBA analyst AI.", max_tokens=500, temperature=0.2):
|
| 80 |
"""
|
|
@@ -89,7 +98,7 @@ def get_perplexity_response(api_key, prompt, system_message="You are a helpful N
|
|
| 89 |
'Content-Type': 'application/json'
|
| 90 |
}
|
| 91 |
payload = {
|
| 92 |
-
'model': 'sonar-pro', #
|
| 93 |
'messages': [
|
| 94 |
{'role': 'system', 'content': system_message},
|
| 95 |
{'role': 'user', 'content': prompt}
|
|
@@ -117,36 +126,236 @@ def get_perplexity_response(api_key, prompt, system_message="You are a helpful N
|
|
| 117 |
st.error(f"An unexpected error occurred with Perplexity API: {e}")
|
| 118 |
return None
|
| 119 |
|
|
|
|
|
|
|
| 120 |
@st.cache_data(ttl=3600)
|
| 121 |
-
def
|
| 122 |
-
"""
|
| 123 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 124 |
|
| 125 |
@st.cache_data(ttl=3600)
|
| 126 |
-
def
|
| 127 |
-
"""
|
| 128 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
|
| 130 |
@st.cache_data(ttl=300)
|
| 131 |
-
def
|
| 132 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
try:
|
| 134 |
-
|
| 135 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
except Exception as e:
|
| 137 |
-
st.error(f"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
return pd.DataFrame()
|
| 139 |
|
| 140 |
@st.cache_data(ttl=300)
|
| 141 |
-
def
|
| 142 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 143 |
try:
|
| 144 |
-
|
| 145 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 146 |
except Exception as e:
|
| 147 |
-
st.error(f"
|
| 148 |
return pd.DataFrame()
|
| 149 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
def create_comparison_chart(data, players_names, metric):
|
| 151 |
"""Create comparison chart for players"""
|
| 152 |
fig = go.Figure()
|
|
@@ -239,7 +448,7 @@ def player_comparison_page():
|
|
| 239 |
st.markdown('<h2 class="section-header">Player vs Player Comparison</h2>', unsafe_allow_html=True)
|
| 240 |
|
| 241 |
# Get all players
|
| 242 |
-
all_players = get_all_players()
|
| 243 |
player_names = [player['full_name'] for player in all_players]
|
| 244 |
|
| 245 |
col1, col2 = st.columns(2)
|
|
@@ -254,7 +463,7 @@ def player_comparison_page():
|
|
| 254 |
with col2:
|
| 255 |
seasons = st.multiselect(
|
| 256 |
"Select Seasons",
|
| 257 |
-
|
| 258 |
default=["2023-24"]
|
| 259 |
)
|
| 260 |
|
|
@@ -264,13 +473,6 @@ def player_comparison_page():
|
|
| 264 |
st.warning("Please select at least one player to compare.")
|
| 265 |
return
|
| 266 |
|
| 267 |
-
# Get player IDs
|
| 268 |
-
player_ids = []
|
| 269 |
-
for name in selected_players:
|
| 270 |
-
player_id = next((p['id'] for p in all_players if p['full_name'] == name), None)
|
| 271 |
-
if player_id:
|
| 272 |
-
player_ids.append(player_id)
|
| 273 |
-
|
| 274 |
# Fetch and display stats
|
| 275 |
stats_tabs = st.tabs(["Basic Stats", "Advanced Stats", "Visualizations"])
|
| 276 |
|
|
@@ -278,19 +480,20 @@ def player_comparison_page():
|
|
| 278 |
st.subheader("Basic Statistics")
|
| 279 |
basic_stats_data = []
|
| 280 |
|
| 281 |
-
for
|
| 282 |
-
|
| 283 |
-
|
| 284 |
-
|
| 285 |
-
|
| 286 |
-
|
| 287 |
-
|
| 288 |
-
|
| 289 |
-
|
|
|
|
| 290 |
|
| 291 |
if basic_stats_data:
|
| 292 |
comparison_df = pd.DataFrame(basic_stats_data)
|
| 293 |
-
basic_cols = ['PLAYER_NAME', 'GP', 'MIN', 'PTS', 'REB', 'AST', 'STL', 'BLK', 'FG_PCT', 'FT_PCT', 'FG3_PCT']
|
| 294 |
display_cols = [col for col in basic_cols if col in comparison_df.columns]
|
| 295 |
st.dataframe(comparison_df[display_cols].round(2), use_container_width=True)
|
| 296 |
else:
|
|
@@ -299,15 +502,20 @@ def player_comparison_page():
|
|
| 299 |
with stats_tabs[1]:
|
| 300 |
st.subheader("Advanced Statistics")
|
| 301 |
if basic_stats_data:
|
| 302 |
-
advanced_df =
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 303 |
# Calculate TS% (True Shooting Percentage)
|
| 304 |
if all(col in advanced_df.columns for col in ['PTS', 'FGA', 'FTA']):
|
| 305 |
-
advanced_df['
|
| 306 |
lambda row: row['PTS'] / (2 * (row['FGA'] + 0.44 * row['FTA'])) if (row['FGA'] + 0.44 * row['FTA']) != 0 else 0,
|
| 307 |
axis=1
|
| 308 |
)
|
| 309 |
|
| 310 |
-
advanced_cols = ['PLAYER_NAME', 'PTS', 'REB', 'AST', 'FG_PCT', '
|
| 311 |
display_cols = [col for col in advanced_cols if col in advanced_df.columns]
|
| 312 |
st.dataframe(advanced_df[display_cols].round(3), use_container_width=True)
|
| 313 |
else:
|
|
@@ -317,6 +525,7 @@ def player_comparison_page():
|
|
| 317 |
st.subheader("Player Comparison Charts")
|
| 318 |
|
| 319 |
if basic_stats_data:
|
|
|
|
| 320 |
metrics = ['PTS', 'REB', 'AST', 'FG_PCT']
|
| 321 |
available_metrics = [m for m in metrics if m in comparison_df.columns]
|
| 322 |
|
|
@@ -324,35 +533,53 @@ def player_comparison_page():
|
|
| 324 |
selected_metric = st.selectbox("Select Metric to Visualize", available_metrics)
|
| 325 |
|
| 326 |
if selected_metric:
|
| 327 |
-
# Bar chart comparison
|
| 328 |
-
|
| 329 |
-
|
| 330 |
-
|
| 331 |
-
|
| 332 |
-
|
| 333 |
-
|
| 334 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 335 |
st.plotly_chart(fig, use_container_width=True)
|
| 336 |
|
| 337 |
# Radar chart for multi-metric comparison
|
| 338 |
-
# It's crucial to normalize data for radar charts if metrics have vastly different scales.
|
| 339 |
radar_metrics_for_chart = ['PTS', 'REB', 'AST', 'STL', 'BLK']
|
| 340 |
radar_metrics_for_chart = [m for m in radar_metrics_for_chart if m in comparison_df.columns]
|
| 341 |
|
| 342 |
if len(radar_metrics_for_chart) >= 3:
|
| 343 |
radar_data = {}
|
| 344 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 345 |
|
| 346 |
# Simple min-max scaling for radar chart visualization (0-100)
|
| 347 |
for col in radar_metrics_for_chart:
|
| 348 |
-
min_val =
|
| 349 |
-
max_val =
|
| 350 |
if max_val > min_val:
|
| 351 |
-
|
| 352 |
else:
|
| 353 |
-
|
| 354 |
|
| 355 |
-
for _, row in
|
| 356 |
radar_data[row['PLAYER_NAME']] = {
|
| 357 |
metric: row[metric] for metric in radar_metrics_for_chart
|
| 358 |
}
|
|
@@ -373,7 +600,7 @@ def player_comparison_page():
|
|
| 373 |
def team_comparison_page():
|
| 374 |
st.markdown('<h2 class="section-header">Team vs Team Analysis</h2>', unsafe_allow_html=True)
|
| 375 |
|
| 376 |
-
all_teams = get_all_teams()
|
| 377 |
team_names = [team['full_name'] for team in all_teams]
|
| 378 |
|
| 379 |
col1, col2 = st.columns(2)
|
|
@@ -388,7 +615,7 @@ def team_comparison_page():
|
|
| 388 |
with col2:
|
| 389 |
seasons = st.multiselect(
|
| 390 |
"Select Seasons",
|
| 391 |
-
|
| 392 |
default=["2023-24"]
|
| 393 |
)
|
| 394 |
|
|
@@ -401,15 +628,14 @@ def team_comparison_page():
|
|
| 401 |
team_stats_data = []
|
| 402 |
|
| 403 |
for team_name in selected_teams:
|
| 404 |
-
|
| 405 |
-
|
| 406 |
-
|
| 407 |
-
|
| 408 |
-
|
| 409 |
-
|
| 410 |
-
|
| 411 |
-
|
| 412 |
-
team_stats_data.append(team_avg)
|
| 413 |
|
| 414 |
if team_stats_data:
|
| 415 |
team_df = pd.DataFrame(team_stats_data)
|
|
|
|
| 5 |
import plotly.graph_objects as go
|
| 6 |
from plotly.subplots import make_subplots
|
| 7 |
import requests
|
| 8 |
+
from bs4 import BeautifulSoup # New import
|
| 9 |
+
import re # New import for regex
|
| 10 |
+
import time # For rate limiting
|
|
|
|
|
|
|
|
|
|
|
|
|
| 11 |
from datetime import datetime
|
| 12 |
import json
|
| 13 |
import os
|
| 14 |
|
| 15 |
+
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
# -----------------------------------------------------------------------
|
| 17 |
|
| 18 |
# Page configuration
|
|
|
|
| 56 |
PERPLEXITY_API_KEY = os.getenv("PERPLEXITY_API_KEY")
|
| 57 |
PERPLEXITY_API_URL = "https://api.perplexity.ai/chat/completions"
|
| 58 |
|
| 59 |
+
# Base URL for Basketball-Reference
|
| 60 |
+
BBR_BASE_URL = "https://www.basketball-reference.com"
|
| 61 |
+
|
| 62 |
+
# Hardcoded Team Name to BBR Abbreviation mapping
|
| 63 |
+
# This is more reliable than scraping for team abbreviations.
|
| 64 |
+
TEAM_NAME_TO_BBR_ABBR = {
|
| 65 |
+
"Atlanta Hawks": "ATL", "Boston Celtics": "BOS", "Brooklyn Nets": "BRK",
|
| 66 |
+
"Charlotte Hornets": "CHO", "Chicago Bulls": "CHI", "Cleveland Cavaliers": "CLE",
|
| 67 |
+
"Dallas Mavericks": "DAL", "Denver Nuggets": "DEN", "Detroit Pistons": "DET",
|
| 68 |
+
"Golden State Warriors": "GSW", "Houston Rockets": "HOU", "Indiana Pacers": "IND",
|
| 69 |
+
"Los Angeles Clippers": "LAC", "Los Angeles Lakers": "LAL", "Memphis Grizzlies": "MEM",
|
| 70 |
+
"Miami Heat": "MIA", "Milwaukee Bucks": "MIL", "Minnesota Timberwolves": "MIN",
|
| 71 |
+
"New Orleans Pelicans": "NOP", "New York Knicks": "NYK", "Oklahoma City Thunder": "OKC",
|
| 72 |
+
"Orlando Magic": "ORL", "Philadelphia 76ers": "PHI", "Phoenix Suns": "PHO",
|
| 73 |
+
"Portland Trail Blazers": "POR", "Sacramento Kings": "SAC", "San Antonio Spurs": "SAS",
|
| 74 |
+
"Toronto Raptors": "TOR", "Utah Jazz": "UTA", "Washington Wizards": "WAS"
|
| 75 |
+
}
|
| 76 |
+
|
| 77 |
+
# Mapping for season year in BBR URLs (e.g., 2023-24 -> 2024)
|
| 78 |
+
BBR_SEASON_URL_MAP = {
|
| 79 |
+
"2023-24": "2024", "2022-23": "2023", "2021-22": "2022",
|
| 80 |
+
"2020-21": "2021", "2019-20": "2020", "2018-19": "2019",
|
| 81 |
+
"2017-18": "2018", "2016-17": "2017", "2015-16": "2016",
|
| 82 |
+
"2014-15": "2015", "2013-14": "2014", "2012-13": "2013",
|
| 83 |
+
"2011-12": "2012", "2010-11": "2011", "2009-10": "2010"
|
| 84 |
+
}
|
| 85 |
+
|
| 86 |
+
|
| 87 |
# ---------- Perplexity API Functions ----------
|
| 88 |
def get_perplexity_response(api_key, prompt, system_message="You are a helpful NBA analyst AI.", max_tokens=500, temperature=0.2):
|
| 89 |
"""
|
|
|
|
| 98 |
'Content-Type': 'application/json'
|
| 99 |
}
|
| 100 |
payload = {
|
| 101 |
+
'model': 'sonar-pro', # keep 'sonar-pro'
|
| 102 |
'messages': [
|
| 103 |
{'role': 'system', 'content': system_message},
|
| 104 |
{'role': 'user', 'content': prompt}
|
|
|
|
| 126 |
st.error(f"An unexpected error occurred with Perplexity API: {e}")
|
| 127 |
return None
|
| 128 |
|
| 129 |
+
# ---------- Basketball-Reference Data Fetching Functions ----------
|
| 130 |
+
|
| 131 |
@st.cache_data(ttl=3600)
|
| 132 |
+
def get_all_players_bbr():
|
| 133 |
+
"""
|
| 134 |
+
Scrapes a list of active players from Basketball-Reference's 2024 per-game stats page.
|
| 135 |
+
Note: This will not get ALL historical players, only those listed on this specific page.
|
| 136 |
+
For a comprehensive list, a more extensive scrape of player index pages (A-Z) would be needed.
|
| 137 |
+
"""
|
| 138 |
+
players_list = []
|
| 139 |
+
url = f"{BBR_BASE_URL}/leagues/NBA_2024_per_game.html" # Using 2024 for current season
|
| 140 |
+
try:
|
| 141 |
+
response = requests.get(url, timeout=10)
|
| 142 |
+
response.raise_for_status()
|
| 143 |
+
soup = BeautifulSoup(response.content, 'lxml')
|
| 144 |
+
table = soup.find('table', {'id': 'per_game_stats'})
|
| 145 |
+
if table:
|
| 146 |
+
for row in table.find_all('tr')[1:]: # Skip header row
|
| 147 |
+
player_name_tag = row.find('a')
|
| 148 |
+
if player_name_tag:
|
| 149 |
+
player_name = player_name_tag.get_text()
|
| 150 |
+
# BBR player ID is part of the href (e.g., /players/j/jamesle01.html)
|
| 151 |
+
player_bbr_id = player_name_tag['href'].split('/')[-1].replace('.html', '')
|
| 152 |
+
players_list.append({'full_name': player_name, 'id': player_bbr_id})
|
| 153 |
+
st.success(f"Loaded {len(players_list)} players from Basketball-Reference.")
|
| 154 |
+
else:
|
| 155 |
+
st.warning(f"Could not find player stats table on {url}")
|
| 156 |
+
except requests.exceptions.RequestException as e:
|
| 157 |
+
st.error(f"Error fetching player list from Basketball-Reference: {e}")
|
| 158 |
+
except Exception as e:
|
| 159 |
+
st.error(f"An unexpected error occurred while parsing player list: {e}")
|
| 160 |
+
return players_list
|
| 161 |
|
| 162 |
@st.cache_data(ttl=3600)
|
| 163 |
+
def get_all_teams_bbr():
|
| 164 |
+
"""
|
| 165 |
+
Returns a list of NBA teams using a hardcoded mapping.
|
| 166 |
+
"""
|
| 167 |
+
teams_list = []
|
| 168 |
+
for full_name, abbr in TEAM_NAME_TO_BBR_ABBR.items():
|
| 169 |
+
teams_list.append({'full_name': full_name, 'id': abbr}) # Using abbr as ID for consistency
|
| 170 |
+
return teams_list
|
| 171 |
|
| 172 |
@st.cache_data(ttl=300)
|
| 173 |
+
def get_player_stats_bbr(player_name, season="2023-24"):
|
| 174 |
+
"""
|
| 175 |
+
Scrapes player career stats for a given player from Basketball-Reference.
|
| 176 |
+
Then filters for the specified season.
|
| 177 |
+
Returns a DataFrame.
|
| 178 |
+
"""
|
| 179 |
+
# Step 1: Find the player's BBR URL by searching
|
| 180 |
+
search_url = f"{BBR_BASE_URL}/search/search.fcgi?search={player_name.replace(' ', '+')}"
|
| 181 |
+
player_url = None
|
| 182 |
try:
|
| 183 |
+
search_response = requests.get(search_url, timeout=10)
|
| 184 |
+
search_response.raise_for_status()
|
| 185 |
+
search_soup = BeautifulSoup(search_response.content, 'lxml')
|
| 186 |
+
# Look for a link to the player's page in the search results
|
| 187 |
+
# This assumes the first search result is the correct player
|
| 188 |
+
player_link_div = search_soup.find('div', {'class': 'search-item-name'})
|
| 189 |
+
if player_link_div:
|
| 190 |
+
player_link = player_link_div.find('a')
|
| 191 |
+
if player_link and player_link['href'].startswith('/players/'):
|
| 192 |
+
player_url = f"{BBR_BASE_URL}{player_link['href']}"
|
| 193 |
+
if not player_url:
|
| 194 |
+
st.warning(f"Could not find Basketball-Reference page for {player_name}.")
|
| 195 |
+
return pd.DataFrame()
|
| 196 |
+
except requests.exceptions.RequestException as e:
|
| 197 |
+
st.error(f"Error searching for player {player_name} on Basketball-Reference: {e}")
|
| 198 |
+
return pd.DataFrame()
|
| 199 |
except Exception as e:
|
| 200 |
+
st.error(f"An unexpected error occurred during player search for {player_name}: {e}")
|
| 201 |
+
return pd.DataFrame()
|
| 202 |
+
|
| 203 |
+
# Step 2: Scrape the player's page for career stats
|
| 204 |
+
try:
|
| 205 |
+
response = requests.get(player_url, timeout=10)
|
| 206 |
+
response.raise_for_status()
|
| 207 |
+
soup = BeautifulSoup(response.content, 'lxml')
|
| 208 |
+
|
| 209 |
+
# Basketball-Reference often hides tables in comments.
|
| 210 |
+
# Find the comment containing the 'per_game' table
|
| 211 |
+
comment = soup.find(string=lambda text: isinstance(text, str) and 'id="per_game"' in text)
|
| 212 |
+
if comment:
|
| 213 |
+
soup_from_comment = BeautifulSoup(comment, 'lxml')
|
| 214 |
+
table = soup_from_comment.find('table', {'id': 'per_game'})
|
| 215 |
+
else:
|
| 216 |
+
table = soup.find('table', {'id': 'per_game'}) # Try direct find if not in comment
|
| 217 |
+
|
| 218 |
+
if table:
|
| 219 |
+
df = pd.read_html(str(table))[0]
|
| 220 |
+
# Clean up column names (remove special characters, make consistent)
|
| 221 |
+
# Handle multi-index columns if present (e.g., 'Shooting' -> 'FG', 'FGA')
|
| 222 |
+
if isinstance(df.columns, pd.MultiIndex):
|
| 223 |
+
df.columns = ['_'.join(col).strip() for col in df.columns.values]
|
| 224 |
+
else:
|
| 225 |
+
df.columns = [col.strip() for col in df.columns.values]
|
| 226 |
+
|
| 227 |
+
# Standardize column names to match original app's expectations
|
| 228 |
+
df = df.rename(columns={
|
| 229 |
+
'Season': 'SEASON_ID_BBR', # Keep original BBR season for filtering
|
| 230 |
+
'Age': 'AGE', 'Tm': 'TEAM_ABBREVIATION', 'Lg': 'LEAGUE_ID', 'Pos': 'POSITION',
|
| 231 |
+
'G': 'GP', 'GS': 'GS', 'MP': 'MIN',
|
| 232 |
+
'FG': 'FGM', 'FGA': 'FGA', 'FG%': 'FG_PCT',
|
| 233 |
+
'3P': 'FG3M', '3PA': 'FG3A', '3P%': 'FG3_PCT',
|
| 234 |
+
'2P': 'FGM2', '2PA': 'FGA2', '2P%': 'FG2_PCT',
|
| 235 |
+
'eFG%': 'EFG_PCT', 'FT': 'FTM', 'FTA': 'FTA', 'FT%': 'FT_PCT',
|
| 236 |
+
'ORB': 'OREB', 'DRB': 'DREB', 'TRB': 'REB', 'AST': 'AST',
|
| 237 |
+
'STL': 'STL', 'BLK': 'BLK', 'TOV': 'TO', 'PF': 'PF', 'PTS': 'PTS'
|
| 238 |
+
})
|
| 239 |
+
|
| 240 |
+
# Filter for the specific season
|
| 241 |
+
# BBR table's 'Season' column is like '2023-24', not just '2024' for the row.
|
| 242 |
+
# So, we filter using the original `season` string.
|
| 243 |
+
filtered_df = df[df['SEASON_ID_BBR'] == season].copy()
|
| 244 |
+
|
| 245 |
+
if not filtered_df.empty:
|
| 246 |
+
# Add PLAYER_NAME and SEASON_ID for consistency with original code
|
| 247 |
+
filtered_df['PLAYER_NAME'] = player_name
|
| 248 |
+
filtered_df['SEASON_ID'] = season # Keep original season format
|
| 249 |
+
return filtered_df
|
| 250 |
+
else:
|
| 251 |
+
st.info(f"No stats found for {player_name} in season {season} on Basketball-Reference.")
|
| 252 |
+
return pd.DataFrame()
|
| 253 |
+
else:
|
| 254 |
+
st.warning(f"Could not find 'per_game' table for {player_name} on Basketball-Reference.")
|
| 255 |
+
return pd.DataFrame()
|
| 256 |
+
except requests.exceptions.RequestException as e:
|
| 257 |
+
st.error(f"Error fetching player stats for {player_name} from Basketball-Reference: {e}")
|
| 258 |
+
return pd.DataFrame()
|
| 259 |
+
except Exception as e:
|
| 260 |
+
st.error(f"An unexpected error occurred while parsing player stats for {player_name}: {e}")
|
| 261 |
return pd.DataFrame()
|
| 262 |
|
| 263 |
@st.cache_data(ttl=300)
|
| 264 |
+
def get_team_stats_bbr(team_name, season="2023-24"):
|
| 265 |
+
"""
|
| 266 |
+
Scrapes team stats for a given team and season from Basketball-Reference.
|
| 267 |
+
Returns a DataFrame.
|
| 268 |
+
"""
|
| 269 |
+
team_abbr = TEAM_NAME_TO_BBR_ABBR.get(team_name)
|
| 270 |
+
if not team_abbr:
|
| 271 |
+
st.error(f"Could not find abbreviation for team: {team_name}")
|
| 272 |
+
return pd.DataFrame()
|
| 273 |
+
|
| 274 |
+
bbr_season_year = BBR_SEASON_URL_MAP.get(season)
|
| 275 |
+
if not bbr_season_year:
|
| 276 |
+
st.warning(f"Invalid season format for Basketball-Reference: {season}")
|
| 277 |
+
return pd.DataFrame()
|
| 278 |
+
|
| 279 |
+
url = f"{BBR_BASE_URL}/teams/{team_abbr}/{bbr_season_year}.html"
|
| 280 |
+
|
| 281 |
try:
|
| 282 |
+
response = requests.get(url, timeout=10)
|
| 283 |
+
response.raise_for_status()
|
| 284 |
+
soup = BeautifulSoup(response.content, 'lxml')
|
| 285 |
+
|
| 286 |
+
# Team stats are usually in a table with id 'team_and_opponent' or similar
|
| 287 |
+
comment = soup.find(string=lambda text: isinstance(text, str) and 'id="team_and_opponent"' in text)
|
| 288 |
+
if comment:
|
| 289 |
+
soup_from_comment = BeautifulSoup(comment, 'lxml')
|
| 290 |
+
table = soup_from_comment.find('table', {'id': 'team_and_opponent'})
|
| 291 |
+
else:
|
| 292 |
+
table = soup.find('table', {'id': 'team_and_opponent'}) # Try direct find
|
| 293 |
+
|
| 294 |
+
if table:
|
| 295 |
+
df = pd.read_html(str(table))[0]
|
| 296 |
+
# Clean up column names
|
| 297 |
+
if isinstance(df.columns, pd.MultiIndex):
|
| 298 |
+
df.columns = ['_'.join(col).strip() for col in df.columns.values]
|
| 299 |
+
else:
|
| 300 |
+
df.columns = [col.strip() for col in df.columns.values]
|
| 301 |
+
|
| 302 |
+
# Standardize column names
|
| 303 |
+
df = df.rename(columns={
|
| 304 |
+
'G': 'GP', 'MP': 'MIN', 'FG': 'FGM', 'FGA': 'FGA', 'FG%': 'FG_PCT',
|
| 305 |
+
'3P': 'FG3M', '3PA': 'FG3A', '3P%': 'FG3_PCT', 'FT': 'FTM', 'FTA': 'FTA', 'FT%': 'FT_PCT',
|
| 306 |
+
'TRB': 'REB', 'AST': 'AST', 'STL': 'STL', 'BLK': 'BLK', 'TOV': 'TO', 'PF': 'PF', 'PTS': 'PTS'
|
| 307 |
+
})
|
| 308 |
+
|
| 309 |
+
if not df.empty:
|
| 310 |
+
# The 'team_and_opponent' table has two main rows: 'Team' and 'Opponent'.
|
| 311 |
+
# We want the 'Team' row.
|
| 312 |
+
team_stats_row = df[df['Rk'] == 'Team'].copy()
|
| 313 |
+
if team_stats_row.empty:
|
| 314 |
+
# Fallback: if 'Rk' isn't 'Team', try the first row (common for overall team stats)
|
| 315 |
+
team_stats_row = df.iloc[[0]].copy()
|
| 316 |
+
|
| 317 |
+
if not team_stats_row.empty:
|
| 318 |
+
team_stats_row['TEAM_NAME'] = team_name
|
| 319 |
+
team_stats_row['SEASON'] = season
|
| 320 |
+
return team_stats_row
|
| 321 |
+
else:
|
| 322 |
+
st.info(f"Could not extract team stats row for {team_name} in season {season}.")
|
| 323 |
+
return pd.DataFrame()
|
| 324 |
+
else:
|
| 325 |
+
st.info(f"No stats found for team {team_name} in season {season} on Basketball-Reference.")
|
| 326 |
+
return pd.DataFrame()
|
| 327 |
+
else:
|
| 328 |
+
st.warning(f"Could not find team stats table for {team_name} on Basketball-Reference.")
|
| 329 |
+
return pd.DataFrame()
|
| 330 |
+
except requests.exceptions.RequestException as e:
|
| 331 |
+
st.error(f"Error fetching team stats for {team_name} from Basketball-Reference: {e}")
|
| 332 |
+
return pd.DataFrame()
|
| 333 |
except Exception as e:
|
| 334 |
+
st.error(f"An unexpected error occurred while parsing team stats for {team_name}: {e}")
|
| 335 |
return pd.DataFrame()
|
| 336 |
|
| 337 |
+
# Redefine the main data fetching functions to use Basketball-Reference versions
|
| 338 |
+
@st.cache_data(ttl=3600)
|
| 339 |
+
def get_all_players():
|
| 340 |
+
"""Get all NBA players (from BBR)."""
|
| 341 |
+
return get_all_players_bbr()
|
| 342 |
+
|
| 343 |
+
@st.cache_data(ttl=3600)
|
| 344 |
+
def get_all_teams():
|
| 345 |
+
"""Get all NBA teams (from BBR)."""
|
| 346 |
+
return get_all_teams_bbr()
|
| 347 |
+
|
| 348 |
+
@st.cache_data(ttl=300)
|
| 349 |
+
def get_player_stats(player_name, season="2023-24"):
|
| 350 |
+
"""Get player stats (from BBR)."""
|
| 351 |
+
return get_player_stats_bbr(player_name, season)
|
| 352 |
+
|
| 353 |
+
@st.cache_data(ttl=300)
|
| 354 |
+
def get_team_stats(team_name, season="2023-24"):
|
| 355 |
+
"""Get team stats (from BBR)."""
|
| 356 |
+
return get_team_stats_bbr(team_name, season)
|
| 357 |
+
|
| 358 |
+
|
| 359 |
def create_comparison_chart(data, players_names, metric):
|
| 360 |
"""Create comparison chart for players"""
|
| 361 |
fig = go.Figure()
|
|
|
|
| 448 |
st.markdown('<h2 class="section-header">Player vs Player Comparison</h2>', unsafe_allow_html=True)
|
| 449 |
|
| 450 |
# Get all players
|
| 451 |
+
all_players = get_all_players() # This now calls get_all_players_bbr()
|
| 452 |
player_names = [player['full_name'] for player in all_players]
|
| 453 |
|
| 454 |
col1, col2 = st.columns(2)
|
|
|
|
| 463 |
with col2:
|
| 464 |
seasons = st.multiselect(
|
| 465 |
"Select Seasons",
|
| 466 |
+
list(BBR_SEASON_URL_MAP.keys()), # Use keys from the BBR season map
|
| 467 |
default=["2023-24"]
|
| 468 |
)
|
| 469 |
|
|
|
|
| 473 |
st.warning("Please select at least one player to compare.")
|
| 474 |
return
|
| 475 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 476 |
# Fetch and display stats
|
| 477 |
stats_tabs = st.tabs(["Basic Stats", "Advanced Stats", "Visualizations"])
|
| 478 |
|
|
|
|
| 480 |
st.subheader("Basic Statistics")
|
| 481 |
basic_stats_data = []
|
| 482 |
|
| 483 |
+
for player_name in selected_players: # Iterate by name directly
|
| 484 |
+
for season in seasons:
|
| 485 |
+
stats_df = get_player_stats(player_name, season) # Pass name and season
|
| 486 |
+
if not stats_df.empty:
|
| 487 |
+
# BBR returns one row per season, so no need to mean()
|
| 488 |
+
# Ensure numeric columns are actually numeric
|
| 489 |
+
for col in ['GP', 'MIN', 'PTS', 'REB', 'AST', 'STL', 'BLK', 'FG_PCT', 'FT_PCT', 'FG3_PCT']:
|
| 490 |
+
if col in stats_df.columns:
|
| 491 |
+
stats_df[col] = pd.to_numeric(stats_df[col], errors='coerce')
|
| 492 |
+
basic_stats_data.append(stats_df.iloc[0].to_dict()) # Take the first (and only) row
|
| 493 |
|
| 494 |
if basic_stats_data:
|
| 495 |
comparison_df = pd.DataFrame(basic_stats_data)
|
| 496 |
+
basic_cols = ['PLAYER_NAME', 'SEASON_ID', 'GP', 'MIN', 'PTS', 'REB', 'AST', 'STL', 'BLK', 'FG_PCT', 'FT_PCT', 'FG3_PCT']
|
| 497 |
display_cols = [col for col in basic_cols if col in comparison_df.columns]
|
| 498 |
st.dataframe(comparison_df[display_cols].round(2), use_container_width=True)
|
| 499 |
else:
|
|
|
|
| 502 |
with stats_tabs[1]:
|
| 503 |
st.subheader("Advanced Statistics")
|
| 504 |
if basic_stats_data:
|
| 505 |
+
advanced_df = pd.DataFrame(basic_stats_data).copy()
|
| 506 |
+
# Ensure numeric columns for calculations
|
| 507 |
+
for col in ['PTS', 'FGA', 'FTA']:
|
| 508 |
+
if col in advanced_df.columns:
|
| 509 |
+
advanced_df[col] = pd.to_numeric(advanced_df[col], errors='coerce')
|
| 510 |
+
|
| 511 |
# Calculate TS% (True Shooting Percentage)
|
| 512 |
if all(col in advanced_df.columns for col in ['PTS', 'FGA', 'FTA']):
|
| 513 |
+
advanced_df['TS_PCT'] = advanced_df.apply(
|
| 514 |
lambda row: row['PTS'] / (2 * (row['FGA'] + 0.44 * row['FTA'])) if (row['FGA'] + 0.44 * row['FTA']) != 0 else 0,
|
| 515 |
axis=1
|
| 516 |
)
|
| 517 |
|
| 518 |
+
advanced_cols = ['PLAYER_NAME', 'SEASON_ID', 'PTS', 'REB', 'AST', 'FG_PCT', 'TS_PCT'] if 'TS_PCT' in advanced_df.columns else ['PLAYER_NAME', 'SEASON_ID', 'PTS', 'REB', 'AST', 'FG_PCT']
|
| 519 |
display_cols = [col for col in advanced_cols if col in advanced_df.columns]
|
| 520 |
st.dataframe(advanced_df[display_cols].round(3), use_container_width=True)
|
| 521 |
else:
|
|
|
|
| 525 |
st.subheader("Player Comparison Charts")
|
| 526 |
|
| 527 |
if basic_stats_data:
|
| 528 |
+
comparison_df = pd.DataFrame(basic_stats_data)
|
| 529 |
metrics = ['PTS', 'REB', 'AST', 'FG_PCT']
|
| 530 |
available_metrics = [m for m in metrics if m in comparison_df.columns]
|
| 531 |
|
|
|
|
| 533 |
selected_metric = st.selectbox("Select Metric to Visualize", available_metrics)
|
| 534 |
|
| 535 |
if selected_metric:
|
| 536 |
+
# Bar chart comparison (for average over selected seasons if multiple seasons selected)
|
| 537 |
+
# Or for each season if only one player selected
|
| 538 |
+
if len(selected_players) == 1 and len(seasons) > 1:
|
| 539 |
+
# Show trend over seasons for one player
|
| 540 |
+
fig = px.line(
|
| 541 |
+
comparison_df[comparison_df['PLAYER_NAME'] == selected_players[0]],
|
| 542 |
+
x='SEASON_ID',
|
| 543 |
+
y=selected_metric,
|
| 544 |
+
title=f"{selected_players[0]} - {selected_metric} Trend",
|
| 545 |
+
markers=True
|
| 546 |
+
)
|
| 547 |
+
else:
|
| 548 |
+
# Average over selected seasons for multiple players for bar chart
|
| 549 |
+
avg_comparison_df = comparison_df.groupby('PLAYER_NAME')[available_metrics].mean().reset_index()
|
| 550 |
+
fig = px.bar(
|
| 551 |
+
avg_comparison_df,
|
| 552 |
+
x='PLAYER_NAME',
|
| 553 |
+
y=selected_metric,
|
| 554 |
+
title=f"Average {selected_metric} Comparison (Selected Seasons)",
|
| 555 |
+
color='PLAYER_NAME'
|
| 556 |
+
)
|
| 557 |
st.plotly_chart(fig, use_container_width=True)
|
| 558 |
|
| 559 |
# Radar chart for multi-metric comparison
|
|
|
|
| 560 |
radar_metrics_for_chart = ['PTS', 'REB', 'AST', 'STL', 'BLK']
|
| 561 |
radar_metrics_for_chart = [m for m in radar_metrics_for_chart if m in comparison_df.columns]
|
| 562 |
|
| 563 |
if len(radar_metrics_for_chart) >= 3:
|
| 564 |
radar_data = {}
|
| 565 |
+
# Use the averaged data for radar chart if multiple seasons
|
| 566 |
+
if len(seasons) > 1:
|
| 567 |
+
radar_source_df = comparison_df.groupby('PLAYER_NAME')[radar_metrics_for_chart].mean().reset_index()
|
| 568 |
+
else:
|
| 569 |
+
radar_source_df = comparison_df.copy()
|
| 570 |
+
|
| 571 |
+
scaled_radar_df = radar_source_df.copy()
|
| 572 |
|
| 573 |
# Simple min-max scaling for radar chart visualization (0-100)
|
| 574 |
for col in radar_metrics_for_chart:
|
| 575 |
+
min_val = scaled_radar_df[col].min()
|
| 576 |
+
max_val = scaled_radar_df[col].max()
|
| 577 |
if max_val > min_val:
|
| 578 |
+
scaled_radar_df[col] = ((scaled_radar_df[col] - min_val) / (max_val - min_val)) * 100
|
| 579 |
else:
|
| 580 |
+
scaled_radar_df[col] = 0 # Default if all values are the same
|
| 581 |
|
| 582 |
+
for _, row in scaled_radar_df.iterrows():
|
| 583 |
radar_data[row['PLAYER_NAME']] = {
|
| 584 |
metric: row[metric] for metric in radar_metrics_for_chart
|
| 585 |
}
|
|
|
|
| 600 |
def team_comparison_page():
|
| 601 |
st.markdown('<h2 class="section-header">Team vs Team Analysis</h2>', unsafe_allow_html=True)
|
| 602 |
|
| 603 |
+
all_teams = get_all_teams() # This now calls get_all_teams_bbr()
|
| 604 |
team_names = [team['full_name'] for team in all_teams]
|
| 605 |
|
| 606 |
col1, col2 = st.columns(2)
|
|
|
|
| 615 |
with col2:
|
| 616 |
seasons = st.multiselect(
|
| 617 |
"Select Seasons",
|
| 618 |
+
list(BBR_SEASON_URL_MAP.keys()), # Use keys from the BBR season map
|
| 619 |
default=["2023-24"]
|
| 620 |
)
|
| 621 |
|
|
|
|
| 628 |
team_stats_data = []
|
| 629 |
|
| 630 |
for team_name in selected_teams:
|
| 631 |
+
for season in seasons:
|
| 632 |
+
stats_df = get_team_stats(team_name, season) # Pass name and season
|
| 633 |
+
if not stats_df.empty:
|
| 634 |
+
# Ensure numeric columns are actually numeric
|
| 635 |
+
for col in ['PTS', 'REB', 'AST', 'FG_PCT', 'FG3_PCT', 'FT_PCT']:
|
| 636 |
+
if col in stats_df.columns:
|
| 637 |
+
stats_df[col] = pd.to_numeric(stats_df[col], errors='coerce')
|
| 638 |
+
team_stats_data.append(stats_df.iloc[0].to_dict()) # Take the first (and only) row
|
|
|
|
| 639 |
|
| 640 |
if team_stats_data:
|
| 641 |
team_df = pd.DataFrame(team_stats_data)
|