Update src/streamlit_app.py
Browse files- src/streamlit_app.py +117 -27
src/streamlit_app.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
import pandas as pd
|
| 3 |
-
import numpy as np
|
| 4 |
import requests
|
| 5 |
import os
|
| 6 |
from datetime import datetime
|
|
@@ -106,11 +106,32 @@ def parse_table(html, table_id=None):
|
|
| 106 |
st.error(f"Error parsing table with pandas: {e}")
|
| 107 |
return pd.DataFrame()
|
| 108 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
@st.cache_data(ttl=300)
|
| 110 |
def get_team_stats_bs(year):
|
| 111 |
"""
|
| 112 |
-
Scrapes the leagueβs perβgame team stats table from
|
| 113 |
-
|
| 114 |
Returns cleaned DataFrame.
|
| 115 |
"""
|
| 116 |
url = f"https://www.basketball-reference.com/leagues/NBA_{year}_per_game.html"
|
|
@@ -118,25 +139,58 @@ def get_team_stats_bs(year):
|
|
| 118 |
if not html:
|
| 119 |
return pd.DataFrame()
|
| 120 |
|
| 121 |
-
|
| 122 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 123 |
return pd.DataFrame()
|
| 124 |
|
| 125 |
-
#
|
| 126 |
if isinstance(df.columns, pd.MultiIndex):
|
| 127 |
-
df.columns = ['_'.join(col).strip() for col in
|
| 128 |
-
|
| 129 |
-
|
|
|
|
|
|
|
| 130 |
|
| 131 |
-
#
|
| 132 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 133 |
return pd.DataFrame()
|
| 134 |
|
| 135 |
-
#
|
| 136 |
-
|
|
|
|
| 137 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 138 |
# Standardize column names
|
| 139 |
-
|
| 140 |
'G': 'GP', 'MP': 'MIN',
|
| 141 |
'FG%': 'FG_PCT', '3P%': 'FG3_PCT', 'FT%': 'FT_PCT',
|
| 142 |
'TRB': 'REB', 'AST': 'AST', 'STL': 'STL', 'BLK': 'BLK', 'TOV': 'TO',
|
|
@@ -145,18 +199,23 @@ def get_team_stats_bs(year):
|
|
| 145 |
'FG': 'FGM', 'FGA': 'FGA', '3P': 'FG3M', '3PA': 'FG3A',
|
| 146 |
'2P': 'FGM2', '2PA': 'FGA2', '2P%': 'FG2_PCT', 'eFG%': 'EFG_PCT',
|
| 147 |
'FT': 'FTM', 'FTA': 'FTA', 'ORB': 'OREB', 'DRB': 'DREB'
|
| 148 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 149 |
|
| 150 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
non_numeric_cols = {"Team", "RANK"}
|
| 152 |
for col in df.columns:
|
| 153 |
if col not in non_numeric_cols:
|
| 154 |
df[col] = pd.to_numeric(df[col], errors="coerce")
|
| 155 |
|
| 156 |
-
# Clean team names (remove asterisks)
|
| 157 |
-
if 'Team' in df.columns:
|
| 158 |
-
df['Team'] = df['Team'].astype(str).str.replace('*', '', regex=False).str.strip()
|
| 159 |
-
|
| 160 |
return df
|
| 161 |
|
| 162 |
# ββββββββββββββββββββββββββββββββββββββββββββββ
|
|
@@ -288,7 +347,7 @@ def ask_perp(prompt, system="You are a helpful NBA analyst AI.", max_tokens=500,
|
|
| 288 |
return ""
|
| 289 |
hdr = {'Authorization':f'Bearer {PERP_KEY}','Content-Type':'application/json'}
|
| 290 |
payload = {
|
| 291 |
-
"model":"sonar-medium-online",
|
| 292 |
"messages":[{"role":"system","content":system},{"role":"user","content":prompt}],
|
| 293 |
"max_tokens":max_tokens, "temperature":temp
|
| 294 |
}
|
|
@@ -300,17 +359,48 @@ def ask_perp(prompt, system="You are a helpful NBA analyst AI.", max_tokens=500,
|
|
| 300 |
st.error(f"Perplexity API error: {e}")
|
| 301 |
return ""
|
| 302 |
|
| 303 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 304 |
def create_radar_chart(player_stats, categories):
|
|
|
|
| 305 |
fig = go.Figure()
|
| 306 |
-
|
|
|
|
|
|
|
|
|
|
| 307 |
fig.add_trace(go.Scatterpolar(
|
| 308 |
-
r=
|
| 309 |
theta=categories,
|
| 310 |
fill='toself',
|
| 311 |
-
name=
|
| 312 |
opacity=0.7
|
| 313 |
))
|
|
|
|
| 314 |
fig.update_layout(
|
| 315 |
polar=dict(radialaxis=dict(visible=True, range=[0,100])),
|
| 316 |
showlegend=True,
|
|
@@ -473,7 +563,7 @@ def player_vs_player():
|
|
| 473 |
|
| 474 |
def team_vs_team():
|
| 475 |
st.markdown('<h2 class="section-header">Team vs Team Analysis</h2>', unsafe_allow_html=True)
|
| 476 |
-
#
|
| 477 |
|
| 478 |
seasons = get_available_seasons()
|
| 479 |
selected_season_str = st.selectbox("Select Season", seasons, index=0)
|
|
@@ -507,7 +597,7 @@ def team_vs_team():
|
|
| 507 |
teams_with_no_data.append(t)
|
| 508 |
|
| 509 |
if teams_with_no_data:
|
| 510 |
-
st.info(f"No data found for the selected season ({selected_season_str}) for: {', '.join(teams_with_no_data)}.")
|
| 511 |
|
| 512 |
if not stats:
|
| 513 |
st.error("No data available for the selected teams to display. Please adjust your selections.")
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
import pandas as pd
|
| 3 |
+
import numpy as np
|
| 4 |
import requests
|
| 5 |
import os
|
| 6 |
from datetime import datetime
|
|
|
|
| 106 |
st.error(f"Error parsing table with pandas: {e}")
|
| 107 |
return pd.DataFrame()
|
| 108 |
|
| 109 |
+
def clean_team_name(team_name):
|
| 110 |
+
"""
|
| 111 |
+
Clean and standardize team names from Basketball Reference.
|
| 112 |
+
"""
|
| 113 |
+
if pd.isna(team_name):
|
| 114 |
+
return team_name
|
| 115 |
+
|
| 116 |
+
# Remove any asterisks or other symbols
|
| 117 |
+
team_name = str(team_name).strip().replace('*', '')
|
| 118 |
+
|
| 119 |
+
# Handle special cases for team name variations (if needed, keep for consistency)
|
| 120 |
+
team_mapping = {
|
| 121 |
+
'NOP': 'NO', # New Orleans Pelicans sometimes shown as NOP
|
| 122 |
+
'PHX': 'PHO', # Phoenix Suns sometimes shown as PHX
|
| 123 |
+
'BRK': 'BKN', # Brooklyn Nets sometimes shown as BRK
|
| 124 |
+
'CHA': 'CHO', # Charlotte sometimes inconsistent
|
| 125 |
+
'UTA': 'UTH' # Utah Jazz sometimes shown as UTA
|
| 126 |
+
}
|
| 127 |
+
|
| 128 |
+
return team_mapping.get(team_name, team_name)
|
| 129 |
+
|
| 130 |
@st.cache_data(ttl=300)
|
| 131 |
def get_team_stats_bs(year):
|
| 132 |
"""
|
| 133 |
+
Scrapes the leagueβs perβgame team stats table from:
|
| 134 |
+
https://www.basketball-reference.com/leagues/NBA_{year}_per_game.html
|
| 135 |
Returns cleaned DataFrame.
|
| 136 |
"""
|
| 137 |
url = f"https://www.basketball-reference.com/leagues/NBA_{year}_per_game.html"
|
|
|
|
| 139 |
if not html:
|
| 140 |
return pd.DataFrame()
|
| 141 |
|
| 142 |
+
# Try multiple possible table IDs for team stats
|
| 143 |
+
possible_table_ids = ["per_game-team", "per_game_team", "team-stats-per_game", "teams_per_game"]
|
| 144 |
+
df = pd.DataFrame()
|
| 145 |
+
|
| 146 |
+
for table_id in possible_table_ids:
|
| 147 |
+
df = parse_table(html, table_id=table_id)
|
| 148 |
+
if not df.empty:
|
| 149 |
+
break
|
| 150 |
+
|
| 151 |
+
# If no specific table found, try to find any table with team data
|
| 152 |
+
if df.empty:
|
| 153 |
+
soup = BeautifulSoup(html, "lxml") # Use lxml for consistency
|
| 154 |
+
tables = soup.find_all("table")
|
| 155 |
+
for table in tables:
|
| 156 |
+
if table.find("th", string=lambda text: text and "team" in text.lower()):
|
| 157 |
+
df = parse_table(str(table))
|
| 158 |
+
if not df.empty:
|
| 159 |
+
break
|
| 160 |
+
|
| 161 |
+
if df.empty:
|
| 162 |
+
st.warning(f"Could not find team stats table for {year}")
|
| 163 |
return pd.DataFrame()
|
| 164 |
|
| 165 |
+
# Handle potential MultiIndex columns
|
| 166 |
if isinstance(df.columns, pd.MultiIndex):
|
| 167 |
+
df.columns = ['_'.join(str(col).strip() for col in cols if str(col).strip() and str(col).strip() != 'Unnamed: 0_level_0')
|
| 168 |
+
for cols in df.columns.values]
|
| 169 |
+
|
| 170 |
+
# Clean column names
|
| 171 |
+
df.columns = [str(col).strip() for col in df.columns]
|
| 172 |
|
| 173 |
+
# Find team column
|
| 174 |
+
team_col = None
|
| 175 |
+
for col in df.columns:
|
| 176 |
+
if 'team' in col.lower() or col in ['Team', 'Tm']:
|
| 177 |
+
team_col = col
|
| 178 |
+
break
|
| 179 |
+
|
| 180 |
+
if team_col is None:
|
| 181 |
+
st.warning(f"Could not find team column in team stats. Available columns: {df.columns.tolist()}")
|
| 182 |
return pd.DataFrame()
|
| 183 |
|
| 184 |
+
# Rename team column to standard name
|
| 185 |
+
if team_col != 'Team':
|
| 186 |
+
df = df.rename(columns={team_col: 'Team'})
|
| 187 |
|
| 188 |
+
# Remove header rows
|
| 189 |
+
df = df[df["Team"].astype(str) != "Team"].copy()
|
| 190 |
+
df = df[df["Team"].notna()].copy()
|
| 191 |
+
|
| 192 |
# Standardize column names
|
| 193 |
+
column_mapping = {
|
| 194 |
'G': 'GP', 'MP': 'MIN',
|
| 195 |
'FG%': 'FG_PCT', '3P%': 'FG3_PCT', 'FT%': 'FT_PCT',
|
| 196 |
'TRB': 'REB', 'AST': 'AST', 'STL': 'STL', 'BLK': 'BLK', 'TOV': 'TO',
|
|
|
|
| 199 |
'FG': 'FGM', 'FGA': 'FGA', '3P': 'FG3M', '3PA': 'FG3A',
|
| 200 |
'2P': 'FGM2', '2PA': 'FGA2', '2P%': 'FG2_PCT', 'eFG%': 'EFG_PCT',
|
| 201 |
'FT': 'FTM', 'FTA': 'FTA', 'ORB': 'OREB', 'DRB': 'DREB'
|
| 202 |
+
}
|
| 203 |
+
|
| 204 |
+
# Apply column mapping only for columns that exist
|
| 205 |
+
for old_col, new_col in column_mapping.items():
|
| 206 |
+
if old_col in df.columns:
|
| 207 |
+
df = df.rename(columns={old_col: new_col})
|
| 208 |
|
| 209 |
+
# Clean team names
|
| 210 |
+
if 'Team' in df.columns: # Ensure 'Team' column exists before applying
|
| 211 |
+
df['Team'] = df['Team'].apply(clean_team_name)
|
| 212 |
+
|
| 213 |
+
# Convert numeric columns
|
| 214 |
non_numeric_cols = {"Team", "RANK"}
|
| 215 |
for col in df.columns:
|
| 216 |
if col not in non_numeric_cols:
|
| 217 |
df[col] = pd.to_numeric(df[col], errors="coerce")
|
| 218 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 219 |
return df
|
| 220 |
|
| 221 |
# ββββββββββββββββββββββββββββββββββββββββββββββ
|
|
|
|
| 347 |
return ""
|
| 348 |
hdr = {'Authorization':f'Bearer {PERP_KEY}','Content-Type':'application/json'}
|
| 349 |
payload = {
|
| 350 |
+
"model":"sonar-medium-online", # Using a commonly available online model
|
| 351 |
"messages":[{"role":"system","content":system},{"role":"user","content":prompt}],
|
| 352 |
"max_tokens":max_tokens, "temperature":temp
|
| 353 |
}
|
|
|
|
| 359 |
st.error(f"Perplexity API error: {e}")
|
| 360 |
return ""
|
| 361 |
|
| 362 |
+
# ββββββββββββββββββββββββββββββββββββββββββββββ
|
| 363 |
+
# Plotting functions
|
| 364 |
+
def create_comparison_chart(data, players_names, metric):
|
| 365 |
+
"""Create comparison chart for players"""
|
| 366 |
+
fig = go.Figure()
|
| 367 |
+
|
| 368 |
+
for i, player in enumerate(players_names):
|
| 369 |
+
if player in data['Player'].values:
|
| 370 |
+
player_data = data[data['Player'] == player]
|
| 371 |
+
fig.add_trace(go.Scatter(
|
| 372 |
+
x=player_data['Season'],
|
| 373 |
+
y=player_data[metric],
|
| 374 |
+
mode='lines+markers',
|
| 375 |
+
name=player,
|
| 376 |
+
line=dict(width=3)
|
| 377 |
+
))
|
| 378 |
+
|
| 379 |
+
fig.update_layout(
|
| 380 |
+
title=f"{metric} Comparison",
|
| 381 |
+
xaxis_title="Season",
|
| 382 |
+
yaxis_title=metric,
|
| 383 |
+
hovermode='x unified',
|
| 384 |
+
height=500
|
| 385 |
+
)
|
| 386 |
+
|
| 387 |
+
return fig
|
| 388 |
+
|
| 389 |
def create_radar_chart(player_stats, categories):
|
| 390 |
+
"""Create radar chart for player comparison"""
|
| 391 |
fig = go.Figure()
|
| 392 |
+
|
| 393 |
+
for player_name, stats in player_stats.items():
|
| 394 |
+
r_values = [stats.get(cat,0) for cat in categories]
|
| 395 |
+
|
| 396 |
fig.add_trace(go.Scatterpolar(
|
| 397 |
+
r=r_values,
|
| 398 |
theta=categories,
|
| 399 |
fill='toself',
|
| 400 |
+
name=player_name,
|
| 401 |
opacity=0.7
|
| 402 |
))
|
| 403 |
+
|
| 404 |
fig.update_layout(
|
| 405 |
polar=dict(radialaxis=dict(visible=True, range=[0,100])),
|
| 406 |
showlegend=True,
|
|
|
|
| 563 |
|
| 564 |
def team_vs_team():
|
| 565 |
st.markdown('<h2 class="section-header">Team vs Team Analysis</h2>', unsafe_allow_html=True)
|
| 566 |
+
# This page uses the custom BeautifulSoup scraper, so no BRSCRAPER_AVAILABLE check here.
|
| 567 |
|
| 568 |
seasons = get_available_seasons()
|
| 569 |
selected_season_str = st.selectbox("Select Season", seasons, index=0)
|
|
|
|
| 597 |
teams_with_no_data.append(t)
|
| 598 |
|
| 599 |
if teams_with_no_data:
|
| 600 |
+
st.info(f"No data found for the selected season ({selected_season_str}) for: {', '.join(teams_with_no_data)}. This might be because the season hasn't started or data is not yet available.")
|
| 601 |
|
| 602 |
if not stats:
|
| 603 |
st.error("No data available for the selected teams to display. Please adjust your selections.")
|