rairo commited on
Commit
2965b0b
Β·
verified Β·
1 Parent(s): f8a8b3d

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +117 -27
src/streamlit_app.py CHANGED
@@ -1,6 +1,6 @@
1
  import streamlit as st
2
  import pandas as pd
3
- import numpy as np # Re-added numpy for general use
4
  import requests
5
  import os
6
  from datetime import datetime
@@ -106,11 +106,32 @@ def parse_table(html, table_id=None):
106
  st.error(f"Error parsing table with pandas: {e}")
107
  return pd.DataFrame()
108
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
109
  @st.cache_data(ttl=300)
110
  def get_team_stats_bs(year):
111
  """
112
- Scrapes the league’s per‑game team stats table from Basketball-Reference
113
- using BeautifulSoup.
114
  Returns cleaned DataFrame.
115
  """
116
  url = f"https://www.basketball-reference.com/leagues/NBA_{year}_per_game.html"
@@ -118,25 +139,58 @@ def get_team_stats_bs(year):
118
  if not html:
119
  return pd.DataFrame()
120
 
121
- df = parse_table(html, table_id="per_game-team")
122
- if df.empty: # Check if df is empty first
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  return pd.DataFrame()
124
 
125
- # Flatten multi-index columns if they exist
126
  if isinstance(df.columns, pd.MultiIndex):
127
- df.columns = ['_'.join(col).strip() for col in df.columns.values]
128
- else:
129
- df.columns = [col.strip() for col in df.columns.values]
 
 
130
 
131
- # Now check for 'Team' column after flattening
132
- if "Team" not in df.columns:
 
 
 
 
 
 
 
133
  return pd.DataFrame()
134
 
135
- # drop repeated headers (e.g., rows where 'Team' is literally 'Team')
136
- df = df[df["Team"] != "Team"].copy()
 
137
 
 
 
 
 
138
  # Standardize column names
139
- df = df.rename(columns={
140
  'G': 'GP', 'MP': 'MIN',
141
  'FG%': 'FG_PCT', '3P%': 'FG3_PCT', 'FT%': 'FT_PCT',
142
  'TRB': 'REB', 'AST': 'AST', 'STL': 'STL', 'BLK': 'BLK', 'TOV': 'TO',
@@ -145,18 +199,23 @@ def get_team_stats_bs(year):
145
  'FG': 'FGM', 'FGA': 'FGA', '3P': 'FG3M', '3PA': 'FG3A',
146
  '2P': 'FGM2', '2PA': 'FGA2', '2P%': 'FG2_PCT', 'eFG%': 'EFG_PCT',
147
  'FT': 'FTM', 'FTA': 'FTA', 'ORB': 'OREB', 'DRB': 'DREB'
148
- })
 
 
 
 
 
149
 
150
- # coerce numeric columns
 
 
 
 
151
  non_numeric_cols = {"Team", "RANK"}
152
  for col in df.columns:
153
  if col not in non_numeric_cols:
154
  df[col] = pd.to_numeric(df[col], errors="coerce")
155
 
156
- # Clean team names (remove asterisks)
157
- if 'Team' in df.columns:
158
- df['Team'] = df['Team'].astype(str).str.replace('*', '', regex=False).str.strip()
159
-
160
  return df
161
 
162
  # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
@@ -288,7 +347,7 @@ def ask_perp(prompt, system="You are a helpful NBA analyst AI.", max_tokens=500,
288
  return ""
289
  hdr = {'Authorization':f'Bearer {PERP_KEY}','Content-Type':'application/json'}
290
  payload = {
291
- "model":"sonar-medium-online",
292
  "messages":[{"role":"system","content":system},{"role":"user","content":prompt}],
293
  "max_tokens":max_tokens, "temperature":temp
294
  }
@@ -300,17 +359,48 @@ def ask_perp(prompt, system="You are a helpful NBA analyst AI.", max_tokens=500,
300
  st.error(f"Perplexity API error: {e}")
301
  return ""
302
 
303
- # Plot helpers
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
304
  def create_radar_chart(player_stats, categories):
 
305
  fig = go.Figure()
306
- for name, stats in player_stats.items():
 
 
 
307
  fig.add_trace(go.Scatterpolar(
308
- r=[stats.get(cat,0) for cat in categories],
309
  theta=categories,
310
  fill='toself',
311
- name=name,
312
  opacity=0.7
313
  ))
 
314
  fig.update_layout(
315
  polar=dict(radialaxis=dict(visible=True, range=[0,100])),
316
  showlegend=True,
@@ -473,7 +563,7 @@ def player_vs_player():
473
 
474
  def team_vs_team():
475
  st.markdown('<h2 class="section-header">Team vs Team Analysis</h2>', unsafe_allow_html=True)
476
- # No BRSCRAPER_AVAILABLE check here, as it uses custom BS scraper
477
 
478
  seasons = get_available_seasons()
479
  selected_season_str = st.selectbox("Select Season", seasons, index=0)
@@ -507,7 +597,7 @@ def team_vs_team():
507
  teams_with_no_data.append(t)
508
 
509
  if teams_with_no_data:
510
- st.info(f"No data found for the selected season ({selected_season_str}) for: {', '.join(teams_with_no_data)}.")
511
 
512
  if not stats:
513
  st.error("No data available for the selected teams to display. Please adjust your selections.")
 
1
  import streamlit as st
2
  import pandas as pd
3
+ import numpy as np
4
  import requests
5
  import os
6
  from datetime import datetime
 
106
  st.error(f"Error parsing table with pandas: {e}")
107
  return pd.DataFrame()
108
 
109
+ def clean_team_name(team_name):
110
+ """
111
+ Clean and standardize team names from Basketball Reference.
112
+ """
113
+ if pd.isna(team_name):
114
+ return team_name
115
+
116
+ # Remove any asterisks or other symbols
117
+ team_name = str(team_name).strip().replace('*', '')
118
+
119
+ # Handle special cases for team name variations (if needed, keep for consistency)
120
+ team_mapping = {
121
+ 'NOP': 'NO', # New Orleans Pelicans sometimes shown as NOP
122
+ 'PHX': 'PHO', # Phoenix Suns sometimes shown as PHX
123
+ 'BRK': 'BKN', # Brooklyn Nets sometimes shown as BRK
124
+ 'CHA': 'CHO', # Charlotte sometimes inconsistent
125
+ 'UTA': 'UTH' # Utah Jazz sometimes shown as UTA
126
+ }
127
+
128
+ return team_mapping.get(team_name, team_name)
129
+
130
  @st.cache_data(ttl=300)
131
  def get_team_stats_bs(year):
132
  """
133
+ Scrapes the league’s per‑game team stats table from:
134
+ https://www.basketball-reference.com/leagues/NBA_{year}_per_game.html
135
  Returns cleaned DataFrame.
136
  """
137
  url = f"https://www.basketball-reference.com/leagues/NBA_{year}_per_game.html"
 
139
  if not html:
140
  return pd.DataFrame()
141
 
142
+ # Try multiple possible table IDs for team stats
143
+ possible_table_ids = ["per_game-team", "per_game_team", "team-stats-per_game", "teams_per_game"]
144
+ df = pd.DataFrame()
145
+
146
+ for table_id in possible_table_ids:
147
+ df = parse_table(html, table_id=table_id)
148
+ if not df.empty:
149
+ break
150
+
151
+ # If no specific table found, try to find any table with team data
152
+ if df.empty:
153
+ soup = BeautifulSoup(html, "lxml") # Use lxml for consistency
154
+ tables = soup.find_all("table")
155
+ for table in tables:
156
+ if table.find("th", string=lambda text: text and "team" in text.lower()):
157
+ df = parse_table(str(table))
158
+ if not df.empty:
159
+ break
160
+
161
+ if df.empty:
162
+ st.warning(f"Could not find team stats table for {year}")
163
  return pd.DataFrame()
164
 
165
+ # Handle potential MultiIndex columns
166
  if isinstance(df.columns, pd.MultiIndex):
167
+ df.columns = ['_'.join(str(col).strip() for col in cols if str(col).strip() and str(col).strip() != 'Unnamed: 0_level_0')
168
+ for cols in df.columns.values]
169
+
170
+ # Clean column names
171
+ df.columns = [str(col).strip() for col in df.columns]
172
 
173
+ # Find team column
174
+ team_col = None
175
+ for col in df.columns:
176
+ if 'team' in col.lower() or col in ['Team', 'Tm']:
177
+ team_col = col
178
+ break
179
+
180
+ if team_col is None:
181
+ st.warning(f"Could not find team column in team stats. Available columns: {df.columns.tolist()}")
182
  return pd.DataFrame()
183
 
184
+ # Rename team column to standard name
185
+ if team_col != 'Team':
186
+ df = df.rename(columns={team_col: 'Team'})
187
 
188
+ # Remove header rows
189
+ df = df[df["Team"].astype(str) != "Team"].copy()
190
+ df = df[df["Team"].notna()].copy()
191
+
192
  # Standardize column names
193
+ column_mapping = {
194
  'G': 'GP', 'MP': 'MIN',
195
  'FG%': 'FG_PCT', '3P%': 'FG3_PCT', 'FT%': 'FT_PCT',
196
  'TRB': 'REB', 'AST': 'AST', 'STL': 'STL', 'BLK': 'BLK', 'TOV': 'TO',
 
199
  'FG': 'FGM', 'FGA': 'FGA', '3P': 'FG3M', '3PA': 'FG3A',
200
  '2P': 'FGM2', '2PA': 'FGA2', '2P%': 'FG2_PCT', 'eFG%': 'EFG_PCT',
201
  'FT': 'FTM', 'FTA': 'FTA', 'ORB': 'OREB', 'DRB': 'DREB'
202
+ }
203
+
204
+ # Apply column mapping only for columns that exist
205
+ for old_col, new_col in column_mapping.items():
206
+ if old_col in df.columns:
207
+ df = df.rename(columns={old_col: new_col})
208
 
209
+ # Clean team names
210
+ if 'Team' in df.columns: # Ensure 'Team' column exists before applying
211
+ df['Team'] = df['Team'].apply(clean_team_name)
212
+
213
+ # Convert numeric columns
214
  non_numeric_cols = {"Team", "RANK"}
215
  for col in df.columns:
216
  if col not in non_numeric_cols:
217
  df[col] = pd.to_numeric(df[col], errors="coerce")
218
 
 
 
 
 
219
  return df
220
 
221
  # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
 
347
  return ""
348
  hdr = {'Authorization':f'Bearer {PERP_KEY}','Content-Type':'application/json'}
349
  payload = {
350
+ "model":"sonar-medium-online", # Using a commonly available online model
351
  "messages":[{"role":"system","content":system},{"role":"user","content":prompt}],
352
  "max_tokens":max_tokens, "temperature":temp
353
  }
 
359
  st.error(f"Perplexity API error: {e}")
360
  return ""
361
 
362
+ # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
363
+ # Plotting functions
364
+ def create_comparison_chart(data, players_names, metric):
365
+ """Create comparison chart for players"""
366
+ fig = go.Figure()
367
+
368
+ for i, player in enumerate(players_names):
369
+ if player in data['Player'].values:
370
+ player_data = data[data['Player'] == player]
371
+ fig.add_trace(go.Scatter(
372
+ x=player_data['Season'],
373
+ y=player_data[metric],
374
+ mode='lines+markers',
375
+ name=player,
376
+ line=dict(width=3)
377
+ ))
378
+
379
+ fig.update_layout(
380
+ title=f"{metric} Comparison",
381
+ xaxis_title="Season",
382
+ yaxis_title=metric,
383
+ hovermode='x unified',
384
+ height=500
385
+ )
386
+
387
+ return fig
388
+
389
  def create_radar_chart(player_stats, categories):
390
+ """Create radar chart for player comparison"""
391
  fig = go.Figure()
392
+
393
+ for player_name, stats in player_stats.items():
394
+ r_values = [stats.get(cat,0) for cat in categories]
395
+
396
  fig.add_trace(go.Scatterpolar(
397
+ r=r_values,
398
  theta=categories,
399
  fill='toself',
400
+ name=player_name,
401
  opacity=0.7
402
  ))
403
+
404
  fig.update_layout(
405
  polar=dict(radialaxis=dict(visible=True, range=[0,100])),
406
  showlegend=True,
 
563
 
564
  def team_vs_team():
565
  st.markdown('<h2 class="section-header">Team vs Team Analysis</h2>', unsafe_allow_html=True)
566
+ # This page uses the custom BeautifulSoup scraper, so no BRSCRAPER_AVAILABLE check here.
567
 
568
  seasons = get_available_seasons()
569
  selected_season_str = st.selectbox("Select Season", seasons, index=0)
 
597
  teams_with_no_data.append(t)
598
 
599
  if teams_with_no_data:
600
+ st.info(f"No data found for the selected season ({selected_season_str}) for: {', '.join(teams_with_no_data)}. This might be because the season hasn't started or data is not yet available.")
601
 
602
  if not stats:
603
  st.error("No data available for the selected teams to display. Please adjust your selections.")