rairo commited on
Commit
5bb37d0
·
verified ·
1 Parent(s): 1c30aa9

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +153 -170
src/streamlit_app.py CHANGED
@@ -4,13 +4,18 @@ import numpy as np
4
  import requests
5
  import os
6
  from datetime import datetime
7
- from bs4 import BeautifulSoup, Comment # Keep Comment for parse_table if needed, though BRScraper handles it
8
- import re
9
  import plotly.express as px
10
  import plotly.graph_objects as go
11
 
12
- # Import BRScraper
13
- from BRScraper import nba
 
 
 
 
 
 
14
 
15
  # Page configuration
16
  st.set_page_config(
@@ -56,7 +61,8 @@ def get_available_seasons(num_seasons=6):
56
  for i in range(num_seasons):
57
  end_year = latest_season_end_year - i
58
  start_year = end_year - 1
59
- seasons_list.append(f"{start_year}–{end_year}")
 
60
  return sorted(seasons_list, reverse=True) # Sort to show most recent first
61
 
62
  @st.cache_data(ttl=3600)
@@ -65,20 +71,41 @@ def get_player_index_brscraper():
65
  Uses BRScraper to get a list of players from a recent season's stats.
66
  This serves as our player index for the multiselect.
67
  """
68
- latest_season_end_year = int(get_available_seasons(1)[0].split('–')[1])
 
 
69
  try:
70
- # Get per_game stats for the latest season to get a list of active players
71
- df = nba.get_stats(latest_season_end_year, info='per_game', rename=False)
72
- if df.empty:
73
- st.error(f"Could not fetch player list for {latest_season_end_year} from BRScraper.")
74
- return pd.DataFrame(columns=['name']) # Return empty with 'name' column
 
 
75
 
76
- # Extract unique player names
77
- player_names = df['Player'].unique().tolist()
 
 
 
 
 
 
 
 
 
 
 
78
  return pd.DataFrame({'name': player_names})
 
79
  except Exception as e:
80
- st.error(f"Error fetching player index with BRScraper: {e}")
81
- return pd.DataFrame(columns=['name'])
 
 
 
 
 
82
 
83
  @st.cache_data(ttl=300)
84
  def get_player_career_stats_brscraper(player_name):
@@ -86,21 +113,26 @@ def get_player_career_stats_brscraper(player_name):
86
  Uses BRScraper to get a player's career stats.
87
  Applies column renaming and numeric conversion.
88
  """
 
 
 
89
  try:
90
- df = nba.get_player_stats(player_name)
 
 
91
  if df.empty:
92
  return pd.DataFrame()
93
 
94
- # Standardize column names
95
  column_mapping = {
96
- 'Season': 'Season', # Keep as is, will convert to en-dash later
97
  'G': 'GP', 'GS': 'GS', 'MP': 'MIN',
98
- 'FG%': 'FG_PCT', '3P%': 'FG3_PCT', 'FT%': 'FT_PCT',
99
  'TRB': 'REB', 'AST': 'AST', 'STL': 'STL', 'BLK': 'BLK', 'TOV': 'TO',
100
  'PF': 'PF', 'PTS': 'PTS',
101
- 'Age': 'AGE', 'Tm': 'TEAM_ABBREVIATION', 'Lg': 'LEAGUE_ID', 'Pos': 'POSITION',
102
- 'FG': 'FGM', 'FGA': 'FGA', '3P': 'FG3M', '3PA': 'FG3A',
103
- '2P': 'FGM2', '2PA': 'FGA2', '2P%': 'FG2_PCT', 'eFG%': 'EFG_PCT',
104
  'FT': 'FTM', 'FTA': 'FTA', 'ORB': 'OREB', 'DRB': 'DREB'
105
  }
106
 
@@ -114,7 +146,8 @@ def get_player_career_stats_brscraper(player_name):
114
  df['Season'] = df['Season'].astype(str).str.replace('-', '–')
115
 
116
  # Convert numeric columns
117
- non_numeric_cols = {'Season', 'TEAM_ABBREVIATION', 'LEAGUE_ID', 'POSITION', 'Player'}
 
118
  for col in df.columns:
119
  if col not in non_numeric_cols:
120
  df[col] = pd.to_numeric(df[col], errors="coerce")
@@ -123,6 +156,7 @@ def get_player_career_stats_brscraper(player_name):
123
  df['Player'] = player_name
124
 
125
  return df
 
126
  except Exception as e:
127
  st.error(f"Error fetching stats for {player_name} with BRScraper: {e}")
128
  return pd.DataFrame()
@@ -130,89 +164,52 @@ def get_player_career_stats_brscraper(player_name):
130
  @st.cache_data(ttl=300)
131
  def get_team_season_stats_brscraper(year):
132
  """
133
- Uses BRScraper to get per-game team stats for a given season year.
134
  Applies column renaming and numeric conversion.
135
  """
 
 
 
136
  try:
137
- df = nba.get_stats(year, info='per_game', rename=False)
138
- if df.empty:
139
- return pd.DataFrame()
140
-
141
- # Filter for team rows (BRScraper's get_stats includes player rows too)
142
- # Team rows typically have 'Rk' (Rank) as NaN or a non-numeric value after cleaning
143
- # Or they are the first few rows before player data starts.
144
- # A common pattern is that team rows have 'Player' as NaN or a team name.
145
- # Let's assume 'Player' column is NaN for team rows, or 'Tm' is not NaN.
146
-
147
- # First, standardize column names to make filtering easier
148
- df.columns = [str(col).strip() for col in df.columns]
149
 
150
- # Rename 'Tm' to 'Team' for easier filtering if it exists
151
- if 'Tm' in df.columns:
152
- df = df.rename(columns={'Tm': 'Team'})
153
-
154
- # Filter out player rows. Player rows usually have a non-null 'Player' column.
155
- # Team rows might have 'Player' as NaN or the team name itself.
156
- # A more robust way is to look for rows where 'Rk' is not numeric (e.g., 'Rk' for team totals)
157
- # Or where 'Player' is NaN and 'Team' is not NaN.
158
-
159
- # Let's try to identify team rows by checking if 'Player' column is missing or NaN
160
- # and 'Team' column is present and not NaN.
161
- if 'Player' in df.columns:
162
- # Filter out rows where 'Player' is not NaN (these are player stats)
163
- team_df = df[df['Player'].isna()].copy()
164
- else:
165
- # If no 'Player' column, assume all rows are team-related, or need further inspection
166
- team_df = df.copy()
167
-
168
- # Further refine: ensure 'Team' column is present and not NaN
169
- if 'Team' not in team_df.columns or team_df['Team'].isna().all():
170
- # Fallback: if 'Team' column is problematic, try to find rows where 'Rk' is 'Rk' (header)
171
- # or where 'Rk' is not a number (e.g., 'Rk' for team totals)
172
- if 'Rk' in df.columns:
173
- team_df = df[pd.to_numeric(df['Rk'], errors='coerce').isna()].copy()
174
- if 'Player' in team_df.columns: # Remove Player column if it's still there
175
- team_df = team_df.drop(columns=['Player'])
176
- else:
177
- st.warning(f"Could not reliably identify team rows for year {year}. Returning all data.")
178
- team_df = df.copy() # Fallback to returning all data if filtering is hard
179
-
180
- if team_df.empty:
181
  return pd.DataFrame()
182
 
183
- # Standardize column names
184
  column_mapping = {
185
  'G': 'GP', 'MP': 'MIN',
186
- 'FG%': 'FG_PCT', '3P%': 'FG3_PCT', 'FT%': 'FT_PCT',
187
  'TRB': 'REB', 'AST': 'AST', 'STL': 'STL', 'BLK': 'BLK', 'TOV': 'TO',
188
  'PF': 'PF', 'PTS': 'PTS',
189
- 'Rk': 'RANK', 'W': 'WINS', 'L': 'LOSSES', 'W/L%': 'WIN_LOSS_PCT',
190
- 'FG': 'FGM', 'FGA': 'FGA', '3P': 'FG3M', '3PA': 'FG3A',
191
- '2P': 'FGM2', '2PA': 'FGA2', '2P%': 'FG2_PCT', 'eFG%': 'EFG_PCT',
192
- 'FT': 'FTM', 'FTA': 'FTA', 'ORB': 'OREB', 'DRB': 'DREB'
 
193
  }
194
 
195
  for old_col, new_col in column_mapping.items():
196
- if old_col in team_df.columns:
197
- team_df = team_df.rename(columns={old_col: new_col})
198
 
199
  # Convert numeric columns
200
- non_numeric_cols = {"Team", "Tm", "RANK"} # 'Team' or 'Tm' will be the team identifier
201
- for col in team_df.columns:
 
202
  if col not in non_numeric_cols:
203
- team_df[col] = pd.to_numeric(team_df[col], errors="coerce")
204
 
205
- # Ensure 'Team' column is present and clean it
206
- if 'Team' in team_df.columns:
207
- team_df['Team'] = team_df['Team'].astype(str).str.replace('*', '').str.strip()
208
- elif 'Tm' in team_df.columns: # Fallback if 'Team' was not the original name
209
- team_df = team_df.rename(columns={'Tm': 'Team'})
210
- team_df['Team'] = team_df['Team'].astype(str).str.replace('*', '').str.strip()
211
  else:
212
- st.warning(f"Could not find a 'Team' or 'Tm' column in the processed team data for {year}.")
213
  return pd.DataFrame()
214
 
215
- return team_df
 
216
  except Exception as e:
217
  st.error(f"Error fetching team stats for {year} with BRScraper: {e}")
218
  return pd.DataFrame()
@@ -228,7 +225,7 @@ def ask_perp(prompt, system="You are a helpful NBA analyst AI.", max_tokens=500,
228
  return ""
229
  hdr = {'Authorization':f'Bearer {PERP_KEY}','Content-Type':'application/json'}
230
  payload = {
231
- "model":"sonar-medium-online", # Changed to a commonly available online model
232
  "messages":[{"role":"system","content":system},{"role":"user","content":prompt}],
233
  "max_tokens":max_tokens, "temperature":temp
234
  }
@@ -238,9 +235,9 @@ def ask_perp(prompt, system="You are a helpful NBA analyst AI.", max_tokens=500,
238
  return r.json().get("choices", [{}])[0].get("message",{}).get("content","")
239
  except requests.exceptions.RequestException as e:
240
  error_message = f"Error communicating with Perplexity API: {e}"
241
- if r.response is not None:
242
  try:
243
- error_detail = r.response.json().get("error", {}).get("message", r.response.text)
244
  error_message = f"Perplexity API error: {error_detail}"
245
  except ValueError:
246
  error_message = f"Perplexity API error: {r.response.status_code} - {r.response.reason}"
@@ -251,16 +248,16 @@ def ask_perp(prompt, system="You are a helpful NBA analyst AI.", max_tokens=500,
251
  return ""
252
 
253
  # —————————————————————————————————————————————————————————————————————————————
254
- # Plotting functions (retained from previous version)
255
  def create_comparison_chart(data, players_names, metric):
256
  """Create comparison chart for players"""
257
  fig = go.Figure()
258
 
259
  for i, player in enumerate(players_names):
260
- if player in data['Player'].values: # Changed to 'Player' column
261
  player_data = data[data['Player'] == player]
262
  fig.add_trace(go.Scatter(
263
- x=player_data['Season'], # Changed to 'Season' column
264
  y=player_data[metric],
265
  mode='lines+markers',
266
  name=player,
@@ -282,7 +279,6 @@ def create_radar_chart(player_stats, categories):
282
  fig = go.Figure()
283
 
284
  for player_name, stats in player_stats.items():
285
- # Ensure all categories are present, default to 0 if not
286
  r_values = [stats.get(cat, 0) for cat in categories]
287
 
288
  fig.add_trace(go.Scatterpolar(
@@ -297,7 +293,6 @@ def create_radar_chart(player_stats, categories):
297
  polar=dict(
298
  radialaxis=dict(
299
  visible=True,
300
- # The range should be adjusted based on the scaled data (0-100)
301
  range=[0, 100]
302
  )),
303
  showlegend=True,
@@ -310,6 +305,9 @@ def create_radar_chart(player_stats, categories):
310
  # Main App Structure
311
  # —————————————————————————————————————————————————————————————————————————————
312
  def main():
 
 
 
313
  st.markdown('<h1 class="main-header">🏀 NBA Analytics Hub (BBR Edition)</h1>', unsafe_allow_html=True)
314
  st.sidebar.title("Navigation")
315
  page = st.sidebar.radio("", [
@@ -333,12 +331,17 @@ def main():
333
  # —————————————————————————————————————————————————————————————————————————————
334
  def player_vs_player():
335
  st.markdown('<h2 class="section-header">Player vs Player Comparison</h2>', unsafe_allow_html=True)
336
- idx = get_player_index_brscraper() # Use BRScraper for player index
 
 
 
 
 
337
  names = idx['name'].tolist()
338
  selected_players = st.multiselect("Select Players (up to 4)", names, max_selections=4)
339
 
340
  available_seasons = get_available_seasons()
341
- selected_seasons = st.multiselect("Select Seasons", available_seasons, default=[available_seasons[0]])
342
 
343
  if st.button("Run Comparison"):
344
  if not selected_players:
@@ -346,7 +349,7 @@ def player_vs_player():
346
  return
347
 
348
  stats_tabs = st.tabs(["Basic Stats", "Advanced Stats", "Visualizations"])
349
- all_player_season_data = [] # To store individual season rows for each player
350
  players_with_no_data = []
351
 
352
  with st.spinner("Fetching player data..."):
@@ -354,33 +357,26 @@ def player_vs_player():
354
  df_player_career = get_player_career_stats_brscraper(player_name)
355
 
356
  if not df_player_career.empty:
357
- # Filter for selected seasons. The player_season_stats function
358
- # already ensures the 'Season' column uses en-dashes.
359
  filtered_df = df_player_career[df_player_career['Season'].isin(selected_seasons)].copy()
360
 
361
  if not filtered_df.empty:
362
- # 'Player' column is already added by get_player_career_stats_brscraper
363
  all_player_season_data.append(filtered_df)
364
  else:
365
  players_with_no_data.append(player_name)
366
  else:
367
  players_with_no_data.append(player_name)
368
 
369
- # Report on players with no data for selected seasons
370
  if players_with_no_data:
371
- st.info(f"No data found for the selected seasons ({', '.join(selected_seasons)}) for: {', '.join(players_with_no_data)}. This might be because the season hasn't started or data is not yet available, or the player name was not found by BRScraper.")
372
 
373
  if not all_player_season_data:
374
- st.error("No data available for any of the selected players and seasons to display. Please adjust your selections.")
375
  return
376
 
377
- # Concatenate all collected season data into one DataFrame
378
  comparison_df_raw = pd.concat(all_player_season_data, ignore_index=True)
379
 
380
  with stats_tabs[0]:
381
  st.subheader("Basic Statistics")
382
- # Group by player and average for the basic stats table if multiple seasons are selected
383
- # Otherwise, show individual season stats if only one season is selected
384
  if len(selected_seasons) > 1:
385
  basic_display_df = comparison_df_raw.groupby('Player').mean(numeric_only=True).reset_index()
386
  basic_cols = ['Player', 'GP', 'MIN', 'PTS', 'REB', 'AST', 'STL', 'BLK', 'FG_PCT', 'FT_PCT', 'FG3_PCT']
@@ -397,7 +393,6 @@ def player_vs_player():
397
  advanced_df = comparison_df_raw.copy()
398
 
399
  # Calculate TS% (True Shooting Percentage)
400
- # Ensure FGA and FTA are numeric and not zero to avoid division by zero
401
  advanced_df['FGA'] = pd.to_numeric(advanced_df['FGA'], errors='coerce').fillna(0)
402
  advanced_df['FTA'] = pd.to_numeric(advanced_df['FTA'], errors='coerce').fillna(0)
403
  advanced_df['PTS'] = pd.to_numeric(advanced_df['PTS'], errors='coerce').fillna(0)
@@ -423,16 +418,14 @@ def player_vs_player():
423
  st.subheader("Player Comparison Charts")
424
 
425
  if not comparison_df_raw.empty:
426
- metrics = ['PTS', 'REB', 'AST', 'FG_PCT', '3P_PCT', 'FT_PCT', 'STL', 'BLK']
427
  available_metrics = [m for m in metrics if m in comparison_df_raw.columns]
428
 
429
  if available_metrics:
430
  selected_metric = st.selectbox("Select Metric to Visualize", available_metrics)
431
 
432
  if selected_metric:
433
- # Determine if we are showing a trend for one player or comparison for multiple
434
  if len(selected_players) == 1 and len(selected_seasons) > 1:
435
- # Show trend over seasons for one player
436
  player_trend_df = comparison_df_raw[comparison_df_raw['Player'] == selected_players[0]].sort_values(by='Season')
437
  fig = px.line(
438
  player_trend_df,
@@ -442,7 +435,6 @@ def player_vs_player():
442
  markers=True
443
  )
444
  else:
445
- # Average over selected seasons for multiple players for bar chart
446
  avg_comparison_df = comparison_df_raw.groupby('Player')[available_metrics].mean(numeric_only=True).reset_index()
447
  fig = px.bar(
448
  avg_comparison_df,
@@ -453,13 +445,12 @@ def player_vs_player():
453
  )
454
  st.plotly_chart(fig, use_container_width=True)
455
 
456
- # Radar chart for multi-metric comparison
457
  radar_metrics_for_chart = ['PTS', 'REB', 'AST', 'STL', 'BLK']
458
  radar_metrics_for_chart = [m for m in radar_metrics_for_chart if m in comparison_df_raw.columns]
459
 
460
  if len(radar_metrics_for_chart) >= 3:
461
  radar_data = {}
462
- # Use the averaged data for radar chart if multiple seasons
463
  if len(selected_seasons) > 1:
464
  radar_source_df = comparison_df_raw.groupby('Player')[radar_metrics_for_chart].mean(numeric_only=True).reset_index()
465
  else:
@@ -467,14 +458,13 @@ def player_vs_player():
467
 
468
  scaled_radar_df = radar_source_df.copy()
469
 
470
- # Simple min-max scaling for radar chart visualization (0-100)
471
  for col in radar_metrics_for_chart:
472
  min_val = scaled_radar_df[col].min()
473
  max_val = scaled_radar_df[col].max()
474
  if max_val > min_val:
475
  scaled_radar_df[col] = ((scaled_radar_df[col] - min_val) / (max_val - min_val)) * 100
476
  else:
477
- scaled_radar_df[col] = 0 # Default if all values are the same
478
 
479
  for _, row in scaled_radar_df.iterrows():
480
  radar_data[row['Player']] = {
@@ -484,31 +474,29 @@ def player_vs_player():
484
  if radar_data:
485
  radar_fig = create_radar_chart(radar_data, radar_metrics_for_chart)
486
  st.plotly_chart(radar_fig, use_container_width=True)
487
- else:
488
- st.info("Could not generate radar chart data.")
489
- else:
490
- st.info("Select at least 3 common metrics for a radar chart (e.g., PTS, REB, AST, STL, BLK).")
491
- else:
492
- st.info("No common metrics available for visualization.")
493
- else:
494
- st.info("No data available for visualizations.")
495
-
496
 
497
  def team_vs_team():
498
  st.markdown('<h2 class="section-header">Team vs Team Analysis</h2>', unsafe_allow_html=True)
499
 
 
 
 
 
500
  available_seasons = get_available_seasons()
 
 
 
 
501
  selected_season_str = st.selectbox("Select Season", available_seasons, index=0)
502
-
503
- # Extract the end year from the season string (e.g., "2024–25" -> 2025)
504
  year_for_team_stats = int(selected_season_str.split('–')[1])
505
 
506
- tm_df = get_team_season_stats_brscraper(year_for_team_stats) # Use BRScraper for team stats
507
  if tm_df.empty:
508
- st.info(f"No team data available for the {selected_season_str} season. This might be because the season hasn't started or data is not yet available, or BRScraper could not fetch it.")
509
  return
510
 
511
- teams = tm_df['Team'].unique().tolist() # Use 'Team' column from BRScraper output
512
  selected_teams = st.multiselect("Select Teams (up to 4)", teams, max_selections=4)
513
 
514
  if st.button("Run Comparison"):
@@ -521,108 +509,103 @@ def team_vs_team():
521
 
522
  with st.spinner("Fetching team data..."):
523
  for t in selected_teams:
524
- df = tm_df[tm_df.Team == t].copy() # Filter by 'Team' column
525
  if not df.empty:
526
- # For team stats, we usually get one row per team per season from team_per_game
527
- # So, no need for .mean() here, just take the row.
528
- df['Season'] = selected_season_str # Add 'Season' column
529
- stats.append(df.iloc[0].to_dict()) # Convert the single row to dict
530
  else:
531
  teams_with_no_data.append(t)
532
 
533
  if teams_with_no_data:
534
- st.info(f"No data found for the selected season ({selected_season_str}) for: {', '.join(teams_with_no_data)}. This might be because the season hasn't started or data is not yet available.")
535
 
536
  if not stats:
537
- st.error("No data available for the selected teams to display. Please adjust your selections.")
538
  return
539
 
540
  comp = pd.DataFrame(stats)
541
- # Ensure numeric columns are actually numeric for display and calculations
542
- for col in ['PTS', 'REB', 'AST', 'STL', 'BLK', 'FG_PCT', '3P_PCT', 'FT_PCT']:
543
  if col in comp.columns:
544
  comp[col] = pd.to_numeric(comp[col], errors='coerce')
545
 
546
  st.subheader("Team Statistics Comparison")
547
- cols = ['Team', 'Season', 'PTS', 'REB', 'AST', 'STL', 'BLK', 'FG_PCT', '3P_PCT', 'FT_PCT']
548
  display_cols = [col for col in cols if col in comp.columns]
549
  st.dataframe(comp[display_cols].round(2), use_container_width=True)
550
 
551
  st.subheader("Team Performance Visualization")
552
- metric_options = ['PTS', 'REB', 'AST', 'FG_PCT', '3P_PCT', 'FT_PCT']
553
  available_metrics = [m for m in metric_options if m in comp.columns]
554
 
555
  if available_metrics:
556
  selected_metric = st.selectbox("Select Metric", available_metrics)
557
-
558
  fig = px.bar(
559
  comp,
560
  x='Team',
561
  y=selected_metric,
562
- color='Team', # Color by team for clarity
563
  title=f"Team {selected_metric} Comparison ({selected_season_str} Season)",
564
  barmode='group'
565
  )
566
  st.plotly_chart(fig, use_container_width=True)
567
- else:
568
- st.info("No common metrics available for visualization.")
569
-
570
 
571
  def awards_predictor():
572
  st.markdown('<h2 class="section-header">NBA Awards Predictor</h2>', unsafe_allow_html=True)
573
  award = st.selectbox("Select Award", ["MVP","Defensive Player of the Year","Rookie of the Year","6th Man of the Year","All-NBA First Team"])
574
  st.subheader(f"{award} Criteria")
575
- # (same sliders as before…)
576
  criteria = {}
577
  if award=="MVP":
578
- criteria = {
579
- "PPG":st.slider("Min PPG",15.0,35.0,25.0),
580
- "Wins":st.slider("Min Team Wins",35,70,50),
581
- "PER":st.slider("Min PER",15.0,35.0,25.0),
582
- "WS":st.slider("Min Win Shares",5.0,20.0,10.0)
583
- }
584
  elif award=="Defensive Player of the Year":
585
- criteria = {
586
- "BPG":st.slider("Min BPG",0.0,4.0,1.5),
587
- "SPG":st.slider("Min SPG",0.0,3.0,1.0),
588
- "DefRtgMax":st.slider("Max Def Rating",90.0,120.0,105.0),
589
- "DefRankMax":st.slider("Max Team Def Rank",1,30,10)
590
- }
591
  else:
592
- criteria = {
593
- "PPG":st.slider("Min PPG",10.0,30.0,15.0),
594
- "Games":st.slider("Min Games",50,82,65),
595
- "FG%":st.slider("Min FG%",0.35,0.65,0.45)
596
- }
597
 
598
  if st.button("Generate Predictions"):
599
- p = f"Predict top 5 {award} based on {criteria}. Focus on 2023-24 season."
600
  resp = ask_perp(p, system="You are an NBA awards expert AI.", max_tokens=800)
601
  st.markdown("### Predictions")
602
  st.write(resp)
603
 
604
  def ai_chat():
605
  st.markdown('<h2 class="section-header">AI Chat & Insights</h2>', unsafe_allow_html=True)
606
- if 'chat_history' not in st.session_state: st.session_state.chat_history=[]
607
  for msg in st.session_state.chat_history:
608
  with st.chat_message(msg["role"]):
609
  st.write(msg["content"])
610
- if prompt:=st.chat_input("Ask me anything about NBA…"):
 
611
  st.session_state.chat_history.append({"role":"user","content":prompt})
612
  with st.chat_message("user"):
613
- st.write(prompt) # Display user message immediately
614
  with st.chat_message("assistant"):
615
  ans = ask_perp(prompt, system="You are an NBA expert analyst AI.", max_tokens=700)
616
  st.write(ans)
617
  st.session_state.chat_history.append({"role":"assistant","content":ans})
 
618
  st.subheader("Quick Actions")
619
  c1,c2,c3 = st.columns(3)
620
  if c1.button("🏆 Contenders"):
621
- prompt = "Analyze the current NBA championship contenders for 2024. Who are the top 5 teams and why?"
622
  response = ask_perp(prompt)
623
  if response: st.write(response)
624
  if c2.button("⭐ Rising Stars"):
625
- prompt = "Who are the most promising young NBA players to watch in 2024? Focus on players 23 and under."
626
  response = ask_perp(prompt)
627
  if response: st.write(response)
628
  if c3.button("📊 Trades"):
@@ -642,7 +625,7 @@ def young_projections():
642
  yrs = st.number_input("Years in NBA",0,7,2)
643
  ppg = st.number_input("PPG",0.0,40.0,15.0)
644
  rpg = st.number_input("RPG",0.0,20.0,5.0)
645
- apg = st.number_input("APG",0.0,15.0,3.0)
646
  if st.button("Generate AI Projection"):
647
  prompt = (
648
  f"Analyze and project the future potential of NBA player {sp}: "
 
4
  import requests
5
  import os
6
  from datetime import datetime
7
+ # Removed BeautifulSoup, Comment, re as BRScraper handles parsing
 
8
  import plotly.express as px
9
  import plotly.graph_objects as go
10
 
11
+ # Import BRScraper - Fixed import
12
+ try:
13
+ from basketball_reference_scraper import players, teams, seasons
14
+ BRSCRAPER_AVAILABLE = True
15
+ except ImportError:
16
+ BRSCRAPER_AVAILABLE = False
17
+ # Display error message if BRScraper is not found
18
+ st.error("BRScraper not found. Please install with: pip install basketball-reference-scraper")
19
 
20
  # Page configuration
21
  st.set_page_config(
 
61
  for i in range(num_seasons):
62
  end_year = latest_season_end_year - i
63
  start_year = end_year - 1
64
+ # Use en-dash for consistency with BBR format
65
+ seasons_list.append(f"{start_year}–{str(end_year)[-2:]}")
66
  return sorted(seasons_list, reverse=True) # Sort to show most recent first
67
 
68
  @st.cache_data(ttl=3600)
 
71
  Uses BRScraper to get a list of players from a recent season's stats.
72
  This serves as our player index for the multiselect.
73
  """
74
+ if not BRSCRAPER_AVAILABLE:
75
+ return pd.DataFrame(columns=['name'])
76
+
77
  try:
78
+ # Get the end year of the most recent season for BRScraper
79
+ # Example: '2024–25' -> 2025
80
+ latest_season_end_year = int(get_available_seasons(1)[0].split('–')[1])
81
+
82
+ # Use seasons.get_stats to get a list of players for the latest season
83
+ # BRScraper's get_stats returns a 'PLAYER' column
84
+ df = seasons.get_stats(latest_season_end_year, data_format='per_game', playoffs=False)
85
 
86
+ if df.empty or 'PLAYER' not in df.columns:
87
+ st.warning(f"BRScraper could not fetch player list for {latest_season_end_year}. Falling back to common players.")
88
+ # Fallback to a hardcoded list of common players for demo
89
+ common_players = [
90
+ 'LeBron James', 'Stephen Curry', 'Kevin Durant', 'Giannis Antetokounmpo',
91
+ 'Nikola Jokic', 'Joel Embiid', 'Jayson Tatum', 'Luka Doncic',
92
+ 'Damian Lillard', 'Jimmy Butler', 'Kawhi Leonard', 'Paul George',
93
+ 'Anthony Davis', 'Rudy Gobert', 'Donovan Mitchell', 'Trae Young',
94
+ 'Devin Booker', 'Karl-Anthony Towns', 'Zion Williamson', 'Ja Morant'
95
+ ]
96
+ return pd.DataFrame({'name': common_players})
97
+
98
+ player_names = df['PLAYER'].unique().tolist()
99
  return pd.DataFrame({'name': player_names})
100
+
101
  except Exception as e:
102
+ st.error(f"Error fetching player index with BRScraper: {e}. Falling back to common players.")
103
+ # Fallback to hardcoded list
104
+ fallback_players = [
105
+ 'LeBron James', 'Stephen Curry', 'Kevin Durant', 'Giannis Antetokounmpo',
106
+ 'Nikola Jokic', 'Joel Embiid', 'Jayson Tatum', 'Luka Doncic'
107
+ ]
108
+ return pd.DataFrame({'name': fallback_players})
109
 
110
  @st.cache_data(ttl=300)
111
  def get_player_career_stats_brscraper(player_name):
 
113
  Uses BRScraper to get a player's career stats.
114
  Applies column renaming and numeric conversion.
115
  """
116
+ if not BRSCRAPER_AVAILABLE:
117
+ return pd.DataFrame()
118
+
119
  try:
120
+ # BRScraper's players.get_stats returns a DataFrame with career stats
121
+ df = players.get_stats(player_name, stat_type='PER_GAME')
122
+
123
  if df.empty:
124
  return pd.DataFrame()
125
 
126
+ # Standardize column names from BRScraper output to app's expected format
127
  column_mapping = {
128
+ 'SEASON': 'Season', # BRScraper returns 'SEASON'
129
  'G': 'GP', 'GS': 'GS', 'MP': 'MIN',
130
+ 'FG_PCT': 'FG_PCT', 'FG3_PCT': 'FG3_PCT', 'FT_PCT': 'FT_PCT',
131
  'TRB': 'REB', 'AST': 'AST', 'STL': 'STL', 'BLK': 'BLK', 'TOV': 'TO',
132
  'PF': 'PF', 'PTS': 'PTS',
133
+ 'AGE': 'AGE', 'TEAM': 'TEAM_ABBREVIATION', 'POS': 'POSITION',
134
+ 'FG': 'FGM', 'FGA': 'FGA', 'FG3': 'FG3M', 'FG3A': 'FG3A',
135
+ 'FG2': 'FGM2', 'FG2A': 'FGA2', 'FG2_PCT': 'FG2_PCT', 'EFG_PCT': 'EFG_PCT',
136
  'FT': 'FTM', 'FTA': 'FTA', 'ORB': 'OREB', 'DRB': 'DREB'
137
  }
138
 
 
146
  df['Season'] = df['Season'].astype(str).str.replace('-', '–')
147
 
148
  # Convert numeric columns
149
+ # Exclude 'Season', 'TEAM_ABBREVIATION', 'POSITION' as they are strings
150
+ non_numeric_cols = {'Season', 'TEAM_ABBREVIATION', 'POSITION'}
151
  for col in df.columns:
152
  if col not in non_numeric_cols:
153
  df[col] = pd.to_numeric(df[col], errors="coerce")
 
156
  df['Player'] = player_name
157
 
158
  return df
159
+
160
  except Exception as e:
161
  st.error(f"Error fetching stats for {player_name} with BRScraper: {e}")
162
  return pd.DataFrame()
 
164
  @st.cache_data(ttl=300)
165
  def get_team_season_stats_brscraper(year):
166
  """
167
+ Uses BRScraper to get team stats for a given season year.
168
  Applies column renaming and numeric conversion.
169
  """
170
+ if not BRSCRAPER_AVAILABLE:
171
+ return pd.DataFrame()
172
+
173
  try:
174
+ # BRScraper's teams.get_team_stats returns a DataFrame with team stats for the year
175
+ df = teams.get_team_stats(year, data_format='per_game')
 
 
 
 
 
 
 
 
 
 
176
 
177
+ if df.empty:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
  return pd.DataFrame()
179
 
180
+ # Standardize column names from BRScraper output
181
  column_mapping = {
182
  'G': 'GP', 'MP': 'MIN',
183
+ 'FG_PCT': 'FG_PCT', 'FG3_PCT': 'FG3_PCT', 'FT_PCT': 'FT_PCT',
184
  'TRB': 'REB', 'AST': 'AST', 'STL': 'STL', 'BLK': 'BLK', 'TOV': 'TO',
185
  'PF': 'PF', 'PTS': 'PTS',
186
+ 'RK': 'RANK', 'W': 'WINS', 'L': 'LOSSES', 'W_L_PCT': 'WIN_LOSS_PCT', # BRScraper uses W_L_PCT
187
+ 'FG': 'FGM', 'FGA': 'FGA', 'FG3': 'FG3M', 'FG3A': 'FG3A',
188
+ 'FG2': 'FGM2', 'FG2A': 'FGA2', 'FG2_PCT': 'FG2_PCT', 'EFG_PCT': 'EFG_PCT',
189
+ 'FT': 'FTM', 'FTA': 'FTA', 'ORB': 'OREB', 'DRB': 'DREB',
190
+ 'TEAM': 'Team' # BRScraper returns 'TEAM'
191
  }
192
 
193
  for old_col, new_col in column_mapping.items():
194
+ if old_col in df.columns:
195
+ df = df.rename(columns={old_col: new_col})
196
 
197
  # Convert numeric columns
198
+ # Exclude 'Team' and 'RANK' as they are strings/identifiers
199
+ non_numeric_cols = {"Team", "RANK"}
200
+ for col in df.columns:
201
  if col not in non_numeric_cols:
202
+ df[col] = pd.to_numeric(df[col], errors="coerce")
203
 
204
+ # Ensure 'Team' column is present and clean it (remove asterisks)
205
+ if 'Team' in df.columns:
206
+ df['Team'] = df['Team'].astype(str).str.replace('*', '', regex=False).str.strip()
 
 
 
207
  else:
208
+ st.warning(f"Could not find 'Team' column in BRScraper output for year {year}.")
209
  return pd.DataFrame()
210
 
211
+ return df
212
+
213
  except Exception as e:
214
  st.error(f"Error fetching team stats for {year} with BRScraper: {e}")
215
  return pd.DataFrame()
 
225
  return ""
226
  hdr = {'Authorization':f'Bearer {PERP_KEY}','Content-Type':'application/json'}
227
  payload = {
228
+ "model":"sonar-medium-online", # Using a commonly available online model
229
  "messages":[{"role":"system","content":system},{"role":"user","content":prompt}],
230
  "max_tokens":max_tokens, "temperature":temp
231
  }
 
235
  return r.json().get("choices", [{}])[0].get("message",{}).get("content","")
236
  except requests.exceptions.RequestException as e:
237
  error_message = f"Error communicating with Perplexity API: {e}"
238
+ if hasattr(e, 'response') and e.response is not None:
239
  try:
240
+ error_detail = e.response.json().get("error", {}).get("message", e.response.text)
241
  error_message = f"Perplexity API error: {error_detail}"
242
  except ValueError:
243
  error_message = f"Perplexity API error: {r.response.status_code} - {r.response.reason}"
 
248
  return ""
249
 
250
  # —————————————————————————————————————————————————————————————————————————————
251
+ # Plotting functions
252
  def create_comparison_chart(data, players_names, metric):
253
  """Create comparison chart for players"""
254
  fig = go.Figure()
255
 
256
  for i, player in enumerate(players_names):
257
+ if player in data['Player'].values:
258
  player_data = data[data['Player'] == player]
259
  fig.add_trace(go.Scatter(
260
+ x=player_data['Season'],
261
  y=player_data[metric],
262
  mode='lines+markers',
263
  name=player,
 
279
  fig = go.Figure()
280
 
281
  for player_name, stats in player_stats.items():
 
282
  r_values = [stats.get(cat, 0) for cat in categories]
283
 
284
  fig.add_trace(go.Scatterpolar(
 
293
  polar=dict(
294
  radialaxis=dict(
295
  visible=True,
 
296
  range=[0, 100]
297
  )),
298
  showlegend=True,
 
305
  # Main App Structure
306
  # —————————————————————————————————————————————————————————————————————————————
307
  def main():
308
+ if not BRSCRAPER_AVAILABLE:
309
+ st.warning("⚠️ BRScraper is not installed. Some features may be limited. Install with: `pip install basketball-reference-scraper`")
310
+
311
  st.markdown('<h1 class="main-header">🏀 NBA Analytics Hub (BBR Edition)</h1>', unsafe_allow_html=True)
312
  st.sidebar.title("Navigation")
313
  page = st.sidebar.radio("", [
 
331
  # —————————————————————————————————————————————————————————————————————————————
332
  def player_vs_player():
333
  st.markdown('<h2 class="section-header">Player vs Player Comparison</h2>', unsafe_allow_html=True)
334
+
335
+ if not BRSCRAPER_AVAILABLE:
336
+ st.error("BRScraper is required for this feature. Please install basketball-reference-scraper.")
337
+ return
338
+
339
+ idx = get_player_index_brscraper()
340
  names = idx['name'].tolist()
341
  selected_players = st.multiselect("Select Players (up to 4)", names, max_selections=4)
342
 
343
  available_seasons = get_available_seasons()
344
+ selected_seasons = st.multiselect("Select Seasons", available_seasons, default=[available_seasons[0]] if available_seasons else [])
345
 
346
  if st.button("Run Comparison"):
347
  if not selected_players:
 
349
  return
350
 
351
  stats_tabs = st.tabs(["Basic Stats", "Advanced Stats", "Visualizations"])
352
+ all_player_season_data = []
353
  players_with_no_data = []
354
 
355
  with st.spinner("Fetching player data..."):
 
357
  df_player_career = get_player_career_stats_brscraper(player_name)
358
 
359
  if not df_player_career.empty:
 
 
360
  filtered_df = df_player_career[df_player_career['Season'].isin(selected_seasons)].copy()
361
 
362
  if not filtered_df.empty:
 
363
  all_player_season_data.append(filtered_df)
364
  else:
365
  players_with_no_data.append(player_name)
366
  else:
367
  players_with_no_data.append(player_name)
368
 
 
369
  if players_with_no_data:
370
+ st.info(f"No data found for the selected seasons ({', '.join(selected_seasons)}) for: {', '.join(players_with_no_data)}.")
371
 
372
  if not all_player_season_data:
373
+ st.error("No data available for any of the selected players and seasons to display.")
374
  return
375
 
 
376
  comparison_df_raw = pd.concat(all_player_season_data, ignore_index=True)
377
 
378
  with stats_tabs[0]:
379
  st.subheader("Basic Statistics")
 
 
380
  if len(selected_seasons) > 1:
381
  basic_display_df = comparison_df_raw.groupby('Player').mean(numeric_only=True).reset_index()
382
  basic_cols = ['Player', 'GP', 'MIN', 'PTS', 'REB', 'AST', 'STL', 'BLK', 'FG_PCT', 'FT_PCT', 'FG3_PCT']
 
393
  advanced_df = comparison_df_raw.copy()
394
 
395
  # Calculate TS% (True Shooting Percentage)
 
396
  advanced_df['FGA'] = pd.to_numeric(advanced_df['FGA'], errors='coerce').fillna(0)
397
  advanced_df['FTA'] = pd.to_numeric(advanced_df['FTA'], errors='coerce').fillna(0)
398
  advanced_df['PTS'] = pd.to_numeric(advanced_df['PTS'], errors='coerce').fillna(0)
 
418
  st.subheader("Player Comparison Charts")
419
 
420
  if not comparison_df_raw.empty:
421
+ metrics = ['PTS', 'REB', 'AST', 'FG_PCT', 'FG3_PCT', 'FT_PCT', 'STL', 'BLK']
422
  available_metrics = [m for m in metrics if m in comparison_df_raw.columns]
423
 
424
  if available_metrics:
425
  selected_metric = st.selectbox("Select Metric to Visualize", available_metrics)
426
 
427
  if selected_metric:
 
428
  if len(selected_players) == 1 and len(selected_seasons) > 1:
 
429
  player_trend_df = comparison_df_raw[comparison_df_raw['Player'] == selected_players[0]].sort_values(by='Season')
430
  fig = px.line(
431
  player_trend_df,
 
435
  markers=True
436
  )
437
  else:
 
438
  avg_comparison_df = comparison_df_raw.groupby('Player')[available_metrics].mean(numeric_only=True).reset_index()
439
  fig = px.bar(
440
  avg_comparison_df,
 
445
  )
446
  st.plotly_chart(fig, use_container_width=True)
447
 
448
+ # Radar chart
449
  radar_metrics_for_chart = ['PTS', 'REB', 'AST', 'STL', 'BLK']
450
  radar_metrics_for_chart = [m for m in radar_metrics_for_chart if m in comparison_df_raw.columns]
451
 
452
  if len(radar_metrics_for_chart) >= 3:
453
  radar_data = {}
 
454
  if len(selected_seasons) > 1:
455
  radar_source_df = comparison_df_raw.groupby('Player')[radar_metrics_for_chart].mean(numeric_only=True).reset_index()
456
  else:
 
458
 
459
  scaled_radar_df = radar_source_df.copy()
460
 
 
461
  for col in radar_metrics_for_chart:
462
  min_val = scaled_radar_df[col].min()
463
  max_val = scaled_radar_df[col].max()
464
  if max_val > min_val:
465
  scaled_radar_df[col] = ((scaled_radar_df[col] - min_val) / (max_val - min_val)) * 100
466
  else:
467
+ scaled_radar_df[col] = 0
468
 
469
  for _, row in scaled_radar_df.iterrows():
470
  radar_data[row['Player']] = {
 
474
  if radar_data:
475
  radar_fig = create_radar_chart(radar_data, radar_metrics_for_chart)
476
  st.plotly_chart(radar_fig, use_container_width=True)
 
 
 
 
 
 
 
 
 
477
 
478
  def team_vs_team():
479
  st.markdown('<h2 class="section-header">Team vs Team Analysis</h2>', unsafe_allow_html=True)
480
 
481
+ if not BRSCRAPER_AVAILABLE:
482
+ st.error("BRScraper is required for this feature.")
483
+ return
484
+
485
  available_seasons = get_available_seasons()
486
+ if not available_seasons:
487
+ st.error("No seasons available.")
488
+ return
489
+
490
  selected_season_str = st.selectbox("Select Season", available_seasons, index=0)
491
+ # Correctly extract the full end year (e.g., "2024–25" -> 2025) for BRScraper
 
492
  year_for_team_stats = int(selected_season_str.split('–')[1])
493
 
494
+ tm_df = get_team_season_stats_brscraper(year_for_team_stats)
495
  if tm_df.empty:
496
+ st.info(f"No team data available for the {selected_season_str} season.")
497
  return
498
 
499
+ teams = tm_df['Team'].unique().tolist()
500
  selected_teams = st.multiselect("Select Teams (up to 4)", teams, max_selections=4)
501
 
502
  if st.button("Run Comparison"):
 
509
 
510
  with st.spinner("Fetching team data..."):
511
  for t in selected_teams:
512
+ df = tm_df[tm_df.Team == t].copy()
513
  if not df.empty:
514
+ df['Season'] = selected_season_str
515
+ stats.append(df.iloc[0].to_dict())
 
 
516
  else:
517
  teams_with_no_data.append(t)
518
 
519
  if teams_with_no_data:
520
+ st.info(f"No data found for: {', '.join(teams_with_no_data)}")
521
 
522
  if not stats:
523
+ st.error("No data available for the selected teams.")
524
  return
525
 
526
  comp = pd.DataFrame(stats)
527
+ for col in ['PTS', 'REB', 'AST', 'STL', 'BLK', 'FG_PCT', 'FG3_PCT', 'FT_PCT']:
 
528
  if col in comp.columns:
529
  comp[col] = pd.to_numeric(comp[col], errors='coerce')
530
 
531
  st.subheader("Team Statistics Comparison")
532
+ cols = ['Team', 'Season', 'PTS', 'REB', 'AST', 'STL', 'BLK', 'FG_PCT', 'FG3_PCT', 'FT_PCT']
533
  display_cols = [col for col in cols if col in comp.columns]
534
  st.dataframe(comp[display_cols].round(2), use_container_width=True)
535
 
536
  st.subheader("Team Performance Visualization")
537
+ metric_options = ['PTS', 'REB', 'AST', 'FG_PCT', 'FG3_PCT', 'FT_PCT']
538
  available_metrics = [m for m in metric_options if m in comp.columns]
539
 
540
  if available_metrics:
541
  selected_metric = st.selectbox("Select Metric", available_metrics)
 
542
  fig = px.bar(
543
  comp,
544
  x='Team',
545
  y=selected_metric,
546
+ color='Team',
547
  title=f"Team {selected_metric} Comparison ({selected_season_str} Season)",
548
  barmode='group'
549
  )
550
  st.plotly_chart(fig, use_container_width=True)
 
 
 
551
 
552
  def awards_predictor():
553
  st.markdown('<h2 class="section-header">NBA Awards Predictor</h2>', unsafe_allow_html=True)
554
  award = st.selectbox("Select Award", ["MVP","Defensive Player of the Year","Rookie of the Year","6th Man of the Year","All-NBA First Team"])
555
  st.subheader(f"{award} Criteria")
556
+
557
  criteria = {}
558
  if award=="MVP":
559
+ criteria = {
560
+ "PPG":st.slider("Min PPG",15.0,35.0,25.0),
561
+ "Wins":st.slider("Min Team Wins",35,70,50),
562
+ "PER":st.slider("Min PER",15.0,35.0,25.0),
563
+ "WS":st.slider("Min Win Shares",5.0,20.0,10.0)
564
+ }
565
  elif award=="Defensive Player of the Year":
566
+ criteria = {
567
+ "BPG":st.slider("Min BPG",0.0,4.0,1.5),
568
+ "SPG":st.slider("Min SPG",0.0,3.0,1.0),
569
+ "DefRtgMax":st.slider("Max Def Rating",90.0,120.0,105.0),
570
+ "DefRankMax":st.slider("Max Team Def Rank",1,30,10)
571
+ }
572
  else:
573
+ criteria = {
574
+ "PPG":st.slider("Min PPG",10.0,30.0,15.0),
575
+ "Games":st.slider("Min Games",50,82,65),
576
+ "FG%":st.slider("Min FG%",0.35,0.65,0.45)
577
+ }
578
 
579
  if st.button("Generate Predictions"):
580
+ p = f"Predict top 5 {award} candidates based on {criteria}. Focus on 2024-25 season."
581
  resp = ask_perp(p, system="You are an NBA awards expert AI.", max_tokens=800)
582
  st.markdown("### Predictions")
583
  st.write(resp)
584
 
585
  def ai_chat():
586
  st.markdown('<h2 class="section-header">AI Chat & Insights</h2>', unsafe_allow_html=True)
587
+
588
  for msg in st.session_state.chat_history:
589
  with st.chat_message(msg["role"]):
590
  st.write(msg["content"])
591
+
592
+ if prompt := st.chat_input("Ask me anything about NBA…"):
593
  st.session_state.chat_history.append({"role":"user","content":prompt})
594
  with st.chat_message("user"):
595
+ st.write(prompt)
596
  with st.chat_message("assistant"):
597
  ans = ask_perp(prompt, system="You are an NBA expert analyst AI.", max_tokens=700)
598
  st.write(ans)
599
  st.session_state.chat_history.append({"role":"assistant","content":ans})
600
+
601
  st.subheader("Quick Actions")
602
  c1,c2,c3 = st.columns(3)
603
  if c1.button("🏆 Contenders"):
604
+ prompt = "Analyze the current NBA championship contenders for 2025. Who are the top 5 teams and why?"
605
  response = ask_perp(prompt)
606
  if response: st.write(response)
607
  if c2.button("⭐ Rising Stars"):
608
+ prompt = "Who are the most promising young NBA players to watch in 2025? Focus on players 23 and under."
609
  response = ask_perp(prompt)
610
  if response: st.write(response)
611
  if c3.button("📊 Trades"):
 
625
  yrs = st.number_input("Years in NBA",0,7,2)
626
  ppg = st.number_input("PPG",0.0,40.0,15.0)
627
  rpg = st.number_input("RPG",0.0,20.0,5.0)
628
+ apg = st.number_input("APG",0.0,15.0,3.0) # Completed this line
629
  if st.button("Generate AI Projection"):
630
  prompt = (
631
  f"Analyze and project the future potential of NBA player {sp}: "