rairo commited on
Commit
629a2b1
Β·
verified Β·
1 Parent(s): 5964d76

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +173 -570
src/streamlit_app.py CHANGED
@@ -4,11 +4,13 @@ import numpy as np
4
  import requests
5
  import os
6
  from datetime import datetime
7
- from bs4 import BeautifulSoup, Comment
8
- import re # New import for regex operations
9
- import plotly.express as px # Ensure plotly imports are present
10
- import plotly.graph_objects as go # Ensure plotly imports are present
11
 
 
 
12
 
13
  # Page configuration
14
  st.set_page_config(
@@ -33,551 +35,188 @@ if 'chat_history' not in st.session_state:
33
  st.session_state.chat_history = []
34
 
35
  # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
36
- # Basketball-Reference Data Fetching Utilities
37
  # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
38
 
39
- import requests
40
- import pandas as pd
41
- import streamlit as st
42
- from bs4 import BeautifulSoup
43
- import re
44
- import time
45
- import random
46
- from urllib.parse import urljoin
47
-
48
- @st.cache_data(ttl=3600)
49
- def fetch_html(url):
50
- """Fetch raw HTML for a URL (with error handling and rate limiting)."""
51
- try:
52
- # Add random delay to be respectful to basketball-reference.com
53
- time.sleep(random.uniform(0.5, 1.5))
54
-
55
- headers = {
56
- 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
57
- }
58
-
59
- resp = requests.get(url, timeout=30, headers=headers)
60
- resp.raise_for_status()
61
- return resp.text
62
- except requests.exceptions.RequestException as e:
63
- st.error(f"Failed to fetch {url}: {e}")
64
- return ""
65
- except Exception as e:
66
- st.error(f"An unexpected error occurred while fetching {url}: {e}")
67
- return ""
68
-
69
- def parse_table(html, table_id=None):
70
- """
71
- Given raw HTML and optional table_id, locate that <table>,
72
- handling cases where it's commented out, then parse it with pandas.read_html.
73
- """
74
- if not html:
75
- return pd.DataFrame()
76
-
77
- soup = BeautifulSoup(html, "html.parser")
78
- tbl_html = ""
79
-
80
- if table_id:
81
- # First, try to find the table directly
82
- tbl = soup.find("table", {"id": table_id})
83
- if tbl:
84
- tbl_html = str(tbl)
85
- else:
86
- # If not found directly, search for it within HTML comments
87
- comments = soup.find_all(string=lambda text: isinstance(text, Comment))
88
- for comment in comments:
89
- comment_soup = BeautifulSoup(comment, "html.parser")
90
- tbl = comment_soup.find("table", {"id": table_id})
91
- if tbl:
92
- tbl_html = str(tbl)
93
- break
94
- else:
95
- # fallback: first table on page (only if no table_id specified)
96
- first = soup.find("table")
97
- if first:
98
- tbl_html = str(first)
99
 
100
- if not tbl_html:
101
- return pd.DataFrame()
 
 
 
 
 
102
 
103
- try:
104
- # pd.read_html returns a list of DataFrames, we want the first one
105
- dfs = pd.read_html(tbl_html, header=0)
106
- if dfs:
107
- return dfs[0]
108
- else:
109
- return pd.DataFrame()
110
- except ValueError as e:
111
- # No tables found in the provided HTML string
112
- st.warning(f"No tables found in HTML: {e}")
113
- return pd.DataFrame()
114
- except Exception as e:
115
- st.error(f"Error parsing table with pandas: {e}")
116
- return pd.DataFrame(
117
 
118
- # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
119
- # Basketball-Reference Data Fetching Utilities
120
- # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
121
  @st.cache_data(ttl=3600)
122
- def get_player_index():
123
  """
124
- Scrape the master list of players from BBR (players/a β†’ players/z).
125
- Returns DataFrame with columns ['name','url'].
126
  """
127
- base = "https://www.basketball-reference.com/players/"
128
- records = []
129
-
130
- for letter in map(chr, range(ord('a'), ord('z')+1)):
131
- url = f"{base}{letter}/"
132
- html = fetch_html(url)
133
- if not html:
134
- continue
135
-
136
- soup = BeautifulSoup(html, "html.parser")
137
- # The players table is usually directly available, not commented out.
138
- table = soup.find("table", {"id": "players"})
139
- if not table:
140
- continue
141
-
142
- # Look for both tbody and direct tr children
143
- rows = table.select("tbody tr") if table.select("tbody tr") else table.select("tr")
144
 
145
- for row in rows:
146
- th = row.find("th", {"data-stat": "player"})
147
- if not th:
148
- continue
149
- a = th.find("a", href=True)
150
- if not a:
151
- continue
152
- name = a.text.strip()
153
- href = a["href"].strip()
154
- full_url = urljoin("https://www.basketball-reference.com", href)
155
- records.append({"name": name, "url": full_url})
156
-
157
- return pd.DataFrame(records)
158
-
159
- # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
160
- @st.cache_data(ttl=300)
161
- def player_season_stats(bbr_url):
162
- """
163
- Scrapes a player's per‑season table (id="per_game") from their BBR page.
164
- Returns cleaned DataFrame with season-by-season averages.
165
- """
166
- html = fetch_html(bbr_url)
167
- if not html:
168
- return pd.DataFrame()
169
-
170
- # Use the parse_table function for consistency
171
- df = parse_table(html, "per_game")
172
-
173
- # If per_game table not found, try alternatives
174
- if df.empty:
175
- # Try other common table IDs for season stats
176
- for table_id in ["stats", "per_game_stats", "totals", "advanced"]:
177
- df = parse_table(html, table_id)
178
- if not df.empty:
179
- break
180
-
181
- # If still empty, try to find any table with Season column
182
- if df.empty:
183
- soup = BeautifulSoup(html, "html.parser")
184
- all_tables = soup.find_all("table")
185
- for table in all_tables:
186
- try:
187
- temp_df = pd.read_html(str(table), header=0)[0]
188
- # Check if it has Season column and looks like season stats
189
- if not temp_df.empty and 'Season' in temp_df.columns and 'Date' not in temp_df.columns:
190
- df = temp_df
191
- break
192
- except:
193
- continue
194
-
195
- if df.empty:
196
- st.warning(f"Could not find season stats table at {bbr_url}")
197
- return pd.DataFrame()
198
-
199
- # Handle potential MultiIndex columns
200
- if isinstance(df.columns, pd.MultiIndex):
201
- df.columns = ['_'.join(str(col).strip() for col in cols if str(col).strip() and 'Unnamed' not in str(col))
202
- for cols in df.columns.values]
203
-
204
- # Clean column names
205
- df.columns = [str(col).strip() for col in df.columns]
206
-
207
- # Verify we have the right table (should have Season column, not Date)
208
- if 'Season' not in df.columns:
209
- st.warning(f"Table found but no Season column. Available columns: {df.columns.tolist()}")
210
- return pd.DataFrame()
211
-
212
- if 'Date' in df.columns:
213
- st.warning("Found game log table instead of season stats. Skipping.")
214
- return pd.DataFrame()
215
-
216
- # Clean the data - remove header rows
217
- df = df[df["Season"].astype(str) != "Season"].copy()
218
- df = df[df["Season"].notna()].copy()
219
-
220
- # Remove any completely empty rows
221
- df = df.dropna(how='all').copy()
222
-
223
- if df.empty:
224
- st.warning("No valid season data found after cleaning")
225
- return pd.DataFrame()
226
-
227
- # Clean season format
228
- df["Season"] = df["Season"].astype(str).str.strip()
229
- df['Season'] = df['Season'].str.replace('-', '–') # Ensure en-dash for consistency
230
-
231
- # Standardize column names to match expected format
232
- column_mapping = {
233
- 'G': 'GP', 'GS': 'GS', 'MP': 'MIN',
234
- 'FG%': 'FG_PCT', '3P%': 'FG3_PCT', 'FT%': 'FT_PCT',
235
- 'TRB': 'REB', 'AST': 'AST', 'STL': 'STL', 'BLK': 'BLK', 'TOV': 'TO',
236
- 'PF': 'PF', 'PTS': 'PTS',
237
- 'Age': 'AGE', 'Tm': 'TEAM_ABBREVIATION', 'Lg': 'LEAGUE_ID', 'Pos': 'POSITION',
238
- 'FG': 'FGM', 'FGA': 'FGA', '3P': 'FG3M', '3PA': 'FG3A',
239
- '2P': 'FGM2', '2PA': 'FGA2', '2P%': 'FG2_PCT', 'eFG%': 'EFG_PCT',
240
- 'FT': 'FTM', 'FTA': 'FTA', 'ORB': 'OREB', 'DRB': 'DREB'
241
- }
242
-
243
- # Apply column mapping only for columns that exist
244
- for old_col, new_col in column_mapping.items():
245
- if old_col in df.columns:
246
- df = df.rename(columns={old_col: new_col})
247
-
248
- # Clean team names if TEAM_ABBREVIATION column exists
249
- if 'TEAM_ABBREVIATION' in df.columns:
250
- df['TEAM_ABBREVIATION'] = df['TEAM_ABBREVIATION'].apply(clean_team_name)
251
-
252
- # Convert numeric columns
253
- non_numeric_cols = {'Season', 'TEAM_ABBREVIATION', 'LEAGUE_ID', 'POSITION', 'Player'}
254
- for col in df.columns:
255
- if col not in non_numeric_cols:
256
- df[col] = pd.to_numeric(df[col], errors="coerce")
257
-
258
- return df
259
 
260
- # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
261
  @st.cache_data(ttl=300)
262
- def get_player_stats_by_name(player_name):
263
  """
264
- Get player stats by searching for the player name in the index.
265
- Returns DataFrame with player's career statistics.
266
  """
267
- if not player_name or not player_name.strip():
268
- st.warning("Please provide a valid player name")
269
- return pd.DataFrame()
270
-
271
- # Get player index
272
  try:
273
- player_index = get_player_index()
274
- if player_index.empty:
275
- st.error("Could not load player index. Please try again later.")
276
- return pd.DataFrame()
277
- except Exception as e:
278
- st.error(f"Error loading player index: {e}")
279
- return pd.DataFrame()
280
-
281
- # Clean the search term
282
- search_name = player_name.strip()
283
-
284
- # Search for player (case insensitive, exact match first)
285
- exact_matches = player_index[player_index['name'].str.lower() == search_name.lower()]
286
-
287
- if not exact_matches.empty:
288
- matches = exact_matches
289
- else:
290
- # Try partial matching
291
- matches = player_index[player_index['name'].str.contains(search_name, case=False, na=False, regex=False)]
292
-
293
- if matches.empty:
294
- st.warning(f"No player found matching '{player_name}'. Please check the spelling and try again.")
295
- return pd.DataFrame()
296
-
297
- if len(matches) > 1:
298
- st.info(f"Multiple players found matching '{player_name}':")
299
- for i, (_, row) in enumerate(matches.head(5).iterrows(), 1):
300
- st.info(f"{i}. {row['name']}")
301
- st.info("Using the first match. For more specific results, try using the full player name.")
302
-
303
- # Get stats for the first match
304
- try:
305
- player_url = matches.iloc[0]['url']
306
- player_stats = player_season_stats(player_url)
307
-
308
- if player_stats.empty:
309
- st.warning(f"Could not retrieve stats for {matches.iloc[0]['name']}")
310
  return pd.DataFrame()
 
 
 
 
 
 
 
 
 
 
 
 
 
311
 
312
- # Add player name to the dataframe for reference
313
- player_stats['PLAYER_NAME'] = matches.iloc[0]['name']
 
 
 
 
 
 
314
 
315
- return player_stats
 
 
 
 
316
 
 
 
 
 
317
  except Exception as e:
318
- st.error(f"Error retrieving stats for {matches.iloc[0]['name']}: {e}")
319
  return pd.DataFrame()
320
 
321
- # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
322
  @st.cache_data(ttl=300)
323
- def team_per_game(year):
324
  """
325
- Scrapes the league's per‑game team stats table from:
326
- https://www.basketball-reference.com/leagues/NBA_{year}_per_game.html
327
- Returns cleaned DataFrame.
328
  """
329
- url = f"https://www.basketball-reference.com/leagues/NBA_{year}_per_game.html"
330
- html = fetch_html(url)
331
- if not html:
332
- return pd.DataFrame()
333
-
334
- # Try multiple possible table IDs for team stats
335
- possible_table_ids = ["per_game-team", "per_game_team", "team-stats-per_game", "teams_per_game"]
336
- df = pd.DataFrame()
337
-
338
- for table_id in possible_table_ids:
339
- df = parse_table(html, table_id=table_id)
340
- if not df.empty:
341
- break
342
-
343
- # If no specific table found, try to find any table with team data
344
- if df.empty:
345
- soup = BeautifulSoup(html, "html.parser")
346
- tables = soup.find_all("table")
347
- for table in tables:
348
- if table.find("th", string=lambda text: text and "team" in text.lower()):
349
- df = parse_table(str(table))
350
- if not df.empty:
351
- break
352
-
353
- if df.empty:
354
- st.warning(f"Could not find team stats table for {year}")
355
- return pd.DataFrame()
356
-
357
- # Handle potential MultiIndex columns
358
- if isinstance(df.columns, pd.MultiIndex):
359
- df.columns = ['_'.join(str(col).strip() for col in cols if str(col).strip() and str(col).strip() != 'Unnamed: 0_level_0')
360
- for cols in df.columns.values]
361
-
362
- # Clean column names
363
- df.columns = [str(col).strip() for col in df.columns]
364
-
365
- # Find team column
366
- team_col = None
367
- for col in df.columns:
368
- if 'team' in col.lower() or col in ['Team', 'Tm']:
369
- team_col = col
370
- break
371
-
372
- if team_col is None:
373
- st.warning(f"Could not find team column in team stats. Available columns: {df.columns.tolist()}")
374
- return pd.DataFrame()
375
-
376
- # Rename team column to standard name
377
- if team_col != 'Team':
378
- df = df.rename(columns={team_col: 'Team'})
379
-
380
- # Remove header rows
381
- df = df[df["Team"].astype(str) != "Team"].copy()
382
- df = df[df["Team"].notna()].copy()
383
-
384
- # Rename Team to Tm for consistency
385
- df = df.rename(columns={"Team": "Tm"})
386
-
387
- # Standardize column names
388
- column_mapping = {
389
- 'G': 'GP', 'MP': 'MIN',
390
- 'FG%': 'FG_PCT', '3P%': 'FG3_PCT', 'FT%': 'FT_PCT',
391
- 'TRB': 'REB', 'AST': 'AST', 'STL': 'STL', 'BLK': 'BLK', 'TOV': 'TO',
392
- 'PF': 'PF', 'PTS': 'PTS',
393
- 'Rk': 'RANK', 'W': 'WINS', 'L': 'LOSSES', 'W/L%': 'WIN_LOSS_PCT',
394
- 'FG': 'FGM', 'FGA': 'FGA', '3P': 'FG3M', '3PA': 'FG3A',
395
- '2P': 'FGM2', '2PA': 'FGA2', '2P%': 'FG2_PCT', 'eFG%': 'EFG_PCT',
396
- 'FT': 'FTM', 'FTA': 'FTA', 'ORB': 'OREB', 'DRB': 'DREB'
397
- }
398
-
399
- # Apply column mapping only for columns that exist
400
- for old_col, new_col in column_mapping.items():
401
- if old_col in df.columns:
402
- df = df.rename(columns={old_col: new_col})
403
-
404
- # Clean team names
405
- df['Tm'] = df['Tm'].apply(clean_team_name)
406
-
407
- # Convert numeric columns
408
- non_numeric_cols = {"Tm", "RANK"}
409
- for col in df.columns:
410
- if col not in non_numeric_cols:
411
- df[col] = pd.to_numeric(df[col], errors="coerce")
412
-
413
- return df
414
-
415
- # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
416
- # Additional utility functions for team data processing
417
-
418
- @st.cache_data(ttl=300)
419
- def team_opponent_stats(year):
420
- """
421
- Scrapes the league's opponent per‑game team stats table from:
422
- https://www.basketball-reference.com/leagues/NBA_{year}_opp_per_game.html
423
- Returns cleaned DataFrame with opponent stats.
424
- """
425
- url = f"https://www.basketball-reference.com/leagues/NBA_{year}_opp_per_game.html"
426
- html = fetch_html(url)
427
- if not html:
428
- return pd.DataFrame()
429
-
430
- # Try multiple possible table IDs for opponent stats
431
- possible_table_ids = ["opp-stats-per_game", "opp_per_game", "opponent-stats-per_game"]
432
- df = pd.DataFrame()
433
-
434
- for table_id in possible_table_ids:
435
- df = parse_table(html, table_id=table_id)
436
- if not df.empty:
437
- break
438
-
439
- if df.empty:
440
- st.warning(f"Could not find opponent stats table for {year}")
441
- return pd.DataFrame()
442
-
443
- # Clean and process the same way as regular team stats
444
- if isinstance(df.columns, pd.MultiIndex):
445
- df.columns = ['_'.join(str(col).strip() for col in cols if str(col).strip() and str(col).strip() != 'Unnamed: 0_level_0')
446
- for cols in df.columns.values]
447
-
448
- df.columns = [str(col).strip() for col in df.columns]
449
-
450
- # Find team column
451
- team_col = None
452
- for col in df.columns:
453
- if 'team' in col.lower() or col in ['Team', 'Tm']:
454
- team_col = col
455
- break
456
-
457
- if team_col is None:
458
- return pd.DataFrame()
459
-
460
- if team_col != 'Team':
461
- df = df.rename(columns={team_col: 'Team'})
462
-
463
- df = df[df["Team"].astype(str) != "Team"].copy()
464
- df = df[df["Team"].notna()].copy()
465
- df = df.rename(columns={"Team": "Tm"})
466
-
467
- # Apply team name cleaning
468
- df['Tm'] = df['Tm'].apply(clean_team_name)
469
-
470
- # Same column standardization as regular team stats
471
- column_mapping = {
472
- 'G': 'OPP_GP', 'MP': 'OPP_MIN',
473
- 'FG%': 'OPP_FG_PCT', '3P%': 'OPP_FG3_PCT', 'FT%': 'OPP_FT_PCT',
474
- 'TRB': 'OPP_REB', 'AST': 'OPP_AST', 'STL': 'OPP_STL', 'BLK': 'OPP_BLK', 'TOV': 'OPP_TO',
475
- 'PF': 'OPP_PF', 'PTS': 'OPP_PTS',
476
- 'FG': 'OPP_FGM', 'FGA': 'OPP_FGA', '3P': 'OPP_FG3M', '3PA': 'OPP_FG3A',
477
- '2P': 'OPP_FGM2', '2PA': 'OPP_FGA2', '2P%': 'OPP_FG2_PCT', 'eFG%': 'OPP_EFG_PCT',
478
- 'FT': 'OPP_FTM', 'FTA': 'OPP_FTA', 'ORB': 'OPP_OREB', 'DRB': 'OPP_DREB'
479
- }
480
-
481
- for old_col, new_col in column_mapping.items():
482
- if old_col in df.columns:
483
- df = df.rename(columns={old_col: new_col})
484
 
485
- # Convert numeric columns
486
- non_numeric_cols = {"Tm"}
487
- for col in df.columns:
488
- if col not in non_numeric_cols:
489
- df[col] = pd.to_numeric(df[col], errors="coerce")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
490
 
491
- return df
 
492
 
493
- @st.cache_data(ttl=300)
494
- def team_standings(year):
495
- """
496
- Scrapes team standings from Basketball Reference.
497
- Returns DataFrame with team records and standings info.
498
- """
499
- url = f"https://www.basketball-reference.com/leagues/NBA_{year}_standings.html"
500
- html = fetch_html(url)
501
- if not html:
502
- return pd.DataFrame()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
503
 
504
- # Try to find standings tables (usually split by conference)
505
- soup = BeautifulSoup(html, "html.parser")
506
- standings_data = []
507
-
508
- # Look for conference tables
509
- for conference in ['E', 'W']: # Eastern and Western conference IDs
510
- table_id = f"standings_{conference}"
511
- table = soup.find("table", {"id": table_id})
512
- if table:
513
- df = parse_table(str(table))
514
- if not df.empty:
515
- df['Conference'] = 'Eastern' if conference == 'E' else 'Western'
516
- standings_data.append(df)
517
-
518
- if not standings_data:
519
  return pd.DataFrame()
520
-
521
- # Combine conference standings
522
- df = pd.concat(standings_data, ignore_index=True)
523
-
524
- # Clean team names if 'Team' column exists
525
- if 'Team' in df.columns:
526
- df['Team'] = df['Team'].apply(clean_team_name)
527
-
528
- return df
529
-
530
- def validate_dataframe(df, required_columns=None):
531
- """
532
- Validate that a DataFrame has the expected structure and data.
533
- """
534
- if df.empty:
535
- return False, "DataFrame is empty"
536
-
537
- if required_columns:
538
- missing_cols = [col for col in required_columns if col not in df.columns]
539
- if missing_cols:
540
- return False, f"Missing required columns: {missing_cols}"
541
-
542
- return True, "DataFrame is valid"
543
-
544
- def clean_team_name(team_name):
545
- """
546
- Clean and standardize team names from Basketball Reference.
547
- """
548
- if pd.isna(team_name):
549
- return team_name
550
-
551
- # Remove any asterisks or other symbols
552
- team_name = str(team_name).strip().replace('*', '')
553
-
554
- # Handle special cases for team name variations
555
- team_mapping = {
556
- 'NOP': 'NO', # New Orleans Pelicans sometimes shown as NOP
557
- 'PHX': 'PHO', # Phoenix Suns sometimes shown as PHX
558
- 'BRK': 'BKN', # Brooklyn Nets sometimes shown as BRK
559
- 'CHA': 'CHO', # Charlotte sometimes inconsistent
560
- 'UTA': 'UTH' # Utah Jazz sometimes shown as UTA
561
- }
562
-
563
- return team_mapping.get(team_name, team_name)
564
 
565
- def retry_fetch(func, *args, max_retries=3, **kwargs):
566
- """
567
- Retry a function call with exponential backoff.
568
- """
569
- for attempt in range(max_retries):
570
- try:
571
- result = func(*args, **kwargs)
572
- if not (isinstance(result, pd.DataFrame) and result.empty):
573
- return result
574
- except Exception as e:
575
- if attempt == max_retries - 1:
576
- st.error(f"Failed after {max_retries} attempts: {e}")
577
- return pd.DataFrame()
578
- time.sleep(2 ** attempt) # Exponential backoff
579
-
580
- return pd.DataFrame()
581
  # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
582
  # Perplexity integration
583
  PERP_KEY = os.getenv("PERPLEXITY_API_KEY")
@@ -589,7 +228,7 @@ def ask_perp(prompt, system="You are a helpful NBA analyst AI.", max_tokens=500,
589
  return ""
590
  hdr = {'Authorization':f'Bearer {PERP_KEY}','Content-Type':'application/json'}
591
  payload = {
592
- "model":"sonar-pro", # Changed to a commonly available online model
593
  "messages":[{"role":"system","content":system},{"role":"user","content":prompt}],
594
  "max_tokens":max_tokens, "temperature":temp
595
  }
@@ -611,28 +250,6 @@ def ask_perp(prompt, system="You are a helpful NBA analyst AI.", max_tokens=500,
611
  st.error(f"An unexpected error occurred with Perplexity API: {e}")
612
  return ""
613
 
614
- # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
615
- # Helper for dynamic season generation
616
- def get_available_seasons(num_seasons=6):
617
- """Generates a list of recent NBA seasons in 'YYYY–YY' format."""
618
- current_year = datetime.now().year
619
- current_month = datetime.now().month
620
-
621
- # Determine the latest season end year.
622
- # If it's before July (e.g., May 2025), the current season is 2024-25 (ends 2025).
623
- # If it's July or later (e.g., July 2025), the 2024-25 season just finished,
624
- # and the next season (2025-26) is considered the "current" one for future projections.
625
- latest_season_end_year = current_year
626
- if current_month >= 7: # NBA season typically ends in June
627
- latest_season_end_year += 1
628
-
629
- seasons_list = []
630
- for i in range(num_seasons):
631
- end_year = latest_season_end_year - i
632
- start_year = end_year - 1
633
- seasons_list.append(f"{start_year}–{end_year}")
634
- return sorted(seasons_list, reverse=True) # Sort to show most recent first
635
-
636
  # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
637
  # Plotting functions (retained from previous version)
638
  def create_comparison_chart(data, players_names, metric):
@@ -716,7 +333,7 @@ def main():
716
  # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
717
  def player_vs_player():
718
  st.markdown('<h2 class="section-header">Player vs Player Comparison</h2>', unsafe_allow_html=True)
719
- idx = get_player_index()
720
  names = idx['name'].tolist()
721
  selected_players = st.multiselect("Select Players (up to 4)", names, max_selections=4)
722
 
@@ -730,18 +347,11 @@ def player_vs_player():
730
 
731
  stats_tabs = st.tabs(["Basic Stats", "Advanced Stats", "Visualizations"])
732
  all_player_season_data = [] # To store individual season rows for each player
733
- players_not_found_in_index = []
734
- players_with_no_season_data = []
735
 
736
  with st.spinner("Fetching player data..."):
737
  for player_name in selected_players:
738
- player_url_row = idx.loc[idx.name == player_name, 'url']
739
- if player_url_row.empty:
740
- players_not_found_in_index.append(player_name)
741
- continue
742
-
743
- player_url = player_url_row.iat[0]
744
- df_player_career = player_season_stats(player_url)
745
 
746
  if not df_player_career.empty:
747
  # Filter for selected seasons. The player_season_stats function
@@ -749,22 +359,16 @@ def player_vs_player():
749
  filtered_df = df_player_career[df_player_career['Season'].isin(selected_seasons)].copy()
750
 
751
  if not filtered_df.empty:
752
- filtered_df['Player'] = player_name # Add player name for identification
753
  all_player_season_data.append(filtered_df)
754
  else:
755
- # This means player was found, but no data for selected seasons
756
- players_with_no_season_data.append(player_name)
757
  else:
758
- # This means player_season_stats returned an empty DF (fetch/parse failed)
759
- players_with_no_season_data.append(player_name) # Treat as no data for selected seasons
760
-
761
- # Report on players not found in index
762
- if players_not_found_in_index:
763
- st.error(f"The following players were not found in the Basketball-Reference index: {', '.join(players_not_found_in_index)}. Please check spelling.")
764
 
765
  # Report on players with no data for selected seasons
766
- if players_with_no_season_data:
767
- st.info(f"No data found for the selected seasons ({', '.join(selected_seasons)}) for: {', '.join(players_with_no_season_data)}. This might be because the season hasn't started or data is not yet available.")
768
 
769
  if not all_player_season_data:
770
  st.error("No data available for any of the selected players and seasons to display. Please adjust your selections.")
@@ -899,12 +503,12 @@ def team_vs_team():
899
  # Extract the end year from the season string (e.g., "2024–25" -> 2025)
900
  year_for_team_stats = int(selected_season_str.split('–')[1])
901
 
902
- tm_df = team_per_game(year_for_team_stats)
903
  if tm_df.empty:
904
- st.info(f"No team data available for the {selected_season_str} season. This might be because the season hasn't started or data is not yet available.")
905
  return
906
 
907
- teams = tm_df['Tm'].unique().tolist()
908
  selected_teams = st.multiselect("Select Teams (up to 4)", teams, max_selections=4)
909
 
910
  if st.button("Run Comparison"):
@@ -917,11 +521,10 @@ def team_vs_team():
917
 
918
  with st.spinner("Fetching team data..."):
919
  for t in selected_teams:
920
- df = tm_df[tm_df.Tm == t].copy() # Use .copy() to avoid SettingWithCopyWarning
921
  if not df.empty:
922
  # For team stats, we usually get one row per team per season from team_per_game
923
  # So, no need for .mean() here, just take the row.
924
- df['Team'] = t # Add 'Team' column for consistency
925
  df['Season'] = selected_season_str # Add 'Season' column
926
  stats.append(df.iloc[0].to_dict()) # Convert the single row to dict
927
  else:
@@ -1029,7 +632,7 @@ def ai_chat():
1029
 
1030
  def young_projections():
1031
  st.markdown('<h2 class="section-header">Young Player Projections</h2>', unsafe_allow_html=True)
1032
- all_p_df = get_player_index()
1033
  all_p = all_p_df['name'].tolist()
1034
  sp = st.selectbox("Select or enter player", [""]+all_p)
1035
  if not sp:
@@ -1061,7 +664,7 @@ def young_projections():
1061
 
1062
  def similar_players():
1063
  st.markdown('<h2 class="section-header">Similar Players Finder</h2>', unsafe_allow_html=True)
1064
- all_p_df = get_player_index()
1065
  all_p = all_p_df['name'].tolist()
1066
  tp = st.selectbox("Target Player", all_p)
1067
  crit = st.multiselect("Criteria",["Position","Height/Weight","Playing Style","Statistical Profile","Age/Experience"],default=["Playing Style","Statistical Profile"])
 
4
  import requests
5
  import os
6
  from datetime import datetime
7
+ from bs4 import BeautifulSoup, Comment # Keep Comment for parse_table if needed, though BRScraper handles it
8
+ import re
9
+ import plotly.express as px
10
+ import plotly.graph_objects as go
11
 
12
+ # Import BRScraper
13
+ from BRScraper import nba
14
 
15
  # Page configuration
16
  st.set_page_config(
 
35
  st.session_state.chat_history = []
36
 
37
  # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
38
+ # BRScraper Data Fetching Utilities
39
  # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
40
 
41
+ # Helper for dynamic season generation
42
+ def get_available_seasons(num_seasons=6):
43
+ """Generates a list of recent NBA seasons in 'YYYY–YY' format."""
44
+ current_year = datetime.now().year
45
+ current_month = datetime.now().month
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
+ # Determine the latest season end year.
48
+ # If it's before July (e.g., May 2025), the current season is 2024-25 (ends 2025).
49
+ # If it's July or later (e.g., July 2025), the 2024-25 season just finished,
50
+ # and the next season (2025-26) is considered the "current" one for future projections.
51
+ latest_season_end_year = current_year
52
+ if current_month >= 7: # NBA season typically ends in June
53
+ latest_season_end_year += 1
54
 
55
+ seasons_list = []
56
+ for i in range(num_seasons):
57
+ end_year = latest_season_end_year - i
58
+ start_year = end_year - 1
59
+ seasons_list.append(f"{start_year}–{end_year}")
60
+ return sorted(seasons_list, reverse=True) # Sort to show most recent first
 
 
 
 
 
 
 
 
61
 
 
 
 
62
  @st.cache_data(ttl=3600)
63
+ def get_player_index_brscraper():
64
  """
65
+ Uses BRScraper to get a list of players from a recent season's stats.
66
+ This serves as our player index for the multiselect.
67
  """
68
+ latest_season_end_year = int(get_available_seasons(1)[0].split('–')[1])
69
+ try:
70
+ # Get per_game stats for the latest season to get a list of active players
71
+ df = nba.get_stats(latest_season_end_year, info='per_game', rename=False)
72
+ if df.empty:
73
+ st.error(f"Could not fetch player list for {latest_season_end_year} from BRScraper.")
74
+ return pd.DataFrame(columns=['name']) # Return empty with 'name' column
 
 
 
 
 
 
 
 
 
 
75
 
76
+ # Extract unique player names
77
+ player_names = df['Player'].unique().tolist()
78
+ return pd.DataFrame({'name': player_names})
79
+ except Exception as e:
80
+ st.error(f"Error fetching player index with BRScraper: {e}")
81
+ return pd.DataFrame(columns=['name'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
 
 
83
  @st.cache_data(ttl=300)
84
+ def get_player_career_stats_brscraper(player_name):
85
  """
86
+ Uses BRScraper to get a player's career stats.
87
+ Applies column renaming and numeric conversion.
88
  """
 
 
 
 
 
89
  try:
90
+ df = nba.get_player_stats(player_name)
91
+ if df.empty:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  return pd.DataFrame()
93
+
94
+ # Standardize column names
95
+ column_mapping = {
96
+ 'Season': 'Season', # Keep as is, will convert to en-dash later
97
+ 'G': 'GP', 'GS': 'GS', 'MP': 'MIN',
98
+ 'FG%': 'FG_PCT', '3P%': 'FG3_PCT', 'FT%': 'FT_PCT',
99
+ 'TRB': 'REB', 'AST': 'AST', 'STL': 'STL', 'BLK': 'BLK', 'TOV': 'TO',
100
+ 'PF': 'PF', 'PTS': 'PTS',
101
+ 'Age': 'AGE', 'Tm': 'TEAM_ABBREVIATION', 'Lg': 'LEAGUE_ID', 'Pos': 'POSITION',
102
+ 'FG': 'FGM', 'FGA': 'FGA', '3P': 'FG3M', '3PA': 'FG3A',
103
+ '2P': 'FGM2', '2PA': 'FGA2', '2P%': 'FG2_PCT', 'eFG%': 'EFG_PCT',
104
+ 'FT': 'FTM', 'FTA': 'FTA', 'ORB': 'OREB', 'DRB': 'DREB'
105
+ }
106
 
107
+ # Apply column mapping only for columns that exist
108
+ for old_col, new_col in column_mapping.items():
109
+ if old_col in df.columns:
110
+ df = df.rename(columns={old_col: new_col})
111
+
112
+ # Ensure 'Season' column is string and uses en-dash
113
+ if 'Season' in df.columns:
114
+ df['Season'] = df['Season'].astype(str).str.replace('-', '–')
115
 
116
+ # Convert numeric columns
117
+ non_numeric_cols = {'Season', 'TEAM_ABBREVIATION', 'LEAGUE_ID', 'POSITION', 'Player'}
118
+ for col in df.columns:
119
+ if col not in non_numeric_cols:
120
+ df[col] = pd.to_numeric(df[col], errors="coerce")
121
 
122
+ # Add 'Player' column for consistency with app logic
123
+ df['Player'] = player_name
124
+
125
+ return df
126
  except Exception as e:
127
+ st.error(f"Error fetching stats for {player_name} with BRScraper: {e}")
128
  return pd.DataFrame()
129
 
 
130
  @st.cache_data(ttl=300)
131
+ def get_team_season_stats_brscraper(year):
132
  """
133
+ Uses BRScraper to get per-game team stats for a given season year.
134
+ Applies column renaming and numeric conversion.
 
135
  """
136
+ try:
137
+ df = nba.get_stats(year, info='per_game', rename=False)
138
+ if df.empty:
139
+ return pd.DataFrame()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
140
 
141
+ # Filter for team rows (BRScraper's get_stats includes player rows too)
142
+ # Team rows typically have 'Rk' (Rank) as NaN or a non-numeric value after cleaning
143
+ # Or they are the first few rows before player data starts.
144
+ # A common pattern is that team rows have 'Player' as NaN or a team name.
145
+ # Let's assume 'Player' column is NaN for team rows, or 'Tm' is not NaN.
146
+
147
+ # First, standardize column names to make filtering easier
148
+ df.columns = [str(col).strip() for col in df.columns]
149
+
150
+ # Rename 'Tm' to 'Team' for easier filtering if it exists
151
+ if 'Tm' in df.columns:
152
+ df = df.rename(columns={'Tm': 'Team'})
153
+
154
+ # Filter out player rows. Player rows usually have a non-null 'Player' column.
155
+ # Team rows might have 'Player' as NaN or the team name itself.
156
+ # A more robust way is to look for rows where 'Rk' is not numeric (e.g., 'Rk' for team totals)
157
+ # Or where 'Player' is NaN and 'Team' is not NaN.
158
+
159
+ # Let's try to identify team rows by checking if 'Player' column is missing or NaN
160
+ # and 'Team' column is present and not NaN.
161
+ if 'Player' in df.columns:
162
+ # Filter out rows where 'Player' is not NaN (these are player stats)
163
+ team_df = df[df['Player'].isna()].copy()
164
+ else:
165
+ # If no 'Player' column, assume all rows are team-related, or need further inspection
166
+ team_df = df.copy()
167
+
168
+ # Further refine: ensure 'Team' column is present and not NaN
169
+ if 'Team' not in team_df.columns or team_df['Team'].isna().all():
170
+ # Fallback: if 'Team' column is problematic, try to find rows where 'Rk' is 'Rk' (header)
171
+ # or where 'Rk' is not a number (e.g., 'Rk' for team totals)
172
+ if 'Rk' in df.columns:
173
+ team_df = df[pd.to_numeric(df['Rk'], errors='coerce').isna()].copy()
174
+ if 'Player' in team_df.columns: # Remove Player column if it's still there
175
+ team_df = team_df.drop(columns=['Player'])
176
+ else:
177
+ st.warning(f"Could not reliably identify team rows for year {year}. Returning all data.")
178
+ team_df = df.copy() # Fallback to returning all data if filtering is hard
179
 
180
+ if team_df.empty:
181
+ return pd.DataFrame()
182
 
183
+ # Standardize column names
184
+ column_mapping = {
185
+ 'G': 'GP', 'MP': 'MIN',
186
+ 'FG%': 'FG_PCT', '3P%': 'FG3_PCT', 'FT%': 'FT_PCT',
187
+ 'TRB': 'REB', 'AST': 'AST', 'STL': 'STL', 'BLK': 'BLK', 'TOV': 'TO',
188
+ 'PF': 'PF', 'PTS': 'PTS',
189
+ 'Rk': 'RANK', 'W': 'WINS', 'L': 'LOSSES', 'W/L%': 'WIN_LOSS_PCT',
190
+ 'FG': 'FGM', 'FGA': 'FGA', '3P': 'FG3M', '3PA': 'FG3A',
191
+ '2P': 'FGM2', '2PA': 'FGA2', '2P%': 'FG2_PCT', 'eFG%': 'EFG_PCT',
192
+ 'FT': 'FTM', 'FTA': 'FTA', 'ORB': 'OREB', 'DRB': 'DREB'
193
+ }
194
+
195
+ for old_col, new_col in column_mapping.items():
196
+ if old_col in team_df.columns:
197
+ team_df = team_df.rename(columns={old_col: new_col})
198
+
199
+ # Convert numeric columns
200
+ non_numeric_cols = {"Team", "Tm", "RANK"} # 'Team' or 'Tm' will be the team identifier
201
+ for col in team_df.columns:
202
+ if col not in non_numeric_cols:
203
+ team_df[col] = pd.to_numeric(team_df[col], errors="coerce")
204
+
205
+ # Ensure 'Team' column is present and clean it
206
+ if 'Team' in team_df.columns:
207
+ team_df['Team'] = team_df['Team'].astype(str).str.replace('*', '').str.strip()
208
+ elif 'Tm' in team_df.columns: # Fallback if 'Team' was not the original name
209
+ team_df = team_df.rename(columns={'Tm': 'Team'})
210
+ team_df['Team'] = team_df['Team'].astype(str).str.replace('*', '').str.strip()
211
+ else:
212
+ st.warning(f"Could not find a 'Team' or 'Tm' column in the processed team data for {year}.")
213
+ return pd.DataFrame()
214
 
215
+ return team_df
216
+ except Exception as e:
217
+ st.error(f"Error fetching team stats for {year} with BRScraper: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
218
  return pd.DataFrame()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
219
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
220
  # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
221
  # Perplexity integration
222
  PERP_KEY = os.getenv("PERPLEXITY_API_KEY")
 
228
  return ""
229
  hdr = {'Authorization':f'Bearer {PERP_KEY}','Content-Type':'application/json'}
230
  payload = {
231
+ "model":"sonar-medium-online", # Changed to a commonly available online model
232
  "messages":[{"role":"system","content":system},{"role":"user","content":prompt}],
233
  "max_tokens":max_tokens, "temperature":temp
234
  }
 
250
  st.error(f"An unexpected error occurred with Perplexity API: {e}")
251
  return ""
252
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
253
  # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
254
  # Plotting functions (retained from previous version)
255
  def create_comparison_chart(data, players_names, metric):
 
333
  # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
334
  def player_vs_player():
335
  st.markdown('<h2 class="section-header">Player vs Player Comparison</h2>', unsafe_allow_html=True)
336
+ idx = get_player_index_brscraper() # Use BRScraper for player index
337
  names = idx['name'].tolist()
338
  selected_players = st.multiselect("Select Players (up to 4)", names, max_selections=4)
339
 
 
347
 
348
  stats_tabs = st.tabs(["Basic Stats", "Advanced Stats", "Visualizations"])
349
  all_player_season_data = [] # To store individual season rows for each player
350
+ players_with_no_data = []
 
351
 
352
  with st.spinner("Fetching player data..."):
353
  for player_name in selected_players:
354
+ df_player_career = get_player_career_stats_brscraper(player_name)
 
 
 
 
 
 
355
 
356
  if not df_player_career.empty:
357
  # Filter for selected seasons. The player_season_stats function
 
359
  filtered_df = df_player_career[df_player_career['Season'].isin(selected_seasons)].copy()
360
 
361
  if not filtered_df.empty:
362
+ # 'Player' column is already added by get_player_career_stats_brscraper
363
  all_player_season_data.append(filtered_df)
364
  else:
365
+ players_with_no_data.append(player_name)
 
366
  else:
367
+ players_with_no_data.append(player_name)
 
 
 
 
 
368
 
369
  # Report on players with no data for selected seasons
370
+ if players_with_no_data:
371
+ st.info(f"No data found for the selected seasons ({', '.join(selected_seasons)}) for: {', '.join(players_with_no_data)}. This might be because the season hasn't started or data is not yet available, or the player name was not found by BRScraper.")
372
 
373
  if not all_player_season_data:
374
  st.error("No data available for any of the selected players and seasons to display. Please adjust your selections.")
 
503
  # Extract the end year from the season string (e.g., "2024–25" -> 2025)
504
  year_for_team_stats = int(selected_season_str.split('–')[1])
505
 
506
+ tm_df = get_team_season_stats_brscraper(year_for_team_stats) # Use BRScraper for team stats
507
  if tm_df.empty:
508
+ st.info(f"No team data available for the {selected_season_str} season. This might be because the season hasn't started or data is not yet available, or BRScraper could not fetch it.")
509
  return
510
 
511
+ teams = tm_df['Team'].unique().tolist() # Use 'Team' column from BRScraper output
512
  selected_teams = st.multiselect("Select Teams (up to 4)", teams, max_selections=4)
513
 
514
  if st.button("Run Comparison"):
 
521
 
522
  with st.spinner("Fetching team data..."):
523
  for t in selected_teams:
524
+ df = tm_df[tm_df.Team == t].copy() # Filter by 'Team' column
525
  if not df.empty:
526
  # For team stats, we usually get one row per team per season from team_per_game
527
  # So, no need for .mean() here, just take the row.
 
528
  df['Season'] = selected_season_str # Add 'Season' column
529
  stats.append(df.iloc[0].to_dict()) # Convert the single row to dict
530
  else:
 
632
 
633
  def young_projections():
634
  st.markdown('<h2 class="section-header">Young Player Projections</h2>', unsafe_allow_html=True)
635
+ all_p_df = get_player_index_brscraper()
636
  all_p = all_p_df['name'].tolist()
637
  sp = st.selectbox("Select or enter player", [""]+all_p)
638
  if not sp:
 
664
 
665
  def similar_players():
666
  st.markdown('<h2 class="section-header">Similar Players Finder</h2>', unsafe_allow_html=True)
667
+ all_p_df = get_player_index_brscraper()
668
  all_p = all_p_df['name'].tolist()
669
  tp = st.selectbox("Target Player", all_p)
670
  crit = st.multiselect("Criteria",["Position","Height/Weight","Playing Style","Statistical Profile","Age/Experience"],default=["Playing Style","Statistical Profile"])