rairo commited on
Commit
3016fda
Β·
verified Β·
1 Parent(s): 0cae82f

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +202 -47
src/streamlit_app.py CHANGED
@@ -53,11 +53,30 @@ if 'chat_history' not in st.session_state:
53
  # Basketball-Reference Data Fetching Utilities
54
  # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
55
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  @st.cache_data(ttl=3600)
57
  def fetch_html(url):
58
- """Fetch raw HTML for a URL (with error handling)."""
59
  try:
60
- resp = requests.get(url, timeout=20)
 
 
 
 
 
 
 
61
  resp.raise_for_status()
62
  return resp.text
63
  except requests.exceptions.RequestException as e:
@@ -72,7 +91,10 @@ def parse_table(html, table_id=None):
72
  Given raw HTML and optional table_id, locate that <table>,
73
  handling cases where it's commented out, then parse it with pandas.read_html.
74
  """
75
- soup = BeautifulSoup(html, "lxml") # Using lxml for potentially faster parsing
 
 
 
76
  tbl_html = ""
77
 
78
  if table_id:
@@ -83,7 +105,10 @@ def parse_table(html, table_id=None):
83
  else:
84
  # If not found directly, search for it within HTML comments
85
  # Basketball-Reference often comments out tables
86
- comment_pattern = re.compile(r'<!--.*?<table[^>]*id="%s"[^>]*>.*?</table>.*?-->' % table_id, re.DOTALL)
 
 
 
87
  comment_match = comment_pattern.search(html)
88
  if comment_match:
89
  # Extract the content of the comment
@@ -91,7 +116,7 @@ def parse_table(html, table_id=None):
91
  # Remove the comment tags
92
  comment_content = comment_content.replace('<!--', '').replace('-->', '')
93
  # Parse the comment content as new HTML
94
- comment_soup = BeautifulSoup(comment_content, 'lxml')
95
  tbl = comment_soup.find('table', {'id': table_id})
96
  if tbl:
97
  tbl_html = str(tbl)
@@ -106,8 +131,14 @@ def parse_table(html, table_id=None):
106
 
107
  try:
108
  # pd.read_html returns a list of DataFrames, we want the first one
109
- return pd.read_html(tbl_html)[0]
110
- except ValueError: # No tables found in the provided HTML string
 
 
 
 
 
 
111
  return pd.DataFrame()
112
  except Exception as e:
113
  st.error(f"Error parsing table with pandas: {e}")
@@ -129,13 +160,16 @@ def get_player_index():
129
  if not html:
130
  continue
131
 
132
- soup = BeautifulSoup(html, "lxml")
133
  # The players table is usually directly available, not commented out.
134
  table = soup.find("table", {"id": "players"})
135
  if not table:
136
  continue
137
 
138
- for row in table.select("tbody tr"):
 
 
 
139
  th = row.find("th", {"data-stat": "player"})
140
  if not th:
141
  continue
@@ -144,7 +178,7 @@ def get_player_index():
144
  continue
145
  name = a.text.strip()
146
  href = a["href"].strip()
147
- full_url = f"https://www.basketball-reference.com{href}"
148
  records.append({"name": name, "url": full_url})
149
 
150
  return pd.DataFrame(records)
@@ -153,7 +187,7 @@ def get_player_index():
153
  @st.cache_data(ttl=300)
154
  def player_season_stats(bbr_url):
155
  """
156
- Scrapes a player’s per‑season table (id="per_game") from their BBR page.
157
  Returns cleaned DataFrame.
158
  """
159
  html = fetch_html(bbr_url)
@@ -161,29 +195,52 @@ def player_season_stats(bbr_url):
161
  return pd.DataFrame()
162
 
163
  df = parse_table(html, table_id="per_game")
164
- if df.empty: # Check if df is empty first
165
  return pd.DataFrame()
166
 
167
- # Flatten multi-index columns if they exist (common with pd.read_html)
168
  if isinstance(df.columns, pd.MultiIndex):
169
- df.columns = ['_'.join(col).strip() for col in df.columns.values]
170
- else:
171
- df.columns = [col.strip() for col in df.columns.values]
172
-
173
- # Now check for 'Season' column after flattening
174
- if "Season" not in df.columns:
175
- # This is the critical point. If 'Season' is still not found,
176
- # it means the table either doesn't exist or has a completely different structure.
177
- # st.warning(f"Could not find 'Season' column in the parsed table from {bbr_url}. Columns found: {df.columns.tolist()}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
178
  return pd.DataFrame()
179
 
180
- # drop repeated header rows (e.g., rows where 'Season' is literally 'Season')
181
- df = df[df["Season"] != "Season"].copy()
182
- df["Season"] = df["Season"].astype(str) # Ensure season is string
183
- df['Season'] = df['Season'].str.replace('-', '–') # Ensure en-dash for consistency
 
 
 
 
 
 
 
184
 
185
  # Standardize column names to match previous nba_api output expectations
186
- df = df.rename(columns={
187
  'G': 'GP', 'GS': 'GS', 'MP': 'MIN',
188
  'FG%': 'FG_PCT', '3P%': 'FG3_PCT', 'FT%': 'FT_PCT',
189
  'TRB': 'REB', 'AST': 'AST', 'STL': 'STL', 'BLK': 'BLK', 'TOV': 'TO',
@@ -192,10 +249,14 @@ def player_season_stats(bbr_url):
192
  'FG': 'FGM', 'FGA': 'FGA', '3P': 'FG3M', '3PA': 'FG3A',
193
  '2P': 'FGM2', '2PA': 'FGA2', '2P%': 'FG2_PCT', 'eFG%': 'EFG_PCT',
194
  'FT': 'FTM', 'FTA': 'FTA', 'ORB': 'OREB', 'DRB': 'DREB'
195
- })
 
 
 
 
 
196
 
197
- # Coerce all numeric columns
198
- # Exclude columns that are definitely not numeric or are identifiers
199
  non_numeric_cols = {'Season', 'TEAM_ABBREVIATION', 'LEAGUE_ID', 'POSITION', 'Player'}
200
  for col in df.columns:
201
  if col not in non_numeric_cols:
@@ -207,7 +268,7 @@ def player_season_stats(bbr_url):
207
  @st.cache_data(ttl=300)
208
  def team_per_game(year):
209
  """
210
- Scrapes the league’s per‑game team stats table from:
211
  https://www.basketball-reference.com/leagues/NBA_{year}_per_game.html
212
  Returns cleaned DataFrame.
213
  """
@@ -216,26 +277,61 @@ def team_per_game(year):
216
  if not html:
217
  return pd.DataFrame()
218
 
219
- df = parse_table(html, table_id="per_game-team") # Correct table ID for team stats
220
- if df.empty: # Check if df is empty first
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
221
  return pd.DataFrame()
222
 
223
- # Flatten multi-index columns if they exist
224
  if isinstance(df.columns, pd.MultiIndex):
225
- df.columns = ['_'.join(col).strip() for col in df.columns.values]
226
- else:
227
- df.columns = [col.strip() for col in df.columns.values]
228
-
229
- # Now check for 'Team' column after flattening
230
- if "Team" not in df.columns:
 
 
 
 
 
 
 
 
 
231
  return pd.DataFrame()
232
 
233
- # drop repeated headers & rename
234
- df = df[df["Team"] != "Team"].copy()
235
- df.rename(columns={"Team": "Tm"}, inplace=True)
 
 
 
 
 
 
 
236
 
237
  # Standardize column names
238
- df = df.rename(columns={
239
  'G': 'GP', 'MP': 'MIN',
240
  'FG%': 'FG_PCT', '3P%': 'FG3_PCT', 'FT%': 'FT_PCT',
241
  'TRB': 'REB', 'AST': 'AST', 'STL': 'STL', 'BLK': 'BLK', 'TOV': 'TO',
@@ -244,9 +340,14 @@ def team_per_game(year):
244
  'FG': 'FGM', 'FGA': 'FGA', '3P': 'FG3M', '3PA': 'FG3A',
245
  '2P': 'FGM2', '2PA': 'FGA2', '2P%': 'FG2_PCT', 'eFG%': 'EFG_PCT',
246
  'FT': 'FTM', 'FTA': 'FTA', 'ORB': 'OREB', 'DRB': 'DREB'
247
- })
 
 
 
 
 
248
 
249
- # coerce numeric columns
250
  non_numeric_cols = {"Tm", "RANK"}
251
  for col in df.columns:
252
  if col not in non_numeric_cols:
@@ -254,6 +355,60 @@ def team_per_game(year):
254
 
255
  return df
256
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
257
  # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”οΏ½οΏ½β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
258
  # Perplexity integration
259
  PERP_KEY = os.getenv("PERPLEXITY_API_KEY")
@@ -265,7 +420,7 @@ def ask_perp(prompt, system="You are a helpful NBA analyst AI.", max_tokens=500,
265
  return ""
266
  hdr = {'Authorization':f'Bearer {PERP_KEY}','Content-Type':'application/json'}
267
  payload = {
268
- "model":"sonar-medium-online", # Changed to a commonly available online model
269
  "messages":[{"role":"system","content":system},{"role":"user","content":prompt}],
270
  "max_tokens":max_tokens, "temperature":temp
271
  }
 
53
  # Basketball-Reference Data Fetching Utilities
54
  # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
55
 
56
+ # Basketball-Reference Data Fetching Utilities
57
+ # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
58
+
59
+ import requests
60
+ import pandas as pd
61
+ import streamlit as st
62
+ from bs4 import BeautifulSoup
63
+ import re
64
+ import time
65
+ import random
66
+ from urllib.parse import urljoin
67
+
68
  @st.cache_data(ttl=3600)
69
  def fetch_html(url):
70
+ """Fetch raw HTML for a URL (with error handling and rate limiting)."""
71
  try:
72
+ # Add random delay to be respectful to basketball-reference.com
73
+ time.sleep(random.uniform(0.5, 1.5))
74
+
75
+ headers = {
76
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
77
+ }
78
+
79
+ resp = requests.get(url, timeout=30, headers=headers)
80
  resp.raise_for_status()
81
  return resp.text
82
  except requests.exceptions.RequestException as e:
 
91
  Given raw HTML and optional table_id, locate that <table>,
92
  handling cases where it's commented out, then parse it with pandas.read_html.
93
  """
94
+ if not html:
95
+ return pd.DataFrame()
96
+
97
+ soup = BeautifulSoup(html, "html.parser") # Changed from lxml to html.parser for better compatibility
98
  tbl_html = ""
99
 
100
  if table_id:
 
105
  else:
106
  # If not found directly, search for it within HTML comments
107
  # Basketball-Reference often comments out tables
108
+ comment_pattern = re.compile(
109
+ r'<!--.*?<table[^>]*?id=["\']' + re.escape(table_id) + r'["\'][^>]*?>.*?</table>.*?-->',
110
+ re.DOTALL | re.IGNORECASE
111
+ )
112
  comment_match = comment_pattern.search(html)
113
  if comment_match:
114
  # Extract the content of the comment
 
116
  # Remove the comment tags
117
  comment_content = comment_content.replace('<!--', '').replace('-->', '')
118
  # Parse the comment content as new HTML
119
+ comment_soup = BeautifulSoup(comment_content, 'html.parser')
120
  tbl = comment_soup.find('table', {'id': table_id})
121
  if tbl:
122
  tbl_html = str(tbl)
 
131
 
132
  try:
133
  # pd.read_html returns a list of DataFrames, we want the first one
134
+ dfs = pd.read_html(tbl_html, header=0)
135
+ if dfs:
136
+ return dfs[0]
137
+ else:
138
+ return pd.DataFrame()
139
+ except ValueError as e:
140
+ # No tables found in the provided HTML string
141
+ st.warning(f"No tables found in HTML: {e}")
142
  return pd.DataFrame()
143
  except Exception as e:
144
  st.error(f"Error parsing table with pandas: {e}")
 
160
  if not html:
161
  continue
162
 
163
+ soup = BeautifulSoup(html, "html.parser")
164
  # The players table is usually directly available, not commented out.
165
  table = soup.find("table", {"id": "players"})
166
  if not table:
167
  continue
168
 
169
+ # Look for both tbody and direct tr children
170
+ rows = table.select("tbody tr") if table.select("tbody tr") else table.select("tr")
171
+
172
+ for row in rows:
173
  th = row.find("th", {"data-stat": "player"})
174
  if not th:
175
  continue
 
178
  continue
179
  name = a.text.strip()
180
  href = a["href"].strip()
181
+ full_url = urljoin("https://www.basketball-reference.com", href)
182
  records.append({"name": name, "url": full_url})
183
 
184
  return pd.DataFrame(records)
 
187
  @st.cache_data(ttl=300)
188
  def player_season_stats(bbr_url):
189
  """
190
+ Scrapes a player's per‑season table (id="per_game") from their BBR page.
191
  Returns cleaned DataFrame.
192
  """
193
  html = fetch_html(bbr_url)
 
195
  return pd.DataFrame()
196
 
197
  df = parse_table(html, table_id="per_game")
198
+ if df.empty:
199
  return pd.DataFrame()
200
 
201
+ # Handle potential MultiIndex columns
202
  if isinstance(df.columns, pd.MultiIndex):
203
+ # Flatten MultiIndex columns
204
+ df.columns = ['_'.join(str(col).strip() for col in cols if str(col).strip() and str(col).strip() != 'Unnamed: 0_level_0')
205
+ for cols in df.columns.values]
206
+
207
+ # Clean column names
208
+ df.columns = [str(col).strip() for col in df.columns]
209
+
210
+ # Find season column (could be 'Season' or similar)
211
+ season_col = None
212
+ for col in df.columns:
213
+ if 'season' in col.lower() or col == 'Season':
214
+ season_col = col
215
+ break
216
+
217
+ if season_col is None:
218
+ # Try to find it by looking for columns with year patterns
219
+ for col in df.columns:
220
+ if df[col].dtype == 'object' and not df[col].isna().all():
221
+ sample_val = str(df[col].iloc[0]) if len(df) > 0 else ""
222
+ if re.match(r'\d{4}-\d{2}', sample_val):
223
+ season_col = col
224
+ break
225
+
226
+ if season_col is None:
227
+ st.warning(f"Could not find season column in player stats. Available columns: {df.columns.tolist()}")
228
  return pd.DataFrame()
229
 
230
+ # Rename season column to standard name
231
+ if season_col != 'Season':
232
+ df = df.rename(columns={season_col: 'Season'})
233
+
234
+ # Remove header rows that might have been included
235
+ df = df[df["Season"].astype(str) != "Season"].copy()
236
+ df = df[df["Season"].notna()].copy()
237
+
238
+ # Clean season format
239
+ df["Season"] = df["Season"].astype(str)
240
+ df['Season'] = df['Season'].str.replace('-', '–') # Ensure en-dash for consistency
241
 
242
  # Standardize column names to match previous nba_api output expectations
243
+ column_mapping = {
244
  'G': 'GP', 'GS': 'GS', 'MP': 'MIN',
245
  'FG%': 'FG_PCT', '3P%': 'FG3_PCT', 'FT%': 'FT_PCT',
246
  'TRB': 'REB', 'AST': 'AST', 'STL': 'STL', 'BLK': 'BLK', 'TOV': 'TO',
 
249
  'FG': 'FGM', 'FGA': 'FGA', '3P': 'FG3M', '3PA': 'FG3A',
250
  '2P': 'FGM2', '2PA': 'FGA2', '2P%': 'FG2_PCT', 'eFG%': 'EFG_PCT',
251
  'FT': 'FTM', 'FTA': 'FTA', 'ORB': 'OREB', 'DRB': 'DREB'
252
+ }
253
+
254
+ # Apply column mapping only for columns that exist
255
+ for old_col, new_col in column_mapping.items():
256
+ if old_col in df.columns:
257
+ df = df.rename(columns={old_col: new_col})
258
 
259
+ # Convert numeric columns
 
260
  non_numeric_cols = {'Season', 'TEAM_ABBREVIATION', 'LEAGUE_ID', 'POSITION', 'Player'}
261
  for col in df.columns:
262
  if col not in non_numeric_cols:
 
268
  @st.cache_data(ttl=300)
269
  def team_per_game(year):
270
  """
271
+ Scrapes the league's per‑game team stats table from:
272
  https://www.basketball-reference.com/leagues/NBA_{year}_per_game.html
273
  Returns cleaned DataFrame.
274
  """
 
277
  if not html:
278
  return pd.DataFrame()
279
 
280
+ # Try multiple possible table IDs for team stats
281
+ possible_table_ids = ["per_game-team", "per_game_team", "team-stats-per_game", "teams_per_game"]
282
+ df = pd.DataFrame()
283
+
284
+ for table_id in possible_table_ids:
285
+ df = parse_table(html, table_id=table_id)
286
+ if not df.empty:
287
+ break
288
+
289
+ # If no specific table found, try to find any table with team data
290
+ if df.empty:
291
+ soup = BeautifulSoup(html, "html.parser")
292
+ tables = soup.find_all("table")
293
+ for table in tables:
294
+ if table.find("th", string=lambda text: text and "team" in text.lower()):
295
+ df = parse_table(str(table))
296
+ if not df.empty:
297
+ break
298
+
299
+ if df.empty:
300
+ st.warning(f"Could not find team stats table for {year}")
301
  return pd.DataFrame()
302
 
303
+ # Handle potential MultiIndex columns
304
  if isinstance(df.columns, pd.MultiIndex):
305
+ df.columns = ['_'.join(str(col).strip() for col in cols if str(col).strip() and str(col).strip() != 'Unnamed: 0_level_0')
306
+ for cols in df.columns.values]
307
+
308
+ # Clean column names
309
+ df.columns = [str(col).strip() for col in df.columns]
310
+
311
+ # Find team column
312
+ team_col = None
313
+ for col in df.columns:
314
+ if 'team' in col.lower() or col in ['Team', 'Tm']:
315
+ team_col = col
316
+ break
317
+
318
+ if team_col is None:
319
+ st.warning(f"Could not find team column in team stats. Available columns: {df.columns.tolist()}")
320
  return pd.DataFrame()
321
 
322
+ # Rename team column to standard name
323
+ if team_col != 'Team':
324
+ df = df.rename(columns={team_col: 'Team'})
325
+
326
+ # Remove header rows
327
+ df = df[df["Team"].astype(str) != "Team"].copy()
328
+ df = df[df["Team"].notna()].copy()
329
+
330
+ # Rename Team to Tm for consistency
331
+ df = df.rename(columns={"Team": "Tm"})
332
 
333
  # Standardize column names
334
+ column_mapping = {
335
  'G': 'GP', 'MP': 'MIN',
336
  'FG%': 'FG_PCT', '3P%': 'FG3_PCT', 'FT%': 'FT_PCT',
337
  'TRB': 'REB', 'AST': 'AST', 'STL': 'STL', 'BLK': 'BLK', 'TOV': 'TO',
 
340
  'FG': 'FGM', 'FGA': 'FGA', '3P': 'FG3M', '3PA': 'FG3A',
341
  '2P': 'FGM2', '2PA': 'FGA2', '2P%': 'FG2_PCT', 'eFG%': 'EFG_PCT',
342
  'FT': 'FTM', 'FTA': 'FTA', 'ORB': 'OREB', 'DRB': 'DREB'
343
+ }
344
+
345
+ # Apply column mapping only for columns that exist
346
+ for old_col, new_col in column_mapping.items():
347
+ if old_col in df.columns:
348
+ df = df.rename(columns={old_col: new_col})
349
 
350
+ # Convert numeric columns
351
  non_numeric_cols = {"Tm", "RANK"}
352
  for col in df.columns:
353
  if col not in non_numeric_cols:
 
355
 
356
  return df
357
 
358
+ # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
359
+ # Additional utility functions for better error handling and data validation
360
+
361
+ def validate_dataframe(df, required_columns=None):
362
+ """
363
+ Validate that a DataFrame has the expected structure and data.
364
+ """
365
+ if df.empty:
366
+ return False, "DataFrame is empty"
367
+
368
+ if required_columns:
369
+ missing_cols = [col for col in required_columns if col not in df.columns]
370
+ if missing_cols:
371
+ return False, f"Missing required columns: {missing_cols}"
372
+
373
+ return True, "DataFrame is valid"
374
+
375
+ def clean_team_name(team_name):
376
+ """
377
+ Clean and standardize team names from Basketball Reference.
378
+ """
379
+ if pd.isna(team_name):
380
+ return team_name
381
+
382
+ # Remove any asterisks or other symbols
383
+ team_name = str(team_name).strip().replace('*', '')
384
+
385
+ # Handle special cases
386
+ team_mapping = {
387
+ 'TOT': 'TOT', # Total for players who played for multiple teams
388
+ 'NOP': 'NO', # New Orleans Pelicans sometimes shown as NOP
389
+ 'PHX': 'PHO', # Phoenix Suns sometimes shown as PHX
390
+ 'BRK': 'BKN', # Brooklyn Nets sometimes shown as BRK
391
+ }
392
+
393
+ return team_mapping.get(team_name, team_name)
394
+
395
+ def retry_fetch(func, *args, max_retries=3, **kwargs):
396
+ """
397
+ Retry a function call with exponential backoff.
398
+ """
399
+ for attempt in range(max_retries):
400
+ try:
401
+ result = func(*args, **kwargs)
402
+ if not (isinstance(result, pd.DataFrame) and result.empty):
403
+ return result
404
+ except Exception as e:
405
+ if attempt == max_retries - 1:
406
+ st.error(f"Failed after {max_retries} attempts: {e}")
407
+ return pd.DataFrame()
408
+ time.sleep(2 ** attempt) # Exponential backoff
409
+
410
+ return pd.DataFrame()
411
+
412
  # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”οΏ½οΏ½β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
413
  # Perplexity integration
414
  PERP_KEY = os.getenv("PERPLEXITY_API_KEY")
 
420
  return ""
421
  hdr = {'Authorization':f'Bearer {PERP_KEY}','Content-Type':'application/json'}
422
  payload = {
423
+ "model":"sonar-pro", # Changed to a commonly available online model
424
  "messages":[{"role":"system","content":system},{"role":"user","content":prompt}],
425
  "max_tokens":max_tokens, "temperature":temp
426
  }