rairo commited on
Commit
ff13041
Β·
verified Β·
1 Parent(s): 5df17b9

Update src/streamlit_app.py

Browse files
Files changed (1) hide show
  1. src/streamlit_app.py +134 -29
src/streamlit_app.py CHANGED
@@ -6,6 +6,8 @@ import os
6
  from datetime import datetime
7
  from bs4 import BeautifulSoup
8
  import re # New import for regex operations
 
 
9
 
10
  # --- IMPORTANT: Addressing PermissionError in Containerized Environments ---
11
  # The error "PermissionError: [Errno 13] Permission denied: '/.streamlit'"
@@ -19,7 +21,7 @@ import re # New import for regex operations
19
  #
20
  # Option 2: Disable Streamlit's usage statistics gathering.
21
  # In your Dockerfile: ENV STREAMLIT_BROWSER_GATHER_USAGE_STATS=False
22
- # Or when running: docker run -e STREAMLIT_BROWSER_GATHER_USAGE_STATS=False your_image_name
23
  #
24
  # Option 1 is generally more robust as it provides a writable home directory
25
  # for any application that might need it.
@@ -159,7 +161,17 @@ def player_season_stats(bbr_url):
159
  return pd.DataFrame()
160
 
161
  df = parse_table(html, table_id="per_game")
162
- if df.empty or "Season" not in df.columns:
 
 
 
 
 
 
 
 
 
 
163
  return pd.DataFrame()
164
 
165
  # drop repeated header rows (e.g., rows where 'Season' is literally 'Season')
@@ -201,7 +213,17 @@ def team_per_game(year):
201
  return pd.DataFrame()
202
 
203
  df = parse_table(html, table_id="per_game-team") # Correct table ID for team stats
204
- if df.empty or "Team" not in df.columns:
 
 
 
 
 
 
 
 
 
 
205
  return pd.DataFrame()
206
 
207
  # drop repeated headers & rename
@@ -221,7 +243,7 @@ def team_per_game(year):
221
  })
222
 
223
  # coerce numeric columns
224
- non_numeric_cols = {"Tm", "RANK"} # 'RANK' is usually numeric, but 'Tm' is not
225
  for col in df.columns:
226
  if col not in non_numeric_cols:
227
  df[col] = pd.to_numeric(df[col], errors="coerce")
@@ -283,6 +305,62 @@ def get_available_seasons(num_seasons=6):
283
  seasons_list.append(f"{start_year}–{end_year}")
284
  return sorted(seasons_list, reverse=True) # Sort to show most recent first
285
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
286
  # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
287
  # Main App Structure
288
  # β€”β€”β€”β€”οΏ½οΏ½οΏ½β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
@@ -324,27 +402,44 @@ def player_vs_player():
324
 
325
  stats_tabs = st.tabs(["Basic Stats", "Advanced Stats", "Visualizations"])
326
  all_player_season_data = [] # To store individual season rows for each player
 
 
327
 
328
  with st.spinner("Fetching player data..."):
329
  for player_name in selected_players:
330
- player_url = idx.loc[idx.name == player_name, 'url'].iat[0]
 
 
 
 
 
331
  df_player_career = player_season_stats(player_url)
332
 
333
  if not df_player_career.empty:
334
  # Filter for selected seasons and ensure 'Season' column is consistent
335
- df_player_career['Season'] = df_player_career['Season'].str.replace('-', '–')
336
  filtered_df = df_player_career[df_player_career['Season'].isin(selected_seasons)].copy()
337
 
338
  if not filtered_df.empty:
339
  filtered_df['Player'] = player_name # Add player name for identification
340
  all_player_season_data.append(filtered_df)
341
  else:
342
- st.info(f"No data found for {player_name} in selected seasons.")
 
343
  else:
344
- st.info(f"Could not fetch career stats for {player_name}.")
 
 
 
 
 
 
 
 
 
345
 
346
  if not all_player_season_data:
347
- st.info("No data available for the selected players and seasons.")
348
  return
349
 
350
  # Concatenate all collected season data into one DataFrame
@@ -416,7 +511,7 @@ def player_vs_player():
416
  )
417
  else:
418
  # Average over selected seasons for multiple players for bar chart
419
- avg_comparison_df = comparison_df_raw.groupby('Player')[available_metrics].mean().reset_index()
420
  fig = px.bar(
421
  avg_comparison_df,
422
  x='Player',
@@ -469,16 +564,16 @@ def player_vs_player():
469
 
470
  def team_vs_team():
471
  st.markdown('<h2 class="section-header">Team vs Team Analysis</h2>', unsafe_allow_html=True)
472
-
473
  available_seasons = get_available_seasons()
474
  selected_season_str = st.selectbox("Select Season", available_seasons, index=0)
475
-
476
  # Extract the end year from the season string (e.g., "2024–25" -> 2025)
477
  year_for_team_stats = int(selected_season_str.split('–')[1])
478
 
479
  tm_df = team_per_game(year_for_team_stats)
480
  if tm_df.empty:
481
- st.info(f"No team data available for the {selected_season_str} season.")
482
  return
483
 
484
  teams = tm_df['Tm'].unique().tolist()
@@ -490,19 +585,25 @@ def team_vs_team():
490
  return
491
 
492
  stats = []
493
- for t in selected_teams:
494
- df = tm_df[tm_df.Tm == t].copy() # Use .copy() to avoid SettingWithCopyWarning
495
- if not df.empty:
496
- # For team stats, we usually get one row per team per season from team_per_game
497
- # So, no need for .mean() here, just take the row.
498
- df['Team'] = t # Add 'Team' column for consistency
499
- df['Season'] = selected_season_str # Add 'Season' column
500
- stats.append(df.iloc[0].to_dict()) # Convert the single row to dict
501
- else:
502
- st.info(f"No data found for team {t} in {selected_season_str}.")
 
 
 
 
 
 
503
 
504
  if not stats:
505
- st.info("No data available for the selected teams.")
506
  return
507
 
508
  comp = pd.DataFrame(stats)
@@ -611,10 +712,14 @@ def young_projections():
611
  ppg = st.number_input("PPG",0.0,40.0,15.0)
612
  rpg = st.number_input("RPG",0.0,20.0,5.0)
613
  apg = st.number_input("APG",0.0,15.0,3.0)
614
- if st.button("Project"):
615
  prompt = (
616
- f"Project {sp}'s next 5-year stats based on Age={age}, "
617
- f"Yrs={yrs}, PPG={ppg}, RPG={rpg}, APG={apg}."
 
 
 
 
618
  )
619
  out = ask_perp(prompt, system="You are an NBA projection expert AI.", max_tokens=800)
620
  st.markdown("### Projection Analysis")
@@ -633,13 +738,13 @@ def similar_players():
633
  tp = st.selectbox("Target Player", all_p)
634
  crit = st.multiselect("Criteria",["Position","Height/Weight","Playing Style","Statistical Profile","Age/Experience"],default=["Playing Style","Statistical Profile"])
635
  if tp and crit and st.button("Find Similar"):
636
- prompt = f"Find top 5 current and top 3 historical similar to {tp} by {', '.join(crit)}. Provide detailed reasoning."
637
  st.write(ask_perp(prompt, system="You are a similarity expert AI.", max_tokens=800))
638
  st.subheader("Manual Compare")
639
  p1 = st.selectbox("Player 1", all_p, key="p1")
640
  p2 = st.selectbox("Player 2", all_p, key="p2")
641
  if p1 and p2 and p1!=p2 and st.button("Compare Players"):
642
- prompt = f"Compare {p1} vs {p2} on statistical comparison (current season), playing style similarities and differences, strengths and weaknesses, team impact and role, and overall similarity score (1-10)."
643
  st.write(ask_perp(prompt, system="You are a comparison expert AI.", max_tokens=700))
644
 
645
  def roster_builder():
 
6
  from datetime import datetime
7
  from bs4 import BeautifulSoup
8
  import re # New import for regex operations
9
+ import plotly.express as px # Ensure plotly imports are present
10
+ import plotly.graph_objects as go # Ensure plotly imports are present
11
 
12
  # --- IMPORTANT: Addressing PermissionError in Containerized Environments ---
13
  # The error "PermissionError: [Errno 13] Permission denied: '/.streamlit'"
 
21
  #
22
  # Option 2: Disable Streamlit's usage statistics gathering.
23
  # In your Dockerfile: ENV STREAMLIT_BROWSER_GATHER_USAGE_STATS=False
24
+ # Or when running: docker run -e STREAMIT_BROWSER_GATHER_USAGE_STATS=False your_image_name
25
  #
26
  # Option 1 is generally more robust as it provides a writable home directory
27
  # for any application that might need it.
 
161
  return pd.DataFrame()
162
 
163
  df = parse_table(html, table_id="per_game")
164
+ if df.empty: # Check if df is empty first
165
+ return pd.DataFrame()
166
+
167
+ # Flatten multi-index columns if they exist (common with pd.read_html)
168
+ if isinstance(df.columns, pd.MultiIndex):
169
+ df.columns = ['_'.join(col).strip() for col in df.columns.values]
170
+ else:
171
+ df.columns = [col.strip() for col in df.columns.values]
172
+
173
+ # Now check for 'Season' column after flattening
174
+ if "Season" not in df.columns:
175
  return pd.DataFrame()
176
 
177
  # drop repeated header rows (e.g., rows where 'Season' is literally 'Season')
 
213
  return pd.DataFrame()
214
 
215
  df = parse_table(html, table_id="per_game-team") # Correct table ID for team stats
216
+ if df.empty: # Check if df is empty first
217
+ return pd.DataFrame()
218
+
219
+ # Flatten multi-index columns if they exist
220
+ if isinstance(df.columns, pd.MultiIndex):
221
+ df.columns = ['_'.join(col).strip() for col in df.columns.values]
222
+ else:
223
+ df.columns = [col.strip() for col in df.columns.values]
224
+
225
+ # Now check for 'Team' column after flattening
226
+ if "Team" not in df.columns:
227
  return pd.DataFrame()
228
 
229
  # drop repeated headers & rename
 
243
  })
244
 
245
  # coerce numeric columns
246
+ non_numeric_cols = {"Tm", "RANK"}
247
  for col in df.columns:
248
  if col not in non_numeric_cols:
249
  df[col] = pd.to_numeric(df[col], errors="coerce")
 
305
  seasons_list.append(f"{start_year}–{end_year}")
306
  return sorted(seasons_list, reverse=True) # Sort to show most recent first
307
 
308
+ # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
309
+ # Plotting functions (retained from previous version)
310
+ def create_comparison_chart(data, players_names, metric):
311
+ """Create comparison chart for players"""
312
+ fig = go.Figure()
313
+
314
+ for i, player in enumerate(players_names):
315
+ if player in data['Player'].values: # Changed to 'Player' column
316
+ player_data = data[data['Player'] == player]
317
+ fig.add_trace(go.Scatter(
318
+ x=player_data['Season'], # Changed to 'Season' column
319
+ y=player_data[metric],
320
+ mode='lines+markers',
321
+ name=player,
322
+ line=dict(width=3)
323
+ ))
324
+
325
+ fig.update_layout(
326
+ title=f"{metric} Comparison",
327
+ xaxis_title="Season",
328
+ yaxis_title=metric,
329
+ hovermode='x unified',
330
+ height=500
331
+ )
332
+
333
+ return fig
334
+
335
+ def create_radar_chart(player_stats, categories):
336
+ """Create radar chart for player comparison"""
337
+ fig = go.Figure()
338
+
339
+ for player_name, stats in player_stats.items():
340
+ # Ensure all categories are present, default to 0 if not
341
+ r_values = [stats.get(cat, 0) for cat in categories]
342
+
343
+ fig.add_trace(go.Scatterpolar(
344
+ r=r_values,
345
+ theta=categories,
346
+ fill='toself',
347
+ name=player_name,
348
+ opacity=0.7
349
+ ))
350
+
351
+ fig.update_layout(
352
+ polar=dict(
353
+ radialaxis=dict(
354
+ visible=True,
355
+ # The range should be adjusted based on the scaled data (0-100)
356
+ range=[0, 100]
357
+ )),
358
+ showlegend=True,
359
+ title="Player Comparison Radar Chart"
360
+ )
361
+
362
+ return fig
363
+
364
  # β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
365
  # Main App Structure
366
  # β€”β€”β€”β€”οΏ½οΏ½οΏ½β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”β€”
 
402
 
403
  stats_tabs = st.tabs(["Basic Stats", "Advanced Stats", "Visualizations"])
404
  all_player_season_data = [] # To store individual season rows for each player
405
+ players_not_found_in_index = []
406
+ players_with_no_season_data = []
407
 
408
  with st.spinner("Fetching player data..."):
409
  for player_name in selected_players:
410
+ player_url_row = idx.loc[idx.name == player_name, 'url']
411
+ if player_url_row.empty:
412
+ players_not_found_in_index.append(player_name)
413
+ continue
414
+
415
+ player_url = player_url_row.iat[0]
416
  df_player_career = player_season_stats(player_url)
417
 
418
  if not df_player_career.empty:
419
  # Filter for selected seasons and ensure 'Season' column is consistent
420
+ # The player_season_stats already handles the hyphen to en-dash replacement
421
  filtered_df = df_player_career[df_player_career['Season'].isin(selected_seasons)].copy()
422
 
423
  if not filtered_df.empty:
424
  filtered_df['Player'] = player_name # Add player name for identification
425
  all_player_season_data.append(filtered_df)
426
  else:
427
+ # This means player was found, but no data for selected seasons
428
+ players_with_no_season_data.append(player_name)
429
  else:
430
+ # This means player_season_stats returned an empty DF (fetch/parse failed)
431
+ players_with_no_season_data.append(player_name) # Treat as no data for selected seasons
432
+
433
+ # Report on players not found in index
434
+ if players_not_found_in_index:
435
+ st.error(f"The following players were not found in the Basketball-Reference index: {', '.join(players_not_found_in_index)}. Please check spelling.")
436
+
437
+ # Report on players with no data for selected seasons
438
+ if players_with_no_season_data:
439
+ st.info(f"No data found for the selected seasons ({', '.join(selected_seasons)}) for: {', '.join(players_with_no_season_data)}. This might be because the season hasn't started or data is not yet available.")
440
 
441
  if not all_player_season_data:
442
+ st.error("No data available for any of the selected players and seasons to display. Please adjust your selections.")
443
  return
444
 
445
  # Concatenate all collected season data into one DataFrame
 
511
  )
512
  else:
513
  # Average over selected seasons for multiple players for bar chart
514
+ avg_comparison_df = comparison_df_raw.groupby('Player')[available_metrics].mean(numeric_only=True).reset_index()
515
  fig = px.bar(
516
  avg_comparison_df,
517
  x='Player',
 
564
 
565
  def team_vs_team():
566
  st.markdown('<h2 class="section-header">Team vs Team Analysis</h2>', unsafe_allow_html=True)
567
+
568
  available_seasons = get_available_seasons()
569
  selected_season_str = st.selectbox("Select Season", available_seasons, index=0)
570
+
571
  # Extract the end year from the season string (e.g., "2024–25" -> 2025)
572
  year_for_team_stats = int(selected_season_str.split('–')[1])
573
 
574
  tm_df = team_per_game(year_for_team_stats)
575
  if tm_df.empty:
576
+ st.info(f"No team data available for the {selected_season_str} season. This might be because the season hasn't started or data is not yet available.")
577
  return
578
 
579
  teams = tm_df['Tm'].unique().tolist()
 
585
  return
586
 
587
  stats = []
588
+ teams_with_no_data = []
589
+
590
+ with st.spinner("Fetching team data..."):
591
+ for t in selected_teams:
592
+ df = tm_df[tm_df.Tm == t].copy() # Use .copy() to avoid SettingWithCopyWarning
593
+ if not df.empty:
594
+ # For team stats, we usually get one row per team per season from team_per_game
595
+ # So, no need for .mean() here, just take the row.
596
+ df['Team'] = t # Add 'Team' column for consistency
597
+ df['Season'] = selected_season_str # Add 'Season' column
598
+ stats.append(df.iloc[0].to_dict()) # Convert the single row to dict
599
+ else:
600
+ teams_with_no_data.append(t)
601
+
602
+ if teams_with_no_data:
603
+ st.info(f"No data found for the selected season ({selected_season_str}) for: {', '.join(teams_with_no_data)}. This might be because the season hasn't started or data is not yet available.")
604
 
605
  if not stats:
606
+ st.error("No data available for the selected teams to display. Please adjust your selections.")
607
  return
608
 
609
  comp = pd.DataFrame(stats)
 
712
  ppg = st.number_input("PPG",0.0,40.0,15.0)
713
  rpg = st.number_input("RPG",0.0,20.0,5.0)
714
  apg = st.number_input("APG",0.0,15.0,3.0)
715
+ if st.button("Generate AI Projection"):
716
  prompt = (
717
+ f"Analyze and project the future potential of NBA player {sp}: "
718
+ f"Current Stats: Age={age}, Years in NBA={yrs}, PPG={ppg}, RPG={rpg}, APG={apg}. "
719
+ "Please provide: 1. 3-year projection of their stats. "
720
+ "2. Peak potential analysis. 3. Areas for improvement. "
721
+ "4. Comparison to similar players at the same age. 5. Career trajectory prediction. "
722
+ "Base your analysis on historical player development patterns and current NBA trends."
723
  )
724
  out = ask_perp(prompt, system="You are an NBA projection expert AI.", max_tokens=800)
725
  st.markdown("### Projection Analysis")
 
738
  tp = st.selectbox("Target Player", all_p)
739
  crit = st.multiselect("Criteria",["Position","Height/Weight","Playing Style","Statistical Profile","Age/Experience"],default=["Playing Style","Statistical Profile"])
740
  if tp and crit and st.button("Find Similar"):
741
+ prompt = f"Find top 5 current and top 3 historical similar to {tp} based on the following criteria: {', '.join(crit)}. Provide detailed reasoning."
742
  st.write(ask_perp(prompt, system="You are a similarity expert AI.", max_tokens=800))
743
  st.subheader("Manual Compare")
744
  p1 = st.selectbox("Player 1", all_p, key="p1")
745
  p2 = st.selectbox("Player 2", all_p, key="p2")
746
  if p1 and p2 and p1!=p2 and st.button("Compare Players"):
747
+ prompt = f"Compare {p1} vs {p2} in detail: 1. Statistical comparison (current season). 2. Playing style similarities and differences. 3. Strengths and weaknesses of each. 4. Team impact and role. 5. Overall similarity score (1-10). Provide a comprehensive comparison with specific examples."
748
  st.write(ask_perp(prompt, system="You are a comparison expert AI.", max_tokens=700))
749
 
750
  def roster_builder():