Spaces:

rairo
/

NBA-Test

Sleeping

App Files Files Community

rairo commited on May 26, 2025

Commit

2965b0b

verified ·

1 Parent(s): f8a8b3d

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +117 -27

src/streamlit_app.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import streamlit as st
 import pandas as pd
-import numpy as np # Re-added numpy for general use
 import requests
 import os
 from datetime import datetime
@@ -106,11 +106,32 @@ def parse_table(html, table_id=None):
         st.error(f"Error parsing table with pandas: {e}")
         return pd.DataFrame()
 @st.cache_data(ttl=300)
 def get_team_stats_bs(year):
     """
-    Scrapes the league’s per‑game team stats table from Basketball-Reference
-    using BeautifulSoup.
     Returns cleaned DataFrame.
     """
     url = f"https://www.basketball-reference.com/leagues/NBA_{year}_per_game.html"
@@ -118,25 +139,58 @@ def get_team_stats_bs(year):
     if not html:
         return pd.DataFrame()
-    df = parse_table(html, table_id="per_game-team")
-    if df.empty: # Check if df is empty first
         return pd.DataFrame()
-    # Flatten multi-index columns if they exist
     if isinstance(df.columns, pd.MultiIndex):
-        df.columns = ['_'.join(col).strip() for col in df.columns.values]
-    else:
-        df.columns = [col.strip() for col in df.columns.values]
-    # Now check for 'Team' column after flattening
-    if "Team" not in df.columns:
         return pd.DataFrame()
-    # drop repeated headers (e.g., rows where 'Team' is literally 'Team')
-    df = df[df["Team"] != "Team"].copy()
     # Standardize column names
-    df = df.rename(columns={
         'G': 'GP', 'MP': 'MIN',
         'FG%': 'FG_PCT', '3P%': 'FG3_PCT', 'FT%': 'FT_PCT',
         'TRB': 'REB', 'AST': 'AST', 'STL': 'STL', 'BLK': 'BLK', 'TOV': 'TO',
@@ -145,18 +199,23 @@ def get_team_stats_bs(year):
         'FG': 'FGM', 'FGA': 'FGA', '3P': 'FG3M', '3PA': 'FG3A',
         '2P': 'FGM2', '2PA': 'FGA2', '2P%': 'FG2_PCT', 'eFG%': 'EFG_PCT',
         'FT': 'FTM', 'FTA': 'FTA', 'ORB': 'OREB', 'DRB': 'DREB'
-    })
-    # coerce numeric columns
     non_numeric_cols = {"Team", "RANK"}
     for col in df.columns:
         if col not in non_numeric_cols:
             df[col] = pd.to_numeric(df[col], errors="coerce")
-    # Clean team names (remove asterisks)
-    if 'Team' in df.columns:
-        df['Team'] = df['Team'].astype(str).str.replace('*', '', regex=False).str.strip()
     return df
 # ——————————————————————————————————————————————
@@ -288,7 +347,7 @@ def ask_perp(prompt, system="You are a helpful NBA analyst AI.", max_tokens=500,
         return ""
     hdr = {'Authorization':f'Bearer {PERP_KEY}','Content-Type':'application/json'}
     payload = {
-        "model":"sonar-medium-online",
         "messages":[{"role":"system","content":system},{"role":"user","content":prompt}],
         "max_tokens":max_tokens, "temperature":temp
     }
@@ -300,17 +359,48 @@ def ask_perp(prompt, system="You are a helpful NBA analyst AI.", max_tokens=500,
         st.error(f"Perplexity API error: {e}")
         return ""
-# Plot helpers
 def create_radar_chart(player_stats, categories):
     fig = go.Figure()
-    for name, stats in player_stats.items():
         fig.add_trace(go.Scatterpolar(
-            r=[stats.get(cat,0) for cat in categories],
             theta=categories,
             fill='toself',
-            name=name,
             opacity=0.7
         ))
     fig.update_layout(
         polar=dict(radialaxis=dict(visible=True, range=[0,100])),
         showlegend=True,
@@ -473,7 +563,7 @@ def player_vs_player():
 def team_vs_team():
     st.markdown('<h2 class="section-header">Team vs Team Analysis</h2>', unsafe_allow_html=True)
-    # No BRSCRAPER_AVAILABLE check here, as it uses custom BS scraper
     seasons = get_available_seasons()
     selected_season_str = st.selectbox("Select Season", seasons, index=0)
@@ -507,7 +597,7 @@ def team_vs_team():
                     teams_with_no_data.append(t)
         if teams_with_no_data:
-            st.info(f"No data found for the selected season ({selected_season_str}) for: {', '.join(teams_with_no_data)}.")
         if not stats:
             st.error("No data available for the selected teams to display. Please adjust your selections.")

 import streamlit as st
 import pandas as pd
+import numpy as np
 import requests
 import os
 from datetime import datetime
         st.error(f"Error parsing table with pandas: {e}")
         return pd.DataFrame()
+def clean_team_name(team_name):
+    """
+    Clean and standardize team names from Basketball Reference.
+    """
+    if pd.isna(team_name):
+        return team_name
+    # Remove any asterisks or other symbols
+    team_name = str(team_name).strip().replace('*', '')
+    # Handle special cases for team name variations (if needed, keep for consistency)
+    team_mapping = {
+        'NOP': 'NO',   # New Orleans Pelicans sometimes shown as NOP
+        'PHX': 'PHO',  # Phoenix Suns sometimes shown as PHX
+        'BRK': 'BKN',  # Brooklyn Nets sometimes shown as BRK
+        'CHA': 'CHO',  # Charlotte sometimes inconsistent
+        'UTA': 'UTH'   # Utah Jazz sometimes shown as UTA
+    }
+    return team_mapping.get(team_name, team_name)
 @st.cache_data(ttl=300)
 def get_team_stats_bs(year):
     """
+    Scrapes the league’s per‑game team stats table from:
+      https://www.basketball-reference.com/leagues/NBA_{year}_per_game.html
     Returns cleaned DataFrame.
     """
     url = f"https://www.basketball-reference.com/leagues/NBA_{year}_per_game.html"
     if not html:
         return pd.DataFrame()
+    # Try multiple possible table IDs for team stats
+    possible_table_ids = ["per_game-team", "per_game_team", "team-stats-per_game", "teams_per_game"]
+    df = pd.DataFrame()
+    for table_id in possible_table_ids:
+        df = parse_table(html, table_id=table_id)
+        if not df.empty:
+            break
+    # If no specific table found, try to find any table with team data
+    if df.empty:
+        soup = BeautifulSoup(html, "lxml") # Use lxml for consistency
+        tables = soup.find_all("table")
+        for table in tables:
+            if table.find("th", string=lambda text: text and "team" in text.lower()):
+                df = parse_table(str(table))
+                if not df.empty:
+                    break
+    if df.empty:
+        st.warning(f"Could not find team stats table for {year}")
         return pd.DataFrame()
+    # Handle potential MultiIndex columns
     if isinstance(df.columns, pd.MultiIndex):
+        df.columns = ['_'.join(str(col).strip() for col in cols if str(col).strip() and str(col).strip() != 'Unnamed: 0_level_0')
+                     for cols in df.columns.values]
+    # Clean column names
+    df.columns = [str(col).strip() for col in df.columns]
+    # Find team column
+    team_col = None
+    for col in df.columns:
+        if 'team' in col.lower() or col in ['Team', 'Tm']:
+            team_col = col
+            break
+    if team_col is None:
+        st.warning(f"Could not find team column in team stats. Available columns: {df.columns.tolist()}")
         return pd.DataFrame()
+    # Rename team column to standard name
+    if team_col != 'Team':
+        df = df.rename(columns={team_col: 'Team'})
+    # Remove header rows
+    df = df[df["Team"].astype(str) != "Team"].copy()
+    df = df[df["Team"].notna()].copy()
     # Standardize column names
+    column_mapping = {
         'G': 'GP', 'MP': 'MIN',
         'FG%': 'FG_PCT', '3P%': 'FG3_PCT', 'FT%': 'FT_PCT',
         'TRB': 'REB', 'AST': 'AST', 'STL': 'STL', 'BLK': 'BLK', 'TOV': 'TO',
         'FG': 'FGM', 'FGA': 'FGA', '3P': 'FG3M', '3PA': 'FG3A',
         '2P': 'FGM2', '2PA': 'FGA2', '2P%': 'FG2_PCT', 'eFG%': 'EFG_PCT',
         'FT': 'FTM', 'FTA': 'FTA', 'ORB': 'OREB', 'DRB': 'DREB'
+    }
+    # Apply column mapping only for columns that exist
+    for old_col, new_col in column_mapping.items():
+        if old_col in df.columns:
+            df = df.rename(columns={old_col: new_col})
+    # Clean team names
+    if 'Team' in df.columns: # Ensure 'Team' column exists before applying
+        df['Team'] = df['Team'].apply(clean_team_name)
+    # Convert numeric columns
     non_numeric_cols = {"Team", "RANK"}
     for col in df.columns:
         if col not in non_numeric_cols:
             df[col] = pd.to_numeric(df[col], errors="coerce")
     return df
 # ——————————————————————————————————————————————
         return ""
     hdr = {'Authorization':f'Bearer {PERP_KEY}','Content-Type':'application/json'}
     payload = {
+        "model":"sonar-medium-online", # Using a commonly available online model
         "messages":[{"role":"system","content":system},{"role":"user","content":prompt}],
         "max_tokens":max_tokens, "temperature":temp
     }
         st.error(f"Perplexity API error: {e}")
         return ""
+# ——————————————————————————————————————————————
+# Plotting functions
+def create_comparison_chart(data, players_names, metric):
+    """Create comparison chart for players"""
+    fig = go.Figure()
+    for i, player in enumerate(players_names):
+        if player in data['Player'].values:
+            player_data = data[data['Player'] == player]
+            fig.add_trace(go.Scatter(
+                x=player_data['Season'],
+                y=player_data[metric],
+                mode='lines+markers',
+                name=player,
+                line=dict(width=3)
+            ))
+    fig.update_layout(
+        title=f"{metric} Comparison",
+        xaxis_title="Season",
+        yaxis_title=metric,
+        hovermode='x unified',
+        height=500
+    )
+    return fig
 def create_radar_chart(player_stats, categories):
+    """Create radar chart for player comparison"""
     fig = go.Figure()
+    for player_name, stats in player_stats.items():
+        r_values = [stats.get(cat,0) for cat in categories]
         fig.add_trace(go.Scatterpolar(
+            r=r_values,
             theta=categories,
             fill='toself',
+            name=player_name,
             opacity=0.7
         ))
     fig.update_layout(
         polar=dict(radialaxis=dict(visible=True, range=[0,100])),
         showlegend=True,
 def team_vs_team():
     st.markdown('<h2 class="section-header">Team vs Team Analysis</h2>', unsafe_allow_html=True)
+    # This page uses the custom BeautifulSoup scraper, so no BRSCRAPER_AVAILABLE check here.
     seasons = get_available_seasons()
     selected_season_str = st.selectbox("Select Season", seasons, index=0)
                     teams_with_no_data.append(t)
         if teams_with_no_data:
+            st.info(f"No data found for the selected season ({selected_season_str}) for: {', '.join(teams_with_no_data)}. This might be because the season hasn't started or data is not yet available.")
         if not stats:
             st.error("No data available for the selected teams to display. Please adjust your selections.")