Spaces:

rairo
/

NBA-Test

Sleeping

App Files Files Community

rairo commited on May 26, 2025

Commit

629a2b1

verified ·

1 Parent(s): 5964d76

Update src/streamlit_app.py

Browse files

Files changed (1) hide show

src/streamlit_app.py +173 -570

src/streamlit_app.py CHANGED Viewed

@@ -4,11 +4,13 @@ import numpy as np
 import requests
 import os
 from datetime import datetime
-from bs4 import BeautifulSoup, Comment
-import re # New import for regex operations
-import plotly.express as px # Ensure plotly imports are present
-import plotly.graph_objects as go # Ensure plotly imports are present
 # Page configuration
 st.set_page_config(
@@ -33,551 +35,188 @@ if 'chat_history' not in st.session_state:
     st.session_state.chat_history = []
 # —————————————————————————————————————————————————————————————————————————————
-# Basketball-Reference Data Fetching Utilities
 # —————————————————————————————————————————————————————————————————————————————
-import requests
-import pandas as pd
-import streamlit as st
-from bs4 import BeautifulSoup
-import re
-import time
-import random
-from urllib.parse import urljoin
-@st.cache_data(ttl=3600)
-def fetch_html(url):
-    """Fetch raw HTML for a URL (with error handling and rate limiting)."""
-    try:
-        # Add random delay to be respectful to basketball-reference.com
-        time.sleep(random.uniform(0.5, 1.5))
-        headers = {
-            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
-        }
-        resp = requests.get(url, timeout=30, headers=headers)
-        resp.raise_for_status()
-        return resp.text
-    except requests.exceptions.RequestException as e:
-        st.error(f"Failed to fetch {url}: {e}")
-        return ""
-    except Exception as e:
-        st.error(f"An unexpected error occurred while fetching {url}: {e}")
-        return ""
-def parse_table(html, table_id=None):
-    """
-    Given raw HTML and optional table_id, locate that <table>,
-    handling cases where it's commented out, then parse it with pandas.read_html.
-    """
-    if not html:
-        return pd.DataFrame()
-    soup = BeautifulSoup(html, "html.parser")
-    tbl_html = ""
-    if table_id:
-        # First, try to find the table directly
-        tbl = soup.find("table", {"id": table_id})
-        if tbl:
-            tbl_html = str(tbl)
-        else:
-            # If not found directly, search for it within HTML comments
-            comments = soup.find_all(string=lambda text: isinstance(text, Comment))
-            for comment in comments:
-                comment_soup = BeautifulSoup(comment, "html.parser")
-                tbl = comment_soup.find("table", {"id": table_id})
-                if tbl:
-                    tbl_html = str(tbl)
-                    break
-    else:
-        # fallback: first table on page (only if no table_id specified)
-        first = soup.find("table")
-        if first:
-            tbl_html = str(first)
-    if not tbl_html:
-        return pd.DataFrame()
-    try:
-        # pd.read_html returns a list of DataFrames, we want the first one
-        dfs = pd.read_html(tbl_html, header=0)
-        if dfs:
-            return dfs[0]
-        else:
-            return pd.DataFrame()
-    except ValueError as e:
-        # No tables found in the provided HTML string
-        st.warning(f"No tables found in HTML: {e}")
-        return pd.DataFrame()
-    except Exception as e:
-        st.error(f"Error parsing table with pandas: {e}")
-        return pd.DataFrame(
-# —————————————————————————————————————————————————————————————————————————————
-# Basketball-Reference Data Fetching Utilities
-# ———————————————————————————————————————————————————————————————————————————
 @st.cache_data(ttl=3600)
-def get_player_index():
     """
-    Scrape the master list of players from BBR (players/a → players/z).
-    Returns DataFrame with columns ['name','url'].
     """
-    base = "https://www.basketball-reference.com/players/"
-    records = []
-    for letter in map(chr, range(ord('a'), ord('z')+1)):
-        url = f"{base}{letter}/"
-        html = fetch_html(url)
-        if not html:
-            continue
-        soup = BeautifulSoup(html, "html.parser")
-        # The players table is usually directly available, not commented out.
-        table = soup.find("table", {"id": "players"})
-        if not table:
-            continue
-        # Look for both tbody and direct tr children
-        rows = table.select("tbody tr") if table.select("tbody tr") else table.select("tr")
-        for row in rows:
-            th = row.find("th", {"data-stat": "player"})
-            if not th:
-                continue
-            a = th.find("a", href=True)
-            if not a:
-                continue
-            name = a.text.strip()
-            href = a["href"].strip()
-            full_url = urljoin("https://www.basketball-reference.com", href)
-            records.append({"name": name, "url": full_url})
-    return pd.DataFrame(records)
-# —————————————————————————————————————————————————————————————————————————————
-@st.cache_data(ttl=300)
-def player_season_stats(bbr_url):
-    """
-    Scrapes a player's per‑season table (id="per_game") from their BBR page.
-    Returns cleaned DataFrame with season-by-season averages.
-    """
-    html = fetch_html(bbr_url)
-    if not html:
-        return pd.DataFrame()
-    # Use the parse_table function for consistency
-    df = parse_table(html, "per_game")
-    # If per_game table not found, try alternatives
-    if df.empty:
-        # Try other common table IDs for season stats
-        for table_id in ["stats", "per_game_stats", "totals", "advanced"]:
-            df = parse_table(html, table_id)
-            if not df.empty:
-                break
-    # If still empty, try to find any table with Season column
-    if df.empty:
-        soup = BeautifulSoup(html, "html.parser")
-        all_tables = soup.find_all("table")
-        for table in all_tables:
-            try:
-                temp_df = pd.read_html(str(table), header=0)[0]
-                # Check if it has Season column and looks like season stats
-                if not temp_df.empty and 'Season' in temp_df.columns and 'Date' not in temp_df.columns:
-                    df = temp_df
-                    break
-            except:
-                continue
-    if df.empty:
-        st.warning(f"Could not find season stats table at {bbr_url}")
-        return pd.DataFrame()
-    # Handle potential MultiIndex columns
-    if isinstance(df.columns, pd.MultiIndex):
-        df.columns = ['_'.join(str(col).strip() for col in cols if str(col).strip() and 'Unnamed' not in str(col))
-                     for cols in df.columns.values]
-    # Clean column names
-    df.columns = [str(col).strip() for col in df.columns]
-    # Verify we have the right table (should have Season column, not Date)
-    if 'Season' not in df.columns:
-        st.warning(f"Table found but no Season column. Available columns: {df.columns.tolist()}")
-        return pd.DataFrame()
-    if 'Date' in df.columns:
-        st.warning("Found game log table instead of season stats. Skipping.")
-        return pd.DataFrame()
-    # Clean the data - remove header rows
-    df = df[df["Season"].astype(str) != "Season"].copy()
-    df = df[df["Season"].notna()].copy()
-    # Remove any completely empty rows
-    df = df.dropna(how='all').copy()
-    if df.empty:
-        st.warning("No valid season data found after cleaning")
-        return pd.DataFrame()
-    # Clean season format
-    df["Season"] = df["Season"].astype(str).str.strip()
-    df['Season'] = df['Season'].str.replace('-', '–')  # Ensure en-dash for consistency
-    # Standardize column names to match expected format
-    column_mapping = {
-        'G': 'GP', 'GS': 'GS', 'MP': 'MIN',
-        'FG%': 'FG_PCT', '3P%': 'FG3_PCT', 'FT%': 'FT_PCT',
-        'TRB': 'REB', 'AST': 'AST', 'STL': 'STL', 'BLK': 'BLK', 'TOV': 'TO',
-        'PF': 'PF', 'PTS': 'PTS',
-        'Age': 'AGE', 'Tm': 'TEAM_ABBREVIATION', 'Lg': 'LEAGUE_ID', 'Pos': 'POSITION',
-        'FG': 'FGM', 'FGA': 'FGA', '3P': 'FG3M', '3PA': 'FG3A',
-        '2P': 'FGM2', '2PA': 'FGA2', '2P%': 'FG2_PCT', 'eFG%': 'EFG_PCT',
-        'FT': 'FTM', 'FTA': 'FTA', 'ORB': 'OREB', 'DRB': 'DREB'
-    }
-    # Apply column mapping only for columns that exist
-    for old_col, new_col in column_mapping.items():
-        if old_col in df.columns:
-            df = df.rename(columns={old_col: new_col})
-    # Clean team names if TEAM_ABBREVIATION column exists
-    if 'TEAM_ABBREVIATION' in df.columns:
-        df['TEAM_ABBREVIATION'] = df['TEAM_ABBREVIATION'].apply(clean_team_name)
-    # Convert numeric columns
-    non_numeric_cols = {'Season', 'TEAM_ABBREVIATION', 'LEAGUE_ID', 'POSITION', 'Player'}
-    for col in df.columns:
-        if col not in non_numeric_cols:
-            df[col] = pd.to_numeric(df[col], errors="coerce")
-    return df
-# —————————————————————————————————————————————————————————————————————————————
 @st.cache_data(ttl=300)
-def get_player_stats_by_name(player_name):
     """
-    Get player stats by searching for the player name in the index.
-    Returns DataFrame with player's career statistics.
     """
-    if not player_name or not player_name.strip():
-        st.warning("Please provide a valid player name")
-        return pd.DataFrame()
-    # Get player index
     try:
-        player_index = get_player_index()
-        if player_index.empty:
-            st.error("Could not load player index. Please try again later.")
-            return pd.DataFrame()
-    except Exception as e:
-        st.error(f"Error loading player index: {e}")
-        return pd.DataFrame()
-    # Clean the search term
-    search_name = player_name.strip()
-    # Search for player (case insensitive, exact match first)
-    exact_matches = player_index[player_index['name'].str.lower() == search_name.lower()]
-    if not exact_matches.empty:
-        matches = exact_matches
-    else:
-        # Try partial matching
-        matches = player_index[player_index['name'].str.contains(search_name, case=False, na=False, regex=False)]
-    if matches.empty:
-        st.warning(f"No player found matching '{player_name}'. Please check the spelling and try again.")
-        return pd.DataFrame()
-    if len(matches) > 1:
-        st.info(f"Multiple players found matching '{player_name}':")
-        for i, (_, row) in enumerate(matches.head(5).iterrows(), 1):
-            st.info(f"{i}. {row['name']}")
-        st.info("Using the first match. For more specific results, try using the full player name.")
-    # Get stats for the first match
-    try:
-        player_url = matches.iloc[0]['url']
-        player_stats = player_season_stats(player_url)
-        if player_stats.empty:
-            st.warning(f"Could not retrieve stats for {matches.iloc[0]['name']}")
             return pd.DataFrame()
-        # Add player name to the dataframe for reference
-        player_stats['PLAYER_NAME'] = matches.iloc[0]['name']
-        return player_stats
     except Exception as e:
-        st.error(f"Error retrieving stats for {matches.iloc[0]['name']}: {e}")
         return pd.DataFrame()
-# —————————————————————————————————————————————————————————————————————————————
 @st.cache_data(ttl=300)
-def team_per_game(year):
     """
-    Scrapes the league's per‑game team stats table from:
-      https://www.basketball-reference.com/leagues/NBA_{year}_per_game.html
-    Returns cleaned DataFrame.
     """
-    url = f"https://www.basketball-reference.com/leagues/NBA_{year}_per_game.html"
-    html = fetch_html(url)
-    if not html:
-        return pd.DataFrame()
-    # Try multiple possible table IDs for team stats
-    possible_table_ids = ["per_game-team", "per_game_team", "team-stats-per_game", "teams_per_game"]
-    df = pd.DataFrame()
-    for table_id in possible_table_ids:
-        df = parse_table(html, table_id=table_id)
-        if not df.empty:
-            break
-    # If no specific table found, try to find any table with team data
-    if df.empty:
-        soup = BeautifulSoup(html, "html.parser")
-        tables = soup.find_all("table")
-        for table in tables:
-            if table.find("th", string=lambda text: text and "team" in text.lower()):
-                df = parse_table(str(table))
-                if not df.empty:
-                    break
-    if df.empty:
-        st.warning(f"Could not find team stats table for {year}")
-        return pd.DataFrame()
-    # Handle potential MultiIndex columns
-    if isinstance(df.columns, pd.MultiIndex):
-        df.columns = ['_'.join(str(col).strip() for col in cols if str(col).strip() and str(col).strip() != 'Unnamed: 0_level_0')
-                     for cols in df.columns.values]
-    # Clean column names
-    df.columns = [str(col).strip() for col in df.columns]
-    # Find team column
-    team_col = None
-    for col in df.columns:
-        if 'team' in col.lower() or col in ['Team', 'Tm']:
-            team_col = col
-            break
-    if team_col is None:
-        st.warning(f"Could not find team column in team stats. Available columns: {df.columns.tolist()}")
-        return pd.DataFrame()
-    # Rename team column to standard name
-    if team_col != 'Team':
-        df = df.rename(columns={team_col: 'Team'})
-    # Remove header rows
-    df = df[df["Team"].astype(str) != "Team"].copy()
-    df = df[df["Team"].notna()].copy()
-    # Rename Team to Tm for consistency
-    df = df.rename(columns={"Team": "Tm"})
-    # Standardize column names
-    column_mapping = {
-        'G': 'GP', 'MP': 'MIN',
-        'FG%': 'FG_PCT', '3P%': 'FG3_PCT', 'FT%': 'FT_PCT',
-        'TRB': 'REB', 'AST': 'AST', 'STL': 'STL', 'BLK': 'BLK', 'TOV': 'TO',
-        'PF': 'PF', 'PTS': 'PTS',
-        'Rk': 'RANK', 'W': 'WINS', 'L': 'LOSSES', 'W/L%': 'WIN_LOSS_PCT',
-        'FG': 'FGM', 'FGA': 'FGA', '3P': 'FG3M', '3PA': 'FG3A',
-        '2P': 'FGM2', '2PA': 'FGA2', '2P%': 'FG2_PCT', 'eFG%': 'EFG_PCT',
-        'FT': 'FTM', 'FTA': 'FTA', 'ORB': 'OREB', 'DRB': 'DREB'
-    }
-    # Apply column mapping only for columns that exist
-    for old_col, new_col in column_mapping.items():
-        if old_col in df.columns:
-            df = df.rename(columns={old_col: new_col})
-    # Clean team names
-    df['Tm'] = df['Tm'].apply(clean_team_name)
-    # Convert numeric columns
-    non_numeric_cols = {"Tm", "RANK"}
-    for col in df.columns:
-        if col not in non_numeric_cols:
-            df[col] = pd.to_numeric(df[col], errors="coerce")
-    return df
-# —————————————————————————————————————————————————————————————————————————————
-# Additional utility functions for team data processing
-@st.cache_data(ttl=300)
-def team_opponent_stats(year):
-    """
-    Scrapes the league's opponent per‑game team stats table from:
-      https://www.basketball-reference.com/leagues/NBA_{year}_opp_per_game.html
-    Returns cleaned DataFrame with opponent stats.
-    """
-    url = f"https://www.basketball-reference.com/leagues/NBA_{year}_opp_per_game.html"
-    html = fetch_html(url)
-    if not html:
-        return pd.DataFrame()
-    # Try multiple possible table IDs for opponent stats
-    possible_table_ids = ["opp-stats-per_game", "opp_per_game", "opponent-stats-per_game"]
-    df = pd.DataFrame()
-    for table_id in possible_table_ids:
-        df = parse_table(html, table_id=table_id)
-        if not df.empty:
-            break
-    if df.empty:
-        st.warning(f"Could not find opponent stats table for {year}")
-        return pd.DataFrame()
-    # Clean and process the same way as regular team stats
-    if isinstance(df.columns, pd.MultiIndex):
-        df.columns = ['_'.join(str(col).strip() for col in cols if str(col).strip() and str(col).strip() != 'Unnamed: 0_level_0')
-                     for cols in df.columns.values]
-    df.columns = [str(col).strip() for col in df.columns]
-    # Find team column
-    team_col = None
-    for col in df.columns:
-        if 'team' in col.lower() or col in ['Team', 'Tm']:
-            team_col = col
-            break
-    if team_col is None:
-        return pd.DataFrame()
-    if team_col != 'Team':
-        df = df.rename(columns={team_col: 'Team'})
-    df = df[df["Team"].astype(str) != "Team"].copy()
-    df = df[df["Team"].notna()].copy()
-    df = df.rename(columns={"Team": "Tm"})
-    # Apply team name cleaning
-    df['Tm'] = df['Tm'].apply(clean_team_name)
-    # Same column standardization as regular team stats
-    column_mapping = {
-        'G': 'OPP_GP', 'MP': 'OPP_MIN',
-        'FG%': 'OPP_FG_PCT', '3P%': 'OPP_FG3_PCT', 'FT%': 'OPP_FT_PCT',
-        'TRB': 'OPP_REB', 'AST': 'OPP_AST', 'STL': 'OPP_STL', 'BLK': 'OPP_BLK', 'TOV': 'OPP_TO',
-        'PF': 'OPP_PF', 'PTS': 'OPP_PTS',
-        'FG': 'OPP_FGM', 'FGA': 'OPP_FGA', '3P': 'OPP_FG3M', '3PA': 'OPP_FG3A',
-        '2P': 'OPP_FGM2', '2PA': 'OPP_FGA2', '2P%': 'OPP_FG2_PCT', 'eFG%': 'OPP_EFG_PCT',
-        'FT': 'OPP_FTM', 'FTA': 'OPP_FTA', 'ORB': 'OPP_OREB', 'DRB': 'OPP_DREB'
-    }
-    for old_col, new_col in column_mapping.items():
-        if old_col in df.columns:
-            df = df.rename(columns={old_col: new_col})
-    # Convert numeric columns
-    non_numeric_cols = {"Tm"}
-    for col in df.columns:
-        if col not in non_numeric_cols:
-            df[col] = pd.to_numeric(df[col], errors="coerce")
-    return df
-@st.cache_data(ttl=300)
-def team_standings(year):
-    """
-    Scrapes team standings from Basketball Reference.
-    Returns DataFrame with team records and standings info.
-    """
-    url = f"https://www.basketball-reference.com/leagues/NBA_{year}_standings.html"
-    html = fetch_html(url)
-    if not html:
-        return pd.DataFrame()
-    # Try to find standings tables (usually split by conference)
-    soup = BeautifulSoup(html, "html.parser")
-    standings_data = []
-    # Look for conference tables
-    for conference in ['E', 'W']:  # Eastern and Western conference IDs
-        table_id = f"standings_{conference}"
-        table = soup.find("table", {"id": table_id})
-        if table:
-            df = parse_table(str(table))
-            if not df.empty:
-                df['Conference'] = 'Eastern' if conference == 'E' else 'Western'
-                standings_data.append(df)
-    if not standings_data:
         return pd.DataFrame()
-    # Combine conference standings
-    df = pd.concat(standings_data, ignore_index=True)
-    # Clean team names if 'Team' column exists
-    if 'Team' in df.columns:
-        df['Team'] = df['Team'].apply(clean_team_name)
-    return df
-def validate_dataframe(df, required_columns=None):
-    """
-    Validate that a DataFrame has the expected structure and data.
-    """
-    if df.empty:
-        return False, "DataFrame is empty"
-    if required_columns:
-        missing_cols = [col for col in required_columns if col not in df.columns]
-        if missing_cols:
-            return False, f"Missing required columns: {missing_cols}"
-    return True, "DataFrame is valid"
-def clean_team_name(team_name):
-    """
-    Clean and standardize team names from Basketball Reference.
-    """
-    if pd.isna(team_name):
-        return team_name
-    # Remove any asterisks or other symbols
-    team_name = str(team_name).strip().replace('*', '')
-    # Handle special cases for team name variations
-    team_mapping = {
-        'NOP': 'NO',   # New Orleans Pelicans sometimes shown as NOP
-        'PHX': 'PHO',  # Phoenix Suns sometimes shown as PHX
-        'BRK': 'BKN',  # Brooklyn Nets sometimes shown as BRK
-        'CHA': 'CHO',  # Charlotte sometimes inconsistent
-        'UTA': 'UTH'   # Utah Jazz sometimes shown as UTA
-    }
-    return team_mapping.get(team_name, team_name)
-def retry_fetch(func, *args, max_retries=3, **kwargs):
-    """
-    Retry a function call with exponential backoff.
-    """
-    for attempt in range(max_retries):
-        try:
-            result = func(*args, **kwargs)
-            if not (isinstance(result, pd.DataFrame) and result.empty):
-                return result
-        except Exception as e:
-            if attempt == max_retries - 1:
-                st.error(f"Failed after {max_retries} attempts: {e}")
-                return pd.DataFrame()
-            time.sleep(2 ** attempt)  # Exponential backoff
-    return pd.DataFrame()
 # —————————————————————————————————————————————————————————————————————————————
 # Perplexity integration
 PERP_KEY = os.getenv("PERPLEXITY_API_KEY")
@@ -589,7 +228,7 @@ def ask_perp(prompt, system="You are a helpful NBA analyst AI.", max_tokens=500,
         return ""
     hdr = {'Authorization':f'Bearer {PERP_KEY}','Content-Type':'application/json'}
     payload = {
-      "model":"sonar-pro", # Changed to a commonly available online model
       "messages":[{"role":"system","content":system},{"role":"user","content":prompt}],
       "max_tokens":max_tokens, "temperature":temp
     }
@@ -611,28 +250,6 @@ def ask_perp(prompt, system="You are a helpful NBA analyst AI.", max_tokens=500,
         st.error(f"An unexpected error occurred with Perplexity API: {e}")
         return ""
-# —————————————————————————————————————————————————————————————————————————————
-# Helper for dynamic season generation
-def get_available_seasons(num_seasons=6):
-    """Generates a list of recent NBA seasons in 'YYYY–YY' format."""
-    current_year = datetime.now().year
-    current_month = datetime.now().month
-    # Determine the latest season end year.
-    # If it's before July (e.g., May 2025), the current season is 2024-25 (ends 2025).
-    # If it's July or later (e.g., July 2025), the 2024-25 season just finished,
-    # and the next season (2025-26) is considered the "current" one for future projections.
-    latest_season_end_year = current_year
-    if current_month >= 7: # NBA season typically ends in June
-        latest_season_end_year += 1
-    seasons_list = []
-    for i in range(num_seasons):
-        end_year = latest_season_end_year - i
-        start_year = end_year - 1
-        seasons_list.append(f"{start_year}–{end_year}")
-    return sorted(seasons_list, reverse=True) # Sort to show most recent first
 # —————————————————————————————————————————————————————————————————————————————
 # Plotting functions (retained from previous version)
 def create_comparison_chart(data, players_names, metric):
@@ -716,7 +333,7 @@ def main():
 # —————————————————————————————————————————————————————————————————————————————
 def player_vs_player():
     st.markdown('<h2 class="section-header">Player vs Player Comparison</h2>', unsafe_allow_html=True)
-    idx = get_player_index()
     names = idx['name'].tolist()
     selected_players = st.multiselect("Select Players (up to 4)", names, max_selections=4)
@@ -730,18 +347,11 @@ def player_vs_player():
         stats_tabs = st.tabs(["Basic Stats", "Advanced Stats", "Visualizations"])
         all_player_season_data = [] # To store individual season rows for each player
-        players_not_found_in_index = []
-        players_with_no_season_data = []
         with st.spinner("Fetching player data..."):
             for player_name in selected_players:
-                player_url_row = idx.loc[idx.name == player_name, 'url']
-                if player_url_row.empty:
-                    players_not_found_in_index.append(player_name)
-                    continue
-                player_url = player_url_row.iat[0]
-                df_player_career = player_season_stats(player_url)
                 if not df_player_career.empty:
                     # Filter for selected seasons. The player_season_stats function
@@ -749,22 +359,16 @@ def player_vs_player():
                     filtered_df = df_player_career[df_player_career['Season'].isin(selected_seasons)].copy()
                     if not filtered_df.empty:
-                        filtered_df['Player'] = player_name # Add player name for identification
                         all_player_season_data.append(filtered_df)
                     else:
-                        # This means player was found, but no data for selected seasons
-                        players_with_no_season_data.append(player_name)
                 else:
-                    # This means player_season_stats returned an empty DF (fetch/parse failed)
-                    players_with_no_season_data.append(player_name) # Treat as no data for selected seasons
-        # Report on players not found in index
-        if players_not_found_in_index:
-            st.error(f"The following players were not found in the Basketball-Reference index: {', '.join(players_not_found_in_index)}. Please check spelling.")
         # Report on players with no data for selected seasons
-        if players_with_no_season_data:
-            st.info(f"No data found for the selected seasons ({', '.join(selected_seasons)}) for: {', '.join(players_with_no_season_data)}. This might be because the season hasn't started or data is not yet available.")
         if not all_player_season_data:
             st.error("No data available for any of the selected players and seasons to display. Please adjust your selections.")
@@ -899,12 +503,12 @@ def team_vs_team():
     # Extract the end year from the season string (e.g., "2024–25" -> 2025)
     year_for_team_stats = int(selected_season_str.split('–')[1])
-    tm_df = team_per_game(year_for_team_stats)
     if tm_df.empty:
-        st.info(f"No team data available for the {selected_season_str} season. This might be because the season hasn't started or data is not yet available.")
         return
-    teams = tm_df['Tm'].unique().tolist()
     selected_teams = st.multiselect("Select Teams (up to 4)", teams, max_selections=4)
     if st.button("Run Comparison"):
@@ -917,11 +521,10 @@ def team_vs_team():
         with st.spinner("Fetching team data..."):
             for t in selected_teams:
-                df = tm_df[tm_df.Tm == t].copy() # Use .copy() to avoid SettingWithCopyWarning
                 if not df.empty:
                     # For team stats, we usually get one row per team per season from team_per_game
                     # So, no need for .mean() here, just take the row.
-                    df['Team'] = t # Add 'Team' column for consistency
                     df['Season'] = selected_season_str # Add 'Season' column
                     stats.append(df.iloc[0].to_dict()) # Convert the single row to dict
                 else:
@@ -1029,7 +632,7 @@ def ai_chat():
 def young_projections():
     st.markdown('<h2 class="section-header">Young Player Projections</h2>', unsafe_allow_html=True)
-    all_p_df = get_player_index()
     all_p = all_p_df['name'].tolist()
     sp = st.selectbox("Select or enter player", [""]+all_p)
     if not sp:
@@ -1061,7 +664,7 @@ def young_projections():
 def similar_players():
     st.markdown('<h2 class="section-header">Similar Players Finder</h2>', unsafe_allow_html=True)
-    all_p_df = get_player_index()
     all_p = all_p_df['name'].tolist()
     tp = st.selectbox("Target Player", all_p)
     crit = st.multiselect("Criteria",["Position","Height/Weight","Playing Style","Statistical Profile","Age/Experience"],default=["Playing Style","Statistical Profile"])

 import requests
 import os
 from datetime import datetime
+from bs4 import BeautifulSoup, Comment # Keep Comment for parse_table if needed, though BRScraper handles it
+import re
+import plotly.express as px
+import plotly.graph_objects as go
+# Import BRScraper
+from BRScraper import nba
 # Page configuration
 st.set_page_config(
     st.session_state.chat_history = []
 # —————————————————————————————————————————————————————————————————————————————
+# BRScraper Data Fetching Utilities
 # —————————————————————————————————————————————————————————————————————————————
+# Helper for dynamic season generation
+def get_available_seasons(num_seasons=6):
+    """Generates a list of recent NBA seasons in 'YYYY–YY' format."""
+    current_year = datetime.now().year
+    current_month = datetime.now().month
+    # Determine the latest season end year.
+    # If it's before July (e.g., May 2025), the current season is 2024-25 (ends 2025).
+    # If it's July or later (e.g., July 2025), the 2024-25 season just finished,
+    # and the next season (2025-26) is considered the "current" one for future projections.
+    latest_season_end_year = current_year
+    if current_month >= 7: # NBA season typically ends in June
+        latest_season_end_year += 1
+    seasons_list = []
+    for i in range(num_seasons):
+        end_year = latest_season_end_year - i
+        start_year = end_year - 1
+        seasons_list.append(f"{start_year}–{end_year}")
+    return sorted(seasons_list, reverse=True) # Sort to show most recent first
 @st.cache_data(ttl=3600)
+def get_player_index_brscraper():
     """
+    Uses BRScraper to get a list of players from a recent season's stats.
+    This serves as our player index for the multiselect.
     """
+    latest_season_end_year = int(get_available_seasons(1)[0].split('–')[1])
+    try:
+        # Get per_game stats for the latest season to get a list of active players
+        df = nba.get_stats(latest_season_end_year, info='per_game', rename=False)
+        if df.empty:
+            st.error(f"Could not fetch player list for {latest_season_end_year} from BRScraper.")
+            return pd.DataFrame(columns=['name']) # Return empty with 'name' column
+        # Extract unique player names
+        player_names = df['Player'].unique().tolist()
+        return pd.DataFrame({'name': player_names})
+    except Exception as e:
+        st.error(f"Error fetching player index with BRScraper: {e}")
+        return pd.DataFrame(columns=['name'])
 @st.cache_data(ttl=300)
+def get_player_career_stats_brscraper(player_name):
     """
+    Uses BRScraper to get a player's career stats.
+    Applies column renaming and numeric conversion.
     """
     try:
+        df = nba.get_player_stats(player_name)
+        if df.empty:
             return pd.DataFrame()
+        # Standardize column names
+        column_mapping = {
+            'Season': 'Season', # Keep as is, will convert to en-dash later
+            'G': 'GP', 'GS': 'GS', 'MP': 'MIN',
+            'FG%': 'FG_PCT', '3P%': 'FG3_PCT', 'FT%': 'FT_PCT',
+            'TRB': 'REB', 'AST': 'AST', 'STL': 'STL', 'BLK': 'BLK', 'TOV': 'TO',
+            'PF': 'PF', 'PTS': 'PTS',
+            'Age': 'AGE', 'Tm': 'TEAM_ABBREVIATION', 'Lg': 'LEAGUE_ID', 'Pos': 'POSITION',
+            'FG': 'FGM', 'FGA': 'FGA', '3P': 'FG3M', '3PA': 'FG3A',
+            '2P': 'FGM2', '2PA': 'FGA2', '2P%': 'FG2_PCT', 'eFG%': 'EFG_PCT',
+            'FT': 'FTM', 'FTA': 'FTA', 'ORB': 'OREB', 'DRB': 'DREB'
+        }
+        # Apply column mapping only for columns that exist
+        for old_col, new_col in column_mapping.items():
+            if old_col in df.columns:
+                df = df.rename(columns={old_col: new_col})
+        # Ensure 'Season' column is string and uses en-dash
+        if 'Season' in df.columns:
+            df['Season'] = df['Season'].astype(str).str.replace('-', '–')
+        # Convert numeric columns
+        non_numeric_cols = {'Season', 'TEAM_ABBREVIATION', 'LEAGUE_ID', 'POSITION', 'Player'}
+        for col in df.columns:
+            if col not in non_numeric_cols:
+                df[col] = pd.to_numeric(df[col], errors="coerce")
+        # Add 'Player' column for consistency with app logic
+        df['Player'] = player_name
+        return df
     except Exception as e:
+        st.error(f"Error fetching stats for {player_name} with BRScraper: {e}")
         return pd.DataFrame()
 @st.cache_data(ttl=300)
+def get_team_season_stats_brscraper(year):
     """
+    Uses BRScraper to get per-game team stats for a given season year.
+    Applies column renaming and numeric conversion.
     """
+    try:
+        df = nba.get_stats(year, info='per_game', rename=False)
+        if df.empty:
+            return pd.DataFrame()
+        # Filter for team rows (BRScraper's get_stats includes player rows too)
+        # Team rows typically have 'Rk' (Rank) as NaN or a non-numeric value after cleaning
+        # Or they are the first few rows before player data starts.
+        # A common pattern is that team rows have 'Player' as NaN or a team name.
+        # Let's assume 'Player' column is NaN for team rows, or 'Tm' is not NaN.
+        # First, standardize column names to make filtering easier
+        df.columns = [str(col).strip() for col in df.columns]
+        # Rename 'Tm' to 'Team' for easier filtering if it exists
+        if 'Tm' in df.columns:
+            df = df.rename(columns={'Tm': 'Team'})
+        # Filter out player rows. Player rows usually have a non-null 'Player' column.
+        # Team rows might have 'Player' as NaN or the team name itself.
+        # A more robust way is to look for rows where 'Rk' is not numeric (e.g., 'Rk' for team totals)
+        # Or where 'Player' is NaN and 'Team' is not NaN.
+        # Let's try to identify team rows by checking if 'Player' column is missing or NaN
+        # and 'Team' column is present and not NaN.
+        if 'Player' in df.columns:
+            # Filter out rows where 'Player' is not NaN (these are player stats)
+            team_df = df[df['Player'].isna()].copy()
+        else:
+            # If no 'Player' column, assume all rows are team-related, or need further inspection
+            team_df = df.copy()
+        # Further refine: ensure 'Team' column is present and not NaN
+        if 'Team' not in team_df.columns or team_df['Team'].isna().all():
+            # Fallback: if 'Team' column is problematic, try to find rows where 'Rk' is 'Rk' (header)
+            # or where 'Rk' is not a number (e.g., 'Rk' for team totals)
+            if 'Rk' in df.columns:
+                team_df = df[pd.to_numeric(df['Rk'], errors='coerce').isna()].copy()
+                if 'Player' in team_df.columns: # Remove Player column if it's still there
+                    team_df = team_df.drop(columns=['Player'])
+            else:
+                st.warning(f"Could not reliably identify team rows for year {year}. Returning all data.")
+                team_df = df.copy() # Fallback to returning all data if filtering is hard
+        if team_df.empty:
+            return pd.DataFrame()
+        # Standardize column names
+        column_mapping = {
+            'G': 'GP', 'MP': 'MIN',
+            'FG%': 'FG_PCT', '3P%': 'FG3_PCT', 'FT%': 'FT_PCT',
+            'TRB': 'REB', 'AST': 'AST', 'STL': 'STL', 'BLK': 'BLK', 'TOV': 'TO',
+            'PF': 'PF', 'PTS': 'PTS',
+            'Rk': 'RANK', 'W': 'WINS', 'L': 'LOSSES', 'W/L%': 'WIN_LOSS_PCT',
+            'FG': 'FGM', 'FGA': 'FGA', '3P': 'FG3M', '3PA': 'FG3A',
+            '2P': 'FGM2', '2PA': 'FGA2', '2P%': 'FG2_PCT', 'eFG%': 'EFG_PCT',
+            'FT': 'FTM', 'FTA': 'FTA', 'ORB': 'OREB', 'DRB': 'DREB'
+        }
+        for old_col, new_col in column_mapping.items():
+            if old_col in team_df.columns:
+                team_df = team_df.rename(columns={old_col: new_col})
+        # Convert numeric columns
+        non_numeric_cols = {"Team", "Tm", "RANK"} # 'Team' or 'Tm' will be the team identifier
+        for col in team_df.columns:
+            if col not in non_numeric_cols:
+                team_df[col] = pd.to_numeric(team_df[col], errors="coerce")
+        # Ensure 'Team' column is present and clean it
+        if 'Team' in team_df.columns:
+            team_df['Team'] = team_df['Team'].astype(str).str.replace('*', '').str.strip()
+        elif 'Tm' in team_df.columns: # Fallback if 'Team' was not the original name
+            team_df = team_df.rename(columns={'Tm': 'Team'})
+            team_df['Team'] = team_df['Team'].astype(str).str.replace('*', '').str.strip()
+        else:
+            st.warning(f"Could not find a 'Team' or 'Tm' column in the processed team data for {year}.")
+            return pd.DataFrame()
+        return team_df
+    except Exception as e:
+        st.error(f"Error fetching team stats for {year} with BRScraper: {e}")
         return pd.DataFrame()
 # —————————————————————————————————————————————————————————————————————————————
 # Perplexity integration
 PERP_KEY = os.getenv("PERPLEXITY_API_KEY")
         return ""
     hdr = {'Authorization':f'Bearer {PERP_KEY}','Content-Type':'application/json'}
     payload = {
+      "model":"sonar-medium-online", # Changed to a commonly available online model
       "messages":[{"role":"system","content":system},{"role":"user","content":prompt}],
       "max_tokens":max_tokens, "temperature":temp
     }
         st.error(f"An unexpected error occurred with Perplexity API: {e}")
         return ""
 # —————————————————————————————————————————————————————————————————————————————
 # Plotting functions (retained from previous version)
 def create_comparison_chart(data, players_names, metric):
 # —————————————————————————————————————————————————————————————————————————————
 def player_vs_player():
     st.markdown('<h2 class="section-header">Player vs Player Comparison</h2>', unsafe_allow_html=True)
+    idx = get_player_index_brscraper() # Use BRScraper for player index
     names = idx['name'].tolist()
     selected_players = st.multiselect("Select Players (up to 4)", names, max_selections=4)
         stats_tabs = st.tabs(["Basic Stats", "Advanced Stats", "Visualizations"])
         all_player_season_data = [] # To store individual season rows for each player
+        players_with_no_data = []
         with st.spinner("Fetching player data..."):
             for player_name in selected_players:
+                df_player_career = get_player_career_stats_brscraper(player_name)
                 if not df_player_career.empty:
                     # Filter for selected seasons. The player_season_stats function
                     filtered_df = df_player_career[df_player_career['Season'].isin(selected_seasons)].copy()
                     if not filtered_df.empty:
+                        # 'Player' column is already added by get_player_career_stats_brscraper
                         all_player_season_data.append(filtered_df)
                     else:
+                        players_with_no_data.append(player_name)
                 else:
+                    players_with_no_data.append(player_name)
         # Report on players with no data for selected seasons
+        if players_with_no_data:
+            st.info(f"No data found for the selected seasons ({', '.join(selected_seasons)}) for: {', '.join(players_with_no_data)}. This might be because the season hasn't started or data is not yet available, or the player name was not found by BRScraper.")
         if not all_player_season_data:
             st.error("No data available for any of the selected players and seasons to display. Please adjust your selections.")
     # Extract the end year from the season string (e.g., "2024–25" -> 2025)
     year_for_team_stats = int(selected_season_str.split('–')[1])
+    tm_df = get_team_season_stats_brscraper(year_for_team_stats) # Use BRScraper for team stats
     if tm_df.empty:
+        st.info(f"No team data available for the {selected_season_str} season. This might be because the season hasn't started or data is not yet available, or BRScraper could not fetch it.")
         return
+    teams = tm_df['Team'].unique().tolist() # Use 'Team' column from BRScraper output
     selected_teams = st.multiselect("Select Teams (up to 4)", teams, max_selections=4)
     if st.button("Run Comparison"):
         with st.spinner("Fetching team data..."):
             for t in selected_teams:
+                df = tm_df[tm_df.Team == t].copy() # Filter by 'Team' column
                 if not df.empty:
                     # For team stats, we usually get one row per team per season from team_per_game
                     # So, no need for .mean() here, just take the row.
                     df['Season'] = selected_season_str # Add 'Season' column
                     stats.append(df.iloc[0].to_dict()) # Convert the single row to dict
                 else:
 def young_projections():
     st.markdown('<h2 class="section-header">Young Player Projections</h2>', unsafe_allow_html=True)
+    all_p_df = get_player_index_brscraper()
     all_p = all_p_df['name'].tolist()
     sp = st.selectbox("Select or enter player", [""]+all_p)
     if not sp:
 def similar_players():
     st.markdown('<h2 class="section-header">Similar Players Finder</h2>', unsafe_allow_html=True)
+    all_p_df = get_player_index_brscraper()
     all_p = all_p_df['name'].tolist()
     tp = st.selectbox("Target Player", all_p)
     crit = st.multiselect("Criteria",["Position","Height/Weight","Playing Style","Statistical Profile","Age/Experience"],default=["Playing Style","Statistical Profile"])