Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import requests | |
| import pandas as pd | |
| from bs4 import BeautifulSoup | |
| import time | |
| import re | |
| from datetime import datetime, timezone | |
| # ---------- Configuration & Constants ---------- | |
| LEAGUES = { | |
| 'premier_league': { | |
| 'player_stats_url': 'https://fbref.com/en/comps/9/stats/Premier-League-Stats', | |
| 'squad_stats_url': 'https://fbref.com/en/comps/9/Premier-League-Stats', | |
| 'fixtures_url': 'https://fbref.com/en/comps/9/schedule/Premier-League-Scores-and-Fixtures', | |
| 'name': 'Premier League' | |
| }, | |
| 'la_liga': { | |
| 'player_stats_url': 'https://fbref.com/en/comps/12/stats/La-Liga-Stats', | |
| 'squad_stats_url': 'https://fbref.com/en/comps/12/La-Liga-Stats', | |
| 'fixtures_url': 'https://fbref.com/en/comps/12/schedule/La-Liga-Scores-and-Fixtures', | |
| 'name': 'La Liga' | |
| }, | |
| 'serie_a': { | |
| 'player_stats_url': 'https://fbref.com/en/comps/11/stats/Serie-A-Stats', | |
| 'squad_stats_url': 'https://fbref.com/en/comps/11/Serie-A-Stats', | |
| 'fixtures_url': 'https://fbref.com/en/comps/11/schedule/Serie-A-Scores-and-Fixtures', | |
| 'name': 'Serie A' | |
| }, | |
| 'bundesliga': { | |
| 'player_stats_url': 'https://fbref.com/en/comps/20/stats/Bundesliga-Stats', | |
| 'squad_stats_url': 'https://fbref.com/en/comps/20/Bundesliga-Stats', | |
| 'fixtures_url': 'https://fbref.com/en/comps/20/schedule/Bundesliga-Scores-and-Fixtures', | |
| 'name': 'Bundesliga' | |
| }, | |
| 'ligue_1': { | |
| 'player_stats_url': 'https://fbref.com/en/comps/13/stats/Ligue-1-Stats', | |
| 'squad_stats_url': 'https://fbref.com/en/comps/13/Ligue-1-Stats', | |
| 'fixtures_url': 'https://fbref.com/en/comps/13/schedule/Ligue-1-Scores-and-Fixtures', | |
| 'name': 'Ligue 1' | |
| } | |
| } | |
| SCRAPE_HEADERS = { | |
| 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' | |
| } | |
| PERPLEXITY_API_URL = 'https://api.perplexity.ai/chat/completions' | |
| PERPLEXITY_MODELS = [ | |
| "sonar-deep-research", | |
| "sonar-reasoning-pro", | |
| "sonar-reasoning", | |
| "sonar-pro", | |
| "sonar", # Defaulting to this if sonar-medium-online is not listed | |
| "r1-1776" | |
| ] | |
| # Initialize session state for storing data | |
| if 'player_stats_data' not in st.session_state: | |
| st.session_state.player_stats_data = {} | |
| if 'squad_stats_data' not in st.session_state: | |
| st.session_state.squad_stats_data = {} | |
| if 'fixtures_data' not in st.session_state: | |
| st.session_state.fixtures_data = {} | |
| if 'perplexity_api_key' not in st.session_state: | |
| st.session_state.perplexity_api_key = "" | |
| if 'selected_perplexity_model' not in st.session_state: | |
| st.session_state.selected_perplexity_model = "sonar" # Default model | |
| # ---------- Helper Functions (from Flask app) ---------- | |
| def clean_fbref_df_columns(df): | |
| if isinstance(df.columns, pd.MultiIndex): | |
| # Attempt to drop the top level if it's generic or a category header | |
| # This is common for FBRef player stats tables | |
| try: | |
| df.columns = df.columns.droplevel(0) | |
| except Exception as e: | |
| st.warning(f"Could not droplevel(0) from columns: {e}. Columns: {df.columns}") | |
| # If droplevel fails, try to flatten in a different way or use as is | |
| # For now, we'll proceed, but this might need adjustment based on specific table structures | |
| # Further cleaning | |
| df.columns = ["".join(c if c.isalnum() or c == '%' else "_" for c in str(col)) for col in df.columns] | |
| df.columns = [col.replace('%', 'Pct') for col in df.columns] | |
| df = df.rename(columns=lambda x: re.sub(r'_+', '_', x)) | |
| df = df.rename(columns=lambda x: x.strip('_')) | |
| return df | |
| # ---------- Scraping Functions (modified for Streamlit) ---------- | |
| def scrape_player_stats_st(league_keys_to_scrape): | |
| st.write("### Scraping Player Stats...") | |
| my_bar = st.progress(0) | |
| total_leagues = len(league_keys_to_scrape) | |
| for i, key in enumerate(league_keys_to_scrape): | |
| url = LEAGUES[key]['player_stats_url'] | |
| st.info(f"Fetching player stats for: {LEAGUES[key]['name']} from {url}") | |
| try: | |
| r = requests.get(url, headers=SCRAPE_HEADERS, timeout=45) # Increased timeout | |
| r.raise_for_status() | |
| soup = BeautifulSoup(r.text, 'html.parser') | |
| # Player standard stats table usually has id="stats_standard" on these specific stats pages | |
| table_player_standard = soup.find('table', {'id': 'stats_standard'}) | |
| if table_player_standard: | |
| df_list = pd.read_html(str(table_player_standard), flavor='lxml') # Use lxml | |
| if not df_list: | |
| st.error(f"Pandas could not read any table from the HTML for player stats: {LEAGUES[key]['name']}.") | |
| continue | |
| df = df_list[0] # Usually the first table | |
| df = clean_fbref_df_columns(df) | |
| # Ensure 'Player' and 'Rk' columns exist after cleaning for filtering | |
| if 'Player' not in df.columns: | |
| st.error(f"'Player' column not found after cleaning for {LEAGUES[key]['name']}. Columns: {df.columns}") | |
| st.dataframe(df.head()) # Show what columns are there | |
| continue | |
| if 'Rk' not in df.columns: | |
| st.warning(f"'Rk' column not found after cleaning for {LEAGUES[key]['name']}. Filtering might be less effective. Columns: {df.columns}") | |
| # Proceed without Rk filtering if not present | |
| df = df[df['Player'].notna() & (df['Player'] != 'Player')] | |
| else: | |
| df = df[df['Player'].notna() & (df['Player'] != 'Player')] | |
| df = df[df['Rk'].notna() & (df['Rk'] != 'Rk')] # Filter out non-player rows if 'Rk' (Rank) column exists | |
| # Convert potential numeric columns | |
| for col in df.columns: | |
| if col.lower() not in ['player', 'nation', 'pos', 'squad', 'comp', 'matches', 'match_report']: # Non-numeric columns | |
| try: | |
| df[col] = pd.to_numeric(df[col], errors='coerce') | |
| except Exception: | |
| pass # Keep as is if conversion fails | |
| df = df.fillna(0) # Or use more sophisticated NaN handling for specific columns | |
| st.session_state.player_stats_data[key] = df | |
| st.success(f"Successfully scraped and processed player stats for {LEAGUES[key]['name']}.") | |
| st.dataframe(df.head()) # Show a preview | |
| else: | |
| st.error(f"Could not find player standard stats table (id='stats_standard') for {LEAGUES[key]['name']} at {url}") | |
| time.sleep(5) # Be polite | |
| except Exception as e: | |
| st.error(f"Error scraping player stats for {LEAGUES[key]['name']}: {e}") | |
| my_bar.progress((i + 1) / total_leagues) | |
| st.write("Player stats scraping complete.") | |
| def scrape_squad_stats_st(league_keys_to_scrape): | |
| st.write("### Scraping Squad Stats (League Tables)...") | |
| my_bar = st.progress(0) | |
| total_leagues = len(league_keys_to_scrape) | |
| for i, key in enumerate(league_keys_to_scrape): | |
| url = LEAGUES[key]['squad_stats_url'] | |
| st.info(f"Fetching squad stats for: {LEAGUES[key]['name']} from {url}") | |
| try: | |
| r = requests.get(url, headers=SCRAPE_HEADERS, timeout=30) | |
| r.raise_for_status() | |
| soup = BeautifulSoup(r.text, 'html.parser') | |
| league_table = None | |
| # Try finding the main league table first (often has "overall" in id or a specific caption) | |
| possible_ids = [id_val for id_val in soup.find_all(id=True) if "overall" in str(id_val.get('id','')).lower() and "results" in str(id_val.get('id','')).lower()] | |
| if possible_ids: | |
| table_tag = soup.find('table', id=possible_ids[0].get('id')) | |
| if table_tag: league_table = table_tag | |
| if not league_table: | |
| all_captions = soup.find_all('caption') | |
| for caption_tag in all_captions: | |
| caption_text = caption_tag.get_text().lower() | |
| if ("league table" in caption_text or "regular season" in caption_text or "overall" in caption_text) and "squad" not in caption_text : | |
| parent_table = caption_tag.find_parent('table') | |
| if parent_table: | |
| temp_df_check = pd.read_html(str(parent_table), flavor='lxml')[0] | |
| temp_cols = temp_df_check.columns | |
| if isinstance(temp_cols, pd.MultiIndex): temp_cols = temp_cols.droplevel(0) | |
| if all(col in temp_cols for col in ['Squad', 'MP', 'W', 'D', 'L', 'Pts']): | |
| league_table = parent_table | |
| break | |
| if not league_table: # Fallback to first 'stats_standard' if it's a squad table | |
| table_squad_standard = soup.find('table', {'id': 'stats_standard'}) | |
| if table_squad_standard: | |
| temp_df_check = pd.read_html(str(table_squad_standard), flavor='lxml')[0] | |
| temp_cols = temp_df_check.columns | |
| if isinstance(temp_cols, pd.MultiIndex): temp_cols = temp_cols.droplevel(0) | |
| if all(col in temp_cols for col in ['Squad', 'MP', 'W', 'D', 'L', 'Pts']): | |
| league_table = table_squad_standard | |
| if league_table: | |
| df = pd.read_html(str(league_table), flavor='lxml')[0] | |
| df = clean_fbref_df_columns(df) | |
| if 'Squad' not in df.columns or 'Rk' not in df.columns: | |
| st.error(f"Squad or Rk column missing after cleaning for squad stats {LEAGUES[key]['name']}. Columns: {df.columns}") | |
| st.dataframe(df.head()) | |
| continue | |
| df = df[df['Squad'].notna() & (df['Squad'] != 'Squad')] | |
| df = df[df['Rk'].notna() & (df['Rk'] != 'Rk')] | |
| numeric_cols = ['MP', 'W', 'D', 'L', 'GF', 'GA', 'GD', 'Pts', 'xG', 'xGA', 'xGD', 'Attendance'] | |
| for col in df.columns: | |
| if col in numeric_cols: # Check if column exists before trying to convert | |
| df[col] = pd.to_numeric(df[col], errors='coerce') | |
| df = df.fillna(0) | |
| st.session_state.squad_stats_data[key] = df | |
| st.success(f"Successfully scraped squad stats for {LEAGUES[key]['name']}.") | |
| else: | |
| st.error(f"Could not find a suitable squad stats/league table for {LEAGUES[key]['name']} at {url}") | |
| time.sleep(3) | |
| except Exception as e: | |
| st.error(f"Error scraping squad stats for {LEAGUES[key]['name']}: {e}") | |
| my_bar.progress((i + 1) / total_leagues) | |
| st.write("Squad stats scraping complete.") | |
| def scrape_fixtures_st(league_keys_to_scrape): | |
| st.write("### Scraping Fixtures...") | |
| my_bar = st.progress(0) | |
| total_leagues = len(league_keys_to_scrape) | |
| for i, key in enumerate(league_keys_to_scrape): | |
| url = LEAGUES[key]['fixtures_url'] | |
| st.info(f"Fetching fixtures for: {LEAGUES[key]['name']} from {url}") | |
| try: | |
| r = requests.get(url, headers=SCRAPE_HEADERS, timeout=30) | |
| r.raise_for_status() | |
| soup = BeautifulSoup(r.text, 'html.parser') | |
| fixture_table = None | |
| # Fixture tables often have a caption containing "Scores and Fixtures" | |
| all_captions = soup.find_all('caption') | |
| for caption_tag in all_captions: | |
| if "scores and fixtures" in caption_tag.get_text().lower(): | |
| fixture_table = caption_tag.find_parent('table') | |
| if fixture_table: break | |
| if not fixture_table: # Fallback if caption not found | |
| potential_tables = soup.find_all('table', class_=lambda x: x and "stats_table" in x and "sched" in x) # More specific class | |
| if not potential_tables: | |
| potential_tables = soup.find_all('table', class_="stats_table") # Generic fallback | |
| if potential_tables: | |
| # Iterate to find one with typical fixture columns | |
| for pt in potential_tables: | |
| temp_df_check = pd.read_html(str(pt), flavor='lxml')[0] | |
| temp_cols = temp_df_check.columns | |
| if isinstance(temp_cols, pd.MultiIndex): temp_cols = temp_cols.droplevel(0) | |
| if all(c in temp_cols for c in ['Wk', 'Date', 'Home', 'Away']): | |
| fixture_table = pt | |
| break | |
| if fixture_table: | |
| df = pd.read_html(str(fixture_table), flavor='lxml')[0] | |
| df = clean_fbref_df_columns(df) | |
| if 'Wk' not in df.columns or 'Home' not in df.columns: | |
| st.error(f"Wk or Home column missing after cleaning for fixtures {LEAGUES[key]['name']}. Columns: {df.columns}") | |
| st.dataframe(df.head()) | |
| continue | |
| df = df[df['Wk'].notna()] # Week column usually present for fixtures | |
| df = df[df['Home'].notna() & (df['Home'] != 'Home')] # Ensure Home team is present and not a header | |
| if 'Score' in df.columns: | |
| score_split = df['Score'].astype(str).str.split('–', expand=True) # Use en-dash | |
| if score_split.shape[1] == 2: | |
| df['HomeGoals'] = pd.to_numeric(score_split[0], errors='coerce') | |
| df['AwayGoals'] = pd.to_numeric(score_split[1], errors='coerce') | |
| else: | |
| df['HomeGoals'] = pd.NA # Use pandas NA for missing numeric | |
| df['AwayGoals'] = pd.NA | |
| else: | |
| df['HomeGoals'] = pd.NA | |
| df['AwayGoals'] = pd.NA | |
| if 'Date' in df.columns: | |
| # Attempt to parse date, handling potential errors | |
| df['Date_parsed'] = pd.to_datetime(df['Date'], errors='coerce') | |
| df['Date'] = df['Date_parsed'].dt.strftime('%Y-%m-%d') | |
| # df = df.drop(columns=['Date_parsed']) # Optional: drop the intermediate column | |
| st.session_state.fixtures_data[key] = df | |
| st.success(f"Successfully scraped fixtures for {LEAGUES[key]['name']}.") | |
| else: | |
| st.error(f"Could not find a suitable fixtures table for {LEAGUES[key]['name']} at {url}") | |
| time.sleep(3) | |
| except Exception as e: | |
| st.error(f"Error scraping fixtures for {LEAGUES[key]['name']}: {e}") | |
| my_bar.progress((i + 1) / total_leagues) | |
| st.write("Fixtures scraping complete.") | |
| # ---------- Perplexity API Functions ---------- | |
| def get_perplexity_response(api_key, model_name, prompt, system_message="You are a helpful football analyst AI."): | |
| if not api_key: | |
| st.error("Perplexity API Key is not set. Please enter it in the sidebar.") | |
| return None | |
| headers = { | |
| 'Authorization': f'Bearer {api_key}', | |
| 'Content-Type': 'application/json', | |
| 'Accept': 'application/json', | |
| } | |
| payload = { | |
| 'model': model_name, | |
| 'messages': [ | |
| {'role': 'system', 'content': system_message}, | |
| {'role': 'user', 'content': prompt} | |
| ] | |
| } | |
| try: | |
| with st.spinner(f"Querying Perplexity AI with model: {model_name}..."): | |
| response = requests.post(PERPLEXITY_API_URL, headers=headers, json=payload, timeout=60) # Increased timeout | |
| response.raise_for_status() | |
| data = response.json() | |
| return data.get('choices', [{}])[0].get('message', {}).get('content', '') | |
| except requests.exceptions.RequestException as e: | |
| error_message = f"Error communicating with Perplexity API: {e}" | |
| if e.response is not None: | |
| try: | |
| error_detail = e.response.json().get("error", {}).get("message", e.response.text) | |
| error_message = f"Perplexity API error ({e.response.status_code}): {error_detail}" | |
| except ValueError: # Not JSON | |
| error_message = f"Perplexity API error: {e.response.status_code} - {e.response.reason}. Response: {e.response.text[:200]}" | |
| st.error(error_message) | |
| return None | |
| except Exception as e: | |
| st.error(f"An unexpected error occurred with Perplexity API: {e}") | |
| return None | |
| # ---------- Streamlit UI ---------- | |
| st.set_page_config(layout="wide") | |
| st.title("⚽ Football Data Scraper & Perplexity Tester v2") | |
| st.markdown("Test data retrieval from FBRef and Perplexity API integration. No Firebase calls.") | |
| # --- Sidebar --- | |
| st.sidebar.header("API Configuration") | |
| st.session_state.perplexity_api_key = st.sidebar.text_input( | |
| "Perplexity API Key:", | |
| type="password", | |
| value=st.session_state.perplexity_api_key, | |
| help="Your Perplexity AI API key." | |
| ) | |
| st.session_state.selected_perplexity_model = st.sidebar.selectbox( | |
| "Select Perplexity Model:", | |
| options=PERPLEXITY_MODELS, | |
| index=PERPLEXITY_MODELS.index(st.session_state.selected_perplexity_model) if st.session_state.selected_perplexity_model in PERPLEXITY_MODELS else 4 # Default to 'sonar' | |
| ) | |
| st.sidebar.markdown("---") | |
| st.sidebar.header("Scraping Controls") | |
| selected_league_keys = st.sidebar.multiselect( | |
| "Select leagues to scrape:", | |
| options=list(LEAGUES.keys()), | |
| format_func=lambda key: LEAGUES[key]['name'], | |
| default=[] | |
| ) | |
| if st.sidebar.button("Scrape Player Stats", key="scrape_player_btn"): | |
| if selected_league_keys: scrape_player_stats_st(selected_league_keys) | |
| else: st.sidebar.warning("Select leagues for player stats.") | |
| if st.sidebar.button("Scrape Squad Stats", key="scrape_squad_btn"): | |
| if selected_league_keys: scrape_squad_stats_st(selected_league_keys) | |
| else: st.sidebar.warning("Select leagues for squad stats.") | |
| if st.sidebar.button("Scrape Fixtures", key="scrape_fixture_btn"): | |
| if selected_league_keys: scrape_fixtures_st(selected_league_keys) | |
| else: st.sidebar.warning("Select leagues for fixtures.") | |
| st.sidebar.markdown("---") | |
| st.sidebar.header("View Scraped Data") | |
| display_league_key = st.sidebar.selectbox( | |
| "Select league to display data for:", | |
| options=[""] + list(LEAGUES.keys()), | |
| format_func=lambda key: LEAGUES[key]['name'] if key else "Select a league" | |
| ) | |
| # --- Main Content Area --- | |
| if display_league_key: | |
| tab1, tab2, tab3 = st.tabs([f"Player Stats ({LEAGUES[display_league_key]['name']})", | |
| f"Squad Stats ({LEAGUES[display_league_key]['name']})", | |
| f"Fixtures ({LEAGUES[display_league_key]['name']})"]) | |
| with tab1: | |
| if display_league_key in st.session_state.player_stats_data: | |
| st.dataframe(st.session_state.player_stats_data[display_league_key]) | |
| else: | |
| st.info("No player stats data loaded. Scrape first.") | |
| with tab2: | |
| if display_league_key in st.session_state.squad_stats_data: | |
| st.dataframe(st.session_state.squad_stats_data[display_league_key]) | |
| else: | |
| st.info("No squad stats data loaded. Scrape first.") | |
| with tab3: | |
| if display_league_key in st.session_state.fixtures_data: | |
| st.dataframe(st.session_state.fixtures_data[display_league_key]) | |
| else: | |
| st.info("No fixtures data loaded. Scrape first.") | |
| else: | |
| st.info("Select a league from the sidebar to view its scraped data, or use the feature testers below.") | |
| st.markdown("---") | |
| st.header("FBRef Data Feature Testing (Local)") | |
| # --- 1. Player Comparison Tool --- | |
| st.subheader("1. Player Comparison (Local Data)") | |
| col1_pc, col2_pc, col3_pc = st.columns([1,2,2]) | |
| pc_league_options = [""] + [k for k in st.session_state.player_stats_data.keys() if not st.session_state.player_stats_data[k].empty] | |
| pc_league = col1_pc.selectbox("League:", options=pc_league_options, format_func=lambda k: LEAGUES[k]['name'] if k else "Select", key="pc_league_select") | |
| pc_player1_name = "" | |
| pc_player2_name = "" | |
| if pc_league and pc_league in st.session_state.player_stats_data: | |
| player_list = sorted(st.session_state.player_stats_data[pc_league]['Player'].unique()) | |
| pc_player1_name = col2_pc.selectbox("Player 1 Name:", options=[""] + player_list, key="pc_p1_select") | |
| pc_player2_name = col3_pc.selectbox("Player 2 Name:", options=[""] + player_list, key="pc_p2_select") | |
| else: | |
| pc_player1_name = col2_pc.text_input("Player 1 Name (Type if no league selected):", key="pc_p1_text") | |
| pc_player2_name = col3_pc.text_input("Player 2 Name (Type if no league selected):", key="pc_p2_text") | |
| if st.button("Compare Players (Local)", key="compare_local_btn"): | |
| if pc_league and pc_player1_name and pc_player2_name: | |
| if pc_league in st.session_state.player_stats_data: | |
| all_players_df = st.session_state.player_stats_data[pc_league] | |
| # Exact match from selectbox, or contains if text input was used (though selectbox is preferred now) | |
| player1_data = all_players_df[all_players_df['Player'] == pc_player1_name] | |
| player2_data = all_players_df[all_players_df['Player'] == pc_player2_name] | |
| if not player1_data.empty: | |
| st.write(f"**Stats for {pc_player1_name}:**") | |
| st.dataframe(player1_data) | |
| else: | |
| st.warning(f"Could not find data for player: {pc_player1_name} in {LEAGUES[pc_league]['name']}") | |
| if not player2_data.empty: | |
| st.write(f"**Stats for {pc_player2_name}:**") | |
| st.dataframe(player2_data) | |
| else: | |
| st.warning(f"Could not find data for player: {pc_player2_name} in {LEAGUES[pc_league]['name']}") | |
| else: | |
| st.error(f"Player stats data for {LEAGUES[pc_league]['name']} not loaded or is empty. Please scrape first.") | |
| else: | |
| st.warning("Please select a league and two player names for comparison.") | |
| # --- 2. Fixture Analysis (Local Data) --- | |
| st.subheader("2. Fixture Analysis (Local Data)") | |
| col1_fa, col2_fa, col3_fa = st.columns([1,2,2]) | |
| fa_league_options = [""] + [k for k in st.session_state.fixtures_data.keys() if not st.session_state.fixtures_data[k].empty] | |
| fa_league = col1_fa.selectbox("League:", options=fa_league_options, format_func=lambda k: LEAGUES[k]['name'] if k else "Select", key="fa_league_select") | |
| fa_home_team = "" | |
| fa_away_team = "" | |
| if fa_league and fa_league in st.session_state.fixtures_data: | |
| # Get unique team names from both Home and Away columns | |
| home_teams = st.session_state.fixtures_data[fa_league]['Home'].unique() | |
| away_teams = st.session_state.fixtures_data[fa_league]['Away'].unique() | |
| all_teams = sorted(list(set(list(home_teams) + list(away_teams)))) | |
| fa_home_team = col2_fa.selectbox("Home Team:", options=[""] + all_teams, key="fa_home_select") | |
| fa_away_team = col3_fa.selectbox("Away Team:", options=[""] + all_teams, key="fa_away_select") | |
| else: | |
| fa_home_team = col2_fa.text_input("Home Team (Type if no league selected):", key="fa_home_text") | |
| fa_away_team = col3_fa.text_input("Away Team (Type if no league selected):", key="fa_away_text") | |
| if st.button("Analyze Fixture (Local)", key="analyze_local_btn"): | |
| if fa_league and fa_home_team and fa_away_team: | |
| if fa_league in st.session_state.fixtures_data: | |
| all_fixtures_df = st.session_state.fixtures_data[fa_league].copy() # Use a copy | |
| home_team_norm = fa_home_team.strip().lower() | |
| away_team_norm = fa_away_team.strip().lower() | |
| # Ensure 'Date' column is suitable for sorting (already converted to YYYY-MM-DD string) | |
| # If 'Date_parsed' exists and is datetime, use it for sorting then drop | |
| if 'Date_parsed' in all_fixtures_df.columns: | |
| all_fixtures_df = all_fixtures_df.sort_values(by='Date_parsed', ascending=False) | |
| elif 'Date' in all_fixtures_df.columns: | |
| all_fixtures_df = all_fixtures_df.sort_values(by='Date', ascending=False) | |
| h2h_matches = all_fixtures_df[ | |
| (all_fixtures_df['Home'].str.lower() == home_team_norm) & (all_fixtures_df['Away'].str.lower() == away_team_norm) | | |
| (all_fixtures_df['Home'].str.lower() == away_team_norm) & (all_fixtures_df['Away'].str.lower() == home_team_norm) | |
| ] | |
| st.write(f"**Head-to-Head between {fa_home_team} and {fa_away_team}:**") | |
| if not h2h_matches.empty: | |
| st.dataframe(h2h_matches) # Already sorted by date | |
| else: | |
| st.info("No H2H matches found in the scraped data.") | |
| def get_form_df(team_name_norm, all_fixtures_sorted_df, num_matches=5): | |
| team_matches = all_fixtures_sorted_df[ # Use already sorted df | |
| (all_fixtures_sorted_df['Home'].str.lower() == team_name_norm) | | |
| (all_fixtures_sorted_df['Away'].str.lower() == team_name_norm) | |
| ] | |
| # Consider only played matches (where HomeGoals is not NA after conversion) | |
| played_matches = team_matches[team_matches['HomeGoals'].notna()] | |
| return played_matches.head(num_matches) | |
| st.write(f"**Recent Form for {fa_home_team} (last 5 played):**") | |
| home_form_df = get_form_df(home_team_norm, all_fixtures_df) | |
| if not home_form_df.empty: st.dataframe(home_form_df) | |
| else: st.info(f"No recent played matches found for {fa_home_team}.") | |
| st.write(f"**Recent Form for {fa_away_team} (last 5 played):**") | |
| away_form_df = get_form_df(away_team_norm, all_fixtures_df) | |
| if not away_form_df.empty: st.dataframe(away_form_df) | |
| else: st.info(f"No recent played matches found for {fa_away_team}.") | |
| else: | |
| st.error(f"Fixtures data for {LEAGUES[fa_league]['name']} not loaded or is empty. Please scrape first.") | |
| else: | |
| st.warning("Please select a league and enter/select home & away team names for analysis.") | |
| # --- 3. Visualization Data (Local Data) --- | |
| st.subheader("3. Visualization Data (Example: Top Scorers - Local Data)") | |
| col1_vd, col2_vd = st.columns(2) | |
| vd_league_options = [""] + [k for k in st.session_state.player_stats_data.keys() if not st.session_state.player_stats_data[k].empty] | |
| vd_league = col1_vd.selectbox("League:", options=vd_league_options, format_func=lambda k: LEAGUES[k]['name'] if k else "Select", key="vd_league_select") | |
| if st.button("Show Top Scorers (Local)", key="top_scorers_local_btn"): | |
| if vd_league: | |
| if vd_league in st.session_state.player_stats_data: | |
| player_df = st.session_state.player_stats_data[vd_league].copy() | |
| # Ensure 'Gls' and 'Ast' columns exist and are numeric | |
| if 'Gls' not in player_df.columns or 'Ast' not in player_df.columns: | |
| st.error(f"Required columns 'Gls' or 'Ast' not found in player stats for {LEAGUES[vd_league]['name']}.") | |
| else: | |
| player_df['Gls'] = pd.to_numeric(player_df['Gls'], errors='coerce').fillna(0) | |
| player_df['Ast'] = pd.to_numeric(player_df['Ast'], errors='coerce').fillna(0) | |
| top_scorers = player_df.sort_values(by=['Gls', 'Ast'], ascending=[False, False]).head(10) | |
| st.write(f"**Top 10 Scorers Data for {LEAGUES[vd_league]['name']}:**") | |
| st.dataframe(top_scorers[['Player', 'Squad', 'Gls', 'Ast']]) | |
| if not top_scorers.empty and 'Player' in top_scorers.columns: | |
| st.write("**Chart: Goals & Assists by Top Scorers**") | |
| chart_data = top_scorers.set_index('Player')[['Gls', 'Ast']] | |
| st.bar_chart(chart_data) | |
| else: | |
| st.error(f"Player stats data for {LEAGUES[vd_league]['name']} not loaded or is empty. Please scrape first.") | |
| else: | |
| st.warning("Please select a league for visualization data.") | |
| st.markdown("---") | |
| st.header("Perplexity API Testing") | |
| # --- 4. Fixture Report via Perplexity --- | |
| st.subheader("4. Fixture Report (via Perplexity AI)") | |
| fr_home_team = st.text_input("Home Team (for Perplexity Report):", key="fr_home_pplx") | |
| fr_away_team = st.text_input("Away Team (for Perplexity Report):", key="fr_away_pplx") | |
| fr_match_date = st.text_input("Match Date (e.g., YYYY-MM-DD) (for Perplexity Report):", key="fr_date_pplx", placeholder="YYYY-MM-DD") | |
| if st.button("Get Fixture Report from Perplexity", key="fr_perplexity_btn"): | |
| if fr_home_team and fr_away_team and fr_match_date: | |
| if not st.session_state.perplexity_api_key: | |
| st.error("Perplexity API Key is not set in the sidebar.") | |
| else: | |
| prompt = ( | |
| f"Generate a concise pre-match report for the football match: {fr_home_team} vs {fr_away_team} scheduled for {fr_match_date}.\n" | |
| "Include the following sections if possible, keeping each brief:\n" | |
| "1. Recent Form (last 3-5 matches for each team, e.g., WWLDW).\n" | |
| "2. Head-to-Head (H2H) summary of their last few encounters.\n" | |
| "3. Key Players to Watch (one or two from each team with brief reason).\n" | |
| "4. Brief Tactical Outlook or Prediction (optional, if confident).\n" | |
| "Prioritize information from reputable football sources. Be objective." | |
| ) | |
| report = get_perplexity_response( | |
| st.session_state.perplexity_api_key, | |
| st.session_state.selected_perplexity_model, | |
| prompt, | |
| "You are a football analyst providing pre-match reports." | |
| ) | |
| if report: | |
| st.markdown("**Perplexity AI Fixture Report:**") | |
| st.markdown(report) | |
| else: | |
| st.warning("Please enter Home Team, Away Team, and Match Date for the report.") | |
| # --- 5. Custom Query via Perplexity --- | |
| st.subheader("5. Custom Query (via Perplexity AI)") | |
| custom_query_text = st.text_area("Enter your football-related question:", height=100, key="custom_q_pplx") | |
| if st.button("Ask Perplexity AI", key="custom_q_btn"): | |
| if custom_query_text: | |
| if not st.session_state.perplexity_api_key: | |
| st.error("Perplexity API Key is not set in the sidebar.") | |
| else: | |
| answer = get_perplexity_response( | |
| st.session_state.perplexity_api_key, | |
| st.session_state.selected_perplexity_model, | |
| custom_query_text | |
| ) | |
| if answer: | |
| st.markdown("**Perplexity AI Answer:**") | |
| st.markdown(answer) | |
| else: | |
| st.warning("Please enter a question to ask Perplexity AI.") | |
| st.markdown("---") | |
| st.caption("Streamlit test app. API keys are not stored after session.") |