Spaces:
Sleeping
Sleeping
| from fastapi import FastAPI, HTTPException | |
| from fastapi.middleware.cors import CORSMiddleware | |
| import pandas as pd | |
| import uvicorn | |
| import plotly.graph_objects as go | |
| import logging | |
| import numpy as np | |
| import os | |
| import json | |
| from groq import Groq | |
| from dotenv import load_dotenv | |
| logging.basicConfig(level=logging.INFO) | |
| logger = logging.getLogger(__name__) | |
| load_dotenv() | |
| GROQ_API_KEY = os.getenv("GROQ_API_KEY") | |
| if not GROQ_API_KEY: | |
| logger.error("GROQ_API_KEY not found in environment variables.") | |
| raise Exception("GROQ_API_KEY not found in environment variables.") | |
| logger.info(f"HTTP_PROXY: {os.environ.get('HTTP_PROXY')}") | |
| logger.info(f"HTTPS_PROXY: {os.environ.get('HTTPS_PROXY')}") | |
| os.environ.pop("HTTP_PROXY", None) | |
| os.environ.pop("HTTPS_PROXY", None) | |
| os.environ.pop("NO_PROXY", None) | |
| logger.info("Proxy environment variables cleared to prevent 'proxies' error.") | |
| try: | |
| client = Groq(api_key=GROQ_API_KEY) | |
| logger.info("Groq client initialized successfully.") | |
| except Exception as e: | |
| logger.error(f"Failed to initialize Groq client: {str(e)}") | |
| raise Exception(f"Groq client initialization failed: {str(e)}") | |
| app = FastAPI() | |
| app.add_middleware( | |
| CORSMiddleware, | |
| allow_origins=["*"], | |
| allow_credentials=True, | |
| allow_methods=["*"], | |
| allow_headers=["*"], | |
| ) | |
| try: | |
| matches_df = pd.read_csv('data/results.csv') | |
| goals_df = pd.read_csv('data/goalscorers.csv') | |
| with open('data/country_codes.json', 'r') as f: | |
| COUNTRY_CODE_MAP = json.load(f) | |
| except FileNotFoundError as e: | |
| logger.error(f"File not found: {e}") | |
| raise HTTPException(status_code=500, detail="Data files not found or inaccessible") | |
| except pd.errors.EmptyDataError as e: | |
| logger.error(f"CSV files are empty: {e}") | |
| raise HTTPException(status_code=500, detail="Data files are empty or invalid") | |
| matches_df['home_score'] = pd.to_numeric(matches_df['home_score'], errors='coerce').fillna(0) | |
| matches_df['away_score'] = pd.to_numeric(matches_df['away_score'], errors='coerce').fillna(0) | |
| np.random.seed(42) | |
| goals_df['x_coord'] = np.where( | |
| goals_df['team'] == goals_df['home_team'], | |
| np.random.uniform(80, 100, len(goals_df)).round(), | |
| np.random.uniform(0, 20, len(goals_df)).round() | |
| ) | |
| goals_df['y_coord'] = np.random.uniform(20, 80, len(goals_df)).round() | |
| teams = set(matches_df['home_team'].unique()).union(set(matches_df['away_team'].unique())) | |
| players = sorted([str(scorer) for scorer in goals_df['scorer'].dropna().unique() if pd.notna(scorer)]) | |
| logger.warning("Model loading skipped due to compatibility issues. Prediction endpoint disabled.") | |
| def summarize_with_groq(text): | |
| """Generate a concise summary of the provided text using the Groq API. | |
| Args: | |
| text (str): The text to summarize. | |
| Returns: | |
| str: A summary of the text or an error message if summarization fails. | |
| """ | |
| try: | |
| chat_completion = client.chat.completions.create( | |
| messages=[ | |
| {"role": "system", "content": "You are a helpful assistant that provides concise summaries."}, | |
| {"role": "user", "content": f"Summarize the following text:\n\n{text}"} | |
| ], | |
| model="llama-3.3-70b-versatile", | |
| max_tokens=150 | |
| ) | |
| return chat_completion.choices[0].message.content | |
| except Exception as e: | |
| logger.error(f"Error summarizing with Groq: {e}") | |
| return "Summary unavailable due to an error." | |
| def get_team_stats(team_name): | |
| """Calculate comprehensive statistics for a specified football team. | |
| Args: | |
| team_name (str): The name of the team to analyze. | |
| Returns: | |
| dict: A dictionary containing team statistics including total matches, wins, losses, draws, | |
| home/away matches played, tournament performance, and country code. | |
| """ | |
| home_matches = matches_df[matches_df['home_team'] == team_name] | |
| away_matches = matches_df[matches_df['away_team'] == team_name] | |
| if home_matches.empty and away_matches.empty: | |
| return { | |
| "total_matches": 0, | |
| "wins": 0, | |
| "losses": 0, | |
| "draws": 0, | |
| "home_matches_played": 0, | |
| "away_matches_played": 0, | |
| "tournament_performance": {}, | |
| "country_code": COUNTRY_CODE_MAP.get(team_name, "unknown") | |
| } | |
| total_matches = len(home_matches) + len(away_matches) | |
| wins = len(home_matches[home_matches['home_score'] > home_matches['away_score']]) + \ | |
| len(away_matches[away_matches['away_score'] > away_matches['home_score']]) | |
| losses = len(home_matches[home_matches['home_score'] < home_matches['away_score']]) + \ | |
| len(away_matches[away_matches['away_score'] < away_matches['home_score']]) | |
| draws = len(home_matches[home_matches['home_score'] == home_matches['away_score']]) + \ | |
| len(away_matches[away_matches['away_score'] == away_matches['home_score']]) | |
| all_matches = pd.concat([home_matches, away_matches]) | |
| tournament_stats = {} | |
| for tournament in all_matches['tournament'].unique(): | |
| tourn_matches = all_matches[all_matches['tournament'] == tournament] | |
| tourn_wins = len(tourn_matches[ | |
| ((tourn_matches['home_team'] == team_name) & (tourn_matches['home_score'] > tourn_matches['away_score'])) | | |
| ((tourn_matches['away_team'] == team_name) & (tourn_matches['away_score'] > tourn_matches['home_score'])) | |
| ]) | |
| tourn_losses = len(tourn_matches[ | |
| ((tourn_matches['home_team'] == team_name) & (tourn_matches['home_score'] < tourn_matches['away_score'])) | | |
| ((tourn_matches['away_team'] == team_name) & (tourn_matches['away_score'] < tourn_matches['home_score'])) | |
| ]) | |
| tourn_draws = len(tourn_matches[tourn_matches['home_score'] == tourn_matches['away_score']]) | |
| tourn_total = tourn_wins + tourn_losses + tourn_draws | |
| tournament_stats[tournament] = { | |
| "matches_played": tourn_total, | |
| "wins": tourn_wins, | |
| "losses": tourn_losses, | |
| "draws": tourn_draws, | |
| "win_percentage": round((tourn_wins / tourn_total * 100), 2) if tourn_total > 0 else 0.0 | |
| } | |
| return { | |
| "total_matches": total_matches, | |
| "wins": wins, | |
| "losses": losses, | |
| "draws": draws, | |
| "home_matches_played": len(home_matches), | |
| "away_matches_played": len(away_matches), | |
| "tournament_performance": tournament_stats, | |
| "country_code": COUNTRY_CODE_MAP.get(team_name, "unknown") | |
| } | |
| def get_match_goalscorers(date, home_team, away_team): | |
| """Retrieve goalscorers for a specific match. | |
| Args: | |
| date (str): The date of the match. | |
| home_team (str): The home team name. | |
| away_team (str): The away team name. | |
| Returns: | |
| list: A list of dictionaries containing goalscorer details for the match. | |
| """ | |
| match_goals = goals_df[(goals_df['date'] == date) & | |
| (goals_df['home_team'] == home_team) & | |
| (goals_df['away_team'] == away_team)] | |
| return match_goals[['scorer', 'minute', 'team', 'own_goal', 'penalty']].to_dict('records') | |
| def get_head_to_head_stats(team1, team2, num_matches=5): | |
| """Calculate head-to-head statistics between two teams. | |
| Args: | |
| team1 (str): The first team name. | |
| team2 (str): The second team name. | |
| num_matches (int, optional): Number of recent matches to include. Defaults to 5. | |
| Returns: | |
| dict: A dictionary containing head-to-head stats including wins, goals, last matches, and a chart. | |
| """ | |
| matches = matches_df[((matches_df['home_team'] == team1) & (matches_df['away_team'] == team2)) | | |
| ((matches_df['home_team'] == team2) & (matches_df['away_team'] == team1))] | |
| if matches.empty: | |
| return {"total_matches": 0, f"{team1}_wins": 0, f"{team2}_wins": 0, "draws": 0, | |
| f"{team1}_goals": 0, f"{team2}_goals": 0, "goal_difference": "Even", | |
| "last_matches": [], "chart": None} | |
| total_matches = len(matches) | |
| team1_wins = len(matches[((matches['home_team'] == team1) & (matches['home_score'] > matches['away_score'])) | | |
| ((matches['away_team'] == team1) & (matches['away_score'] > matches['home_score']))]) | |
| team2_wins = len(matches[((matches['home_team'] == team2) & (matches['home_score'] > matches['away_score'])) | | |
| ((matches['away_team'] == team2) & (matches['away_score'] > matches['home_score']))]) | |
| draws = len(matches[matches['home_score'] == matches['away_score']]) | |
| team1_goals = matches[matches['home_team'] == team1]['home_score'].sum() + \ | |
| matches[matches['away_team'] == team1]['away_score'].sum() | |
| team2_goals = matches[matches['home_team'] == team2]['home_score'].sum() + \ | |
| matches[matches['away_team'] == team2]['away_score'].sum() | |
| goal_diff = team1_goals - team2_goals | |
| goal_difference_str = f"{team1} +{int(goal_diff)}" if goal_diff > 0 else \ | |
| f"{team2} +{int(abs(goal_diff))}" if goal_diff < 0 else "Even" | |
| last_n_matches = matches.tail(num_matches) | |
| last_n_results = [] | |
| for _, match in last_n_matches.iterrows(): | |
| goalscorers = get_match_goalscorers(match['date'], match['home_team'], match['away_team']) | |
| last_n_results.append({ | |
| "date": match['date'], "home_team": match['home_team'], "away_team": match['away_team'], | |
| "home_score": int(match['home_score']), "away_score": int(match['away_score']), | |
| "tournament": match['tournament'], "goalscorers": goalscorers | |
| }) | |
| total_wins = team1_wins + team2_wins | |
| win_prop_team1 = team1_wins / total_wins if total_wins > 0 else 0 | |
| win_prop_team2 = team2_wins / total_wins if total_wins > 0 else 0 | |
| total_goals = team1_goals + team2_goals | |
| goal_prop_team1 = team1_goals / total_goals if total_goals > 0 else 0 | |
| goal_prop_team2 = team2_goals / total_goals if total_goals > 0 else 0 | |
| goal_diff_value = int(abs(goal_diff)) | |
| goal_diff_prop_team1 = goal_diff_value / (goal_diff_value + 1) if goal_diff_value > 0 else 0.5 | |
| goal_diff_prop_team2 = 1 - goal_diff_prop_team1 if goal_diff_value > 0 else 0.5 | |
| fig = go.Figure(data=[ | |
| go.Bar(name=team1, x=[win_prop_team1, goal_prop_team1, goal_diff_prop_team1], y=['Wins', 'Goals', 'Goal Difference'], orientation='h', marker_color='teal'), | |
| go.Bar(name=team2, x=[win_prop_team2, goal_prop_team2, goal_diff_prop_team2], y=['Wins', 'Goals', 'Goal Difference'], orientation='h', marker_color='orange') | |
| ]) | |
| fig.update_layout(barmode='stack', title_text=f'Proportion of {team1} vs {team2}', xaxis_title="Proportion", yaxis_title="Categories", xaxis=dict(range=[0, 1])) | |
| return { | |
| "total_matches": total_matches, f"{team1}_wins": team1_wins, f"{team2}_wins": team2_wins, "draws": draws, | |
| f"{team1}_goals": int(team1_goals), f"{team2}_goals": int(team2_goals), "goal_difference": goal_difference_str, | |
| "last_matches": last_n_results, "chart": fig.to_json() | |
| } | |
| def get_player_stats(player_name): | |
| """Retrieve statistics for a specific player. | |
| Args: | |
| player_name (str): The name of the player. | |
| Returns: | |
| dict: A dictionary containing the player's name, country, and total goals. | |
| Raises: | |
| HTTPException: If the player is not found in the dataset. | |
| """ | |
| player_goals = goals_df[goals_df['scorer'] == player_name] | |
| if player_goals.empty: | |
| raise HTTPException(status_code=404, detail="Player not found") | |
| total_goals = len(player_goals[player_goals['own_goal'] == False]) | |
| player_team = player_goals['team'].mode()[0] if not player_goals['team'].empty else "Unknown" | |
| return {"player_name": player_name, "country": player_team, "total_goals": total_goals} | |
| def predict_match_outcome(team1, team2): | |
| """Predict the outcome of a match between two teams. | |
| Args: | |
| team1 (str): The first team name. | |
| team2 (str): The second team name. | |
| Raises: | |
| HTTPException: Always raises an exception as prediction is currently disabled. | |
| """ | |
| raise HTTPException(status_code=503, detail="Prediction functionality is temporarily disabled due to model loading issues.") | |
| async def home(): | |
| """Return a welcome message and API description. | |
| Returns: | |
| dict: A dictionary containing welcome message, description, and available endpoints. | |
| """ | |
| return { | |
| "message": "Welcome to Football Prediction API", | |
| "description": "This API provides football statistics, match predictions, and data visualizations. Note: Prediction endpoint is currently disabled.", | |
| "available_endpoints": { | |
| "/teams": "List all teams", | |
| "/players": "List all players", | |
| "/country-codes": "Get country codes", | |
| "/team/{team_name}": "Get team statistics", | |
| "/head-to-head/{team1}/{team2}": "Get head-to-head statistics", | |
| "/player/{player_name}": "Get player statistics", | |
| "/predict/{team1}/{team2}": "Predict match outcome (currently disabled)", | |
| "/goal-spatial-heatmap/{team}": "Get goal distribution heatmap" | |
| } | |
| } | |
| async def get_teams(): | |
| """Retrieve a list of all unique teams. | |
| Returns: | |
| dict: A dictionary containing a sorted list of team names. | |
| """ | |
| return {"teams": sorted(list(teams))} | |
| async def get_players(): | |
| """Retrieve a list of all unique players. | |
| Returns: | |
| dict: A dictionary containing a sorted list of player names. | |
| """ | |
| return {"players": players} | |
| async def get_country_codes(): | |
| """Retrieve the country code mapping. | |
| Returns: | |
| dict: A dictionary mapping team names to their country codes. | |
| """ | |
| return COUNTRY_CODE_MAP | |
| async def get_team_statistics(team_name: str, summarize: bool = False): | |
| """Get detailed statistics for a specified team. | |
| Args: | |
| team_name (str): The name of the team. | |
| summarize (bool, optional): Whether to include a summary. Defaults to False. | |
| Returns: | |
| dict: A dictionary containing team statistics and optionally a summary. | |
| Raises: | |
| HTTPException: If the team is not found or stats calculation fails. | |
| """ | |
| if team_name not in teams: | |
| raise HTTPException(status_code=404, detail=f"Team {team_name} not found") | |
| try: | |
| stats = get_team_stats(team_name) | |
| except Exception as e: | |
| logger.error(f"Error calculating stats for {team_name}: {str(e)}") | |
| raise HTTPException(status_code=500, detail=f"Error calculating stats: {str(e)}") | |
| response = {"team": team_name, "statistics": stats} | |
| if summarize: | |
| basic_stats_text = "\n".join([f"{key}: {value}" for key, value in stats.items() if key != "tournament_performance"]) | |
| tournament_text = "\nTournament Performance:\n" + "\n".join( | |
| [f"{tourn}: Matches: {stats['tournament_performance'][tourn]['matches_played']}, " | |
| f"Wins: {stats['tournament_performance'][tourn]['wins']}, " | |
| f"Losses: {stats['tournament_performance'][tourn]['losses']}, " | |
| f"Draws: {stats['tournament_performance'][tourn]['draws']}, " | |
| f"Win%: {stats['tournament_performance'][tourn]['win_percentage']}%" | |
| for tourn in stats['tournament_performance']] | |
| ) | |
| full_text = f"{basic_stats_text}\n{tournament_text}" | |
| summary = summarize_with_groq(full_text) | |
| response["summary"] = summary | |
| return response | |
| async def get_head_to_head(team1: str, team2: str, num_matches: int = 5, summarize: bool = False): | |
| """Get head-to-head statistics between two teams. | |
| Args: | |
| team1 (str): The first team name. | |
| team2 (str): The second team name. | |
| num_matches (int, optional): Number of recent matches to include. Defaults to 5. | |
| summarize (bool, optional): Whether to include a summary. Defaults to False. | |
| Returns: | |
| dict: A dictionary containing head-to-head statistics and optionally a summary. | |
| Raises: | |
| HTTPException: If teams are not found or num_matches is negative. | |
| """ | |
| if team1 not in teams or team2 not in teams: | |
| raise HTTPException(status_code=404, detail="One or both teams not found") | |
| if num_matches < 0: | |
| raise HTTPException(status_code=400, detail="Number of matches must be non-negative") | |
| stats = get_head_to_head_stats(team1, team2, num_matches) | |
| response = {"team1": team1, "team2": team2, "head_to_head_statistics": stats} | |
| if summarize: | |
| text = "\n".join([f"{key}: {value}" for key, value in stats.items() if key not in ["last_matches", "chart"]] + | |
| [f"Last Match: {match['date']} - {match['home_team']} {match['home_score']} vs {match['away_score']} {match['away_team']}" | |
| for match in stats["last_matches"]]) | |
| summary = summarize_with_groq(text) | |
| response["summary"] = summary | |
| return response | |
| async def get_player_statistics(player_name: str, summarize: bool = False): | |
| """Get statistics for a specified player. | |
| Args: | |
| player_name (str): The name of the player. | |
| summarize (bool, optional): Whether to include a summary. Defaults to False. | |
| Returns: | |
| dict: A dictionary containing player statistics and optionally a summary. | |
| """ | |
| stats = get_player_stats(player_name) | |
| response = stats | |
| if summarize: | |
| text = "\n".join([f"{key}: {value}" for key, value in stats.items()]) | |
| summary = summarize_with_groq(text) | |
| response["summary"] = summary | |
| return response | |
| async def predict_match(team1: str, team2: str, summarize: bool = False): | |
| """Predict the outcome of a match between two teams (currently disabled). | |
| Args: | |
| team1 (str): The first team name. | |
| team2 (str): The second team name. | |
| summarize (bool, optional): Whether to include a summary. Defaults to False. | |
| Raises: | |
| HTTPException: Always raises an exception as prediction is disabled. | |
| """ | |
| raise HTTPException(status_code=503, detail="Prediction functionality is temporarily disabled due to model loading issues.") | |
| async def get_goal_spatial_heatmap(team: str, start_year: int = 2000, end_year: int = 2023, summarize: bool = False): | |
| """Generate a spatial heatmap of goal distribution for a team. | |
| Args: | |
| team (str): The team name. | |
| start_year (int, optional): The starting year for analysis. Defaults to 2000. | |
| end_year (int, optional): The ending year for analysis. Defaults to 2023. | |
| summarize (bool, optional): Whether to include a summary. Defaults to False. | |
| Returns: | |
| dict: A dictionary containing the heatmap, total goals, and average goals per match. | |
| Raises: | |
| HTTPException: If team not found, years invalid, or no goal data exists. | |
| """ | |
| if team not in teams: | |
| raise HTTPException(status_code=404, detail=f"Team {team} not found") | |
| if start_year > end_year: | |
| raise HTTPException(status_code=400, detail="start_year must be less than or equal to end_year") | |
| try: | |
| matches_df['date'] = pd.to_datetime(matches_df['date']) | |
| goals_df['date'] = pd.to_datetime(goals_df['date']) | |
| team_matches = matches_df[ | |
| ((matches_df['home_team'] == team) | (matches_df['away_team'] == team)) & | |
| (matches_df['date'].dt.year >= start_year) & (matches_df['date'].dt.year <= end_year) | |
| ] | |
| team_goals = goals_df[ | |
| (goals_df['team'] == team) & | |
| (goals_df['date'].dt.year >= start_year) & (goals_df['date'].dt.year <= end_year) | |
| ].dropna(subset=['x_coord', 'y_coord']) | |
| if team_goals.empty: | |
| raise HTTPException(status_code=404, detail=f"No goal data found for {team} in the specified year range") | |
| heatmap_data, xedges, yedges = np.histogram2d( | |
| team_goals['x_coord'], | |
| team_goals['y_coord'], | |
| bins=50, | |
| range=[[0, 100], [0, 100]] | |
| ) | |
| heatmap_data = heatmap_data / heatmap_data.max() if heatmap_data.max() > 0 else heatmap_data | |
| fig = go.Figure(data=go.Heatmap( | |
| z=heatmap_data.T, | |
| x=xedges, | |
| y=yedges, | |
| colorscale='Viridis', | |
| colorbar=dict(title='Goal Density'), | |
| zmin=0, | |
| zmax=1 | |
| )) | |
| fig.add_shape(type="rect", x0=0, y0=0, x1=100, y1=100, line=dict(color="white", width=2)) | |
| fig.add_shape(type="rect", x0=0, y0=20, x1=16, y1=80, line=dict(color="white", width=2)) | |
| fig.add_shape(type="rect", x0=84, y0=20, x1=100, y1=80, line=dict(color="white", width=2)) | |
| fig.add_shape(type="rect", x0=0, y0=40, x1=5, y1=60, line=dict(color="white", width=2)) | |
| fig.add_shape(type="rect", x0=95, y0=40, x1=100, y1=60, line=dict(color="white", width=2)) | |
| fig.add_shape(type="circle", x0=45, y0=45, x1=55, y1=55, line=dict(color="white", width=2)) | |
| fig.add_shape(type="line", x0=50, y0=0, x1=50, y1=100, line=dict(color="white", width=2)) | |
| fig.update_layout( | |
| title=f'Goal Distribution Heatmap for {team} ({start_year}-{end_year})', | |
| xaxis_title='X Position (Length of Pitch)', | |
| yaxis_title='Y Position (Width of Pitch)', | |
| xaxis=dict(range=[0, 100], tickvals=[0, 20, 40, 60, 80, 100], showgrid=False), | |
| yaxis=dict(range=[0, 100], tickvals=[0, 20, 40, 60, 80, 100], showgrid=False), | |
| template="plotly_dark", | |
| width=800, | |
| height=500, | |
| plot_bgcolor='rgba(0,128,0,0.3)', | |
| paper_bgcolor='rgba(0,0,0,0)' | |
| ) | |
| response = { | |
| "team": team, | |
| "start_year": start_year, | |
| "end_year": end_year, | |
| "heatmap": fig.to_json(), | |
| "total_goals": len(team_goals), | |
| "average_goals_per_match": round(len(team_goals) / len(team_matches) if len(team_matches) > 0 else 0, 2) | |
| } | |
| if summarize: | |
| text = (f"Goal Distribution for {team} ({start_year}-{end_year})\n" | |
| f"Total Goals: {len(team_goals)}\n" | |
| f"Average Goals per Match: {response['average_goals_per_match']:.2f}") | |
| summary = summarize_with_groq(text) | |
| response["summary"] = summary | |
| return response | |
| except Exception as e: | |
| logger.error(f"Error generating spatial heatmap for {team}: {str(e)}") | |
| raise HTTPException(status_code=500, detail=f"Error generating heatmap: {str(e)}") | |
| if __name__ == "__main__": | |
| uvicorn.run(app, host="0.0.0.0", port=8000) |