Spaces:

Kabila22
/

Backend_Caps

Sleeping

File size: 23,089 Bytes

from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
import pandas as pd
import uvicorn
import plotly.graph_objects as go
import logging
import numpy as np
import os
import json
from groq import Groq
from dotenv import load_dotenv

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

load_dotenv()
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
if not GROQ_API_KEY:
    logger.error("GROQ_API_KEY not found in environment variables.")
    raise Exception("GROQ_API_KEY not found in environment variables.")

logger.info(f"HTTP_PROXY: {os.environ.get('HTTP_PROXY')}")
logger.info(f"HTTPS_PROXY: {os.environ.get('HTTPS_PROXY')}")
os.environ.pop("HTTP_PROXY", None)
os.environ.pop("HTTPS_PROXY", None)
os.environ.pop("NO_PROXY", None)
logger.info("Proxy environment variables cleared to prevent 'proxies' error.")

try:
    client = Groq(api_key=GROQ_API_KEY)
    logger.info("Groq client initialized successfully.")
except Exception as e:
    logger.error(f"Failed to initialize Groq client: {str(e)}")
    raise Exception(f"Groq client initialization failed: {str(e)}")

app = FastAPI()

app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"],
)

try:
    matches_df = pd.read_csv('data/results.csv')
    goals_df = pd.read_csv('data/goalscorers.csv')
    with open('data/country_codes.json', 'r') as f:
        COUNTRY_CODE_MAP = json.load(f)
except FileNotFoundError as e:
    logger.error(f"File not found: {e}")
    raise HTTPException(status_code=500, detail="Data files not found or inaccessible")
except pd.errors.EmptyDataError as e:
    logger.error(f"CSV files are empty: {e}")
    raise HTTPException(status_code=500, detail="Data files are empty or invalid")

matches_df['home_score'] = pd.to_numeric(matches_df['home_score'], errors='coerce').fillna(0)
matches_df['away_score'] = pd.to_numeric(matches_df['away_score'], errors='coerce').fillna(0)

np.random.seed(42)
goals_df['x_coord'] = np.where(
    goals_df['team'] == goals_df['home_team'],
    np.random.uniform(80, 100, len(goals_df)).round(),
    np.random.uniform(0, 20, len(goals_df)).round()
)
goals_df['y_coord'] = np.random.uniform(20, 80, len(goals_df)).round()

teams = set(matches_df['home_team'].unique()).union(set(matches_df['away_team'].unique()))
players = sorted([str(scorer) for scorer in goals_df['scorer'].dropna().unique() if pd.notna(scorer)])

logger.warning("Model loading skipped due to compatibility issues. Prediction endpoint disabled.")

def summarize_with_groq(text):
    """Generate a concise summary of the provided text using the Groq API.

    Args:
        text (str): The text to summarize.

    Returns:
        str: A summary of the text or an error message if summarization fails.
    """
    try:
        chat_completion = client.chat.completions.create(
            messages=[
                {"role": "system", "content": "You are a helpful assistant that provides concise summaries."},
                {"role": "user", "content": f"Summarize the following text:\n\n{text}"}
            ],
            model="llama-3.3-70b-versatile",
            max_tokens=150
        )
        return chat_completion.choices[0].message.content
    except Exception as e:
        logger.error(f"Error summarizing with Groq: {e}")
        return "Summary unavailable due to an error."

def get_team_stats(team_name):
    """Calculate comprehensive statistics for a specified football team.

    Args:
        team_name (str): The name of the team to analyze.

    Returns:
        dict: A dictionary containing team statistics including total matches, wins, losses, draws,
              home/away matches played, tournament performance, and country code.
    """
    home_matches = matches_df[matches_df['home_team'] == team_name]
    away_matches = matches_df[matches_df['away_team'] == team_name]
    
    if home_matches.empty and away_matches.empty:
        return {
            "total_matches": 0,
            "wins": 0,
            "losses": 0,
            "draws": 0,
            "home_matches_played": 0,
            "away_matches_played": 0,
            "tournament_performance": {},
            "country_code": COUNTRY_CODE_MAP.get(team_name, "unknown")
        }
    
    total_matches = len(home_matches) + len(away_matches)
    wins = len(home_matches[home_matches['home_score'] > home_matches['away_score']]) + \
           len(away_matches[away_matches['away_score'] > away_matches['home_score']])
    losses = len(home_matches[home_matches['home_score'] < home_matches['away_score']]) + \
             len(away_matches[away_matches['away_score'] < away_matches['home_score']])
    draws = len(home_matches[home_matches['home_score'] == home_matches['away_score']]) + \
            len(away_matches[away_matches['away_score'] == away_matches['home_score']])
    
    all_matches = pd.concat([home_matches, away_matches])
    tournament_stats = {}
    
    for tournament in all_matches['tournament'].unique():
        tourn_matches = all_matches[all_matches['tournament'] == tournament]
        tourn_wins = len(tourn_matches[
            ((tourn_matches['home_team'] == team_name) & (tourn_matches['home_score'] > tourn_matches['away_score'])) |
            ((tourn_matches['away_team'] == team_name) & (tourn_matches['away_score'] > tourn_matches['home_score']))
        ])
        tourn_losses = len(tourn_matches[
            ((tourn_matches['home_team'] == team_name) & (tourn_matches['home_score'] < tourn_matches['away_score'])) |
            ((tourn_matches['away_team'] == team_name) & (tourn_matches['away_score'] < tourn_matches['home_score']))
        ])
        tourn_draws = len(tourn_matches[tourn_matches['home_score'] == tourn_matches['away_score']])
        tourn_total = tourn_wins + tourn_losses + tourn_draws
        tournament_stats[tournament] = {
            "matches_played": tourn_total,
            "wins": tourn_wins,
            "losses": tourn_losses,
            "draws": tourn_draws,
            "win_percentage": round((tourn_wins / tourn_total * 100), 2) if tourn_total > 0 else 0.0
        }
    
    return {
        "total_matches": total_matches,
        "wins": wins,
        "losses": losses,
        "draws": draws,
        "home_matches_played": len(home_matches),
        "away_matches_played": len(away_matches),
        "tournament_performance": tournament_stats,
        "country_code": COUNTRY_CODE_MAP.get(team_name, "unknown")
    }

def get_match_goalscorers(date, home_team, away_team):
    """Retrieve goalscorers for a specific match.

    Args:
        date (str): The date of the match.
        home_team (str): The home team name.
        away_team (str): The away team name.

    Returns:
        list: A list of dictionaries containing goalscorer details for the match.
    """
    match_goals = goals_df[(goals_df['date'] == date) & 
                          (goals_df['home_team'] == home_team) & 
                          (goals_df['away_team'] == away_team)]
    return match_goals[['scorer', 'minute', 'team', 'own_goal', 'penalty']].to_dict('records')

def get_head_to_head_stats(team1, team2, num_matches=5):
    """Calculate head-to-head statistics between two teams.

    Args:
        team1 (str): The first team name.
        team2 (str): The second team name.
        num_matches (int, optional): Number of recent matches to include. Defaults to 5.

    Returns:
        dict: A dictionary containing head-to-head stats including wins, goals, last matches, and a chart.
    """
    matches = matches_df[((matches_df['home_team'] == team1) & (matches_df['away_team'] == team2)) | 
                        ((matches_df['home_team'] == team2) & (matches_df['away_team'] == team1))]
    
    if matches.empty:
        return {"total_matches": 0, f"{team1}_wins": 0, f"{team2}_wins": 0, "draws": 0,
                f"{team1}_goals": 0, f"{team2}_goals": 0, "goal_difference": "Even",
                "last_matches": [], "chart": None}
    
    total_matches = len(matches)
    team1_wins = len(matches[((matches['home_team'] == team1) & (matches['home_score'] > matches['away_score'])) |
                            ((matches['away_team'] == team1) & (matches['away_score'] > matches['home_score']))])
    team2_wins = len(matches[((matches['home_team'] == team2) & (matches['home_score'] > matches['away_score'])) |
                            ((matches['away_team'] == team2) & (matches['away_score'] > matches['home_score']))])
    draws = len(matches[matches['home_score'] == matches['away_score']])
    team1_goals = matches[matches['home_team'] == team1]['home_score'].sum() + \
                 matches[matches['away_team'] == team1]['away_score'].sum()
    team2_goals = matches[matches['home_team'] == team2]['home_score'].sum() + \
                 matches[matches['away_team'] == team2]['away_score'].sum()
    
    goal_diff = team1_goals - team2_goals
    goal_difference_str = f"{team1} +{int(goal_diff)}" if goal_diff > 0 else \
                         f"{team2} +{int(abs(goal_diff))}" if goal_diff < 0 else "Even"
    
    last_n_matches = matches.tail(num_matches)
    last_n_results = []
    for _, match in last_n_matches.iterrows():
        goalscorers = get_match_goalscorers(match['date'], match['home_team'], match['away_team'])
        last_n_results.append({
            "date": match['date'], "home_team": match['home_team'], "away_team": match['away_team'],
            "home_score": int(match['home_score']), "away_score": int(match['away_score']),
            "tournament": match['tournament'], "goalscorers": goalscorers
        })
    
    total_wins = team1_wins + team2_wins
    win_prop_team1 = team1_wins / total_wins if total_wins > 0 else 0
    win_prop_team2 = team2_wins / total_wins if total_wins > 0 else 0
    total_goals = team1_goals + team2_goals
    goal_prop_team1 = team1_goals / total_goals if total_goals > 0 else 0
    goal_prop_team2 = team2_goals / total_goals if total_goals > 0 else 0
    goal_diff_value = int(abs(goal_diff))
    goal_diff_prop_team1 = goal_diff_value / (goal_diff_value + 1) if goal_diff_value > 0 else 0.5
    goal_diff_prop_team2 = 1 - goal_diff_prop_team1 if goal_diff_value > 0 else 0.5

    fig = go.Figure(data=[
        go.Bar(name=team1, x=[win_prop_team1, goal_prop_team1, goal_diff_prop_team1], y=['Wins', 'Goals', 'Goal Difference'], orientation='h', marker_color='teal'),
        go.Bar(name=team2, x=[win_prop_team2, goal_prop_team2, goal_diff_prop_team2], y=['Wins', 'Goals', 'Goal Difference'], orientation='h', marker_color='orange')
    ])
    fig.update_layout(barmode='stack', title_text=f'Proportion of {team1} vs {team2}', xaxis_title="Proportion", yaxis_title="Categories", xaxis=dict(range=[0, 1]))
    
    return {
        "total_matches": total_matches, f"{team1}_wins": team1_wins, f"{team2}_wins": team2_wins, "draws": draws,
        f"{team1}_goals": int(team1_goals), f"{team2}_goals": int(team2_goals), "goal_difference": goal_difference_str,
        "last_matches": last_n_results, "chart": fig.to_json()
    }

def get_player_stats(player_name):
    """Retrieve statistics for a specific player.

    Args:
        player_name (str): The name of the player.

    Returns:
        dict: A dictionary containing the player's name, country, and total goals.

    Raises:
        HTTPException: If the player is not found in the dataset.
    """
    player_goals = goals_df[goals_df['scorer'] == player_name]
    if player_goals.empty:
        raise HTTPException(status_code=404, detail="Player not found")
    total_goals = len(player_goals[player_goals['own_goal'] == False])
    player_team = player_goals['team'].mode()[0] if not player_goals['team'].empty else "Unknown"
    return {"player_name": player_name, "country": player_team, "total_goals": total_goals}

def predict_match_outcome(team1, team2):
    """Predict the outcome of a match between two teams.

    Args:
        team1 (str): The first team name.
        team2 (str): The second team name.

    Raises:
        HTTPException: Always raises an exception as prediction is currently disabled.
    """
    raise HTTPException(status_code=503, detail="Prediction functionality is temporarily disabled due to model loading issues.")

@app.get("/")
async def home():
    """Return a welcome message and API description.

    Returns:
        dict: A dictionary containing welcome message, description, and available endpoints.
    """
    return {
        "message": "Welcome to Football Prediction API",
        "description": "This API provides football statistics, match predictions, and data visualizations. Note: Prediction endpoint is currently disabled.",
        "available_endpoints": {
            "/teams": "List all teams",
            "/players": "List all players",
            "/country-codes": "Get country codes",
            "/team/{team_name}": "Get team statistics",
            "/head-to-head/{team1}/{team2}": "Get head-to-head statistics",
            "/player/{player_name}": "Get player statistics",
            "/predict/{team1}/{team2}": "Predict match outcome (currently disabled)",
            "/goal-spatial-heatmap/{team}": "Get goal distribution heatmap"
        }
    }

@app.get("/teams")
async def get_teams():
    """Retrieve a list of all unique teams.

    Returns:
        dict: A dictionary containing a sorted list of team names.
    """
    return {"teams": sorted(list(teams))}

@app.get("/players")
async def get_players():
    """Retrieve a list of all unique players.

    Returns:
        dict: A dictionary containing a sorted list of player names.
    """
    return {"players": players}

@app.get("/country-codes")
async def get_country_codes():
    """Retrieve the country code mapping.

    Returns:
        dict: A dictionary mapping team names to their country codes.
    """
    return COUNTRY_CODE_MAP

@app.get("/team/{team_name}")
async def get_team_statistics(team_name: str, summarize: bool = False):
    """Get detailed statistics for a specified team.

    Args:
        team_name (str): The name of the team.
        summarize (bool, optional): Whether to include a summary. Defaults to False.

    Returns:
        dict: A dictionary containing team statistics and optionally a summary.

    Raises:
        HTTPException: If the team is not found or stats calculation fails.
    """
    if team_name not in teams:
        raise HTTPException(status_code=404, detail=f"Team {team_name} not found")
    try:
        stats = get_team_stats(team_name)
    except Exception as e:
        logger.error(f"Error calculating stats for {team_name}: {str(e)}")
        raise HTTPException(status_code=500, detail=f"Error calculating stats: {str(e)}")
    
    response = {"team": team_name, "statistics": stats}
    
    if summarize:
        basic_stats_text = "\n".join([f"{key}: {value}" for key, value in stats.items() if key != "tournament_performance"])
        tournament_text = "\nTournament Performance:\n" + "\n".join(
            [f"{tourn}: Matches: {stats['tournament_performance'][tourn]['matches_played']}, "
             f"Wins: {stats['tournament_performance'][tourn]['wins']}, "
             f"Losses: {stats['tournament_performance'][tourn]['losses']}, "
             f"Draws: {stats['tournament_performance'][tourn]['draws']}, "
             f"Win%: {stats['tournament_performance'][tourn]['win_percentage']}%"
             for tourn in stats['tournament_performance']]
        )
        full_text = f"{basic_stats_text}\n{tournament_text}"
        summary = summarize_with_groq(full_text)
        response["summary"] = summary
    
    return response

@app.get("/head-to-head/{team1}/{team2}")
async def get_head_to_head(team1: str, team2: str, num_matches: int = 5, summarize: bool = False):
    """Get head-to-head statistics between two teams.

    Args:
        team1 (str): The first team name.
        team2 (str): The second team name.
        num_matches (int, optional): Number of recent matches to include. Defaults to 5.
        summarize (bool, optional): Whether to include a summary. Defaults to False.

    Returns:
        dict: A dictionary containing head-to-head statistics and optionally a summary.

    Raises:
        HTTPException: If teams are not found or num_matches is negative.
    """
    if team1 not in teams or team2 not in teams:
        raise HTTPException(status_code=404, detail="One or both teams not found")
    if num_matches < 0:
        raise HTTPException(status_code=400, detail="Number of matches must be non-negative")
    stats = get_head_to_head_stats(team1, team2, num_matches)
    response = {"team1": team1, "team2": team2, "head_to_head_statistics": stats}
    if summarize:
        text = "\n".join([f"{key}: {value}" for key, value in stats.items() if key not in ["last_matches", "chart"]] +
                         [f"Last Match: {match['date']} - {match['home_team']} {match['home_score']} vs {match['away_score']} {match['away_team']}"
                          for match in stats["last_matches"]])
        summary = summarize_with_groq(text)
        response["summary"] = summary
    return response

@app.get("/player/{player_name}")
async def get_player_statistics(player_name: str, summarize: bool = False):
    """Get statistics for a specified player.

    Args:
        player_name (str): The name of the player.
        summarize (bool, optional): Whether to include a summary. Defaults to False.

    Returns:
        dict: A dictionary containing player statistics and optionally a summary.
    """
    stats = get_player_stats(player_name)
    response = stats
    if summarize:
        text = "\n".join([f"{key}: {value}" for key, value in stats.items()])
        summary = summarize_with_groq(text)
        response["summary"] = summary
    return response

@app.get("/predict/{team1}/{team2}")
async def predict_match(team1: str, team2: str, summarize: bool = False):
    """Predict the outcome of a match between two teams (currently disabled).

    Args:
        team1 (str): The first team name.
        team2 (str): The second team name.
        summarize (bool, optional): Whether to include a summary. Defaults to False.

    Raises:
        HTTPException: Always raises an exception as prediction is disabled.
    """
    raise HTTPException(status_code=503, detail="Prediction functionality is temporarily disabled due to model loading issues.")

@app.get("/goal-spatial-heatmap/{team}")
async def get_goal_spatial_heatmap(team: str, start_year: int = 2000, end_year: int = 2023, summarize: bool = False):
    """Generate a spatial heatmap of goal distribution for a team.

    Args:
        team (str): The team name.
        start_year (int, optional): The starting year for analysis. Defaults to 2000.
        end_year (int, optional): The ending year for analysis. Defaults to 2023.
        summarize (bool, optional): Whether to include a summary. Defaults to False.

    Returns:
        dict: A dictionary containing the heatmap, total goals, and average goals per match.

    Raises:
        HTTPException: If team not found, years invalid, or no goal data exists.
    """
    if team not in teams:
        raise HTTPException(status_code=404, detail=f"Team {team} not found")
    
    if start_year > end_year:
        raise HTTPException(status_code=400, detail="start_year must be less than or equal to end_year")
    
    try:
        matches_df['date'] = pd.to_datetime(matches_df['date'])
        goals_df['date'] = pd.to_datetime(goals_df['date'])
        
        team_matches = matches_df[
            ((matches_df['home_team'] == team) | (matches_df['away_team'] == team)) &
            (matches_df['date'].dt.year >= start_year) & (matches_df['date'].dt.year <= end_year)
        ]
        
        team_goals = goals_df[
            (goals_df['team'] == team) &
            (goals_df['date'].dt.year >= start_year) & (goals_df['date'].dt.year <= end_year)
        ].dropna(subset=['x_coord', 'y_coord'])
        
        if team_goals.empty:
            raise HTTPException(status_code=404, detail=f"No goal data found for {team} in the specified year range")
        
        heatmap_data, xedges, yedges = np.histogram2d(
            team_goals['x_coord'],
            team_goals['y_coord'],
            bins=50,
            range=[[0, 100], [0, 100]]
        )
        
        heatmap_data = heatmap_data / heatmap_data.max() if heatmap_data.max() > 0 else heatmap_data
        
        fig = go.Figure(data=go.Heatmap(
            z=heatmap_data.T,
            x=xedges,
            y=yedges,
            colorscale='Viridis',
            colorbar=dict(title='Goal Density'),
            zmin=0,
            zmax=1
        ))
        
        fig.add_shape(type="rect", x0=0, y0=0, x1=100, y1=100, line=dict(color="white", width=2))
        fig.add_shape(type="rect", x0=0, y0=20, x1=16, y1=80, line=dict(color="white", width=2))
        fig.add_shape(type="rect", x0=84, y0=20, x1=100, y1=80, line=dict(color="white", width=2))
        fig.add_shape(type="rect", x0=0, y0=40, x1=5, y1=60, line=dict(color="white", width=2))
        fig.add_shape(type="rect", x0=95, y0=40, x1=100, y1=60, line=dict(color="white", width=2))
        fig.add_shape(type="circle", x0=45, y0=45, x1=55, y1=55, line=dict(color="white", width=2))
        fig.add_shape(type="line", x0=50, y0=0, x1=50, y1=100, line=dict(color="white", width=2))
        
        fig.update_layout(
            title=f'Goal Distribution Heatmap for {team} ({start_year}-{end_year})',
            xaxis_title='X Position (Length of Pitch)',
            yaxis_title='Y Position (Width of Pitch)',
            xaxis=dict(range=[0, 100], tickvals=[0, 20, 40, 60, 80, 100], showgrid=False),
            yaxis=dict(range=[0, 100], tickvals=[0, 20, 40, 60, 80, 100], showgrid=False),
            template="plotly_dark",
            width=800,
            height=500,
            plot_bgcolor='rgba(0,128,0,0.3)',
            paper_bgcolor='rgba(0,0,0,0)'
        )
        
        response = {
            "team": team,
            "start_year": start_year,
            "end_year": end_year,
            "heatmap": fig.to_json(),
            "total_goals": len(team_goals),
            "average_goals_per_match": round(len(team_goals) / len(team_matches) if len(team_matches) > 0 else 0, 2)
        }
        
        if summarize:
            text = (f"Goal Distribution for {team} ({start_year}-{end_year})\n"
                   f"Total Goals: {len(team_goals)}\n"
                   f"Average Goals per Match: {response['average_goals_per_match']:.2f}")
            summary = summarize_with_groq(text)
            response["summary"] = summary
            
        return response
    
    except Exception as e:
        logger.error(f"Error generating spatial heatmap for {team}: {str(e)}")
        raise HTTPException(status_code=500, detail=f"Error generating heatmap: {str(e)}")

if __name__ == "__main__":
    uvicorn.run(app, host="0.0.0.0", port=8000)