Backend_Caps / app /main.py
Kabila22's picture
backend commit
99e52a8
from fastapi import FastAPI, HTTPException
from fastapi.middleware.cors import CORSMiddleware
import pandas as pd
import uvicorn
import plotly.graph_objects as go
import logging
import numpy as np
import os
import json
from groq import Groq
from dotenv import load_dotenv
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
load_dotenv()
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
if not GROQ_API_KEY:
logger.error("GROQ_API_KEY not found in environment variables.")
raise Exception("GROQ_API_KEY not found in environment variables.")
logger.info(f"HTTP_PROXY: {os.environ.get('HTTP_PROXY')}")
logger.info(f"HTTPS_PROXY: {os.environ.get('HTTPS_PROXY')}")
os.environ.pop("HTTP_PROXY", None)
os.environ.pop("HTTPS_PROXY", None)
os.environ.pop("NO_PROXY", None)
logger.info("Proxy environment variables cleared to prevent 'proxies' error.")
try:
client = Groq(api_key=GROQ_API_KEY)
logger.info("Groq client initialized successfully.")
except Exception as e:
logger.error(f"Failed to initialize Groq client: {str(e)}")
raise Exception(f"Groq client initialization failed: {str(e)}")
app = FastAPI()
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
try:
matches_df = pd.read_csv('data/results.csv')
goals_df = pd.read_csv('data/goalscorers.csv')
with open('data/country_codes.json', 'r') as f:
COUNTRY_CODE_MAP = json.load(f)
except FileNotFoundError as e:
logger.error(f"File not found: {e}")
raise HTTPException(status_code=500, detail="Data files not found or inaccessible")
except pd.errors.EmptyDataError as e:
logger.error(f"CSV files are empty: {e}")
raise HTTPException(status_code=500, detail="Data files are empty or invalid")
matches_df['home_score'] = pd.to_numeric(matches_df['home_score'], errors='coerce').fillna(0)
matches_df['away_score'] = pd.to_numeric(matches_df['away_score'], errors='coerce').fillna(0)
np.random.seed(42)
goals_df['x_coord'] = np.where(
goals_df['team'] == goals_df['home_team'],
np.random.uniform(80, 100, len(goals_df)).round(),
np.random.uniform(0, 20, len(goals_df)).round()
)
goals_df['y_coord'] = np.random.uniform(20, 80, len(goals_df)).round()
teams = set(matches_df['home_team'].unique()).union(set(matches_df['away_team'].unique()))
players = sorted([str(scorer) for scorer in goals_df['scorer'].dropna().unique() if pd.notna(scorer)])
logger.warning("Model loading skipped due to compatibility issues. Prediction endpoint disabled.")
def summarize_with_groq(text):
"""Generate a concise summary of the provided text using the Groq API.
Args:
text (str): The text to summarize.
Returns:
str: A summary of the text or an error message if summarization fails.
"""
try:
chat_completion = client.chat.completions.create(
messages=[
{"role": "system", "content": "You are a helpful assistant that provides concise summaries."},
{"role": "user", "content": f"Summarize the following text:\n\n{text}"}
],
model="llama-3.3-70b-versatile",
max_tokens=150
)
return chat_completion.choices[0].message.content
except Exception as e:
logger.error(f"Error summarizing with Groq: {e}")
return "Summary unavailable due to an error."
def get_team_stats(team_name):
"""Calculate comprehensive statistics for a specified football team.
Args:
team_name (str): The name of the team to analyze.
Returns:
dict: A dictionary containing team statistics including total matches, wins, losses, draws,
home/away matches played, tournament performance, and country code.
"""
home_matches = matches_df[matches_df['home_team'] == team_name]
away_matches = matches_df[matches_df['away_team'] == team_name]
if home_matches.empty and away_matches.empty:
return {
"total_matches": 0,
"wins": 0,
"losses": 0,
"draws": 0,
"home_matches_played": 0,
"away_matches_played": 0,
"tournament_performance": {},
"country_code": COUNTRY_CODE_MAP.get(team_name, "unknown")
}
total_matches = len(home_matches) + len(away_matches)
wins = len(home_matches[home_matches['home_score'] > home_matches['away_score']]) + \
len(away_matches[away_matches['away_score'] > away_matches['home_score']])
losses = len(home_matches[home_matches['home_score'] < home_matches['away_score']]) + \
len(away_matches[away_matches['away_score'] < away_matches['home_score']])
draws = len(home_matches[home_matches['home_score'] == home_matches['away_score']]) + \
len(away_matches[away_matches['away_score'] == away_matches['home_score']])
all_matches = pd.concat([home_matches, away_matches])
tournament_stats = {}
for tournament in all_matches['tournament'].unique():
tourn_matches = all_matches[all_matches['tournament'] == tournament]
tourn_wins = len(tourn_matches[
((tourn_matches['home_team'] == team_name) & (tourn_matches['home_score'] > tourn_matches['away_score'])) |
((tourn_matches['away_team'] == team_name) & (tourn_matches['away_score'] > tourn_matches['home_score']))
])
tourn_losses = len(tourn_matches[
((tourn_matches['home_team'] == team_name) & (tourn_matches['home_score'] < tourn_matches['away_score'])) |
((tourn_matches['away_team'] == team_name) & (tourn_matches['away_score'] < tourn_matches['home_score']))
])
tourn_draws = len(tourn_matches[tourn_matches['home_score'] == tourn_matches['away_score']])
tourn_total = tourn_wins + tourn_losses + tourn_draws
tournament_stats[tournament] = {
"matches_played": tourn_total,
"wins": tourn_wins,
"losses": tourn_losses,
"draws": tourn_draws,
"win_percentage": round((tourn_wins / tourn_total * 100), 2) if tourn_total > 0 else 0.0
}
return {
"total_matches": total_matches,
"wins": wins,
"losses": losses,
"draws": draws,
"home_matches_played": len(home_matches),
"away_matches_played": len(away_matches),
"tournament_performance": tournament_stats,
"country_code": COUNTRY_CODE_MAP.get(team_name, "unknown")
}
def get_match_goalscorers(date, home_team, away_team):
"""Retrieve goalscorers for a specific match.
Args:
date (str): The date of the match.
home_team (str): The home team name.
away_team (str): The away team name.
Returns:
list: A list of dictionaries containing goalscorer details for the match.
"""
match_goals = goals_df[(goals_df['date'] == date) &
(goals_df['home_team'] == home_team) &
(goals_df['away_team'] == away_team)]
return match_goals[['scorer', 'minute', 'team', 'own_goal', 'penalty']].to_dict('records')
def get_head_to_head_stats(team1, team2, num_matches=5):
"""Calculate head-to-head statistics between two teams.
Args:
team1 (str): The first team name.
team2 (str): The second team name.
num_matches (int, optional): Number of recent matches to include. Defaults to 5.
Returns:
dict: A dictionary containing head-to-head stats including wins, goals, last matches, and a chart.
"""
matches = matches_df[((matches_df['home_team'] == team1) & (matches_df['away_team'] == team2)) |
((matches_df['home_team'] == team2) & (matches_df['away_team'] == team1))]
if matches.empty:
return {"total_matches": 0, f"{team1}_wins": 0, f"{team2}_wins": 0, "draws": 0,
f"{team1}_goals": 0, f"{team2}_goals": 0, "goal_difference": "Even",
"last_matches": [], "chart": None}
total_matches = len(matches)
team1_wins = len(matches[((matches['home_team'] == team1) & (matches['home_score'] > matches['away_score'])) |
((matches['away_team'] == team1) & (matches['away_score'] > matches['home_score']))])
team2_wins = len(matches[((matches['home_team'] == team2) & (matches['home_score'] > matches['away_score'])) |
((matches['away_team'] == team2) & (matches['away_score'] > matches['home_score']))])
draws = len(matches[matches['home_score'] == matches['away_score']])
team1_goals = matches[matches['home_team'] == team1]['home_score'].sum() + \
matches[matches['away_team'] == team1]['away_score'].sum()
team2_goals = matches[matches['home_team'] == team2]['home_score'].sum() + \
matches[matches['away_team'] == team2]['away_score'].sum()
goal_diff = team1_goals - team2_goals
goal_difference_str = f"{team1} +{int(goal_diff)}" if goal_diff > 0 else \
f"{team2} +{int(abs(goal_diff))}" if goal_diff < 0 else "Even"
last_n_matches = matches.tail(num_matches)
last_n_results = []
for _, match in last_n_matches.iterrows():
goalscorers = get_match_goalscorers(match['date'], match['home_team'], match['away_team'])
last_n_results.append({
"date": match['date'], "home_team": match['home_team'], "away_team": match['away_team'],
"home_score": int(match['home_score']), "away_score": int(match['away_score']),
"tournament": match['tournament'], "goalscorers": goalscorers
})
total_wins = team1_wins + team2_wins
win_prop_team1 = team1_wins / total_wins if total_wins > 0 else 0
win_prop_team2 = team2_wins / total_wins if total_wins > 0 else 0
total_goals = team1_goals + team2_goals
goal_prop_team1 = team1_goals / total_goals if total_goals > 0 else 0
goal_prop_team2 = team2_goals / total_goals if total_goals > 0 else 0
goal_diff_value = int(abs(goal_diff))
goal_diff_prop_team1 = goal_diff_value / (goal_diff_value + 1) if goal_diff_value > 0 else 0.5
goal_diff_prop_team2 = 1 - goal_diff_prop_team1 if goal_diff_value > 0 else 0.5
fig = go.Figure(data=[
go.Bar(name=team1, x=[win_prop_team1, goal_prop_team1, goal_diff_prop_team1], y=['Wins', 'Goals', 'Goal Difference'], orientation='h', marker_color='teal'),
go.Bar(name=team2, x=[win_prop_team2, goal_prop_team2, goal_diff_prop_team2], y=['Wins', 'Goals', 'Goal Difference'], orientation='h', marker_color='orange')
])
fig.update_layout(barmode='stack', title_text=f'Proportion of {team1} vs {team2}', xaxis_title="Proportion", yaxis_title="Categories", xaxis=dict(range=[0, 1]))
return {
"total_matches": total_matches, f"{team1}_wins": team1_wins, f"{team2}_wins": team2_wins, "draws": draws,
f"{team1}_goals": int(team1_goals), f"{team2}_goals": int(team2_goals), "goal_difference": goal_difference_str,
"last_matches": last_n_results, "chart": fig.to_json()
}
def get_player_stats(player_name):
"""Retrieve statistics for a specific player.
Args:
player_name (str): The name of the player.
Returns:
dict: A dictionary containing the player's name, country, and total goals.
Raises:
HTTPException: If the player is not found in the dataset.
"""
player_goals = goals_df[goals_df['scorer'] == player_name]
if player_goals.empty:
raise HTTPException(status_code=404, detail="Player not found")
total_goals = len(player_goals[player_goals['own_goal'] == False])
player_team = player_goals['team'].mode()[0] if not player_goals['team'].empty else "Unknown"
return {"player_name": player_name, "country": player_team, "total_goals": total_goals}
def predict_match_outcome(team1, team2):
"""Predict the outcome of a match between two teams.
Args:
team1 (str): The first team name.
team2 (str): The second team name.
Raises:
HTTPException: Always raises an exception as prediction is currently disabled.
"""
raise HTTPException(status_code=503, detail="Prediction functionality is temporarily disabled due to model loading issues.")
@app.get("/")
async def home():
"""Return a welcome message and API description.
Returns:
dict: A dictionary containing welcome message, description, and available endpoints.
"""
return {
"message": "Welcome to Football Prediction API",
"description": "This API provides football statistics, match predictions, and data visualizations. Note: Prediction endpoint is currently disabled.",
"available_endpoints": {
"/teams": "List all teams",
"/players": "List all players",
"/country-codes": "Get country codes",
"/team/{team_name}": "Get team statistics",
"/head-to-head/{team1}/{team2}": "Get head-to-head statistics",
"/player/{player_name}": "Get player statistics",
"/predict/{team1}/{team2}": "Predict match outcome (currently disabled)",
"/goal-spatial-heatmap/{team}": "Get goal distribution heatmap"
}
}
@app.get("/teams")
async def get_teams():
"""Retrieve a list of all unique teams.
Returns:
dict: A dictionary containing a sorted list of team names.
"""
return {"teams": sorted(list(teams))}
@app.get("/players")
async def get_players():
"""Retrieve a list of all unique players.
Returns:
dict: A dictionary containing a sorted list of player names.
"""
return {"players": players}
@app.get("/country-codes")
async def get_country_codes():
"""Retrieve the country code mapping.
Returns:
dict: A dictionary mapping team names to their country codes.
"""
return COUNTRY_CODE_MAP
@app.get("/team/{team_name}")
async def get_team_statistics(team_name: str, summarize: bool = False):
"""Get detailed statistics for a specified team.
Args:
team_name (str): The name of the team.
summarize (bool, optional): Whether to include a summary. Defaults to False.
Returns:
dict: A dictionary containing team statistics and optionally a summary.
Raises:
HTTPException: If the team is not found or stats calculation fails.
"""
if team_name not in teams:
raise HTTPException(status_code=404, detail=f"Team {team_name} not found")
try:
stats = get_team_stats(team_name)
except Exception as e:
logger.error(f"Error calculating stats for {team_name}: {str(e)}")
raise HTTPException(status_code=500, detail=f"Error calculating stats: {str(e)}")
response = {"team": team_name, "statistics": stats}
if summarize:
basic_stats_text = "\n".join([f"{key}: {value}" for key, value in stats.items() if key != "tournament_performance"])
tournament_text = "\nTournament Performance:\n" + "\n".join(
[f"{tourn}: Matches: {stats['tournament_performance'][tourn]['matches_played']}, "
f"Wins: {stats['tournament_performance'][tourn]['wins']}, "
f"Losses: {stats['tournament_performance'][tourn]['losses']}, "
f"Draws: {stats['tournament_performance'][tourn]['draws']}, "
f"Win%: {stats['tournament_performance'][tourn]['win_percentage']}%"
for tourn in stats['tournament_performance']]
)
full_text = f"{basic_stats_text}\n{tournament_text}"
summary = summarize_with_groq(full_text)
response["summary"] = summary
return response
@app.get("/head-to-head/{team1}/{team2}")
async def get_head_to_head(team1: str, team2: str, num_matches: int = 5, summarize: bool = False):
"""Get head-to-head statistics between two teams.
Args:
team1 (str): The first team name.
team2 (str): The second team name.
num_matches (int, optional): Number of recent matches to include. Defaults to 5.
summarize (bool, optional): Whether to include a summary. Defaults to False.
Returns:
dict: A dictionary containing head-to-head statistics and optionally a summary.
Raises:
HTTPException: If teams are not found or num_matches is negative.
"""
if team1 not in teams or team2 not in teams:
raise HTTPException(status_code=404, detail="One or both teams not found")
if num_matches < 0:
raise HTTPException(status_code=400, detail="Number of matches must be non-negative")
stats = get_head_to_head_stats(team1, team2, num_matches)
response = {"team1": team1, "team2": team2, "head_to_head_statistics": stats}
if summarize:
text = "\n".join([f"{key}: {value}" for key, value in stats.items() if key not in ["last_matches", "chart"]] +
[f"Last Match: {match['date']} - {match['home_team']} {match['home_score']} vs {match['away_score']} {match['away_team']}"
for match in stats["last_matches"]])
summary = summarize_with_groq(text)
response["summary"] = summary
return response
@app.get("/player/{player_name}")
async def get_player_statistics(player_name: str, summarize: bool = False):
"""Get statistics for a specified player.
Args:
player_name (str): The name of the player.
summarize (bool, optional): Whether to include a summary. Defaults to False.
Returns:
dict: A dictionary containing player statistics and optionally a summary.
"""
stats = get_player_stats(player_name)
response = stats
if summarize:
text = "\n".join([f"{key}: {value}" for key, value in stats.items()])
summary = summarize_with_groq(text)
response["summary"] = summary
return response
@app.get("/predict/{team1}/{team2}")
async def predict_match(team1: str, team2: str, summarize: bool = False):
"""Predict the outcome of a match between two teams (currently disabled).
Args:
team1 (str): The first team name.
team2 (str): The second team name.
summarize (bool, optional): Whether to include a summary. Defaults to False.
Raises:
HTTPException: Always raises an exception as prediction is disabled.
"""
raise HTTPException(status_code=503, detail="Prediction functionality is temporarily disabled due to model loading issues.")
@app.get("/goal-spatial-heatmap/{team}")
async def get_goal_spatial_heatmap(team: str, start_year: int = 2000, end_year: int = 2023, summarize: bool = False):
"""Generate a spatial heatmap of goal distribution for a team.
Args:
team (str): The team name.
start_year (int, optional): The starting year for analysis. Defaults to 2000.
end_year (int, optional): The ending year for analysis. Defaults to 2023.
summarize (bool, optional): Whether to include a summary. Defaults to False.
Returns:
dict: A dictionary containing the heatmap, total goals, and average goals per match.
Raises:
HTTPException: If team not found, years invalid, or no goal data exists.
"""
if team not in teams:
raise HTTPException(status_code=404, detail=f"Team {team} not found")
if start_year > end_year:
raise HTTPException(status_code=400, detail="start_year must be less than or equal to end_year")
try:
matches_df['date'] = pd.to_datetime(matches_df['date'])
goals_df['date'] = pd.to_datetime(goals_df['date'])
team_matches = matches_df[
((matches_df['home_team'] == team) | (matches_df['away_team'] == team)) &
(matches_df['date'].dt.year >= start_year) & (matches_df['date'].dt.year <= end_year)
]
team_goals = goals_df[
(goals_df['team'] == team) &
(goals_df['date'].dt.year >= start_year) & (goals_df['date'].dt.year <= end_year)
].dropna(subset=['x_coord', 'y_coord'])
if team_goals.empty:
raise HTTPException(status_code=404, detail=f"No goal data found for {team} in the specified year range")
heatmap_data, xedges, yedges = np.histogram2d(
team_goals['x_coord'],
team_goals['y_coord'],
bins=50,
range=[[0, 100], [0, 100]]
)
heatmap_data = heatmap_data / heatmap_data.max() if heatmap_data.max() > 0 else heatmap_data
fig = go.Figure(data=go.Heatmap(
z=heatmap_data.T,
x=xedges,
y=yedges,
colorscale='Viridis',
colorbar=dict(title='Goal Density'),
zmin=0,
zmax=1
))
fig.add_shape(type="rect", x0=0, y0=0, x1=100, y1=100, line=dict(color="white", width=2))
fig.add_shape(type="rect", x0=0, y0=20, x1=16, y1=80, line=dict(color="white", width=2))
fig.add_shape(type="rect", x0=84, y0=20, x1=100, y1=80, line=dict(color="white", width=2))
fig.add_shape(type="rect", x0=0, y0=40, x1=5, y1=60, line=dict(color="white", width=2))
fig.add_shape(type="rect", x0=95, y0=40, x1=100, y1=60, line=dict(color="white", width=2))
fig.add_shape(type="circle", x0=45, y0=45, x1=55, y1=55, line=dict(color="white", width=2))
fig.add_shape(type="line", x0=50, y0=0, x1=50, y1=100, line=dict(color="white", width=2))
fig.update_layout(
title=f'Goal Distribution Heatmap for {team} ({start_year}-{end_year})',
xaxis_title='X Position (Length of Pitch)',
yaxis_title='Y Position (Width of Pitch)',
xaxis=dict(range=[0, 100], tickvals=[0, 20, 40, 60, 80, 100], showgrid=False),
yaxis=dict(range=[0, 100], tickvals=[0, 20, 40, 60, 80, 100], showgrid=False),
template="plotly_dark",
width=800,
height=500,
plot_bgcolor='rgba(0,128,0,0.3)',
paper_bgcolor='rgba(0,0,0,0)'
)
response = {
"team": team,
"start_year": start_year,
"end_year": end_year,
"heatmap": fig.to_json(),
"total_goals": len(team_goals),
"average_goals_per_match": round(len(team_goals) / len(team_matches) if len(team_matches) > 0 else 0, 2)
}
if summarize:
text = (f"Goal Distribution for {team} ({start_year}-{end_year})\n"
f"Total Goals: {len(team_goals)}\n"
f"Average Goals per Match: {response['average_goals_per_match']:.2f}")
summary = summarize_with_groq(text)
response["summary"] = summary
return response
except Exception as e:
logger.error(f"Error generating spatial heatmap for {team}: {str(e)}")
raise HTTPException(status_code=500, detail=f"Error generating heatmap: {str(e)}")
if __name__ == "__main__":
uvicorn.run(app, host="0.0.0.0", port=8000)