""" Graph Neural Network (GNN) for Football Prediction Uses team relationships and match context as a graph structure. Based on the blueprint for advanced deep learning models. """ import torch import torch.nn as nn import torch.nn.functional as F from typing import Dict, List, Tuple, Optional import numpy as np import logging logger = logging.getLogger(__name__) # Check for torch_geometric try: from torch_geometric.nn import GCNConv, GATConv, SAGEConv from torch_geometric.data import Data, Batch HAS_TORCH_GEOMETRIC = True except ImportError: HAS_TORCH_GEOMETRIC = False logger.warning("torch_geometric not installed. GNN features limited.") class TeamEmbedding(nn.Module): """Learnable team embeddings.""" def __init__(self, num_teams: int, embedding_dim: int = 64): super().__init__() self.embedding = nn.Embedding(num_teams, embedding_dim) def forward(self, team_ids: torch.Tensor) -> torch.Tensor: return self.embedding(team_ids) class MatchGraphEncoder(nn.Module): """ Encode match context using graph neural networks. Nodes: Teams Edges: Recent matches between teams Node features: Team statistics Edge features: Match statistics """ def __init__( self, node_features: int = 64, hidden_dim: int = 128, output_dim: int = 64, num_layers: int = 3, dropout: float = 0.2 ): super().__init__() if not HAS_TORCH_GEOMETRIC: # Fallback to simple MLP self.use_gnn = False self.fallback = nn.Sequential( nn.Linear(node_features * 2, hidden_dim), nn.ReLU(), nn.Dropout(dropout), nn.Linear(hidden_dim, output_dim) ) return self.use_gnn = True # Graph convolution layers self.convs = nn.ModuleList() self.bns = nn.ModuleList() # First layer self.convs.append(GATConv(node_features, hidden_dim, heads=4, concat=False)) self.bns.append(nn.BatchNorm1d(hidden_dim)) # Hidden layers for _ in range(num_layers - 2): self.convs.append(GATConv(hidden_dim, hidden_dim, heads=4, concat=False)) self.bns.append(nn.BatchNorm1d(hidden_dim)) # Output layer self.convs.append(GATConv(hidden_dim, output_dim, heads=1, concat=False)) self.dropout = nn.Dropout(dropout) def forward( self, x: torch.Tensor, edge_index: torch.Tensor = None, batch: torch.Tensor = None ) -> torch.Tensor: if not self.use_gnn or edge_index is None: # Fallback return self.fallback(x) if hasattr(self, 'fallback') else x for i, conv in enumerate(self.convs[:-1]): x = conv(x, edge_index) x = self.bns[i](x) x = F.elu(x) x = self.dropout(x) x = self.convs[-1](x, edge_index) return x class GraphFootballPredictor(nn.Module): """ Complete GNN-based football prediction model. Architecture: 1. Team embeddings 2. Graph encoder for league context 3. Match predictor head """ def __init__( self, num_teams: int = 1000, team_embed_dim: int = 64, feature_dim: int = 128, hidden_dim: int = 256, num_gnn_layers: int = 3, dropout: float = 0.3 ): super().__init__() # Team embeddings self.team_embedding = TeamEmbedding(num_teams, team_embed_dim) # Feature encoder self.feature_encoder = nn.Sequential( nn.Linear(feature_dim, hidden_dim), nn.ReLU(), nn.Dropout(dropout), nn.Linear(hidden_dim, team_embed_dim) ) # Graph encoder self.graph_encoder = MatchGraphEncoder( node_features=team_embed_dim * 2, hidden_dim=hidden_dim, output_dim=hidden_dim // 2, num_layers=num_gnn_layers, dropout=dropout ) # Match representation match_dim = hidden_dim // 2 + team_embed_dim * 2 # Prediction heads # 1X2 Result self.result_head = nn.Sequential( nn.Linear(match_dim, hidden_dim), nn.ReLU(), nn.Dropout(dropout), nn.Linear(hidden_dim, 64), nn.ReLU(), nn.Linear(64, 3) ) # Goals prediction self.home_goals_head = nn.Sequential( nn.Linear(match_dim, 64), nn.ReLU(), nn.Linear(64, 8) # 0-7 goals ) self.away_goals_head = nn.Sequential( nn.Linear(match_dim, 64), nn.ReLU(), nn.Linear(64, 8) ) # BTTS self.btts_head = nn.Sequential( nn.Linear(match_dim, 32), nn.ReLU(), nn.Linear(32, 2) ) # Over 2.5 self.over25_head = nn.Sequential( nn.Linear(match_dim, 32), nn.ReLU(), nn.Linear(32, 2) ) def forward( self, home_team_id: torch.Tensor, away_team_id: torch.Tensor, match_features: torch.Tensor, edge_index: torch.Tensor = None, return_embeddings: bool = False ) -> Dict[str, torch.Tensor]: # Get team embeddings home_embed = self.team_embedding(home_team_id) away_embed = self.team_embedding(away_team_id) # Encode features encoded_features = self.feature_encoder(match_features) # Combine for graph combined = torch.cat([home_embed, away_embed], dim=-1) # Graph encoding if edge_index is not None: graph_out = self.graph_encoder(combined, edge_index) else: graph_out = self.graph_encoder(combined) # Match representation match_repr = torch.cat([ graph_out, home_embed, away_embed ], dim=-1) # Predictions result = F.softmax(self.result_head(match_repr), dim=-1) home_goals = F.softmax(self.home_goals_head(match_repr), dim=-1) away_goals = F.softmax(self.away_goals_head(match_repr), dim=-1) btts = F.softmax(self.btts_head(match_repr), dim=-1) over25 = F.softmax(self.over25_head(match_repr), dim=-1) output = { 'result': result, 'home_goals': home_goals, 'away_goals': away_goals, 'btts': btts, 'over_25': over25 } if return_embeddings: output['home_embedding'] = home_embed output['away_embedding'] = away_embed output['match_representation'] = match_repr return output def predict(self, home_team_id: int, away_team_id: int, features: np.ndarray) -> Dict: """Generate predictions for a single match.""" self.eval() with torch.no_grad(): home_id = torch.tensor([home_team_id]) away_id = torch.tensor([away_team_id]) feat_tensor = torch.FloatTensor(features).unsqueeze(0) output = self.forward(home_id, away_id, feat_tensor) # Calculate correct scores home_probs = output['home_goals'].squeeze().cpu().numpy() away_probs = output['away_goals'].squeeze().cpu().numpy() correct_scores = {} for h in range(8): for a in range(8): correct_scores[f'{h}-{a}'] = float(home_probs[h] * away_probs[a]) # Normalize total = sum(correct_scores.values()) if total > 0: correct_scores = {k: v/total for k, v in correct_scores.items()} return { 'result': { 'home_win': float(output['result'][0, 0]), 'draw': float(output['result'][0, 1]), 'away_win': float(output['result'][0, 2]) }, 'correct_scores': dict(sorted( correct_scores.items(), key=lambda x: x[1], reverse=True )[:10]), 'btts_yes': float(output['btts'][0, 1]), 'over_25': float(output['over_25'][0, 1]) } class TransformerPredictor(nn.Module): """ Transformer-based model for sequence prediction. Processes team's recent match history. """ def __init__( self, feature_dim: int = 128, d_model: int = 256, nhead: int = 8, num_layers: int = 4, dropout: float = 0.2 ): super().__init__() # Input projection self.input_proj = nn.Linear(feature_dim, d_model) # Positional encoding self.pos_encoding = nn.Parameter(torch.randn(1, 50, d_model) * 0.1) # Transformer encoder encoder_layer = nn.TransformerEncoderLayer( d_model=d_model, nhead=nhead, dim_feedforward=d_model * 4, dropout=dropout, batch_first=True ) self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers) # Output heads self.result_head = nn.Sequential( nn.Linear(d_model * 2, d_model), nn.ReLU(), nn.Dropout(dropout), nn.Linear(d_model, 3) ) self.goals_head = nn.Sequential( nn.Linear(d_model * 2, d_model // 2), nn.ReLU(), nn.Linear(d_model // 2, 16) # 8 home + 8 away ) def forward( self, home_sequence: torch.Tensor, # (batch, seq_len, feature_dim) away_sequence: torch.Tensor ) -> Dict[str, torch.Tensor]: batch_size, seq_len, _ = home_sequence.shape # Project inputs home_proj = self.input_proj(home_sequence) away_proj = self.input_proj(away_sequence) # Add positional encoding home_proj = home_proj + self.pos_encoding[:, :seq_len, :] away_proj = away_proj + self.pos_encoding[:, :seq_len, :] # Transformer encoding home_encoded = self.transformer(home_proj) away_encoded = self.transformer(away_proj) # Pool (mean over sequence) home_pooled = home_encoded.mean(dim=1) away_pooled = away_encoded.mean(dim=1) # Combine combined = torch.cat([home_pooled, away_pooled], dim=-1) # Predictions result = F.softmax(self.result_head(combined), dim=-1) goals = self.goals_head(combined) home_goals = F.softmax(goals[:, :8], dim=-1) away_goals = F.softmax(goals[:, 8:], dim=-1) return { 'result': result, 'home_goals': home_goals, 'away_goals': away_goals } # Factory functions def get_gnn_model(num_teams: int = 1000, feature_dim: int = 128) -> GraphFootballPredictor: """Get GNN model instance.""" return GraphFootballPredictor( num_teams=num_teams, feature_dim=feature_dim ) def get_transformer_model(feature_dim: int = 128) -> TransformerPredictor: """Get Transformer model instance.""" return TransformerPredictor(feature_dim=feature_dim)