Spaces:
Runtime error
Runtime error
| """ | |
| Attention Models | |
| Various attention mechanisms for football prediction. | |
| Part of the complete blueprint implementation. | |
| """ | |
| import numpy as np | |
| from typing import Dict, Optional, Tuple | |
| import logging | |
| logger = logging.getLogger(__name__) | |
| try: | |
| import torch | |
| import torch.nn as nn | |
| import torch.nn.functional as F | |
| TORCH_AVAILABLE = True | |
| except ImportError: | |
| TORCH_AVAILABLE = False | |
| if TORCH_AVAILABLE: | |
| class MultiHeadSelfAttention(nn.Module): | |
| """Multi-head self-attention for match features.""" | |
| def __init__( | |
| self, | |
| embed_dim: int = 64, | |
| num_heads: int = 4, | |
| dropout: float = 0.1 | |
| ): | |
| super().__init__() | |
| self.attention = nn.MultiheadAttention( | |
| embed_dim, num_heads, dropout=dropout, batch_first=True | |
| ) | |
| self.norm = nn.LayerNorm(embed_dim) | |
| def forward( | |
| self, | |
| x: torch.Tensor, | |
| mask: torch.Tensor = None | |
| ) -> Tuple[torch.Tensor, torch.Tensor]: | |
| attn_out, attn_weights = self.attention(x, x, x, key_padding_mask=mask) | |
| return self.norm(x + attn_out), attn_weights | |
| class CrossAttention(nn.Module): | |
| """Cross-attention between home and away team features.""" | |
| def __init__(self, embed_dim: int = 64, num_heads: int = 4): | |
| super().__init__() | |
| self.cross_attn = nn.MultiheadAttention( | |
| embed_dim, num_heads, batch_first=True | |
| ) | |
| self.norm = nn.LayerNorm(embed_dim) | |
| def forward( | |
| self, | |
| query: torch.Tensor, | |
| key_value: torch.Tensor | |
| ) -> torch.Tensor: | |
| attn_out, _ = self.cross_attn(query, key_value, key_value) | |
| return self.norm(query + attn_out) | |
| class AttentionModel(nn.Module): | |
| """Full attention-based prediction model.""" | |
| def __init__( | |
| self, | |
| input_dim: int = 32, | |
| embed_dim: int = 64, | |
| num_heads: int = 4, | |
| num_layers: int = 2, | |
| output_dim: int = 3, | |
| dropout: float = 0.2 | |
| ): | |
| super().__init__() | |
| self.input_proj = nn.Linear(input_dim, embed_dim) | |
| # Self-attention layers | |
| self.self_attention = nn.ModuleList([ | |
| MultiHeadSelfAttention(embed_dim, num_heads, dropout) | |
| for _ in range(num_layers) | |
| ]) | |
| # Cross-attention for home vs away | |
| self.cross_attention = CrossAttention(embed_dim, num_heads) | |
| # Output | |
| self.output = nn.Sequential( | |
| nn.Linear(embed_dim * 2, embed_dim), | |
| nn.ReLU(), | |
| nn.Dropout(dropout), | |
| nn.Linear(embed_dim, output_dim) | |
| ) | |
| def forward( | |
| self, | |
| home_features: torch.Tensor, | |
| away_features: torch.Tensor | |
| ) -> Tuple[torch.Tensor, Dict]: | |
| """ | |
| Forward pass with attention. | |
| Args: | |
| home_features: (batch, seq_len, input_dim) | |
| away_features: (batch, seq_len, input_dim) | |
| """ | |
| # Project inputs | |
| home = self.input_proj(home_features) | |
| away = self.input_proj(away_features) | |
| attention_weights = {} | |
| # Self-attention | |
| for i, layer in enumerate(self.self_attention): | |
| home, home_attn = layer(home) | |
| away, away_attn = layer(away) | |
| attention_weights[f'self_layer_{i}'] = { | |
| 'home': home_attn.detach(), | |
| 'away': away_attn.detach() | |
| } | |
| # Cross-attention | |
| home_cross = self.cross_attention(home, away) | |
| away_cross = self.cross_attention(away, home) | |
| # Pool and combine | |
| home_pooled = home_cross.mean(dim=1) | |
| away_pooled = away_cross.mean(dim=1) | |
| combined = torch.cat([home_pooled, away_pooled], dim=-1) | |
| output = self.output(combined) | |
| return output, attention_weights | |
| class AttentionPredictor: | |
| """Wrapper for attention-based prediction.""" | |
| def __init__( | |
| self, | |
| input_dim: int = 32, | |
| embed_dim: int = 64, | |
| seq_len: int = 10 | |
| ): | |
| self.input_dim = input_dim | |
| self.embed_dim = embed_dim | |
| self.seq_len = seq_len | |
| self.model = None | |
| self.device = 'cuda' if TORCH_AVAILABLE and torch.cuda.is_available() else 'cpu' | |
| if TORCH_AVAILABLE: | |
| self.model = AttentionModel(input_dim, embed_dim).to(self.device) | |
| def encode_team_history(self, matches: list) -> np.ndarray: | |
| """Encode team match history.""" | |
| sequence = np.zeros((self.seq_len, self.input_dim)) | |
| for i, match in enumerate(matches[-self.seq_len:]): | |
| idx = self.seq_len - len(matches[-self.seq_len:]) + i | |
| sequence[idx, 0] = match.get('goals_for', 0) | |
| sequence[idx, 1] = match.get('goals_against', 0) | |
| sequence[idx, 2] = match.get('xg', 0) | |
| sequence[idx, 3] = match.get('possession', 50) / 100 | |
| sequence[idx, 4] = match.get('shots', 0) / 20 | |
| return sequence | |
| def predict( | |
| self, | |
| home_history: list, | |
| away_history: list | |
| ) -> Dict: | |
| """Predict match with attention weights.""" | |
| if not TORCH_AVAILABLE or self.model is None: | |
| return {'home': 0.4, 'draw': 0.25, 'away': 0.35} | |
| home_enc = self.encode_team_history(home_history) | |
| away_enc = self.encode_team_history(away_history) | |
| self.model.eval() | |
| with torch.no_grad(): | |
| home_t = torch.tensor(home_enc, dtype=torch.float32).unsqueeze(0).to(self.device) | |
| away_t = torch.tensor(away_enc, dtype=torch.float32).unsqueeze(0).to(self.device) | |
| logits, attn_weights = self.model(home_t, away_t) | |
| probs = torch.softmax(logits, dim=-1).cpu().numpy()[0] | |
| return { | |
| 'home': float(probs[0]), | |
| 'draw': float(probs[1]), | |
| 'away': float(probs[2]), | |
| 'model': 'attention', | |
| 'attention_available': True | |
| } | |
| def get_attention_explanation( | |
| self, | |
| home_history: list, | |
| away_history: list | |
| ) -> Dict: | |
| """Get attention weights for interpretation.""" | |
| if not TORCH_AVAILABLE or self.model is None: | |
| return {} | |
| home_enc = self.encode_team_history(home_history) | |
| away_enc = self.encode_team_history(away_history) | |
| self.model.eval() | |
| with torch.no_grad(): | |
| home_t = torch.tensor(home_enc, dtype=torch.float32).unsqueeze(0).to(self.device) | |
| away_t = torch.tensor(away_enc, dtype=torch.float32).unsqueeze(0).to(self.device) | |
| _, attn_weights = self.model(home_t, away_t) | |
| # Extract attention patterns | |
| explanation = { | |
| 'most_important_home_match': 0, | |
| 'most_important_away_match': 0, | |
| } | |
| if 'self_layer_0' in attn_weights: | |
| home_attn = attn_weights['self_layer_0']['home'].cpu().numpy() | |
| away_attn = attn_weights['self_layer_0']['away'].cpu().numpy() | |
| explanation['most_important_home_match'] = int(np.argmax(home_attn.mean(axis=(0, 1)))) | |
| explanation['most_important_away_match'] = int(np.argmax(away_attn.mean(axis=(0, 1)))) | |
| return explanation | |
| _model: Optional[AttentionPredictor] = None | |
| def get_model() -> AttentionPredictor: | |
| global _model | |
| if _model is None: | |
| _model = AttentionPredictor() | |
| return _model | |