nananie143's picture
feat: Complete blueprint implementation with 66+ modules
90bacf7 verified
"""
Attention Models
Various attention mechanisms for football prediction.
Part of the complete blueprint implementation.
"""
import numpy as np
from typing import Dict, Optional, Tuple
import logging
logger = logging.getLogger(__name__)
try:
import torch
import torch.nn as nn
import torch.nn.functional as F
TORCH_AVAILABLE = True
except ImportError:
TORCH_AVAILABLE = False
if TORCH_AVAILABLE:
class MultiHeadSelfAttention(nn.Module):
"""Multi-head self-attention for match features."""
def __init__(
self,
embed_dim: int = 64,
num_heads: int = 4,
dropout: float = 0.1
):
super().__init__()
self.attention = nn.MultiheadAttention(
embed_dim, num_heads, dropout=dropout, batch_first=True
)
self.norm = nn.LayerNorm(embed_dim)
def forward(
self,
x: torch.Tensor,
mask: torch.Tensor = None
) -> Tuple[torch.Tensor, torch.Tensor]:
attn_out, attn_weights = self.attention(x, x, x, key_padding_mask=mask)
return self.norm(x + attn_out), attn_weights
class CrossAttention(nn.Module):
"""Cross-attention between home and away team features."""
def __init__(self, embed_dim: int = 64, num_heads: int = 4):
super().__init__()
self.cross_attn = nn.MultiheadAttention(
embed_dim, num_heads, batch_first=True
)
self.norm = nn.LayerNorm(embed_dim)
def forward(
self,
query: torch.Tensor,
key_value: torch.Tensor
) -> torch.Tensor:
attn_out, _ = self.cross_attn(query, key_value, key_value)
return self.norm(query + attn_out)
class AttentionModel(nn.Module):
"""Full attention-based prediction model."""
def __init__(
self,
input_dim: int = 32,
embed_dim: int = 64,
num_heads: int = 4,
num_layers: int = 2,
output_dim: int = 3,
dropout: float = 0.2
):
super().__init__()
self.input_proj = nn.Linear(input_dim, embed_dim)
# Self-attention layers
self.self_attention = nn.ModuleList([
MultiHeadSelfAttention(embed_dim, num_heads, dropout)
for _ in range(num_layers)
])
# Cross-attention for home vs away
self.cross_attention = CrossAttention(embed_dim, num_heads)
# Output
self.output = nn.Sequential(
nn.Linear(embed_dim * 2, embed_dim),
nn.ReLU(),
nn.Dropout(dropout),
nn.Linear(embed_dim, output_dim)
)
def forward(
self,
home_features: torch.Tensor,
away_features: torch.Tensor
) -> Tuple[torch.Tensor, Dict]:
"""
Forward pass with attention.
Args:
home_features: (batch, seq_len, input_dim)
away_features: (batch, seq_len, input_dim)
"""
# Project inputs
home = self.input_proj(home_features)
away = self.input_proj(away_features)
attention_weights = {}
# Self-attention
for i, layer in enumerate(self.self_attention):
home, home_attn = layer(home)
away, away_attn = layer(away)
attention_weights[f'self_layer_{i}'] = {
'home': home_attn.detach(),
'away': away_attn.detach()
}
# Cross-attention
home_cross = self.cross_attention(home, away)
away_cross = self.cross_attention(away, home)
# Pool and combine
home_pooled = home_cross.mean(dim=1)
away_pooled = away_cross.mean(dim=1)
combined = torch.cat([home_pooled, away_pooled], dim=-1)
output = self.output(combined)
return output, attention_weights
class AttentionPredictor:
"""Wrapper for attention-based prediction."""
def __init__(
self,
input_dim: int = 32,
embed_dim: int = 64,
seq_len: int = 10
):
self.input_dim = input_dim
self.embed_dim = embed_dim
self.seq_len = seq_len
self.model = None
self.device = 'cuda' if TORCH_AVAILABLE and torch.cuda.is_available() else 'cpu'
if TORCH_AVAILABLE:
self.model = AttentionModel(input_dim, embed_dim).to(self.device)
def encode_team_history(self, matches: list) -> np.ndarray:
"""Encode team match history."""
sequence = np.zeros((self.seq_len, self.input_dim))
for i, match in enumerate(matches[-self.seq_len:]):
idx = self.seq_len - len(matches[-self.seq_len:]) + i
sequence[idx, 0] = match.get('goals_for', 0)
sequence[idx, 1] = match.get('goals_against', 0)
sequence[idx, 2] = match.get('xg', 0)
sequence[idx, 3] = match.get('possession', 50) / 100
sequence[idx, 4] = match.get('shots', 0) / 20
return sequence
def predict(
self,
home_history: list,
away_history: list
) -> Dict:
"""Predict match with attention weights."""
if not TORCH_AVAILABLE or self.model is None:
return {'home': 0.4, 'draw': 0.25, 'away': 0.35}
home_enc = self.encode_team_history(home_history)
away_enc = self.encode_team_history(away_history)
self.model.eval()
with torch.no_grad():
home_t = torch.tensor(home_enc, dtype=torch.float32).unsqueeze(0).to(self.device)
away_t = torch.tensor(away_enc, dtype=torch.float32).unsqueeze(0).to(self.device)
logits, attn_weights = self.model(home_t, away_t)
probs = torch.softmax(logits, dim=-1).cpu().numpy()[0]
return {
'home': float(probs[0]),
'draw': float(probs[1]),
'away': float(probs[2]),
'model': 'attention',
'attention_available': True
}
def get_attention_explanation(
self,
home_history: list,
away_history: list
) -> Dict:
"""Get attention weights for interpretation."""
if not TORCH_AVAILABLE or self.model is None:
return {}
home_enc = self.encode_team_history(home_history)
away_enc = self.encode_team_history(away_history)
self.model.eval()
with torch.no_grad():
home_t = torch.tensor(home_enc, dtype=torch.float32).unsqueeze(0).to(self.device)
away_t = torch.tensor(away_enc, dtype=torch.float32).unsqueeze(0).to(self.device)
_, attn_weights = self.model(home_t, away_t)
# Extract attention patterns
explanation = {
'most_important_home_match': 0,
'most_important_away_match': 0,
}
if 'self_layer_0' in attn_weights:
home_attn = attn_weights['self_layer_0']['home'].cpu().numpy()
away_attn = attn_weights['self_layer_0']['away'].cpu().numpy()
explanation['most_important_home_match'] = int(np.argmax(home_attn.mean(axis=(0, 1))))
explanation['most_important_away_match'] = int(np.argmax(away_attn.mean(axis=(0, 1))))
return explanation
_model: Optional[AttentionPredictor] = None
def get_model() -> AttentionPredictor:
global _model
if _model is None:
_model = AttentionPredictor()
return _model