Spaces:

nananie143
/

footypredict-pro

Runtime error

App Files Files Community

footypredict-pro / src /models /deep_learning /graph_transformer.py

nananie143

Upload src/models/deep_learning/graph_transformer.py with huggingface_hub

1ac8cff verified about 1 month ago

raw

history blame contribute delete

11.8 kB

	"""
	Graph Neural Network (GNN) for Football Prediction
	Uses team relationships and match context as a graph structure.

	Based on the blueprint for advanced deep learning models.
	"""

	import torch
	import torch.nn as nn
	import torch.nn.functional as F
	from typing import Dict, List, Tuple, Optional
	import numpy as np
	import logging

	logger = logging.getLogger(__name__)

	# Check for torch_geometric
	try:
	from torch_geometric.nn import GCNConv, GATConv, SAGEConv
	from torch_geometric.data import Data, Batch
	HAS_TORCH_GEOMETRIC = True
	except ImportError:
	HAS_TORCH_GEOMETRIC = False
	logger.warning("torch_geometric not installed. GNN features limited.")


	class TeamEmbedding(nn.Module):
	"""Learnable team embeddings."""

	def __init__(self, num_teams: int, embedding_dim: int = 64):
	super().__init__()
	self.embedding = nn.Embedding(num_teams, embedding_dim)

	def forward(self, team_ids: torch.Tensor) -> torch.Tensor:
	return self.embedding(team_ids)


	class MatchGraphEncoder(nn.Module):
	"""
	Encode match context using graph neural networks.

	Nodes: Teams
	Edges: Recent matches between teams
	Node features: Team statistics
	Edge features: Match statistics
	"""

	def __init__(
	self,
	node_features: int = 64,
	hidden_dim: int = 128,
	output_dim: int = 64,
	num_layers: int = 3,
	dropout: float = 0.2
	):
	super().__init__()

	if not HAS_TORCH_GEOMETRIC:
	# Fallback to simple MLP
	self.use_gnn = False
	self.fallback = nn.Sequential(
	nn.Linear(node_features * 2, hidden_dim),
	nn.ReLU(),
	nn.Dropout(dropout),
	nn.Linear(hidden_dim, output_dim)
	)
	return

	self.use_gnn = True

	# Graph convolution layers
	self.convs = nn.ModuleList()
	self.bns = nn.ModuleList()

	# First layer
	self.convs.append(GATConv(node_features, hidden_dim, heads=4, concat=False))
	self.bns.append(nn.BatchNorm1d(hidden_dim))

	# Hidden layers
	for _ in range(num_layers - 2):
	self.convs.append(GATConv(hidden_dim, hidden_dim, heads=4, concat=False))
	self.bns.append(nn.BatchNorm1d(hidden_dim))

	# Output layer
	self.convs.append(GATConv(hidden_dim, output_dim, heads=1, concat=False))

	self.dropout = nn.Dropout(dropout)

	def forward(
	self,
	x: torch.Tensor,
	edge_index: torch.Tensor = None,
	batch: torch.Tensor = None
	) -> torch.Tensor:

	if not self.use_gnn or edge_index is None:
	# Fallback
	return self.fallback(x) if hasattr(self, 'fallback') else x

	for i, conv in enumerate(self.convs[:-1]):
	x = conv(x, edge_index)
	x = self.bns[i](x)
	x = F.elu(x)
	x = self.dropout(x)

	x = self.convs[-1](x, edge_index)

	return x


	class GraphFootballPredictor(nn.Module):
	"""
	Complete GNN-based football prediction model.

	Architecture:
	1. Team embeddings
	2. Graph encoder for league context
	3. Match predictor head
	"""

	def __init__(
	self,
	num_teams: int = 1000,
	team_embed_dim: int = 64,
	feature_dim: int = 128,
	hidden_dim: int = 256,
	num_gnn_layers: int = 3,
	dropout: float = 0.3
	):
	super().__init__()

	# Team embeddings
	self.team_embedding = TeamEmbedding(num_teams, team_embed_dim)

	# Feature encoder
	self.feature_encoder = nn.Sequential(
	nn.Linear(feature_dim, hidden_dim),
	nn.ReLU(),
	nn.Dropout(dropout),
	nn.Linear(hidden_dim, team_embed_dim)
	)

	# Graph encoder
	self.graph_encoder = MatchGraphEncoder(
	node_features=team_embed_dim * 2,
	hidden_dim=hidden_dim,
	output_dim=hidden_dim // 2,
	num_layers=num_gnn_layers,
	dropout=dropout
	)

	# Match representation
	match_dim = hidden_dim // 2 + team_embed_dim * 2

	# Prediction heads
	# 1X2 Result
	self.result_head = nn.Sequential(
	nn.Linear(match_dim, hidden_dim),
	nn.ReLU(),
	nn.Dropout(dropout),
	nn.Linear(hidden_dim, 64),
	nn.ReLU(),
	nn.Linear(64, 3)
	)

	# Goals prediction
	self.home_goals_head = nn.Sequential(
	nn.Linear(match_dim, 64),
	nn.ReLU(),
	nn.Linear(64, 8) # 0-7 goals
	)

	self.away_goals_head = nn.Sequential(
	nn.Linear(match_dim, 64),
	nn.ReLU(),
	nn.Linear(64, 8)
	)

	# BTTS
	self.btts_head = nn.Sequential(
	nn.Linear(match_dim, 32),
	nn.ReLU(),
	nn.Linear(32, 2)
	)

	# Over 2.5
	self.over25_head = nn.Sequential(
	nn.Linear(match_dim, 32),
	nn.ReLU(),
	nn.Linear(32, 2)
	)

	def forward(
	self,
	home_team_id: torch.Tensor,
	away_team_id: torch.Tensor,
	match_features: torch.Tensor,
	edge_index: torch.Tensor = None,
	return_embeddings: bool = False
	) -> Dict[str, torch.Tensor]:

	# Get team embeddings
	home_embed = self.team_embedding(home_team_id)
	away_embed = self.team_embedding(away_team_id)

	# Encode features
	encoded_features = self.feature_encoder(match_features)

	# Combine for graph
	combined = torch.cat([home_embed, away_embed], dim=-1)

	# Graph encoding
	if edge_index is not None:
	graph_out = self.graph_encoder(combined, edge_index)
	else:
	graph_out = self.graph_encoder(combined)

	# Match representation
	match_repr = torch.cat([
	graph_out,
	home_embed,
	away_embed
	], dim=-1)

	# Predictions
	result = F.softmax(self.result_head(match_repr), dim=-1)
	home_goals = F.softmax(self.home_goals_head(match_repr), dim=-1)
	away_goals = F.softmax(self.away_goals_head(match_repr), dim=-1)
	btts = F.softmax(self.btts_head(match_repr), dim=-1)
	over25 = F.softmax(self.over25_head(match_repr), dim=-1)

	output = {
	'result': result,
	'home_goals': home_goals,
	'away_goals': away_goals,
	'btts': btts,
	'over_25': over25
	}

	if return_embeddings:
	output['home_embedding'] = home_embed
	output['away_embedding'] = away_embed
	output['match_representation'] = match_repr

	return output

	def predict(self, home_team_id: int, away_team_id: int, features: np.ndarray) -> Dict:
	"""Generate predictions for a single match."""
	self.eval()

	with torch.no_grad():
	home_id = torch.tensor([home_team_id])
	away_id = torch.tensor([away_team_id])
	feat_tensor = torch.FloatTensor(features).unsqueeze(0)

	output = self.forward(home_id, away_id, feat_tensor)

	# Calculate correct scores
	home_probs = output['home_goals'].squeeze().cpu().numpy()
	away_probs = output['away_goals'].squeeze().cpu().numpy()

	correct_scores = {}
	for h in range(8):
	for a in range(8):
	correct_scores[f'{h}-{a}'] = float(home_probs[h] * away_probs[a])

	# Normalize
	total = sum(correct_scores.values())
	if total > 0:
	correct_scores = {k: v/total for k, v in correct_scores.items()}

	return {
	'result': {
	'home_win': float(output['result'][0, 0]),
	'draw': float(output['result'][0, 1]),
	'away_win': float(output['result'][0, 2])
	},
	'correct_scores': dict(sorted(
	correct_scores.items(),
	key=lambda x: x[1],
	reverse=True
	)[:10]),
	'btts_yes': float(output['btts'][0, 1]),
	'over_25': float(output['over_25'][0, 1])
	}


	class TransformerPredictor(nn.Module):
	"""
	Transformer-based model for sequence prediction.
	Processes team's recent match history.
	"""

	def __init__(
	self,
	feature_dim: int = 128,
	d_model: int = 256,
	nhead: int = 8,
	num_layers: int = 4,
	dropout: float = 0.2
	):
	super().__init__()

	# Input projection
	self.input_proj = nn.Linear(feature_dim, d_model)

	# Positional encoding
	self.pos_encoding = nn.Parameter(torch.randn(1, 50, d_model) * 0.1)

	# Transformer encoder
	encoder_layer = nn.TransformerEncoderLayer(
	d_model=d_model,
	nhead=nhead,
	dim_feedforward=d_model * 4,
	dropout=dropout,
	batch_first=True
	)
	self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)

	# Output heads
	self.result_head = nn.Sequential(
	nn.Linear(d_model * 2, d_model),
	nn.ReLU(),
	nn.Dropout(dropout),
	nn.Linear(d_model, 3)
	)

	self.goals_head = nn.Sequential(
	nn.Linear(d_model * 2, d_model // 2),
	nn.ReLU(),
	nn.Linear(d_model // 2, 16) # 8 home + 8 away
	)

	def forward(
	self,
	home_sequence: torch.Tensor, # (batch, seq_len, feature_dim)
	away_sequence: torch.Tensor
	) -> Dict[str, torch.Tensor]:

	batch_size, seq_len, _ = home_sequence.shape

	# Project inputs
	home_proj = self.input_proj(home_sequence)
	away_proj = self.input_proj(away_sequence)

	# Add positional encoding
	home_proj = home_proj + self.pos_encoding[:, :seq_len, :]
	away_proj = away_proj + self.pos_encoding[:, :seq_len, :]

	# Transformer encoding
	home_encoded = self.transformer(home_proj)
	away_encoded = self.transformer(away_proj)

	# Pool (mean over sequence)
	home_pooled = home_encoded.mean(dim=1)
	away_pooled = away_encoded.mean(dim=1)

	# Combine
	combined = torch.cat([home_pooled, away_pooled], dim=-1)

	# Predictions
	result = F.softmax(self.result_head(combined), dim=-1)
	goals = self.goals_head(combined)

	home_goals = F.softmax(goals[:, :8], dim=-1)
	away_goals = F.softmax(goals[:, 8:], dim=-1)

	return {
	'result': result,
	'home_goals': home_goals,
	'away_goals': away_goals
	}


	# Factory functions
	def get_gnn_model(num_teams: int = 1000, feature_dim: int = 128) -> GraphFootballPredictor:
	"""Get GNN model instance."""
	return GraphFootballPredictor(
	num_teams=num_teams,
	feature_dim=feature_dim
	)


	def get_transformer_model(feature_dim: int = 128) -> TransformerPredictor:
	"""Get Transformer model instance."""
	return TransformerPredictor(feature_dim=feature_dim)