"""
Graph Neural Network (GNN) for Football Prediction
Uses team relationships and match context as a graph structure.

Based on the blueprint for advanced deep learning models.
"""

import torch
import torch.nn as nn
import torch.nn.functional as F
from typing import Dict, List, Tuple, Optional
import numpy as np
import logging

logger = logging.getLogger(__name__)

# Check for torch_geometric
try:
    from torch_geometric.nn import GCNConv, GATConv, SAGEConv
    from torch_geometric.data import Data, Batch
    HAS_TORCH_GEOMETRIC = True
except ImportError:
    HAS_TORCH_GEOMETRIC = False
    logger.warning("torch_geometric not installed. GNN features limited.")


class TeamEmbedding(nn.Module):
    """Learnable team embeddings."""
    
    def __init__(self, num_teams: int, embedding_dim: int = 64):
        super().__init__()
        self.embedding = nn.Embedding(num_teams, embedding_dim)
        
    def forward(self, team_ids: torch.Tensor) -> torch.Tensor:
        return self.embedding(team_ids)


class MatchGraphEncoder(nn.Module):
    """
    Encode match context using graph neural networks.
    
    Nodes: Teams
    Edges: Recent matches between teams
    Node features: Team statistics
    Edge features: Match statistics
    """
    
    def __init__(
        self,
        node_features: int = 64,
        hidden_dim: int = 128,
        output_dim: int = 64,
        num_layers: int = 3,
        dropout: float = 0.2
    ):
        super().__init__()
        
        if not HAS_TORCH_GEOMETRIC:
            # Fallback to simple MLP
            self.use_gnn = False
            self.fallback = nn.Sequential(
                nn.Linear(node_features * 2, hidden_dim),
                nn.ReLU(),
                nn.Dropout(dropout),
                nn.Linear(hidden_dim, output_dim)
            )
            return
        
        self.use_gnn = True
        
        # Graph convolution layers
        self.convs = nn.ModuleList()
        self.bns = nn.ModuleList()
        
        # First layer
        self.convs.append(GATConv(node_features, hidden_dim, heads=4, concat=False))
        self.bns.append(nn.BatchNorm1d(hidden_dim))
        
        # Hidden layers
        for _ in range(num_layers - 2):
            self.convs.append(GATConv(hidden_dim, hidden_dim, heads=4, concat=False))
            self.bns.append(nn.BatchNorm1d(hidden_dim))
        
        # Output layer
        self.convs.append(GATConv(hidden_dim, output_dim, heads=1, concat=False))
        
        self.dropout = nn.Dropout(dropout)
        
    def forward(
        self,
        x: torch.Tensor,
        edge_index: torch.Tensor = None,
        batch: torch.Tensor = None
    ) -> torch.Tensor:
        
        if not self.use_gnn or edge_index is None:
            # Fallback
            return self.fallback(x) if hasattr(self, 'fallback') else x
        
        for i, conv in enumerate(self.convs[:-1]):
            x = conv(x, edge_index)
            x = self.bns[i](x)
            x = F.elu(x)
            x = self.dropout(x)
        
        x = self.convs[-1](x, edge_index)
        
        return x


class GraphFootballPredictor(nn.Module):
    """
    Complete GNN-based football prediction model.
    
    Architecture:
    1. Team embeddings
    2. Graph encoder for league context
    3. Match predictor head
    """
    
    def __init__(
        self,
        num_teams: int = 1000,
        team_embed_dim: int = 64,
        feature_dim: int = 128,
        hidden_dim: int = 256,
        num_gnn_layers: int = 3,
        dropout: float = 0.3
    ):
        super().__init__()
        
        # Team embeddings
        self.team_embedding = TeamEmbedding(num_teams, team_embed_dim)
        
        # Feature encoder
        self.feature_encoder = nn.Sequential(
            nn.Linear(feature_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, team_embed_dim)
        )
        
        # Graph encoder
        self.graph_encoder = MatchGraphEncoder(
            node_features=team_embed_dim * 2,
            hidden_dim=hidden_dim,
            output_dim=hidden_dim // 2,
            num_layers=num_gnn_layers,
            dropout=dropout
        )
        
        # Match representation
        match_dim = hidden_dim // 2 + team_embed_dim * 2
        
        # Prediction heads
        # 1X2 Result
        self.result_head = nn.Sequential(
            nn.Linear(match_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 3)
        )
        
        # Goals prediction
        self.home_goals_head = nn.Sequential(
            nn.Linear(match_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 8)  # 0-7 goals
        )
        
        self.away_goals_head = nn.Sequential(
            nn.Linear(match_dim, 64),
            nn.ReLU(),
            nn.Linear(64, 8)
        )
        
        # BTTS
        self.btts_head = nn.Sequential(
            nn.Linear(match_dim, 32),
            nn.ReLU(),
            nn.Linear(32, 2)
        )
        
        # Over 2.5
        self.over25_head = nn.Sequential(
            nn.Linear(match_dim, 32),
            nn.ReLU(),
            nn.Linear(32, 2)
        )
        
    def forward(
        self,
        home_team_id: torch.Tensor,
        away_team_id: torch.Tensor,
        match_features: torch.Tensor,
        edge_index: torch.Tensor = None,
        return_embeddings: bool = False
    ) -> Dict[str, torch.Tensor]:
        
        # Get team embeddings
        home_embed = self.team_embedding(home_team_id)
        away_embed = self.team_embedding(away_team_id)
        
        # Encode features
        encoded_features = self.feature_encoder(match_features)
        
        # Combine for graph
        combined = torch.cat([home_embed, away_embed], dim=-1)
        
        # Graph encoding
        if edge_index is not None:
            graph_out = self.graph_encoder(combined, edge_index)
        else:
            graph_out = self.graph_encoder(combined)
        
        # Match representation
        match_repr = torch.cat([
            graph_out,
            home_embed,
            away_embed
        ], dim=-1)
        
        # Predictions
        result = F.softmax(self.result_head(match_repr), dim=-1)
        home_goals = F.softmax(self.home_goals_head(match_repr), dim=-1)
        away_goals = F.softmax(self.away_goals_head(match_repr), dim=-1)
        btts = F.softmax(self.btts_head(match_repr), dim=-1)
        over25 = F.softmax(self.over25_head(match_repr), dim=-1)
        
        output = {
            'result': result,
            'home_goals': home_goals,
            'away_goals': away_goals,
            'btts': btts,
            'over_25': over25
        }
        
        if return_embeddings:
            output['home_embedding'] = home_embed
            output['away_embedding'] = away_embed
            output['match_representation'] = match_repr
        
        return output
    
    def predict(self, home_team_id: int, away_team_id: int, features: np.ndarray) -> Dict:
        """Generate predictions for a single match."""
        self.eval()
        
        with torch.no_grad():
            home_id = torch.tensor([home_team_id])
            away_id = torch.tensor([away_team_id])
            feat_tensor = torch.FloatTensor(features).unsqueeze(0)
            
            output = self.forward(home_id, away_id, feat_tensor)
            
            # Calculate correct scores
            home_probs = output['home_goals'].squeeze().cpu().numpy()
            away_probs = output['away_goals'].squeeze().cpu().numpy()
            
            correct_scores = {}
            for h in range(8):
                for a in range(8):
                    correct_scores[f'{h}-{a}'] = float(home_probs[h] * away_probs[a])
            
            # Normalize
            total = sum(correct_scores.values())
            if total > 0:
                correct_scores = {k: v/total for k, v in correct_scores.items()}
            
            return {
                'result': {
                    'home_win': float(output['result'][0, 0]),
                    'draw': float(output['result'][0, 1]),
                    'away_win': float(output['result'][0, 2])
                },
                'correct_scores': dict(sorted(
                    correct_scores.items(),
                    key=lambda x: x[1],
                    reverse=True
                )[:10]),
                'btts_yes': float(output['btts'][0, 1]),
                'over_25': float(output['over_25'][0, 1])
            }


class TransformerPredictor(nn.Module):
    """
    Transformer-based model for sequence prediction.
    Processes team's recent match history.
    """
    
    def __init__(
        self,
        feature_dim: int = 128,
        d_model: int = 256,
        nhead: int = 8,
        num_layers: int = 4,
        dropout: float = 0.2
    ):
        super().__init__()
        
        # Input projection
        self.input_proj = nn.Linear(feature_dim, d_model)
        
        # Positional encoding
        self.pos_encoding = nn.Parameter(torch.randn(1, 50, d_model) * 0.1)
        
        # Transformer encoder
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=nhead,
            dim_feedforward=d_model * 4,
            dropout=dropout,
            batch_first=True
        )
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers=num_layers)
        
        # Output heads
        self.result_head = nn.Sequential(
            nn.Linear(d_model * 2, d_model),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(d_model, 3)
        )
        
        self.goals_head = nn.Sequential(
            nn.Linear(d_model * 2, d_model // 2),
            nn.ReLU(),
            nn.Linear(d_model // 2, 16)  # 8 home + 8 away
        )
        
    def forward(
        self,
        home_sequence: torch.Tensor,  # (batch, seq_len, feature_dim)
        away_sequence: torch.Tensor
    ) -> Dict[str, torch.Tensor]:
        
        batch_size, seq_len, _ = home_sequence.shape
        
        # Project inputs
        home_proj = self.input_proj(home_sequence)
        away_proj = self.input_proj(away_sequence)
        
        # Add positional encoding
        home_proj = home_proj + self.pos_encoding[:, :seq_len, :]
        away_proj = away_proj + self.pos_encoding[:, :seq_len, :]
        
        # Transformer encoding
        home_encoded = self.transformer(home_proj)
        away_encoded = self.transformer(away_proj)
        
        # Pool (mean over sequence)
        home_pooled = home_encoded.mean(dim=1)
        away_pooled = away_encoded.mean(dim=1)
        
        # Combine
        combined = torch.cat([home_pooled, away_pooled], dim=-1)
        
        # Predictions
        result = F.softmax(self.result_head(combined), dim=-1)
        goals = self.goals_head(combined)
        
        home_goals = F.softmax(goals[:, :8], dim=-1)
        away_goals = F.softmax(goals[:, 8:], dim=-1)
        
        return {
            'result': result,
            'home_goals': home_goals,
            'away_goals': away_goals
        }


# Factory functions
def get_gnn_model(num_teams: int = 1000, feature_dim: int = 128) -> GraphFootballPredictor:
    """Get GNN model instance."""
    return GraphFootballPredictor(
        num_teams=num_teams,
        feature_dim=feature_dim
    )


def get_transformer_model(feature_dim: int = 128) -> TransformerPredictor:
    """Get Transformer model instance."""
    return TransformerPredictor(feature_dim=feature_dim)