"""
AIBA BERT-BiLSTM Multi-Task Model Architecture

This file contains the model architecture for the AIBA multi-task model.
"""

import torch
import torch.nn as nn
from transformers import AutoModel


class EnhancedMultiTaskModel(nn.Module):
    """
    Multi-task model for:
    - Named Entity Recognition (NER)
    - Intent Classification
    - Language Detection
    
    Architecture: BERT + BiLSTM + Multi-head classification
    """
    
    def __init__(self, config):
        super().__init__()
        
        # Load BERT base model
        self.bert = AutoModel.from_pretrained(config.get("base_model", "google-bert/bert-base-multilingual-uncased"))
        hidden_size = self.bert.config.hidden_size
        
        # Layer normalization
        self.layer_norm = nn.LayerNorm(hidden_size)
        
        # NER head with BiLSTM
        lstm_hidden = config.get("lstm_hidden", 256)
        self.ner_lstm = nn.LSTM(
            hidden_size,
            lstm_hidden // 2,
            num_layers=1,
            bidirectional=True,
            batch_first=True,
            dropout=0
        )
        dropout = config.get("dropout", 0.15)
        self.ner_dropout = nn.Dropout(dropout)
        self.ner_classifier = nn.Linear(lstm_hidden, config["num_ner_labels"])
        
        # Attention pooling for sequence-level tasks
        self.attention_pool = nn.Sequential(
            nn.Linear(hidden_size, hidden_size // 4),
            nn.Tanh(),
            nn.Linear(hidden_size // 4, 1),
            nn.Softmax(dim=1)
        )
        
        # Shared representation layer
        self.shared_dense = nn.Sequential(
            nn.Linear(hidden_size, hidden_size // 2),
            nn.GELU(),
            nn.LayerNorm(hidden_size // 2),
            nn.Dropout(dropout)
        )
        
        # Intent classification head
        self.intent_classifier = nn.Sequential(
            nn.Linear(hidden_size // 2, hidden_size // 4),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_size // 4, hidden_size // 8),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_size // 8, config["num_intent_labels"])
        )
        
        # Language detection head
        self.lang_classifier = nn.Sequential(
            nn.Linear(hidden_size // 2, hidden_size // 4),
            nn.GELU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_size // 4, config["num_lang_labels"])
        )
        
        # Store config
        self.num_ner_labels = config["num_ner_labels"]
        self.num_intent_labels = config["num_intent_labels"]
        self.num_lang_labels = config["num_lang_labels"]
    
    def forward(self, input_ids, attention_mask=None, token_type_ids=None,
                labels_ner=None, labels_intent=None, labels_lang=None):
        """
        Forward pass
        
        Args:
            input_ids: Input token IDs
            attention_mask: Attention mask
            token_type_ids: Token type IDs (optional)
            labels_ner: NER labels (optional, for training)
            labels_intent: Intent labels (optional, for training)
            labels_lang: Language labels (optional, for training)
        
        Returns:
            dict with keys: 'loss', 'ner_logits', 'intent_logits', 'lang_logits'
        """
        # BERT encoding
        outputs = self.bert(
            input_ids=input_ids,
            attention_mask=attention_mask,
            token_type_ids=token_type_ids
        )
        
        sequence_output = outputs.last_hidden_state
        sequence_output = self.layer_norm(sequence_output)
        
        # NER predictions (token-level)
        ner_lstm_out, _ = self.ner_lstm(sequence_output)
        ner_output = self.ner_dropout(ner_lstm_out)
        ner_logits = self.ner_classifier(ner_output)
        
        # Attention pooling for sequence-level tasks
        attention_weights = self.attention_pool(sequence_output)
        pooled_output = torch.sum(sequence_output * attention_weights, dim=1)
        
        # Shared representation
        shared_repr = self.shared_dense(pooled_output)
        
        # Intent and language predictions
        intent_logits = self.intent_classifier(shared_repr)
        lang_logits = self.lang_classifier(shared_repr)
        
        return {
            'ner_logits': ner_logits,
            'intent_logits': intent_logits,
            'lang_logits': lang_logits,
        }
    
    @classmethod
    def from_pretrained(cls, model_path):
        """
        Load pretrained model from Hugging Face Hub
        
        Args:
            model_path: Path or repo ID (e.g., 'username/aiba-bert-bilstm')
        
        Returns:
            Loaded model
        """
        import json
        from pathlib import Path
        from huggingface_hub import hf_hub_download
        
        # Download config
        config_path = hf_hub_download(repo_id=model_path, filename="config.json")
        with open(config_path, 'r') as f:
            config = json.load(f)
        
        # Initialize model
        model = cls(config)
        
        # Load weights
        try:
            # Try safetensors first
            weights_path = hf_hub_download(repo_id=model_path, filename="model.safetensors")
            from safetensors.torch import load_file
            state_dict = load_file(weights_path)
        except:
            # Fall back to pytorch_model.bin
            weights_path = hf_hub_download(repo_id=model_path, filename="pytorch_model.bin")
            state_dict = torch.load(weights_path, map_location='cpu')
        
        model.load_state_dict(state_dict)
        return model


def load_model_and_tokenizer(model_path):
    """
    Convenience function to load model, tokenizer, and label mappings
    
    Args:
        model_path: Path or repo ID (e.g., 'username/aiba-bert-bilstm')
    
    Returns:
        tuple: (model, tokenizer, config_dict)
    """
    import json
    from transformers import AutoTokenizer
    from huggingface_hub import hf_hub_download
    
    # Load config
    config_path = hf_hub_download(repo_id=model_path, filename="config.json")
    with open(config_path, 'r') as f:
        config = json.load(f)
    
    # Load tokenizer
    tokenizer = AutoTokenizer.from_pretrained(model_path)
    
    # Load model
    model = EnhancedMultiTaskModel.from_pretrained(model_path)
    model.eval()
    
    return model, tokenizer, config