# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
#                                                           #
#   This file was created by: Alberto Palomo Alonso         #
# Universidad de Alcalá - Escuela Politécnica Superior      #
#                                                           #
# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
import torch
from transformers import PreTrainedModel, PretrainedConfig
from src.model import SegmentationNetwork
from src.model.config import ModelConfig, TransformerConfig, CoSeNetConfig


class SentenceCoseNetConfig(PretrainedConfig):
    """
    Configuration class for SentenceCoseNet.

    This class stores all hyperparameters needed to initialize
    a `SentenceCoseNet` model. It follows Hugging Face's
    `PretrainedConfig` interface so the model can be saved,
    loaded, and shared via the Hub.

    Attributes:
        model_type (str):
            Identifier used by Hugging Face to register the model.
        vocab_size (int):
            Size of the tokenizer vocabulary.
        emb_dim (int):
            Dimensionality of token embeddings.
        seq_len (int):
            Maximum input sequence length supported by the model.
        dropout (float):
            Dropout probability applied in Transformer blocks.
        valid_padding (bool):
            Whether padding tokens are treated as valid positions.
        cosenet (dict):
            Configuration of the cosine-similarity network head.
        transformers (list[dict]):
            List of Transformer encoder block configurations.
    """

    model_type = "sentence_cosenet"

    def __init__(
        self,
        vocab_size: int = 32768,
        emb_dim: int = 256,
        seq_len: int = 382,
        dropout: float = 0.0,
        valid_padding: bool = True,
        cosenet: dict | None = None,
        transformers: list | None = None,
        **kwargs,
    ):
        """
        Initialize SentenceCoseNet configuration.

        Args:
            vocab_size:
                Size of the tokenizer vocabulary.
            emb_dim:
                Dimension of token embeddings.
            seq_len:
                Maximum number of tokens per input sequence.
            dropout:
                Dropout probability used throughout the network.
            valid_padding:
                Whether padded tokens should be considered valid.
            cosenet:
                Optional configuration dictionary for the cosine
                similarity network head.
            transformers:
                Optional list of dictionaries describing each
                Transformer encoder block.
            **kwargs:
                Additional keyword arguments passed to
                `PretrainedConfig`.
        """
        super().__init__(**kwargs)

        self.vocab_size = vocab_size
        self.emb_dim = emb_dim
        self.seq_len = seq_len
        self.dropout = dropout
        self.valid_padding = valid_padding

        self.cosenet = cosenet or {
            "trainable": True,
            "init_scale": 5.0
        }

        self.transformers = transformers or [
            {
                "attention_heads": 16,
                "feed_forward_multiplier": 8,
                "dropout": 0.0,
                "pre_normalize": True
            },
            {
                "attention_heads": 16,
                "feed_forward_multiplier": 8,
                "dropout": 0.0,
                "pre_normalize": True
            }
        ]

        self.hidden_size = emb_dim
        self.max_position_embeddings = seq_len


class SentenceCoseNet(PreTrainedModel):
    """
    Sentence-level encoder model based on CoseNet.

    This class wraps a custom PyTorch segmentation network
    and exposes it as a Hugging Face `PreTrainedModel`,
    enabling interoperability with the Transformers ecosystem.

    The model is intended for:
    - Sentence embeddings
    - Semantic search
    - Information retrieval
    - Similarity learning
    """

    config_class = SentenceCoseNetConfig
    base_model_prefix = "cosenet"

    def __init__(self, config: SentenceCoseNetConfig):
        """
        Initialize the SentenceCoseNet model.

        Args:
            config:
                Instance of `SentenceCoseNetConfig` containing
                model hyperparameters.
        """
        super().__init__(config)
        
        # Core PyTorch model
        self.model = SegmentationNetwork(self.to_model_config(config))

        # Initialize weights following HF conventions
        self.post_init()

        # Set evaluation mode by default
        self.model.eval()

    def encode(
        self,
        input_ids: torch.Tensor,
        attention_mask=None
    ) -> torch.Tensor:
        """
        Encode input token sequences into contextualized embeddings.

        This method performs embedding lookup, positional encoding,
        and Transformer-based contextualization, returning token-level
        representations.

        Args:
            input_ids:
                Tensor of token IDs with shape
                `(batch_size, sequence_length)`.
            attention_mask:
                Optional attention mask indicating valid (1) and
                padded (0) positions. Shape:
                `(batch_size, sequence_length)`.

        Returns:
            torch.Tensor:
                Contextualized token embeddings with shape
                `(batch_size, sequence_length, emb_dim)`.
        """
        # Set the model task:
        self.model.task = 'token_encoding'
        # Convert to type:
        if len(input_ids.shape) == 2:
            x = input_ids.int().unsqueeze(1)
            mask = attention_mask.unsqueeze(1) if attention_mask is not None else None
            output = self.model(x=x, mask=mask).squeeze(1)
        elif len(input_ids.shape) == 3:
            x = input_ids.int()
            mask = attention_mask if attention_mask is not None else None
            output = self.model(x=x, mask=mask)
        else:
            raise ValueError("Input tensor must be of shape (Batch, Tokens) or (Batch, Sentences, Tokens).")
        return output

    def get_sentence_embedding(
            self,
            input_ids: torch.Tensor,
            attention_mask=None,
            normalize: bool = False,
    ) -> torch.Tensor:
        """
        Compute sentence embeddings for zero-shot transfer and
        information retrieval.

        Args:
            input_ids (torch.Tensor):
                Tensor of shape (B, T)
            attention_mask (torch.Tensor, optional):
                Boolean or binary mask of shape (B, T)
            normalize (bool, optional):
                Whether to L2-normalize the output embeddings.

        Returns:
            torch.Tensor:
                Sentence embeddings of shape (B, D)
        """
        # Set the model task:
        self.model.task = 'sentence_encoding'
        output = self.call(input_ids, attention_mask)

        if normalize:
            output = torch.nn.functional.normalize(output, p=2, dim=-1)

        return output

    def similarity(self, embeddings_1: torch.Tensor, embeddings_2: torch.Tensor) -> torch.Tensor:
        """
        Compute cosine similarity scores between two sets of embeddings.

        Args:
            embeddings_1 (torch.Tensor):
                Tensor of shape (B, S, D) containing the first set of
                embeddings concatenated along the first dimension.

            embeddings_2 (torch.Tensor):
                Tensor of shape (B, S, D) containing the second set of
                embeddings concatenated along the first dimension.

        Returns:
            torch.Tensor:
                Similarity scores of shape (B, S)
        """
        # Concatenate embeddings (B, S, 2, D)
        embeddings = torch.stack([embeddings_1, embeddings_2], dim=-2)
        # Compute distances (B, S, 2, 2):
        embeddings = self.model.distance_layer(embeddings)
        # Return cosine similarities (B, S):
        return (embeddings[..., 0, 1] + embeddings[..., 1, 0]) / 2

    def forward(
        self,
        input_ids: torch.Tensor,
        attention_mask=None,
        candidate_mask=None,
        **kwargs,
    ):
        """
        Forward pass of the SentenceCoseNet model.

        This method delegates execution to the underlying
        `SegmentationNetwork`.

        Args:
            input_ids:
                Tensor of token IDs with shape
                `(batch_size, sequence_length)`.
            attention_mask:
                Optional attention mask tensor.
            candidate_mask:
                Optional mask indicating candidate segments or spans.
            **kwargs:
                Additional arguments forwarded to the core model.

        Returns:
            Model-specific output as produced by `SegmentationNetwork`.
        """
        self.model.task = 'segmentation'
        return self.model(
            x=input_ids,
            mask=attention_mask,
            candidate_mask=candidate_mask,
            **kwargs,
        )

    def call(self, input_ids: torch.Tensor, attention_mask=None) -> torch.Tensor:
        """
        Internal method to handle different input shapes (task already selected).
        Args:
            input_ids:
                Tensor of token IDs with shape
                `(batch_size, sequence_length)`.
            attention_mask:
                Optional attention mask tensor.
        """
        # Convert to type:
        if len(input_ids.shape) == 2:
            x = input_ids.int().unsqueeze(1)
            mask = attention_mask.unsqueeze(1) if attention_mask is not None else None
            output = self.model(x=x, mask=mask).squeeze(1)
        elif len(input_ids.shape) == 3:
            x = input_ids.int()
            mask = attention_mask if attention_mask is not None else None
            output = self.model(x=x, mask=mask)
        else:
            raise ValueError("Input tensor must be of shape (Batch, Tokens) or (Batch, Sentences, Tokens).")
        return output

    @staticmethod
    def to_model_config(config: SentenceCoseNetConfig) -> ModelConfig:
        """
        Convert Hugging Face config to internal ModelConfig.
        """
        mc = ModelConfig()

        # Core dimensions
        mc.vocab_size = config.vocab_size
        mc.model_dim = config.emb_dim
        mc.valid_padding = config.valid_padding

        # CoSeNet config
        mc.cosenet = CoSeNetConfig(**config.cosenet)

        # Transformer stack
        mc.transformers = [
            TransformerConfig(**cfg)
            for cfg in config.transformers
        ]

        return mc
# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #
#                        END OF FILE                        #
# - x - x - x - x - x - x - x - x - x - x - x - x - x - x - #