# model.py
# ProsodyBoundaryModel — custom DistilBERT multi-task token classifier.
# The libri+peoples+sbc checkpoint was trained with use_pos_embedding=False.
# The POS embedding path is present in the class but inactive for this model.

import torch
import torch.nn as nn
from transformers import (
    DistilBertModel,
    DistilBertPreTrainedModel,
    AutoTokenizer,
)

# ── POS tag vocabulary (Universal Dependencies / spaCy UPOS) ─────────────────
# Preserved for checkpoint compatibility. Not used by libri+peoples+sbc.
UNIVERSAL_TO_TOKEN = {
    "ADJ":   "adj",
    "ADP":   "adp",
    "ADV":   "adv",
    "AUX":   "aux",
    "CCONJ": "cc",
    "DET":   "det",
    "INTJ":  "ij",
    "NOUN":  "nn",
    "NUM":   "num",
    "PART":  "pt",
    "PRON":  "pro",
    "PROPN": "np",
    "PUNCT": "pun",
    "SCONJ": "sc",
    "SYM":   "sym",
    "VERB":  "vb",
    "X":     "xx",
    "SPACE": "sp",
}
UNK_POS_TOKEN = "unk"

_POS_TAG_NAMES = ["PAD"] + list(UNIVERSAL_TO_TOKEN.keys())
POS_TAG_TO_ID  = {tag: i for i, tag in enumerate(_POS_TAG_NAMES)}
NUM_POS_TAGS   = len(_POS_TAG_NAMES)   # 19


class ProsodyBoundaryModel(DistilBertPreTrainedModel):
    """
    Multi-task token classifier for ToBI prosodic annotation.

    Architecture
    ────────────
    DistilBERT encoder
        [+ optional POS embedding addition, post-transformer]
        └─► dropout (seq_classif_dropout)
             ├─► boundary_head    Linear(768 → 2)   boundary / non-boundary
             ├─► intonation_head  Linear(768 → 3)   H% / L% / !H%
             └─► break_idx_head   Linear(768 → 2)   index-3 / index-4

    This checkpoint is set to use_pos_embedding=False.
    All three heads are applied to every token; intonation and break index
    predictions are only meaningful at boundary positions.
    """

    def __init__(self, config):
        super().__init__(config)
        self.distilbert = DistilBertModel(config)
        self.dropout    = nn.Dropout(config.seq_classif_dropout)

        self.use_pos_embedding = getattr(config, "use_pos_embedding", False)
        if self.use_pos_embedding:
            _pos_emb_dim  = getattr(config, "pos_emb_dim",  64)
            _num_pos_tags = getattr(config, "num_pos_tags", NUM_POS_TAGS)
            self.pos_embedding = nn.Embedding(
                _num_pos_tags, _pos_emb_dim, padding_idx=0
            )
            self.pos_proj = nn.Linear(_pos_emb_dim, config.hidden_size, bias=False)

        self.boundary_head   = nn.Linear(config.hidden_size, 2)
        self.intonation_head = nn.Linear(config.hidden_size, 3)
        self.break_idx_head  = nn.Linear(config.hidden_size, 2)
        self.post_init()

    def forward(self, input_ids, attention_mask, pos_ids=None, **kwargs):
        """
        Parameters
        ----------
        input_ids      : (B, T)
        attention_mask : (B, T)
        pos_ids        : (B, T) LongTensor | None  — only used when use_pos_embedding=True

        Returns
        -------
        dict with keys:
            boundary_logits    : (B, T, 2)
            intonation_logits  : (B, T, 3)
            break_idx_logits   : (B, T, 2)
        """
        outputs = self.distilbert(input_ids=input_ids,
                                  attention_mask=attention_mask)
        seq_out = self.dropout(outputs.last_hidden_state)   # (B, T, H)

        if self.use_pos_embedding and pos_ids is not None:
            pos_emb = self.pos_proj(self.pos_embedding(pos_ids))   # (B, T, H)
            seq_out = seq_out + pos_emb

        return {
            "boundary_logits":   self.boundary_head(seq_out),    # (B, T, 2)
            "intonation_logits": self.intonation_head(seq_out),  # (B, T, 3)
            "break_idx_logits":  self.break_idx_head(seq_out),   # (B, T, 2)
        }

    @classmethod
    def _can_set_experts_implementation(cls):
        return False