# modeling_kbert_mtl.py
import torch
import torch.nn as nn
from transformers import PreTrainedModel, AutoModel, AutoConfig, BertConfig

def _config_from_base_dict(base_cfg_dict: dict):
    if base_cfg_dict is None:
        raise ValueError("config.base_model_config is required for offline load.")
    model_type = "bert"
    try:
        kwargs = {k: v for k, v in base_cfg_dict.items() if k != "model_type"}
        cfg = AutoConfig.for_model(model_type, **kwargs)
    except Exception:
        cfg = BertConfig(**{k: v for k, v in base_cfg_dict.items() if k != "model_type"})
    return cfg

class KbertMTL(PreTrainedModel):
    config_class = BertConfig

    def __init__(self, config):
        super().__init__(config)
        base_cfg_dict = getattr(config, "base_model_config", None)
        base_cfg = _config_from_base_dict(base_cfg_dict)

        self.bert = AutoModel.from_config(base_cfg)

        hidden = self.bert.config.hidden_size
        self.head_senti = nn.Linear(hidden, 5)
        self.head_act   = nn.Linear(hidden, 6)
        self.head_emo   = nn.Linear(hidden, 7)
        self.head_reg   = nn.Linear(hidden, 3)
        self.has_token_type = getattr(self.bert.embeddings, "token_type_embeddings", None) is not None

        self.post_init()

    def forward(self, input_ids=None, attention_mask=None, token_type_ids=None, **kwargs):
        kw = dict(input_ids=input_ids, attention_mask=attention_mask)
        if self.has_token_type and token_type_ids is not None:
            kw["token_type_ids"] = token_type_ids
        out = self.bert(**kw)
        h = out.last_hidden_state[:, 0]
        return {
            "logits_senti": self.head_senti(h),
            "logits_act":   self.head_act(h),
            "logits_emo":   self.head_emo(h),
            "pred_reg":     self.head_reg(h),
            "last_hidden_state": out.last_hidden_state
        }