LangQuant
/

LQ-Kbert-base

@@ -3,6 +3,15 @@ import torch
 import torch.nn as nn
 from transformers import PreTrainedModel, AutoModel, AutoConfig
 class KbertMTL(PreTrainedModel):
     """
     LangQuant KBERT Multi-Task Head (HF-standard, offline-friendly)
@@ -11,19 +20,15 @@ class KbertMTL(PreTrainedModel):
       - logits_senti: (B,5)
       - logits_act:   (B,6)
       - logits_emo:   (B,7)
-      - pred_reg:     (B,3)  # [certainty, relevance, toxicity] in 0~1 (권장)
-      - last_hidden_state: (B, L, H) from base encoder
     """
     def __init__(self, config):
         super().__init__(config)
-        if not hasattr(config, "base_model_config") or config.base_model_config is None:
-            raise ValueError(
-                "config.base_model_config is required for offline load. "
-                "Make sure your config.json contains a serialized base model config."
-            )
-        base_cfg = AutoConfig.from_dict(config.base_model_config)
         self.bert = AutoModel.from_config(base_cfg)
         hidden = self.bert.config.hidden_size
@@ -33,16 +38,14 @@ class KbertMTL(PreTrainedModel):
         self.head_reg   = nn.Linear(hidden, 3)
         self.has_token_type = getattr(self.bert.embeddings, "token_type_embeddings", None) is not None
-        self.post_init()
     def forward(self, input_ids=None, attention_mask=None, token_type_ids=None, **kwargs):
         kw = dict(input_ids=input_ids, attention_mask=attention_mask)
         if self.has_token_type and token_type_ids is not None:
             kw["token_type_ids"] = token_type_ids
         out = self.bert(**kw)
         h = out.last_hidden_state[:, 0]  # [CLS]
         return {
             "logits_senti": self.head_senti(h),
             "logits_act":   self.head_act(h),

 import torch.nn as nn
 from transformers import PreTrainedModel, AutoModel, AutoConfig
+def _config_from_base_dict(base_cfg_dict: dict):
+    if base_cfg_dict is None:
+        raise ValueError("config.base_model_config is required for offline load.")
+    model_type = base_cfg_dict.get("model_type", None)
+    if model_type is None:
+        model_type = "bert"
+    kwargs = {k: v for k, v in base_cfg_dict.items() if k != "model_type"}
+    return AutoConfig.for_model(model_type, **kwargs)
 class KbertMTL(PreTrainedModel):
     """
     LangQuant KBERT Multi-Task Head (HF-standard, offline-friendly)
       - logits_senti: (B,5)
       - logits_act:   (B,6)
       - logits_emo:   (B,7)
+      - pred_reg:     (B,3)  # [certainty, relevance, toxicity]
+      - last_hidden_state: (B, L, H)
     """
     def __init__(self, config):
         super().__init__(config)
+        base_cfg_dict = getattr(config, "base_model_config", None)
+        base_cfg = _config_from_base_dict(base_cfg_dict)
         self.bert = AutoModel.from_config(base_cfg)
         hidden = self.bert.config.hidden_size
         self.head_reg   = nn.Linear(hidden, 3)
         self.has_token_type = getattr(self.bert.embeddings, "token_type_embeddings", None) is not None
+        self.post_init()
     def forward(self, input_ids=None, attention_mask=None, token_type_ids=None, **kwargs):
         kw = dict(input_ids=input_ids, attention_mask=attention_mask)
         if self.has_token_type and token_type_ids is not None:
             kw["token_type_ids"] = token_type_ids
         out = self.bert(**kw)
         h = out.last_hidden_state[:, 0]  # [CLS]
         return {
             "logits_senti": self.head_senti(h),
             "logits_act":   self.head_act(h),