MultiEvalVietSum / modeling_multievalvietsum.py
phuongntc's picture
Upload MultiEvalVietSum: weights, tokenizer, config, code, and model card
2bcedff verified
import json
from pathlib import Path
import torch
import torch.nn as nn
from transformers import AutoModel, AutoTokenizer
def mean_pool(last_hidden_state: torch.Tensor, attention_mask: torch.Tensor) -> torch.Tensor:
mask = attention_mask.unsqueeze(-1).float()
masked = last_hidden_state * mask
denom = mask.sum(dim=1).clamp(min=1e-6)
return masked.sum(dim=1) / denom
class MultiEvalVietSumModel(nn.Module):
def __init__(self, backbone_name: str):
super().__init__()
self.backbone_name = backbone_name
self.model = AutoModel.from_pretrained(backbone_name)
hidden = self.model.config.hidden_size
self.trunk = nn.Sequential(
nn.Linear(hidden * 2, 256),
nn.GELU(),
nn.Dropout(0.1),
)
self.head_faith = nn.Linear(256, 1)
self.head_coh = nn.Linear(256, 1)
self.head_rel = nn.Linear(256, 1)
def forward(self, input_ids, attention_mask, token_type_ids=None):
kwargs = {
"input_ids": input_ids,
"attention_mask": attention_mask,
}
if token_type_ids is not None:
kwargs["token_type_ids"] = token_type_ids
out = self.model(**kwargs)
cls_vec = out.last_hidden_state[:, 0]
mean_vec = mean_pool(out.last_hidden_state, attention_mask)
pooled = torch.cat([cls_vec, mean_vec], dim=-1)
z = self.trunk(pooled)
faith = self.head_faith(z)
coh = self.head_coh(z)
rel = self.head_rel(z)
return torch.cat([faith, coh, rel], dim=1)
@classmethod
def from_pretrained_local(cls, model_dir: str):
model_dir = Path(model_dir)
with open(model_dir / "multievalvietsum_config.json", "r", encoding="utf-8") as f:
cfg = json.load(f)
model = cls(backbone_name=cfg["backbone_name"])
state_dict = torch.load(model_dir / "pytorch_model.bin", map_location="cpu")
model.load_state_dict(state_dict, strict=True)
model.eval()
return model, cfg
@staticmethod
def load_tokenizer_local(model_dir: str):
return AutoTokenizer.from_pretrained(model_dir, use_fast=True)