Multi_EvalSum / modeling_summary_evaluator.py
phuongntc's picture
Upload folder using huggingface_hub
f254c87 verified
import os, json, torch
from torch import nn
from transformers import AutoModel, AutoTokenizer
def mean_pool(last_hidden_state, attention_mask):
mask = attention_mask.unsqueeze(-1).type_as(last_hidden_state)
summed = (last_hidden_state * mask).sum(dim=1)
counts = mask.sum(dim=1).clamp(min=1e-9)
return summed / counts
class SummaryEvaluatorModule(nn.Module):
def __init__(self, base_model_name_or_path, head_config_path, regressor_path):
super().__init__()
self.model = AutoModel.from_pretrained(base_model_name_or_path)
with open(head_config_path, "r", encoding="utf-8") as f:
head_cfg = json.load(f)
hidden = head_cfg["in_features"]
self.regressor = nn.Sequential(
nn.Linear(hidden, 256),
nn.ReLU(),
nn.Linear(256, 3)
)
self.regressor.load_state_dict(torch.load(regressor_path, map_location="cpu"))
self.regressor.eval()
@torch.no_grad()
def forward(self, input_ids, attention_mask):
out = self.model(input_ids=input_ids, attention_mask=attention_mask)
pooled = mean_pool(out.last_hidden_state, attention_mask)
return self.regressor(pooled)
def from_pretrained_custom(repo_dir_or_id, device=None):
# Cho phép dùng cả local folder hoặc repo_id trên Hub
base = repo_dir_or_id
tok = AutoTokenizer.from_pretrained(base, use_fast=True)
mdl = SummaryEvaluatorModule(
base_model_name_or_path=base,
head_config_path=os.path.join(base, "head_config.json"),
regressor_path=os.path.join(base, "regressor.pt"),
)
if device is None:
device = "cuda" if torch.cuda.is_available() else "cpu"
mdl.to(device).eval()
return mdl, tok, device