import os import torch import torch.nn as nn from transformers import AutoModel, AutoConfig class Model(nn.Module): def __init__( self, model_dir: str, metric_names: list[str], dropout: float = 0.2, hidden_dim: int | None = None ): """ :param model_dir: path or HF identifier for base Transformer :param metric_names: list of your emotion columns, e.g. ['Happiness_M','Sadness_M', …,'Arousal_M'] :param dropout: dropout rate after LayerNorm :param hidden_dim: if None, inferred from model.config.hidden_size """ super().__init__() self.metric_names = metric_names self.bert = AutoModel.from_pretrained(model_dir) # infer hidden dim if not provided if hidden_dim is None: hidden_dim = self.bert.config.hidden_size # per‐metric heads for name in self.metric_names: setattr(self, name, nn.Linear(hidden_dim, 1)) setattr(self, 'l_1_' + name, nn.Linear(hidden_dim, hidden_dim)) self.layer_norm = nn.LayerNorm(hidden_dim) self.relu = nn.ReLU() self.dropout_layer = nn.Dropout(dropout) self.sigmoid = nn.Sigmoid() def forward(self, input_ids: torch.Tensor, attention_mask: torch.Tensor): # RobertaModel returns (last_hidden_state, pooled_output) when return_dict=False _, pooled = self.bert( input_ids = input_ids, attention_mask = attention_mask, return_dict = False ) return self.rate_embedding(pooled) def rate_embedding(self, x: torch.Tensor): outputs = [] for name in self.metric_names: h = getattr(self, 'l_1_' + name)(x) # residual + norm + dropout + activation h = self.relu(self.dropout_layer(self.layer_norm(h + x))) out = self.sigmoid(getattr(self, name)(h)) outputs.append(out) return outputs def save_pretrained(self, save_directory: str): """ Saves: - config.json (with custom metric_names) - the base model files (via HF save_pretrained) - pytorch_model.bin (full state_dict) """ os.makedirs(save_directory, exist_ok=True) # extend HF config with our metric_names config = self.bert.config config.metric_names = self.metric_names config.save_pretrained(save_directory) self.bert.save_pretrained(save_directory) torch.save(self.state_dict(), os.path.join(save_directory, 'pytorch_model.bin')) @classmethod def from_pretrained( cls, bert_model_dir: str, state_dict_path: str | None = None, dropout: float = 0.2, hidden_dim: int | None = None, metric_names: list[str] | None = None ): """ Loads your production model in two pieces: 1) the base Transformer from `bert_model_dir` (must be a model name or dir with a config.json) 2) your old state‐dict from `state_dict_path` (a file, e.g. no-extension) You must pass exactly the same metric_names you trained with, unless you previously called save_pretrained (which writes them into config.json). """ # 1) try to read config to recover metric_names & hidden_dim config = AutoConfig.from_pretrained(bert_model_dir) cfg_names = getattr(config, 'metric_names', None) cfg_hidden = getattr(config, 'hidden_size', None) _metric_names = metric_names or cfg_names _hidden_dim = hidden_dim or cfg_hidden if _metric_names is None or _hidden_dim is None: raise ValueError( "Must provide `metric_names` and `hidden_dim` " "either as arguments or stored in config.json." ) # instantiate model = cls( model_dir = bert_model_dir, metric_names = _metric_names, dropout = dropout, hidden_dim = _hidden_dim ) # 2) load your saved state‐dict if state_dict_path: if not os.path.isfile(state_dict_path): raise FileNotFoundError(f"State‐dict file not found: {state_dict_path}") sd = torch.load(state_dict_path, map_location='cpu') model.load_state_dict(sd) else: # fallback: look for pytorch_model.bin in bert_model_dir fallback = os.path.join(bert_model_dir, 'pytorch_model.bin') if os.path.isfile(fallback): sd = torch.load(fallback, map_location='cpu') model.load_state_dict(sd) else: raise FileNotFoundError( "No state‐dict provided and no pytorch_model.bin in the model_dir." ) return model