from dataclasses import dataclass from typing import Optional, Tuple, Union import torch from torch import nn from transformers.modeling_outputs import SequenceClassifierOutput from transformers.models.roberta import RobertaModel, RobertaPreTrainedModel from .configuration_alignscore import AlignscoreConfig @dataclass class ModelOutput: loss: Optional[torch.FloatTensor] = None all_loss: Optional[list] = None loss_nums: Optional[list] = None prediction_logits: torch.FloatTensor = None seq_relationship_logits: torch.FloatTensor = None tri_label_logits: torch.FloatTensor = None reg_label_logits: torch.FloatTensor = None hidden_states: Optional[Tuple[torch.FloatTensor]] = None attentions: Optional[Tuple[torch.FloatTensor]] = None class AlignscoreModel(RobertaPreTrainedModel): config_class = AlignscoreConfig # COPIED FROM transformers.models.roberta.modeling_roberta.RobertaForSequenceClassification def __init__(self, config): super().__init__(config) # NUM_LABELS WILL BE IGNOREDD # self.num_labels = config.num_labels self.config = config self.roberta = RobertaModel(config, add_pooling_layer=True) self.bin_layer = nn.Linear(config.hidden_size, 2) self.tri_layer = nn.Linear(config.hidden_size, 3) self.reg_layer = nn.Linear(config.hidden_size, 1) if config.hidden_dropout_prob != 0.1: print( "Warning: The hidden_dropout_prob is not set to 0.1, which may affect the model's performance." ) self.dropout = nn.Dropout(config.hidden_dropout_prob) # should be 0.1 self.softmax = nn.Softmax(dim=-1) # Initialize weights and apply final processing self.post_init() def forward( self, input_ids: Optional[torch.LongTensor] = None, attention_mask: Optional[torch.FloatTensor] = None, token_type_ids: Optional[torch.LongTensor] = None, position_ids: Optional[torch.LongTensor] = None, head_mask: Optional[torch.FloatTensor] = None, inputs_embeds: Optional[torch.FloatTensor] = None, labels: Optional[torch.LongTensor] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, ) -> Union[Tuple[torch.Tensor], SequenceClassifierOutput]: r""" labels (`torch.LongTensor` of shape `(batch_size,)`, *optional*): Labels for computing the sequence classification/regression loss. Indices should be in `[0, ..., config.num_labels - 1]`. If `config.num_labels == 1` a regression loss is computed (Mean-Square loss), If `config.num_labels > 1` a classification loss is computed (Cross-Entropy). """ return_dict = ( return_dict if return_dict is not None else self.config.use_return_dict ) outputs = self.roberta( input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids, position_ids=position_ids, head_mask=head_mask, inputs_embeds=inputs_embeds, output_attentions=output_attentions, output_hidden_states=output_hidden_states, return_dict=return_dict, ) seq_relationship_score = self.bin_layer( self.dropout(outputs.pooler_output) ) ## pooled output for classification tri_label_score = self.tri_layer(self.dropout(outputs.pooler_output)) reg_label_score = self.reg_layer(outputs.pooler_output) if labels is not None: raise NotImplementedError( "AlignscoreModel does not support labels for training. " "Please use the model for inference only." ) return ModelOutput( loss=None, all_loss=None, loss_nums=None, prediction_logits=None, seq_relationship_logits=seq_relationship_score, tri_label_logits=tri_label_score, reg_label_logits=reg_label_score, hidden_states=outputs.hidden_states, attentions=outputs.attentions, )