| from transformers import AutoTokenizer, XLMRobertaModel |
| from transformers.modeling_utils import PreTrainedModel |
| from transformers import AutoConfig |
| import pandas as pd |
| from tqdm import tqdm |
| import numpy as np |
| import torch |
| from torch import nn |
| from sklearn.model_selection import train_test_split |
| from torch.optim import Adam |
| from sklearn.metrics import accuracy_score, f1_score, classification_report |
| import sys |
| import warnings |
| warnings.filterwarnings("ignore") |
|
|
| class XLMRoBERTaClassifier(PreTrainedModel): |
| def __init__(self, dropout=0.3, model_name='xlm-roberta-large'): |
| self.config = AutoConfig.from_pretrained("FacebookAI/xlm-roberta-large") |
| super(XLMRoBERTaClassifier, self).__init__(self.config) |
| self.roberta = XLMRobertaModel.from_pretrained(model_name) |
| self.dropout = nn.Dropout(dropout) |
| self.relu = nn.ReLU() |
| self.sigmoid = nn.Sigmoid() |
| self.conv1 = nn.Conv1d(200,3,3) |
| self.conv2 = nn.Conv1d(3,3,3) |
| self.pool = nn.MaxPool1d(3) |
| self.flatten = nn.Flatten() |
| self.linear = nn.Linear(336, 128) |
| self.final_layer = nn.Linear(128, 1) |
|
|
| def forward(self, input_ids, attention_mask): |
| roberta_output = self.roberta(input_ids = input_ids, |
| attention_mask=attention_mask) |
| last_hidden_state = roberta_output.last_hidden_state |
| conv_output = self.conv1(last_hidden_state) |
| pool_output = self.pool(conv_output) |
| conv_output = self.conv2(pool_output) |
| pool_output = self.pool(conv_output) |
| flatten_output = self.flatten(pool_output) |
| |
| linear_output = self.linear(flatten_output) |
| dropout_output = self.dropout(linear_output) |
| final_output = self.final_layer(dropout_output) |
| sigmoid_output = self.sigmoid(final_output) |
| sigmoid_output = torch.squeeze(sigmoid_output) |
| return sigmoid_output |