| import torch |
| from torch import nn |
| from transformers import BertPreTrainedModel |
|
|
| class ParagramSPModel(BertPreTrainedModel): |
| def __init__(self, config): |
| super().__init__(config) |
| self.config = config |
| self.word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size, padding_idx=config.pad_token_id) |
| |
| self.post_init() |
|
|
| def filter_input_ids(self, input_ids): |
| output = [] |
| length = input_ids.shape[1] |
| for i in range(input_ids.shape[0]): |
| ids = input_ids[i] |
| filtered_ids = [] |
| for j in ids: |
| if j > 0: |
| filtered_ids.append(j) |
| if len(filtered_ids) == 0: |
| filtered_ids = [0] |
| output.append(filtered_ids + [self.config.pad_token_id] * (length - len(filtered_ids))) |
| return torch.tensor(output) |
| |
| def forward(self, input_ids, attention_mask): |
| print(input_ids) |
| print(attention_mask) |
| input_ids = self.filter_input_ids(input_ids) |
| attention_mask = input_ids != self.config.pad_token_id |
| print(input_ids) |
| print(attention_mask) |
| embeddings = self.word_embeddings(input_ids) |
| masked_embeddings = embeddings * attention_mask[:, :, None] |
| mean_pooled_embeddings = masked_embeddings.sum(dim=1) / attention_mask[:, :, None].sum(dim=1) |
| print(attention_mask[:, :, None].sum(dim=1)) |
| return (embeddings, mean_pooled_embeddings, embeddings) |