| | import torch |
| | from torch import nn |
| | from transformers import BertPreTrainedModel |
| |
|
| | class ParagramSPModel(BertPreTrainedModel): |
| | def __init__(self, config): |
| | super().__init__(config) |
| | self.config = config |
| | self.word_embeddings = nn.Embedding(config.vocab_size, config.hidden_size, padding_idx=config.pad_token_id) |
| | |
| | self.post_init() |
| |
|
| | def filter_input_ids(self, input_ids): |
| | output = [] |
| | length = input_ids.shape[1] |
| | for i in range(input_ids.shape[0]): |
| | ids = input_ids[i] |
| | filtered_ids = [] |
| | for j in ids: |
| | if j > 0: |
| | filtered_ids.append(j) |
| | if len(filtered_ids) == 0: |
| | filtered_ids = [0] |
| | output.append(filtered_ids + [self.config.pad_token_id] * (length - len(filtered_ids))) |
| | return torch.tensor(output) |
| | |
| | def forward(self, input_ids, attention_mask): |
| | print(input_ids) |
| | print(attention_mask) |
| | input_ids = self.filter_input_ids(input_ids) |
| | attention_mask = input_ids != self.config.pad_token_id |
| | print(input_ids) |
| | print(attention_mask) |
| | embeddings = self.word_embeddings(input_ids) |
| | masked_embeddings = embeddings * attention_mask[:, :, None] |
| | mean_pooled_embeddings = masked_embeddings.sum(dim=1) / attention_mask[:, :, None].sum(dim=1) |
| | print(attention_mask[:, :, None].sum(dim=1)) |
| | return (embeddings, mean_pooled_embeddings, embeddings) |