Spaces:
Running
Running
| from torch.utils.data import Dataset | |
| import torch | |
| class NerDataset(Dataset): | |
| def __init__(self, embeddings, labels): | |
| super().__init__() | |
| self.embeddings = embeddings | |
| self.labels = labels | |
| def __len__(self): | |
| return len(self.embeddings) | |
| def __getitem__(self, idx): | |
| return self.embeddings[idx], self.labels[idx] | |
| def collate_fn(batch): # Batch_size x Seq_length x 768 | |
| embeddings, labels = zip(*batch) | |
| lengths = [e.size(0) for e in embeddings] | |
| max_len = max(lengths) | |
| padded_embs = torch.stack([ | |
| torch.cat([e, torch.zeros(max_len - e.size(0), e.size(1))]) for e in embeddings | |
| ]) | |
| padded_labels = torch.stack([ | |
| torch.cat([l, torch.full((max_len - l.size(0),), -1, dtype=torch.long)]) for l in labels | |
| ]) | |
| return padded_embs, padded_labels, lengths | |