| | |
| | import torch |
| | from mmdet.datasets.builder import PIPELINES |
| |
|
| | from mmocr.models.builder import build_convertor |
| |
|
| |
|
| | @PIPELINES.register_module() |
| | class NerTransform: |
| | """Convert text to ID and entity in ground truth to label ID. The masks and |
| | tokens are generated at the same time. The four parameters will be used as |
| | input to the model. |
| | |
| | Args: |
| | label_convertor: Convert text to ID and entity |
| | in ground truth to label ID. |
| | max_len (int): Limited maximum input length. |
| | """ |
| |
|
| | def __init__(self, label_convertor, max_len): |
| | self.label_convertor = build_convertor(label_convertor) |
| | self.max_len = max_len |
| |
|
| | def __call__(self, results): |
| | texts = results['text'] |
| | input_ids = self.label_convertor.convert_text2id(texts) |
| | labels = self.label_convertor.convert_entity2label( |
| | results['label'], len(texts)) |
| |
|
| | attention_mask = [0] * self.max_len |
| | token_type_ids = [0] * self.max_len |
| | |
| | |
| | for i in range(len(texts) + 2): |
| | attention_mask[i] = 1 |
| | results = dict( |
| | labels=labels, |
| | texts=texts, |
| | input_ids=input_ids, |
| | attention_mask=attention_mask, |
| | token_type_ids=token_type_ids) |
| | return results |
| |
|
| |
|
| | @PIPELINES.register_module() |
| | class ToTensorNER: |
| | """Convert data with ``list`` type to tensor.""" |
| |
|
| | def __call__(self, results): |
| |
|
| | input_ids = torch.tensor(results['input_ids']) |
| | labels = torch.tensor(results['labels']) |
| | attention_masks = torch.tensor(results['attention_mask']) |
| | token_type_ids = torch.tensor(results['token_type_ids']) |
| |
|
| | results = dict( |
| | img=[], |
| | img_metas=dict( |
| | input_ids=input_ids, |
| | attention_masks=attention_masks, |
| | labels=labels, |
| | token_type_ids=token_type_ids)) |
| | return results |
| |
|