# Copyright (c) OpenMMLab. All rights reserved. from typing import List, Optional from transformers import BertTokenizer from mmaction.registry import TOKENIZER class VindLUTokenizer(BertTokenizer): """VindLUTokenizer inherit BertTokenizer. The main difference from BertTokenizer is removing the last separate token for a single sequence. """ def build_inputs_with_special_tokens( self, token_ids_0: List[int], token_ids_1: Optional[List[int]] = None) -> List[int]: """Build model inputs from a sequence or a pair of sequence for sequence classification tasks by concatenating and adding special tokens. A BERT sequence has the following format: - single sequence: `[CLS] X` - pair of sequences: `[CLS] A [SEP] B [SEP]` Args: token_ids_0 (`List[int]`): List of IDs to which the special tokens will be added. token_ids_1 (`List[int]`, *optional*): Optional second list of IDs for sequence pairs. Returns: `List[int]`: List of [input IDs](../glossary#input-ids) with the appropriate special tokens. """ if token_ids_1 is None: return [self.cls_token_id] + token_ids_0 cls = [self.cls_token_id] sep = [self.sep_token_id] return cls + token_ids_0 + sep + token_ids_1 + sep TOKENIZER.register_module( 'VindLUTokenizer', module=VindLUTokenizer.from_pretrained)