File size: 1,559 Bytes
d670799
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
# Copyright (c) OpenMMLab. All rights reserved.
from typing import List, Optional

from transformers import BertTokenizer

from mmaction.registry import TOKENIZER


class VindLUTokenizer(BertTokenizer):
    """VindLUTokenizer inherit BertTokenizer.



    The main difference from BertTokenizer is removing the last separate token

    for a single sequence.

    """

    def build_inputs_with_special_tokens(

            self,

            token_ids_0: List[int],

            token_ids_1: Optional[List[int]] = None) -> List[int]:
        """Build model inputs from a sequence or a pair of sequence for

        sequence classification tasks by concatenating and adding special

        tokens. A BERT sequence has the following format:



        - single sequence: `[CLS] X`

        - pair of sequences: `[CLS] A [SEP] B [SEP]`



        Args:

            token_ids_0 (`List[int]`):

                List of IDs to which the special tokens will be added.

            token_ids_1 (`List[int]`, *optional*):

                Optional second list of IDs for sequence pairs.



        Returns:

            `List[int]`: List of [input IDs](../glossary#input-ids) with

            the appropriate special tokens.

        """
        if token_ids_1 is None:
            return [self.cls_token_id] + token_ids_0
        cls = [self.cls_token_id]
        sep = [self.sep_token_id]
        return cls + token_ids_0 + sep + token_ids_1 + sep


TOKENIZER.register_module(
    'VindLUTokenizer', module=VindLUTokenizer.from_pretrained)