File size: 2,332 Bytes
cbdd75c
0708f0e
cbdd75c
 
0708f0e
cbdd75c
0708f0e
cbdd75c
 
 
 
8a5546b
0708f0e
cbdd75c
0708f0e
cbdd75c
 
 
 
8a5546b
0708f0e
cbdd75c
0708f0e
cbdd75c
 
 
 
8a5546b
0708f0e
cbdd75c
0708f0e
cbdd75c
 
 
de7baa0
49c0434
0708f0e
 
49c0434
de7baa0
 
 
0708f0e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cbdd75c
 
0708f0e
cbdd75c
0708f0e
 
 
 
 
 
 
 
 
 
 
 
 
 
cbdd75c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
{
  "add_prefix_space": false,
  "added_tokens_decoder": {
    "0": {
      "content": "<s>",
      "lstrip": false,
      "normalized": true,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "1": {
      "content": "<pad>",
      "lstrip": false,
      "normalized": true,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "2": {
      "content": "</s>",
      "lstrip": false,
      "normalized": true,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "3": {
      "content": "<unk>",
      "lstrip": false,
      "normalized": true,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "4": {
      "content": "<mask>",
      "lstrip": true,
      "normalized": false,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "5": {
      "content": "<|endoftext|>",
      "lstrip": false,
      "normalized": true,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "6": {
      "content": "<|startoftext|>",
      "lstrip": false,
      "normalized": true,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "7": {
      "content": "<nl>",
      "lstrip": false,
      "normalized": true,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "8": {
      "content": "<hs>",
      "lstrip": false,
      "normalized": true,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "9": {
      "content": "<sep>",
      "lstrip": false,
      "normalized": true,
      "rstrip": false,
      "single_word": false,
      "special": true
    },
    "10": {
      "content": "<cls>",
      "lstrip": false,
      "normalized": true,
      "rstrip": false,
      "single_word": false,
      "special": true
    }
  },
  "bos_token": "<s>",
  "clean_up_tokenization_spaces": true,
  "cls_token": "<s>",
  "eos_token": "</s>",
  "errors": "replace",
  "mask_token": "<mask>",
  "max_length": 512,
  "model_max_length": 512,
  "pad_token": "<pad>",
  "sep_token": "</s>",
  "stride": 0,
  "tokenizer_class": "RobertaTokenizer",
  "trim_offsets": true,
  "truncation_side": "right",
  "truncation_strategy": "longest_first",
  "unk_token": "<unk>"
}