dstilesr commited on
Commit
ee21474
·
verified ·
1 Parent(s): 5299d0c

Pretrained smaller model

Browse files
special_tokens_map.json CHANGED
@@ -34,6 +34,13 @@
34
  "rstrip": false,
35
  "single_word": false
36
  },
 
 
 
 
 
 
 
37
  "unk_token": {
38
  "content": "[UNK]",
39
  "lstrip": false,
 
34
  "rstrip": false,
35
  "single_word": false
36
  },
37
+ "sep_token": {
38
+ "content": "[SEP]",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
  "unk_token": {
45
  "content": "[UNK]",
46
  "lstrip": false,
tokenizer.json CHANGED
@@ -47,6 +47,15 @@
47
  "rstrip": false,
48
  "normalized": false,
49
  "special": true
 
 
 
 
 
 
 
 
 
50
  }
51
  ],
52
  "normalizer": {
@@ -70,10 +79,28 @@
70
  "use_regex": true
71
  },
72
  "post_processor": {
73
- "type": "ByteLevel",
74
- "add_prefix_space": true,
75
- "trim_offsets": true,
76
- "use_regex": true
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  },
78
  "decoder": {
79
  "type": "ByteLevel",
 
47
  "rstrip": false,
48
  "normalized": false,
49
  "special": true
50
+ },
51
+ {
52
+ "id": 75001,
53
+ "content": "[SEP]",
54
+ "single_word": false,
55
+ "lstrip": false,
56
+ "rstrip": false,
57
+ "normalized": false,
58
+ "special": true
59
  }
60
  ],
61
  "normalizer": {
 
79
  "use_regex": true
80
  },
81
  "post_processor": {
82
+ "type": "Sequence",
83
+ "processors": [
84
+ {
85
+ "type": "ByteLevel",
86
+ "add_prefix_space": true,
87
+ "trim_offsets": true,
88
+ "use_regex": true
89
+ },
90
+ {
91
+ "type": "RobertaProcessing",
92
+ "sep": [
93
+ "[SEP]",
94
+ 75001
95
+ ],
96
+ "cls": [
97
+ "[CLS]",
98
+ 75000
99
+ ],
100
+ "trim_offsets": true,
101
+ "add_prefix_space": true
102
+ }
103
+ ]
104
  },
105
  "decoder": {
106
  "type": "ByteLevel",
tokenizer_config.json CHANGED
@@ -39,6 +39,14 @@
39
  "rstrip": false,
40
  "single_word": false,
41
  "special": true
 
 
 
 
 
 
 
 
42
  }
43
  },
44
  "bos_token": "[CLS]",
@@ -49,6 +57,7 @@
49
  "mask_token": "[MASK]",
50
  "model_max_length": 1000000000000000019884624838656,
51
  "pad_token": "[PAD]",
 
52
  "tokenizer_class": "PreTrainedTokenizerFast",
53
  "unk_token": "[UNK]"
54
  }
 
39
  "rstrip": false,
40
  "single_word": false,
41
  "special": true
42
+ },
43
+ "75001": {
44
+ "content": "[SEP]",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
  }
51
  },
52
  "bos_token": "[CLS]",
 
57
  "mask_token": "[MASK]",
58
  "model_max_length": 1000000000000000019884624838656,
59
  "pad_token": "[PAD]",
60
+ "sep_token": "[SEP]",
61
  "tokenizer_class": "PreTrainedTokenizerFast",
62
  "unk_token": "[UNK]"
63
  }