hellosindh commited on
Commit
b500d27
·
verified ·
1 Parent(s): ee669ca

Revert: clean tokenizer_config matching training IDs

Browse files
Files changed (1) hide show
  1. tokenizer_config.json +15 -11
tokenizer_config.json CHANGED
@@ -1,4 +1,17 @@
1
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "added_tokens_decoder": {
3
  "0": {
4
  "content": "<pad>",
@@ -34,20 +47,11 @@
34
  },
35
  "32000": {
36
  "content": "<mask>",
37
- "lstrip": false,
38
  "normalized": false,
39
  "rstrip": false,
40
  "single_word": false,
41
  "special": true
42
  }
43
- },
44
- "additional_special_tokens": null,
45
- "backend": "custom",
46
- "bos_token": "<s>",
47
- "eos_token": "</s>",
48
- "mask_token": "<mask>",
49
- "model_max_length": 1000000000000000019884624838656,
50
- "pad_token": "<pad>",
51
- "tokenizer_class": "XLMRobertaTokenizer",
52
- "unk_token": "<unk>"
53
  }
 
1
  {
2
+ "add_prefix_space": true,
3
+ "backend": "custom",
4
+ "bos_token": "<s>",
5
+ "cls_token": "<s>",
6
+ "eos_token": "</s>",
7
+ "extra_special_tokens": [],
8
+ "mask_token": "<mask>",
9
+ "model_max_length": 512,
10
+ "pad_token": "<pad>",
11
+ "sep_token": "</s>",
12
+ "unk_token": "<unk>",
13
+ "unk_id": 1,
14
+ "tokenizer_class": "XLMRobertaTokenizer",
15
  "added_tokens_decoder": {
16
  "0": {
17
  "content": "<pad>",
 
47
  },
48
  "32000": {
49
  "content": "<mask>",
50
+ "lstrip": true,
51
  "normalized": false,
52
  "rstrip": false,
53
  "single_word": false,
54
  "special": true
55
  }
56
+ }
 
 
 
 
 
 
 
 
 
57
  }