bencyc1129 commited on
Commit
6955515
·
verified ·
1 Parent(s): 91116ad

Upload tokenizer

Browse files
special_tokens_map.json CHANGED
@@ -1,13 +1,4 @@
1
  {
2
- "additional_special_tokens": [
3
- {
4
- "content": "<N>",
5
- "lstrip": false,
6
- "normalized": false,
7
- "rstrip": false,
8
- "single_word": false
9
- }
10
- ],
11
  "cls_token": "[CLS]",
12
  "mask_token": "[MASK]",
13
  "pad_token": "[PAD]",
 
1
  {
 
 
 
 
 
 
 
 
 
2
  "cls_token": "[CLS]",
3
  "mask_token": "[MASK]",
4
  "pad_token": "[PAD]",
tokenizer.json CHANGED
@@ -47,15 +47,6 @@
47
  "rstrip": false,
48
  "normalized": false,
49
  "special": true
50
- },
51
- {
52
- "id": 28996,
53
- "content": "<N>",
54
- "single_word": false,
55
- "lstrip": false,
56
- "rstrip": false,
57
- "normalized": false,
58
- "special": true
59
  }
60
  ],
61
  "normalizer": {
 
47
  "rstrip": false,
48
  "normalized": false,
49
  "special": true
 
 
 
 
 
 
 
 
 
50
  }
51
  ],
52
  "normalizer": {
tokenizer_config.json CHANGED
@@ -39,24 +39,13 @@
39
  "rstrip": false,
40
  "single_word": false,
41
  "special": true
42
- },
43
- "28996": {
44
- "content": "<N>",
45
- "lstrip": false,
46
- "normalized": false,
47
- "rstrip": false,
48
- "single_word": false,
49
- "special": true
50
  }
51
  },
52
- "additional_special_tokens": [
53
- "<N>"
54
- ],
55
  "clean_up_tokenization_spaces": true,
56
  "cls_token": "[CLS]",
57
  "do_lower_case": false,
58
  "mask_token": "[MASK]",
59
- "model_max_length": 512,
60
  "pad_token": "[PAD]",
61
  "sep_token": "[SEP]",
62
  "strip_accents": null,
 
39
  "rstrip": false,
40
  "single_word": false,
41
  "special": true
 
 
 
 
 
 
 
 
42
  }
43
  },
 
 
 
44
  "clean_up_tokenization_spaces": true,
45
  "cls_token": "[CLS]",
46
  "do_lower_case": false,
47
  "mask_token": "[MASK]",
48
+ "model_max_length": 256,
49
  "pad_token": "[PAD]",
50
  "sep_token": "[SEP]",
51
  "strip_accents": null,