Sayan01 commited on
Commit
2e432f5
·
1 Parent(s): c788427

Upload tokenizer

Browse files
Files changed (2) hide show
  1. special_tokens_map.json +21 -3
  2. tokenizer.json +4 -2
special_tokens_map.json CHANGED
@@ -101,7 +101,25 @@
101
  "<extra_id_98>",
102
  "<extra_id_99>"
103
  ],
104
- "eos_token": "</s>",
105
- "pad_token": "<pad>",
106
- "unk_token": "<unk>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
107
  }
 
101
  "<extra_id_98>",
102
  "<extra_id_99>"
103
  ],
104
+ "eos_token": {
105
+ "content": "</s>",
106
+ "lstrip": false,
107
+ "normalized": false,
108
+ "rstrip": false,
109
+ "single_word": false
110
+ },
111
+ "pad_token": {
112
+ "content": "<pad>",
113
+ "lstrip": false,
114
+ "normalized": false,
115
+ "rstrip": false,
116
+ "single_word": false
117
+ },
118
+ "unk_token": {
119
+ "content": "<unk>",
120
+ "lstrip": false,
121
+ "normalized": false,
122
+ "rstrip": false,
123
+ "single_word": false
124
+ }
125
  }
tokenizer.json CHANGED
@@ -964,7 +964,8 @@
964
  "pre_tokenizer": {
965
  "type": "Metaspace",
966
  "replacement": "▁",
967
- "add_prefix_space": true
 
968
  },
969
  "post_processor": {
970
  "type": "TemplateProcessing",
@@ -1023,7 +1024,8 @@
1023
  "decoder": {
1024
  "type": "Metaspace",
1025
  "replacement": "▁",
1026
- "add_prefix_space": true
 
1027
  },
1028
  "model": {
1029
  "type": "Unigram",
 
964
  "pre_tokenizer": {
965
  "type": "Metaspace",
966
  "replacement": "▁",
967
+ "add_prefix_space": true,
968
+ "prepend_scheme": "always"
969
  },
970
  "post_processor": {
971
  "type": "TemplateProcessing",
 
1024
  "decoder": {
1025
  "type": "Metaspace",
1026
  "replacement": "▁",
1027
+ "add_prefix_space": true,
1028
+ "prepend_scheme": "always"
1029
  },
1030
  "model": {
1031
  "type": "Unigram",