new proc
Browse files- added_tokens.json +1 -2
- special_tokens_map.json +2 -2
- tokenizer_config.json +2 -2
- vocab.json +1 -0
added_tokens.json
CHANGED
|
@@ -17,7 +17,6 @@
|
|
| 17 |
"<|da|>": 50285,
|
| 18 |
"<|de|>": 50261,
|
| 19 |
"<|el|>": 50281,
|
| 20 |
-
"<|endoftext|>": 50257,
|
| 21 |
"<|en|>": 50259,
|
| 22 |
"<|es|>": 50262,
|
| 23 |
"<|et|>": 50307,
|
|
@@ -30,6 +29,7 @@
|
|
| 30 |
"<|gu|>": 50333,
|
| 31 |
"<|haw|>": 50352,
|
| 32 |
"<|ha|>": 50354,
|
|
|
|
| 33 |
"<|hi|>": 50276,
|
| 34 |
"<|hr|>": 50291,
|
| 35 |
"<|ht|>": 50339,
|
|
@@ -38,7 +38,6 @@
|
|
| 38 |
"<|id|>": 50275,
|
| 39 |
"<|is|>": 50311,
|
| 40 |
"<|it|>": 50274,
|
| 41 |
-
"<|iw|>": 50279,
|
| 42 |
"<|ja|>": 50266,
|
| 43 |
"<|jw|>": 50356,
|
| 44 |
"<|ka|>": 50329,
|
|
|
|
| 17 |
"<|da|>": 50285,
|
| 18 |
"<|de|>": 50261,
|
| 19 |
"<|el|>": 50281,
|
|
|
|
| 20 |
"<|en|>": 50259,
|
| 21 |
"<|es|>": 50262,
|
| 22 |
"<|et|>": 50307,
|
|
|
|
| 29 |
"<|gu|>": 50333,
|
| 30 |
"<|haw|>": 50352,
|
| 31 |
"<|ha|>": 50354,
|
| 32 |
+
"<|he|>": 50279,
|
| 33 |
"<|hi|>": 50276,
|
| 34 |
"<|hr|>": 50291,
|
| 35 |
"<|ht|>": 50339,
|
|
|
|
| 38 |
"<|id|>": 50275,
|
| 39 |
"<|is|>": 50311,
|
| 40 |
"<|it|>": 50274,
|
|
|
|
| 41 |
"<|ja|>": 50266,
|
| 42 |
"<|jw|>": 50356,
|
| 43 |
"<|ka|>": 50329,
|
special_tokens_map.json
CHANGED
|
@@ -22,7 +22,7 @@
|
|
| 22 |
"<|hi|>",
|
| 23 |
"<|fi|>",
|
| 24 |
"<|vi|>",
|
| 25 |
-
"<|
|
| 26 |
"<|uk|>",
|
| 27 |
"<|el|>",
|
| 28 |
"<|ms|>",
|
|
@@ -124,7 +124,7 @@
|
|
| 124 |
},
|
| 125 |
"pad_token": "<|endoftext|>",
|
| 126 |
"unk_token": {
|
| 127 |
-
"content": "",
|
| 128 |
"lstrip": false,
|
| 129 |
"normalized": true,
|
| 130 |
"rstrip": false,
|
|
|
|
| 22 |
"<|hi|>",
|
| 23 |
"<|fi|>",
|
| 24 |
"<|vi|>",
|
| 25 |
+
"<|he|>",
|
| 26 |
"<|uk|>",
|
| 27 |
"<|el|>",
|
| 28 |
"<|ms|>",
|
|
|
|
| 124 |
},
|
| 125 |
"pad_token": "<|endoftext|>",
|
| 126 |
"unk_token": {
|
| 127 |
+
"content": "<|endoftext|>",
|
| 128 |
"lstrip": false,
|
| 129 |
"normalized": true,
|
| 130 |
"rstrip": false,
|
tokenizer_config.json
CHANGED
|
@@ -19,7 +19,7 @@
|
|
| 19 |
"single_word": false
|
| 20 |
},
|
| 21 |
"errors": "replace",
|
| 22 |
-
"model_max_length":
|
| 23 |
"pad_token": null,
|
| 24 |
"processor_class": "WhisperProcessor",
|
| 25 |
"return_attention_mask": false,
|
|
@@ -27,7 +27,7 @@
|
|
| 27 |
"trust_remote_code": false,
|
| 28 |
"unk_token": {
|
| 29 |
"__type": "AddedToken",
|
| 30 |
-
"content": "",
|
| 31 |
"lstrip": false,
|
| 32 |
"normalized": true,
|
| 33 |
"rstrip": false,
|
|
|
|
| 19 |
"single_word": false
|
| 20 |
},
|
| 21 |
"errors": "replace",
|
| 22 |
+
"model_max_length": 1024,
|
| 23 |
"pad_token": null,
|
| 24 |
"processor_class": "WhisperProcessor",
|
| 25 |
"return_attention_mask": false,
|
|
|
|
| 27 |
"trust_remote_code": false,
|
| 28 |
"unk_token": {
|
| 29 |
"__type": "AddedToken",
|
| 30 |
+
"content": "<|endoftext|>",
|
| 31 |
"lstrip": false,
|
| 32 |
"normalized": true,
|
| 33 |
"rstrip": false,
|
vocab.json
CHANGED
|
@@ -314,6 +314,7 @@
|
|
| 314 |
";;": 35746,
|
| 315 |
"<": 27,
|
| 316 |
"</": 3433,
|
|
|
|
| 317 |
"=": 28,
|
| 318 |
"=\"": 13114,
|
| 319 |
"=\"#": 34106,
|
|
|
|
| 314 |
";;": 35746,
|
| 315 |
"<": 27,
|
| 316 |
"</": 3433,
|
| 317 |
+
"<|endoftext|>": 50257,
|
| 318 |
"=": 28,
|
| 319 |
"=\"": 13114,
|
| 320 |
"=\"#": 34106,
|