Upload tokenizer
Browse files- tokenizer_config.json +4 -32
tokenizer_config.json
CHANGED
|
@@ -12976,43 +12976,15 @@
|
|
| 12976 |
"<|nocaptions|>",
|
| 12977 |
"<|notimestamps|>"
|
| 12978 |
],
|
| 12979 |
-
"bos_token":
|
| 12980 |
-
"__type": "AddedToken",
|
| 12981 |
-
"content": "<|endoftext|>",
|
| 12982 |
-
"lstrip": false,
|
| 12983 |
-
"normalized": true,
|
| 12984 |
-
"rstrip": false,
|
| 12985 |
-
"single_word": false
|
| 12986 |
-
},
|
| 12987 |
"clean_up_tokenization_spaces": true,
|
| 12988 |
"dropout": 0.2,
|
| 12989 |
-
"eos_token":
|
| 12990 |
-
"__type": "AddedToken",
|
| 12991 |
-
"content": "<|endoftext|>",
|
| 12992 |
-
"lstrip": false,
|
| 12993 |
-
"normalized": true,
|
| 12994 |
-
"rstrip": false,
|
| 12995 |
-
"single_word": false
|
| 12996 |
-
},
|
| 12997 |
"errors": "replace",
|
| 12998 |
"model_max_length": 1024,
|
| 12999 |
-
"pad_token":
|
| 13000 |
-
"__type": "AddedToken",
|
| 13001 |
-
"content": "<|endoftext|>",
|
| 13002 |
-
"lstrip": false,
|
| 13003 |
-
"normalized": true,
|
| 13004 |
-
"rstrip": false,
|
| 13005 |
-
"single_word": false
|
| 13006 |
-
},
|
| 13007 |
"processor_class": "WhisperProcessor",
|
| 13008 |
"return_attention_mask": false,
|
| 13009 |
"tokenizer_class": "WhisperTokenizer",
|
| 13010 |
-
"unk_token":
|
| 13011 |
-
"__type": "AddedToken",
|
| 13012 |
-
"content": "<|endoftext|>",
|
| 13013 |
-
"lstrip": false,
|
| 13014 |
-
"normalized": true,
|
| 13015 |
-
"rstrip": false,
|
| 13016 |
-
"single_word": false
|
| 13017 |
-
}
|
| 13018 |
}
|
|
|
|
| 12976 |
"<|nocaptions|>",
|
| 12977 |
"<|notimestamps|>"
|
| 12978 |
],
|
| 12979 |
+
"bos_token": "<|endoftext|>",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12980 |
"clean_up_tokenization_spaces": true,
|
| 12981 |
"dropout": 0.2,
|
| 12982 |
+
"eos_token": "<|endoftext|>",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12983 |
"errors": "replace",
|
| 12984 |
"model_max_length": 1024,
|
| 12985 |
+
"pad_token": "<|endoftext|>",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12986 |
"processor_class": "WhisperProcessor",
|
| 12987 |
"return_attention_mask": false,
|
| 12988 |
"tokenizer_class": "WhisperTokenizer",
|
| 12989 |
+
"unk_token": "<|endoftext|>"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12990 |
}
|