Upload tokenizer
Browse files- tokenizer.json +4 -2
- tokenizer_config.json +2 -0
tokenizer.json
CHANGED
|
@@ -1023,7 +1023,8 @@
|
|
| 1023 |
{
|
| 1024 |
"type": "Metaspace",
|
| 1025 |
"replacement": "▁",
|
| 1026 |
-
"
|
|
|
|
| 1027 |
},
|
| 1028 |
{
|
| 1029 |
"type": "Digits",
|
|
@@ -1080,7 +1081,8 @@
|
|
| 1080 |
"decoder": {
|
| 1081 |
"type": "Metaspace",
|
| 1082 |
"replacement": "▁",
|
| 1083 |
-
"
|
|
|
|
| 1084 |
},
|
| 1085 |
"model": {
|
| 1086 |
"type": "Unigram",
|
|
|
|
| 1023 |
{
|
| 1024 |
"type": "Metaspace",
|
| 1025 |
"replacement": "▁",
|
| 1026 |
+
"prepend_scheme": "always",
|
| 1027 |
+
"split": true
|
| 1028 |
},
|
| 1029 |
{
|
| 1030 |
"type": "Digits",
|
|
|
|
| 1081 |
"decoder": {
|
| 1082 |
"type": "Metaspace",
|
| 1083 |
"replacement": "▁",
|
| 1084 |
+
"prepend_scheme": "always",
|
| 1085 |
+
"split": true
|
| 1086 |
},
|
| 1087 |
"model": {
|
| 1088 |
"type": "Unigram",
|
tokenizer_config.json
CHANGED
|
@@ -1,4 +1,5 @@
|
|
| 1 |
{
|
|
|
|
| 2 |
"added_tokens_decoder": {
|
| 3 |
"0": {
|
| 4 |
"content": "<pad>",
|
|
@@ -987,6 +988,7 @@
|
|
| 987 |
"clean_up_tokenization_spaces": true,
|
| 988 |
"eos_token": "</s>",
|
| 989 |
"extra_ids": 100,
|
|
|
|
| 990 |
"max_length": 1024,
|
| 991 |
"model_max_length": 1000000000000000019884624838656,
|
| 992 |
"pad_to_multiple_of": null,
|
|
|
|
| 1 |
{
|
| 2 |
+
"add_prefix_space": null,
|
| 3 |
"added_tokens_decoder": {
|
| 4 |
"0": {
|
| 5 |
"content": "<pad>",
|
|
|
|
| 988 |
"clean_up_tokenization_spaces": true,
|
| 989 |
"eos_token": "</s>",
|
| 990 |
"extra_ids": 100,
|
| 991 |
+
"extra_special_tokens": {},
|
| 992 |
"max_length": 1024,
|
| 993 |
"model_max_length": 1000000000000000019884624838656,
|
| 994 |
"pad_to_multiple_of": null,
|