Upload tokenizer
Browse files- tokenizer.json +27 -0
- tokenizer_config.json +2 -1
tokenizer.json
CHANGED
|
@@ -6964,6 +6964,12 @@
|
|
| 6964 |
"id": "A",
|
| 6965 |
"type_id": 0
|
| 6966 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6967 |
}
|
| 6968 |
],
|
| 6969 |
"pair": [
|
|
@@ -6979,6 +6985,12 @@
|
|
| 6979 |
"type_id": 0
|
| 6980 |
}
|
| 6981 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6982 |
{
|
| 6983 |
"SpecialToken": {
|
| 6984 |
"id": "<s>",
|
|
@@ -6990,9 +7002,24 @@
|
|
| 6990 |
"id": "B",
|
| 6991 |
"type_id": 1
|
| 6992 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6993 |
}
|
| 6994 |
],
|
| 6995 |
"special_tokens": {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6996 |
"<s>": {
|
| 6997 |
"id": "<s>",
|
| 6998 |
"ids": [
|
|
|
|
| 6964 |
"id": "A",
|
| 6965 |
"type_id": 0
|
| 6966 |
}
|
| 6967 |
+
},
|
| 6968 |
+
{
|
| 6969 |
+
"SpecialToken": {
|
| 6970 |
+
"id": "</s>",
|
| 6971 |
+
"type_id": 0
|
| 6972 |
+
}
|
| 6973 |
}
|
| 6974 |
],
|
| 6975 |
"pair": [
|
|
|
|
| 6985 |
"type_id": 0
|
| 6986 |
}
|
| 6987 |
},
|
| 6988 |
+
{
|
| 6989 |
+
"SpecialToken": {
|
| 6990 |
+
"id": "</s>",
|
| 6991 |
+
"type_id": 0
|
| 6992 |
+
}
|
| 6993 |
+
},
|
| 6994 |
{
|
| 6995 |
"SpecialToken": {
|
| 6996 |
"id": "<s>",
|
|
|
|
| 7002 |
"id": "B",
|
| 7003 |
"type_id": 1
|
| 7004 |
}
|
| 7005 |
+
},
|
| 7006 |
+
{
|
| 7007 |
+
"SpecialToken": {
|
| 7008 |
+
"id": "</s>",
|
| 7009 |
+
"type_id": 1
|
| 7010 |
+
}
|
| 7011 |
}
|
| 7012 |
],
|
| 7013 |
"special_tokens": {
|
| 7014 |
+
"</s>": {
|
| 7015 |
+
"id": "</s>",
|
| 7016 |
+
"ids": [
|
| 7017 |
+
2
|
| 7018 |
+
],
|
| 7019 |
+
"tokens": [
|
| 7020 |
+
"</s>"
|
| 7021 |
+
]
|
| 7022 |
+
},
|
| 7023 |
"<s>": {
|
| 7024 |
"id": "<s>",
|
| 7025 |
"ids": [
|
tokenizer_config.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"add_bos_token": true,
|
| 3 |
-
"add_eos_token":
|
| 4 |
"add_prefix_space": true,
|
| 5 |
"added_tokens_decoder": {
|
| 6 |
"0": {
|
|
@@ -6179,6 +6179,7 @@
|
|
| 6179 |
"legacy": false,
|
| 6180 |
"model_max_length": 1000000000000000019884624838656,
|
| 6181 |
"pad_token": "</s>",
|
|
|
|
| 6182 |
"sp_model_kwargs": {},
|
| 6183 |
"spaces_between_special_tokens": false,
|
| 6184 |
"tokenizer_class": "LlamaTokenizer",
|
|
|
|
| 1 |
{
|
| 2 |
"add_bos_token": true,
|
| 3 |
+
"add_eos_token": true,
|
| 4 |
"add_prefix_space": true,
|
| 5 |
"added_tokens_decoder": {
|
| 6 |
"0": {
|
|
|
|
| 6179 |
"legacy": false,
|
| 6180 |
"model_max_length": 1000000000000000019884624838656,
|
| 6181 |
"pad_token": "</s>",
|
| 6182 |
+
"padding_side": "right",
|
| 6183 |
"sp_model_kwargs": {},
|
| 6184 |
"spaces_between_special_tokens": false,
|
| 6185 |
"tokenizer_class": "LlamaTokenizer",
|