Commit
·
ed665c0
1
Parent(s):
29f2bf6
Upload tokenizer
Browse files- special_tokens_map.json +1 -1
- tokenizer.json +7 -7
- tokenizer_config.json +1 -3
special_tokens_map.json
CHANGED
|
@@ -3,4 +3,4 @@
|
|
| 3 |
"eos_token": "</s>",
|
| 4 |
"pad_token": "</s>",
|
| 5 |
"unk_token": "<unk>"
|
| 6 |
-
}
|
|
|
|
| 3 |
"eos_token": "</s>",
|
| 4 |
"pad_token": "</s>",
|
| 5 |
"unk_token": "<unk>"
|
| 6 |
+
}
|
tokenizer.json
CHANGED
|
@@ -8790,7 +8790,7 @@
|
|
| 8790 |
"\"?": 8652,
|
| 8791 |
"▁>>>": 8653,
|
| 8792 |
"Que": 8654,
|
| 8793 |
-
"
|
| 8794 |
"▁plain": 8656,
|
| 8795 |
"ativa": 8657,
|
| 8796 |
"ocker": 8658,
|
|
@@ -18026,7 +18026,7 @@
|
|
| 18026 |
"▁farm": 17888,
|
| 18027 |
"▁rôle": 17889,
|
| 18028 |
"▁статьи": 17890,
|
| 18029 |
-
"
|
| 18030 |
"subfigure": 17892,
|
| 18031 |
"èces": 17893,
|
| 18032 |
"ziel": 17894,
|
|
@@ -20381,7 +20381,7 @@
|
|
| 20381 |
"▁gcc": 20243,
|
| 20382 |
"▁scène": 20244,
|
| 20383 |
"Navigation": 20245,
|
| 20384 |
-
"▁
|
| 20385 |
"▁кан": 20247,
|
| 20386 |
"▁towns": 20248,
|
| 20387 |
"Username": 20249,
|
|
@@ -30216,7 +30216,7 @@
|
|
| 30216 |
"æ": 30078,
|
| 30217 |
"њ": 30079,
|
| 30218 |
" ": 30080,
|
| 30219 |
-
"
|
| 30220 |
"Э": 30082,
|
| 30221 |
"ë": 30083,
|
| 30222 |
"õ": 30084,
|
|
@@ -51054,7 +51054,7 @@
|
|
| 51054 |
"▁>> >",
|
| 51055 |
"Qu e",
|
| 51056 |
"Q ue",
|
| 51057 |
-
"
|
| 51058 |
"▁p lain",
|
| 51059 |
"▁pl ain",
|
| 51060 |
"▁pla in",
|
|
@@ -70279,7 +70279,7 @@
|
|
| 70279 |
"▁fa rm",
|
| 70280 |
"▁r ôle",
|
| 70281 |
"▁стать и",
|
| 70282 |
-
"
|
| 70283 |
"sub figure",
|
| 70284 |
"èce s",
|
| 70285 |
"è ces",
|
|
@@ -74942,7 +74942,7 @@
|
|
| 74942 |
"▁ gcc",
|
| 74943 |
"▁sc ène",
|
| 74944 |
"N avigation",
|
| 74945 |
-
"▁
|
| 74946 |
"▁к ан",
|
| 74947 |
"▁ка н",
|
| 74948 |
"▁ кан",
|
|
|
|
| 8790 |
"\"?": 8652,
|
| 8791 |
"▁>>>": 8653,
|
| 8792 |
"Que": 8654,
|
| 8793 |
+
" ": 8655,
|
| 8794 |
"▁plain": 8656,
|
| 8795 |
"ativa": 8657,
|
| 8796 |
"ocker": 8658,
|
|
|
|
| 18026 |
"▁farm": 17888,
|
| 18027 |
"▁rôle": 17889,
|
| 18028 |
"▁статьи": 17890,
|
| 18029 |
+
" ": 17891,
|
| 18030 |
"subfigure": 17892,
|
| 18031 |
"èces": 17893,
|
| 18032 |
"ziel": 17894,
|
|
|
|
| 20381 |
"▁gcc": 20243,
|
| 20382 |
"▁scène": 20244,
|
| 20383 |
"Navigation": 20245,
|
| 20384 |
+
"▁ ": 20246,
|
| 20385 |
"▁кан": 20247,
|
| 20386 |
"▁towns": 20248,
|
| 20387 |
"Username": 20249,
|
|
|
|
| 30216 |
"æ": 30078,
|
| 30217 |
"њ": 30079,
|
| 30218 |
" ": 30080,
|
| 30219 |
+
" ": 30081,
|
| 30220 |
"Э": 30082,
|
| 30221 |
"ë": 30083,
|
| 30222 |
"õ": 30084,
|
|
|
|
| 51054 |
"▁>> >",
|
| 51055 |
"Qu e",
|
| 51056 |
"Q ue",
|
| 51057 |
+
" ",
|
| 51058 |
"▁p lain",
|
| 51059 |
"▁pl ain",
|
| 51060 |
"▁pla in",
|
|
|
|
| 70279 |
"▁fa rm",
|
| 70280 |
"▁r ôle",
|
| 70281 |
"▁стать и",
|
| 70282 |
+
" ",
|
| 70283 |
"sub figure",
|
| 70284 |
"èce s",
|
| 70285 |
"è ces",
|
|
|
|
| 74942 |
"▁ gcc",
|
| 74943 |
"▁sc ène",
|
| 74944 |
"N avigation",
|
| 74945 |
+
"▁ ",
|
| 74946 |
"▁к ан",
|
| 74947 |
"▁ка н",
|
| 74948 |
"▁ кан",
|
tokenizer_config.json
CHANGED
|
@@ -1,6 +1,4 @@
|
|
| 1 |
{
|
| 2 |
-
"add_bos_token": true,
|
| 3 |
-
"add_eos_token": false,
|
| 4 |
"bos_token": {
|
| 5 |
"__type": "AddedToken",
|
| 6 |
"content": "<s>",
|
|
@@ -36,4 +34,4 @@
|
|
| 36 |
},
|
| 37 |
"use_default_system_prompt": true,
|
| 38 |
"use_fast": true
|
| 39 |
-
}
|
|
|
|
| 1 |
{
|
|
|
|
|
|
|
| 2 |
"bos_token": {
|
| 3 |
"__type": "AddedToken",
|
| 4 |
"content": "<s>",
|
|
|
|
| 34 |
},
|
| 35 |
"use_default_system_prompt": true,
|
| 36 |
"use_fast": true
|
| 37 |
+
}
|