Upload tokenizer.json
Browse files- tokenizer.json +27 -0
tokenizer.json
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"tokens": {
|
| 3 |
+
"words": "Hello, World",
|
| 4 |
+
"word_based1": "Hello",
|
| 5 |
+
"word_based2": "World",
|
| 6 |
+
"character_based": {
|
| 7 |
+
"1": "H",
|
| 8 |
+
"2": "e",
|
| 9 |
+
"3": "l",
|
| 10 |
+
"4": "l",
|
| 11 |
+
"5": "o",
|
| 12 |
+
"6": ",",
|
| 13 |
+
"7": "W",
|
| 14 |
+
"8": "o",
|
| 15 |
+
"9": "r",
|
| 16 |
+
"10": "l",
|
| 17 |
+
"11": "d"
|
| 18 |
+
},
|
| 19 |
+
"subword_based": [
|
| 20 |
+
"He",
|
| 21 |
+
"##llo",
|
| 22 |
+
"##,",
|
| 23 |
+
"##Wor",
|
| 24 |
+
"##ld"
|
| 25 |
+
]
|
| 26 |
+
}
|
| 27 |
+
}
|