Upload tokenizer
Browse files- tokenizer.json +8 -8
- vocab.json +1 -1
tokenizer.json
CHANGED
|
@@ -14,7 +14,7 @@
|
|
| 14 |
},
|
| 15 |
{
|
| 16 |
"id": 1,
|
| 17 |
-
"content": "<|
|
| 18 |
"single_word": false,
|
| 19 |
"lstrip": false,
|
| 20 |
"rstrip": false,
|
|
@@ -23,7 +23,7 @@
|
|
| 23 |
},
|
| 24 |
{
|
| 25 |
"id": 2,
|
| 26 |
-
"content": "<|
|
| 27 |
"single_word": false,
|
| 28 |
"lstrip": false,
|
| 29 |
"rstrip": false,
|
|
@@ -32,7 +32,7 @@
|
|
| 32 |
},
|
| 33 |
{
|
| 34 |
"id": 3,
|
| 35 |
-
"content": "<|
|
| 36 |
"single_word": false,
|
| 37 |
"lstrip": false,
|
| 38 |
"rstrip": false,
|
|
@@ -41,7 +41,7 @@
|
|
| 41 |
},
|
| 42 |
{
|
| 43 |
"id": 4,
|
| 44 |
-
"content": "<|
|
| 45 |
"single_word": false,
|
| 46 |
"lstrip": false,
|
| 47 |
"rstrip": false,
|
|
@@ -326,10 +326,10 @@
|
|
| 326 |
"byte_fallback": false,
|
| 327 |
"vocab": {
|
| 328 |
"<|endoftext|>": 0,
|
| 329 |
-
"<|
|
| 330 |
-
"<|
|
| 331 |
-
"<|
|
| 332 |
-
"<|
|
| 333 |
"<|meter_0|>": 5,
|
| 334 |
"<|meter_1|>": 6,
|
| 335 |
"<|meter_2|>": 7,
|
|
|
|
| 14 |
},
|
| 15 |
{
|
| 16 |
"id": 1,
|
| 17 |
+
"content": "<|psep|>",
|
| 18 |
"single_word": false,
|
| 19 |
"lstrip": false,
|
| 20 |
"rstrip": false,
|
|
|
|
| 23 |
},
|
| 24 |
{
|
| 25 |
"id": 2,
|
| 26 |
+
"content": "<|vsep|>",
|
| 27 |
"single_word": false,
|
| 28 |
"lstrip": false,
|
| 29 |
"rstrip": false,
|
|
|
|
| 32 |
},
|
| 33 |
{
|
| 34 |
"id": 3,
|
| 35 |
+
"content": "<|bsep|>",
|
| 36 |
"single_word": false,
|
| 37 |
"lstrip": false,
|
| 38 |
"rstrip": false,
|
|
|
|
| 41 |
},
|
| 42 |
{
|
| 43 |
"id": 4,
|
| 44 |
+
"content": "<|pad|>",
|
| 45 |
"single_word": false,
|
| 46 |
"lstrip": false,
|
| 47 |
"rstrip": false,
|
|
|
|
| 326 |
"byte_fallback": false,
|
| 327 |
"vocab": {
|
| 328 |
"<|endoftext|>": 0,
|
| 329 |
+
"<|psep|>": 1,
|
| 330 |
+
"<|vsep|>": 2,
|
| 331 |
+
"<|bsep|>": 3,
|
| 332 |
+
"<|pad|>": 4,
|
| 333 |
"<|meter_0|>": 5,
|
| 334 |
"<|meter_1|>": 6,
|
| 335 |
"<|meter_2|>": 7,
|
vocab.json
CHANGED
|
@@ -1 +1 @@
|
|
| 1 |
-
{"<|endoftext|>":0,"<|
|
|
|
|
| 1 |
+
{"<|endoftext|>":0,"<|psep|>":1,"<|vsep|>":2,"<|bsep|>":3,"<|pad|>":4,"<|meter_0|>":5,"<|meter_1|>":6,"<|meter_2|>":7,"<|meter_3|>":8,"<|meter_4|>":9,"<|meter_5|>":10,"<|meter_6|>":11,"<|meter_7|>":12,"<|meter_8|>":13,"<|meter_9|>":14,"<|meter_10|>":15,"<|meter_11|>":16,"<|meter_12|>":17,"<|meter_13|>":18,"<|meter_14|>":19,"<|meter_15|>":20,"<|res_0|>":21,"<|res_1|>":22,"<|res_2|>":23,"<|res_3|>":24,"<|res_4|>":25,"<|res_5|>":26,"<|res_6|>":27,"<|res_7|>":28,"<|res_8|>":29,"<|res_9|>":30,"<|res_10|>":31,"<|res_11|>":32,"<|res_12|>":33," ":34,"0":35,"1":36,"2":37,"3":38,"4":39,"5":40,"6":41,"7":42,"8":43,"9":44,"<":45,">":46,"_":47,"b":48,"e":49,"m":50,"p":51,"r":52,"s":53,"t":54,"v":55,"|":56,"~":57,"ء":58,"أ":59,"ؤ":60,"ئ":61,"ا":62,"ب":63,"ة":64,"ت":65,"ث":66,"ج":67,"ح":68,"خ":69,"د":70,"ذ":71,"ر":72,"ز":73,"س":74,"ش":75,"ص":76,"ض":77,"ط":78,"ظ":79,"ع":80,"غ":81,"ف":82,"ق":83,"ك":84,"ل":85,"م":86,"ن":87,"ه":88,"و":89,"ى":90,"ي":91,"ً":92,"ٌ":93,"ٍ":94,"َ":95,"ُ":96,"ِ":97,"ّ":98,"ْ":99}
|