behroz commited on
Commit
3a0c66a
·
1 Parent(s): 285ac66

add tokenizer

Browse files
Files changed (1) hide show
  1. vocab.json +1 -1
vocab.json CHANGED
@@ -1 +1 @@
1
- {"ن": 2030, "ی": 4372, "ل": 1416, "م": 1607, "ے": 1932, "س": 1290, "ا": 4065, "گ": 752, "ر": 2708, "ہ": 1326, "پ": 563, "ڈ": 321, "و": 3166, "ف": 519, "د": 978, "ق": 332, "ش": 556, "ک": 2325, "ت": 1061, "ھ": 1151, "ٹ": 391, "غ": 200, "آ": 163, "ں": 1223, "ح": 259, "ج": 679, "ب": 1091, "خ": 324, "ز": 445, "ئ": 515, "ص": 179, "ع": 412, "چ": 516, "ڑ": 307, "ث": 59, "ً": 6, "ٔ": 55, "ض": 98, "ط": 69, "ُ": 12, "ذ": 63, "ظ": 60, "َ": 6, "ِ": 2, "ّ": 5, "ؤ": 59, "ژ": 18, "‌": 2, "ء": 1, "|": 10104, "[UNK]": 50, "[PAD]": 51}
 
1
+ {"ل": 0, "غ": 1, "ش": 2, "ح": 3, "د": 4, "ہ": 5, "ف": 6, "آ": 8, "ٹ": 9, "ڈ": 10, "م": 11, "ی": 12, "ا": 13, "ھ": 14, "ے": 15, "ت": 16, "ں": 17, "پ": 18, "ق": 19, "گ": 20, "ر": 21, "و": 22, "ک": 23, "ن": 24, "س": 25, "|": 7, "[UNK]": 26, "[PAD]": 27}