add tokenizer
8b564d7 | { |
| "!": 1, |
| "'": 2, |
| ",": 3, |
| "-": 4, |
| ".": 5, |
| ":": 6, |
| ";": 7, |
| "=": 8, |
| "?": 9, |
| "[PAD]": 84, |
| "[UNK]": 83, |
| "|": 0, |
| "।": 10, |
| "ঁ": 11, |
| "ং": 12, |
| "ঃ": 13, |
| "অ": 14, |
| "আ": 15, |
| "ই": 16, |
| "ঈ": 17, |
| "উ": 18, |
| "ঊ": 19, |
| "ঋ": 20, |
| "এ": 21, |
| "ঐ": 22, |
| "ও": 23, |
| "ঔ": 24, |
| "ক": 25, |
| "খ": 26, |
| "গ": 27, |
| "ঘ": 28, |
| "ঙ": 29, |
| "চ": 30, |
| "ছ": 31, |
| "জ": 32, |
| "ঝ": 33, |
| "ঞ": 34, |
| "ট": 35, |
| "ঠ": 36, |
| "ড": 37, |
| "ঢ": 38, |
| "ণ": 39, |
| "ত": 40, |
| "থ": 41, |
| "দ": 42, |
| "ধ": 43, |
| "ন": 44, |
| "প": 45, |
| "ফ": 46, |
| "ব": 47, |
| "ভ": 48, |
| "ম": 49, |
| "য": 50, |
| "র": 51, |
| "ল": 52, |
| "শ": 53, |
| "ষ": 54, |
| "স": 55, |
| "হ": 56, |
| "া": 57, |
| "ি": 58, |
| "ী": 59, |
| "ু": 60, |
| "ূ": 61, |
| "ৃ": 62, |
| "ে": 63, |
| "ৈ": 64, |
| "ো": 65, |
| "ৌ": 66, |
| "্": 67, |
| "ৎ": 68, |
| "ড়": 69, |
| "ঢ়": 70, |
| "য়": 71, |
| "০": 72, |
| "১": 73, |
| "২": 74, |
| "৩": 75, |
| "৪": 76, |
| "৫": 77, |
| "৬": 78, |
| "৭": 79, |
| "৮": 80, |
| "৯": 81, |
| "": 82 |
| } |
|
|