cdactvm's picture
Upload tokenizer
e249878 verified
raw
history blame contribute delete
879 Bytes
{
"[PAD]": 67,
"[UNK]": 66,
"|": 0,
"ঁ": 1,
"ং": 2,
"ঃ": 3,
"অ": 4,
"আ": 5,
"ই": 6,
"ঈ": 7,
"উ": 8,
"ঊ": 9,
"ঋ": 10,
"ঌ": 11,
"এ": 12,
"ঐ": 13,
"ও": 14,
"ঔ": 15,
"ক": 16,
"খ": 17,
"গ": 18,
"ঘ": 19,
"ঙ": 20,
"চ": 21,
"ছ": 22,
"জ": 23,
"ঝ": 24,
"ঞ": 25,
"ট": 26,
"ঠ": 27,
"ড": 28,
"ঢ": 29,
"ণ": 30,
"ত": 31,
"থ": 32,
"দ": 33,
"ধ": 34,
"ন": 35,
"প": 36,
"ফ": 37,
"ব": 38,
"ভ": 39,
"ম": 40,
"য": 41,
"র": 42,
"ল": 43,
"শ": 44,
"ষ": 45,
"স": 46,
"হ": 47,
"়": 48,
"া": 49,
"ি": 50,
"ী": 51,
"ু": 52,
"ূ": 53,
"ৃ": 54,
"ে": 55,
"ৈ": 56,
"ো": 57,
"ৌ": 58,
"্": 59,
"ৎ": 60,
"ৗ": 61,
"ড়": 62,
"ঢ়": 63,
"য়": 64,
"ৰ": 65
}