ahazeemi commited on
Commit
783d4cf
·
1 Parent(s): a883cfa

add tokenizer

Browse files
Files changed (3) hide show
  1. special_tokens_map.json +1 -0
  2. tokenizer_config.json +1 -0
  3. vocab.json +1 -0
special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]"}
tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "replace_word_delimiter_char": " ", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
vocab.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"स": 0, "थ": 1, "त": 2, "ग़": 3, "ू": 4, "ऋ": 5, "म": 6, "ऐ": 7, "ऊ": 8, "ँ": 9, ":": 10, "घ": 11, "c": 12, "व": 13, "़": 14, "क़": 15, "o": 16, "ट": 17, "ञ": 18, "ठ": 19, "ज": 20, "ख़": 21, "ढ़": 22, "i": 23, "ः": 24, "m": 25, ",": 26, "औ": 27, "ओ": 28, "ध": 29, "भ": 30, "#": 31, "<": 32, "फ़": 33, "े": 34, "य": 35, "ी": 36, "अ": 37, "ु": 38, "t": 39, "ज़": 40, "च": 41, "ख": 42, "ई": 43, "b": 44, "ो": 45, "र": 46, "ढ": 47, "ण": 49, "a": 50, "प": 51, "-": 52, "ै": 53, "ृ": 54, "ा": 55, "श": 56, "ब": 57, "न": 58, "ौ": 59, "e": 60, "p": 61, "्": 62, "ह": 63, "ल": 64, "फ": 65, ">": 66, "ि": 67, "n": 68, "ए": 69, "ष": 70, "ग": 71, "u": 72, "क": 73, "ं": 74, "इ": 75, "छ": 76, "आ": 77, " ": 78, "ऍ": 79, "'": 80, "l": 81, "ड़": 82, "d": 83, "।": 84, "द": 85, "ऑ": 86, "ड": 87, "ॉ": 88, "झ": 89, "उ": 90, "|": 48, "[UNK]": 91, "[PAD]": 92}