kabir5297 commited on
Commit
c43218c
·
1 Parent(s): edf7cd3

commit files to HF hub

Browse files
.gitattributes CHANGED
@@ -32,3 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ language_model/6gram_correct.arpa filter=lfs diff=lfs merge=lfs -text
alphabet.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"labels": [" ", "</s>", "<s>", "\u0981", "\u0982", "\u0983", "\u0985", "\u0986", "\u0987", "\u0988", "\u0989", "\u098a", "\u098b", "\u098f", "\u0990", "\u0993", "\u0994", "\u0995", "\u0996", "\u0997", "\u0998", "\u0999", "\u099a", "\u099b", "\u099c", "\u099d", "\u099e", "\u099f", "\u09a0", "\u09a1", "\u09a2", "\u09a3", "\u09a4", "\u09a5", "\u09a6", "\u09a7", "\u09a8", "\u09aa", "\u09ab", "\u09ac", "\u09ad", "\u09ae", "\u09af", "\u09b0", "\u09b2", "\u09b6", "\u09b7", "\u09b8", "\u09b9", "\u09be", "\u09bf", "\u09c0", "\u09c1", "\u09c2", "\u09c3", "\u09c7", "\u09c8", "\u09cb", "\u09cc", "\u09cd", "\u09ce", "\u09dc", "\u09dd", "\u09df", "\u200d", "\u2047", ""], "is_bpe": false}
language_model/6gram_correct.arpa ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1cbaf1ccf4c3bf75cce856debb28dd306d4c1f59d6fffc246209aff96d0bbae0
3
+ size 49087825
language_model/attrs.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"alpha": 0.5, "beta": 1.5, "unk_score_offset": -10.0, "score_boundary": true}
language_model/unigrams.txt ADDED
The diff for this file is too large to render. See raw diff
 
preprocessor_config.json CHANGED
@@ -4,6 +4,7 @@
4
  "feature_size": 1,
5
  "padding_side": "right",
6
  "padding_value": 0.0,
 
7
  "return_attention_mask": true,
8
  "sampling_rate": 16000
9
  }
 
4
  "feature_size": 1,
5
  "padding_side": "right",
6
  "padding_value": 0.0,
7
+ "processor_class": "Wav2Vec2ProcessorWithLM",
8
  "return_attention_mask": true,
9
  "sampling_rate": 16000
10
  }
special_tokens_map.json CHANGED
@@ -1 +1,6 @@
1
- {"bos_token": "<s>", "eos_token": "</s>", "unk_token": "[UNK]", "pad_token": "[PAD]"}
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "eos_token": "</s>",
4
+ "pad_token": "[PAD]",
5
+ "unk_token": "[UNK]"
6
+ }
tokenizer_config.json CHANGED
@@ -1 +1,14 @@
1
- {"unk_token": "[UNK]", "bos_token": "<s>", "eos_token": "</s>", "pad_token": "[PAD]", "do_lower_case": false, "word_delimiter_token": "|", "replace_word_delimiter_char": " ", "special_tokens_map_file": null, "name_or_path": "./", "tokenizer_class": "Wav2Vec2CTCTokenizer"}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "do_lower_case": false,
4
+ "eos_token": "</s>",
5
+ "model_max_length": 1000000000000000019884624838656,
6
+ "name_or_path": "kabir5297/BanglaASR-XLSR-SmallLM",
7
+ "pad_token": "[PAD]",
8
+ "processor_class": "Wav2Vec2ProcessorWithLM",
9
+ "replace_word_delimiter_char": " ",
10
+ "special_tokens_map_file": null,
11
+ "tokenizer_class": "Wav2Vec2CTCTokenizer",
12
+ "unk_token": "[UNK]",
13
+ "word_delimiter_token": "|"
14
+ }
vocab.json CHANGED
@@ -1 +1,69 @@
1
- {"</s>": 1, "<s>": 2, "ঁ": 3, "ং": 4, "ঃ": 5, "অ": 6, "আ": 7, "ই": 8, "ঈ": 9, "উ": 10, "ঊ": 11, "ঋ": 12, "এ": 13, "ঐ": 14, "ও": 15, "ঔ": 16, "ক": 17, "খ": 18, "গ": 19, "ঘ": 20, "ঙ": 21, "চ": 22, "ছ": 23, "জ": 24, "ঝ": 25, "ঞ": 26, "ট": 27, "ঠ": 28, "ড": 29, "ঢ": 30, "ণ": 31, "ত": 32, "থ": 33, "দ": 34, "ধ": 35, "ন": 36, "প": 37, "ফ": 38, "ব": 39, "ভ": 40, "ম": 41, "য": 42, "র": 43, "ল": 44, "শ": 45, "ষ": 46, "স": 47, "হ": 48, "া": 49, "ি": 50, "ী": 51, "ু": 52, "ূ": 53, "ৃ": 54, "ে": 55, "ৈ": 56, "ো": 57, "ৌ": 58, "্": 59, "ৎ": 60, "ড়": 61, "ঢ়": 62, "য়": 63, "‍": 64, "|": 0, "[UNK]": 65, "[PAD]": 66}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</s>": 1,
3
+ "<s>": 2,
4
+ "[PAD]": 66,
5
+ "[UNK]": 65,
6
+ "|": 0,
7
+ "ঁ": 3,
8
+ "ং": 4,
9
+ "ঃ": 5,
10
+ "অ": 6,
11
+ "আ": 7,
12
+ "ই": 8,
13
+ "ঈ": 9,
14
+ "উ": 10,
15
+ "ঊ": 11,
16
+ "ঋ": 12,
17
+ "এ": 13,
18
+ "ঐ": 14,
19
+ "ও": 15,
20
+ "ঔ": 16,
21
+ "ক": 17,
22
+ "খ": 18,
23
+ "গ": 19,
24
+ "ঘ": 20,
25
+ "ঙ": 21,
26
+ "চ": 22,
27
+ "ছ": 23,
28
+ "জ": 24,
29
+ "ঝ": 25,
30
+ "ঞ": 26,
31
+ "ট": 27,
32
+ "ঠ": 28,
33
+ "ড": 29,
34
+ "ঢ": 30,
35
+ "ণ": 31,
36
+ "ত": 32,
37
+ "থ": 33,
38
+ "দ": 34,
39
+ "ধ": 35,
40
+ "ন": 36,
41
+ "প": 37,
42
+ "ফ": 38,
43
+ "ব": 39,
44
+ "ভ": 40,
45
+ "ম": 41,
46
+ "য": 42,
47
+ "র": 43,
48
+ "ল": 44,
49
+ "শ": 45,
50
+ "ষ": 46,
51
+ "স": 47,
52
+ "হ": 48,
53
+ "া": 49,
54
+ "ি": 50,
55
+ "ী": 51,
56
+ "ু": 52,
57
+ "ূ": 53,
58
+ "ৃ": 54,
59
+ "ে": 55,
60
+ "ৈ": 56,
61
+ "ো": 57,
62
+ "ৌ": 58,
63
+ "্": 59,
64
+ "ৎ": 60,
65
+ "ড়": 61,
66
+ "ঢ়": 62,
67
+ "য়": 63,
68
+ "‍": 64
69
+ }