Automatic Speech Recognition
audio
vosk
kaldi
nextcloud
kyteinsky commited on
Commit
06f2f15
·
verified ·
1 Parent(s): 22028d0

Add "Arabic" and "Arabic Tunisian" models

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +16 -0
  2. vosk-model-ar-mgb2-0.4/README +10 -0
  3. vosk-model-ar-mgb2-0.4/am/final.mdl +3 -0
  4. vosk-model-ar-mgb2-0.4/am/tree +3 -0
  5. vosk-model-ar-mgb2-0.4/conf/mfcc.conf +10 -0
  6. vosk-model-ar-mgb2-0.4/conf/model.conf +10 -0
  7. vosk-model-ar-mgb2-0.4/graph/HCLG.fst +3 -0
  8. vosk-model-ar-mgb2-0.4/graph/disambig_tid.int +2 -0
  9. vosk-model-ar-mgb2-0.4/graph/num_pdfs +1 -0
  10. vosk-model-ar-mgb2-0.4/graph/phones.txt +160 -0
  11. vosk-model-ar-mgb2-0.4/graph/phones/align_lexicon.int +3 -0
  12. vosk-model-ar-mgb2-0.4/graph/phones/align_lexicon.txt +3 -0
  13. vosk-model-ar-mgb2-0.4/graph/phones/disambig.int +2 -0
  14. vosk-model-ar-mgb2-0.4/graph/phones/disambig.txt +2 -0
  15. vosk-model-ar-mgb2-0.4/graph/phones/optional_silence.csl +1 -0
  16. vosk-model-ar-mgb2-0.4/graph/phones/optional_silence.int +1 -0
  17. vosk-model-ar-mgb2-0.4/graph/phones/optional_silence.txt +1 -0
  18. vosk-model-ar-mgb2-0.4/graph/phones/silence.csl +1 -0
  19. vosk-model-ar-mgb2-0.4/graph/phones/word_boundary.int +157 -0
  20. vosk-model-ar-mgb2-0.4/graph/phones/word_boundary.txt +157 -0
  21. vosk-model-ar-mgb2-0.4/graph/words.txt +3 -0
  22. vosk-model-ar-mgb2-0.4/graph/words_bw.txt +3 -0
  23. vosk-model-ar-mgb2-0.4/graph/words_head.txt +1 -0
  24. vosk-model-ar-mgb2-0.4/graph/words_tail.txt +3 -0
  25. vosk-model-ar-mgb2-0.4/ivector/final.dubm +3 -0
  26. vosk-model-ar-mgb2-0.4/ivector/final.ie +3 -0
  27. vosk-model-ar-mgb2-0.4/ivector/final.ie.id +1 -0
  28. vosk-model-ar-mgb2-0.4/ivector/final.mat +0 -0
  29. vosk-model-ar-mgb2-0.4/ivector/global_cmvn.stats +3 -0
  30. vosk-model-ar-mgb2-0.4/ivector/online_cmvn.conf +1 -0
  31. vosk-model-ar-mgb2-0.4/ivector/splice.conf +2 -0
  32. vosk-model-ar-mgb2-0.4/scripts/buckwalter2unicode.py +454 -0
  33. vosk-model-small-ar-tn-0.1-linto/am/cmvn_opts +1 -0
  34. vosk-model-small-ar-tn-0.1-linto/am/final.ie.id +1 -0
  35. vosk-model-small-ar-tn-0.1-linto/am/final.mdl +3 -0
  36. vosk-model-small-ar-tn-0.1-linto/am/frame_subsampling_factor +1 -0
  37. vosk-model-small-ar-tn-0.1-linto/am/num_jobs +1 -0
  38. vosk-model-small-ar-tn-0.1-linto/am/phones.txt +302 -0
  39. vosk-model-small-ar-tn-0.1-linto/am/tree +3 -0
  40. vosk-model-small-ar-tn-0.1-linto/conf/mfcc.conf +10 -0
  41. vosk-model-small-ar-tn-0.1-linto/conf/model.conf +10 -0
  42. vosk-model-small-ar-tn-0.1-linto/conf/splice.conf +3 -0
  43. vosk-model-small-ar-tn-0.1-linto/graph/Gr.fst +3 -0
  44. vosk-model-small-ar-tn-0.1-linto/graph/HCLr.fst +3 -0
  45. vosk-model-small-ar-tn-0.1-linto/graph/disambig_tid.int +4 -0
  46. vosk-model-small-ar-tn-0.1-linto/graph/phones/align_lexicon.int +0 -0
  47. vosk-model-small-ar-tn-0.1-linto/graph/phones/align_lexicon.txt +3 -0
  48. vosk-model-small-ar-tn-0.1-linto/graph/phones/disambig.int +4 -0
  49. vosk-model-small-ar-tn-0.1-linto/graph/phones/disambig.txt +4 -0
  50. vosk-model-small-ar-tn-0.1-linto/graph/phones/optional_silence.csl +1 -0
.gitattributes CHANGED
@@ -169,3 +169,19 @@ vosk-model-en-us-0.22/rnnlm/final.raw filter=lfs diff=lfs merge=lfs -text
169
  vosk-model-small-ko-0.22/graph/HCLr.fst filter=lfs diff=lfs merge=lfs -text
170
  vosk-model-en-us-0.22/graph/HCLG.fst filter=lfs diff=lfs merge=lfs -text
171
  vosk-model-en-us-0.22/rescore/G.carpa filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
169
  vosk-model-small-ko-0.22/graph/HCLr.fst filter=lfs diff=lfs merge=lfs -text
170
  vosk-model-en-us-0.22/graph/HCLG.fst filter=lfs diff=lfs merge=lfs -text
171
  vosk-model-en-us-0.22/rescore/G.carpa filter=lfs diff=lfs merge=lfs -text
172
+ vosk-model-ar-mgb2-0.4/am/final.mdl filter=lfs diff=lfs merge=lfs -text
173
+ vosk-model-ar-mgb2-0.4/am/tree filter=lfs diff=lfs merge=lfs -text
174
+ vosk-model-ar-mgb2-0.4/graph/HCLG.fst filter=lfs diff=lfs merge=lfs -text
175
+ vosk-model-ar-mgb2-0.4/graph/phones/align_lexicon.int filter=lfs diff=lfs merge=lfs -text
176
+ vosk-model-ar-mgb2-0.4/graph/phones/align_lexicon.txt filter=lfs diff=lfs merge=lfs -text
177
+ vosk-model-ar-mgb2-0.4/graph/words_bw.txt filter=lfs diff=lfs merge=lfs -text
178
+ vosk-model-ar-mgb2-0.4/graph/words.txt filter=lfs diff=lfs merge=lfs -text
179
+ vosk-model-ar-mgb2-0.4/ivector/final.dubm filter=lfs diff=lfs merge=lfs -text
180
+ vosk-model-ar-mgb2-0.4/ivector/final.ie filter=lfs diff=lfs merge=lfs -text
181
+ vosk-model-small-ar-tn-0.1-linto/am/final.mdl filter=lfs diff=lfs merge=lfs -text
182
+ vosk-model-small-ar-tn-0.1-linto/am/tree filter=lfs diff=lfs merge=lfs -text
183
+ vosk-model-small-ar-tn-0.1-linto/graph/Gr.fst filter=lfs diff=lfs merge=lfs -text
184
+ vosk-model-small-ar-tn-0.1-linto/graph/HCLr.fst filter=lfs diff=lfs merge=lfs -text
185
+ vosk-model-small-ar-tn-0.1-linto/graph/phones/align_lexicon.txt filter=lfs diff=lfs merge=lfs -text
186
+ vosk-model-small-ar-tn-0.1-linto/ivector/final.dubm filter=lfs diff=lfs merge=lfs -text
187
+ vosk-model-small-ar-tn-0.1-linto/ivector/final.ie filter=lfs diff=lfs merge=lfs -text
vosk-model-ar-mgb2-0.4/README ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ Arabic model trained from MGB-2 dataset
2
+
3
+ Get the model here https://kaldi-asr.org/models/m9
4
+
5
+ SIZE 617M
6
+ DATE 2020-02-26
7
+ UPLOADER Dongji Gao
8
+ RECIP Eegs/mgb2_arabic/s5
9
+ MODEL TYPE Chain (TDNN and LSTM)
10
+ ERROR RATE 16.40% WER (on dev set)
vosk-model-ar-mgb2-0.4/am/final.mdl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:270d47d60692aedb6b78b913ae24ab636264e67970b0b5ffa8cee666070501b1
3
+ size 147954790
vosk-model-ar-mgb2-0.4/am/tree ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18f4a0198af15f21433b408c25f9adc00ba310e97352256bfce34185028765e4
3
+ size 724536
vosk-model-ar-mgb2-0.4/conf/mfcc.conf ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ # config for high-resolution MFCC features, intended for neural network training.
2
+ # Note: we keep all cepstra, so it has the same info as filterbank features,
3
+ # but MFCC is more easily compressible (because less correlated) which is why
4
+ # we prefer this method.
5
+ --use-energy=false # use average of log energy, not energy.
6
+ --sample-frequency=16000
7
+ --num-mel-bins=40
8
+ --num-ceps=40
9
+ --low-freq=40 # low cutoff frequency for mel bins
10
+ --high-freq=-200 # high cutoff frequently, relative to Nyquist of 4000 (=3800)
vosk-model-ar-mgb2-0.4/conf/model.conf ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ --min-active=200
2
+ --max-active=7000
3
+ --beam=13.0
4
+ --lattice-beam=6.0
5
+ --acoustic-scale=1.0
6
+ --frame-subsampling-factor=3
7
+ --endpoint.silence-phones=1:2:3:4:5
8
+ --endpoint.rule2.min-trailing-silence=0.5
9
+ --endpoint.rule3.min-trailing-silence=1.0
10
+ --endpoint.rule4.min-trailing-silence=2.0
vosk-model-ar-mgb2-0.4/graph/HCLG.fst ADDED

Git LFS Details

  • SHA256: 2cf1efb28eb7a88ecd078ab8df8ef3b8cb195aec3dd229fe8d82b86732d7f8e7
  • Pointer size: 134 Bytes
  • Size of remote file: 419 MB
vosk-model-ar-mgb2-0.4/graph/disambig_tid.int ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ 22981
2
+ 22982
vosk-model-ar-mgb2-0.4/graph/num_pdfs ADDED
@@ -0,0 +1 @@
 
 
1
+ 6360
vosk-model-ar-mgb2-0.4/graph/phones.txt ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <eps> 0
2
+ SIL 1
3
+ SIL_B 2
4
+ SIL_E 3
5
+ SIL_I 4
6
+ SIL_S 5
7
+ $_B 6
8
+ $_E 7
9
+ $_I 8
10
+ $_S 9
11
+ &_B 10
12
+ &_E 11
13
+ &_I 12
14
+ &_S 13
15
+ '_B 14
16
+ '_E 15
17
+ '_I 16
18
+ '_S 17
19
+ <_B 18
20
+ <_E 19
21
+ <_I 20
22
+ <_S 21
23
+ >_B 22
24
+ >_E 23
25
+ >_I 24
26
+ >_S 25
27
+ A_B 26
28
+ A_E 27
29
+ A_I 28
30
+ A_S 29
31
+ D_B 30
32
+ D_E 31
33
+ D_I 32
34
+ D_S 33
35
+ E_B 34
36
+ E_E 35
37
+ E_I 36
38
+ E_S 37
39
+ H_B 38
40
+ H_E 39
41
+ H_I 40
42
+ H_S 41
43
+ S_B 42
44
+ S_E 43
45
+ S_I 44
46
+ S_S 45
47
+ T_B 46
48
+ T_E 47
49
+ T_I 48
50
+ T_S 49
51
+ V_B 50
52
+ V_E 51
53
+ V_I 52
54
+ V_S 53
55
+ Y_B 54
56
+ Y_E 55
57
+ Y_I 56
58
+ Y_S 57
59
+ Z_B 58
60
+ Z_E 59
61
+ Z_I 60
62
+ Z_S 61
63
+ a_B 62
64
+ a_E 63
65
+ a_I 64
66
+ a_S 65
67
+ b_B 66
68
+ b_E 67
69
+ b_I 68
70
+ b_S 69
71
+ d_B 70
72
+ d_E 71
73
+ d_I 72
74
+ d_S 73
75
+ f_B 74
76
+ f_E 75
77
+ f_I 76
78
+ f_S 77
79
+ g_B 78
80
+ g_E 79
81
+ g_I 80
82
+ g_S 81
83
+ h_B 82
84
+ h_E 83
85
+ h_I 84
86
+ h_S 85
87
+ j_B 86
88
+ j_E 87
89
+ j_I 88
90
+ j_S 89
91
+ k_B 90
92
+ k_E 91
93
+ k_I 92
94
+ k_S 93
95
+ l_B 94
96
+ l_E 95
97
+ l_I 96
98
+ l_S 97
99
+ m_B 98
100
+ m_E 99
101
+ m_I 100
102
+ m_S 101
103
+ n_B 102
104
+ n_E 103
105
+ n_I 104
106
+ n_S 105
107
+ p_B 106
108
+ p_E 107
109
+ p_I 108
110
+ p_S 109
111
+ q_B 110
112
+ q_E 111
113
+ q_I 112
114
+ q_S 113
115
+ r_B 114
116
+ r_E 115
117
+ r_I 116
118
+ r_S 117
119
+ s_B 118
120
+ s_E 119
121
+ s_I 120
122
+ s_S 121
123
+ t_B 122
124
+ t_E 123
125
+ t_I 124
126
+ t_S 125
127
+ v_B 126
128
+ v_E 127
129
+ v_I 128
130
+ v_S 129
131
+ w_B 130
132
+ w_E 131
133
+ w_I 132
134
+ w_S 133
135
+ x_B 134
136
+ x_E 135
137
+ x_I 136
138
+ x_S 137
139
+ y_B 138
140
+ y_E 139
141
+ y_I 140
142
+ y_S 141
143
+ z_B 142
144
+ z_E 143
145
+ z_I 144
146
+ z_S 145
147
+ {_B 146
148
+ {_E 147
149
+ {_I 148
150
+ {_S 149
151
+ |_B 150
152
+ |_E 151
153
+ |_I 152
154
+ |_S 153
155
+ }_B 154
156
+ }_E 155
157
+ }_I 156
158
+ }_S 157
159
+ #0 158
160
+ #1 159
vosk-model-ar-mgb2-0.4/graph/phones/align_lexicon.int ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fdf7d5a0049e2722df88323c48434848c93ed1805871c1f04b9360e4c1b38f5
3
+ size 35195793
vosk-model-ar-mgb2-0.4/graph/phones/align_lexicon.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2101700722b6e45bddabaa4918f07e0d2575636c52393008eb29e0ed9d480d1
3
+ size 40057532
vosk-model-ar-mgb2-0.4/graph/phones/disambig.int ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ 158
2
+ 159
vosk-model-ar-mgb2-0.4/graph/phones/disambig.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ #0
2
+ #1
vosk-model-ar-mgb2-0.4/graph/phones/optional_silence.csl ADDED
@@ -0,0 +1 @@
 
 
1
+ 1
vosk-model-ar-mgb2-0.4/graph/phones/optional_silence.int ADDED
@@ -0,0 +1 @@
 
 
1
+ 1
vosk-model-ar-mgb2-0.4/graph/phones/optional_silence.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ SIL
vosk-model-ar-mgb2-0.4/graph/phones/silence.csl ADDED
@@ -0,0 +1 @@
 
 
1
+ 1:2:3:4:5
vosk-model-ar-mgb2-0.4/graph/phones/word_boundary.int ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 1 nonword
2
+ 2 begin
3
+ 3 end
4
+ 4 internal
5
+ 5 singleton
6
+ 6 begin
7
+ 7 end
8
+ 8 internal
9
+ 9 singleton
10
+ 10 begin
11
+ 11 end
12
+ 12 internal
13
+ 13 singleton
14
+ 14 begin
15
+ 15 end
16
+ 16 internal
17
+ 17 singleton
18
+ 18 begin
19
+ 19 end
20
+ 20 internal
21
+ 21 singleton
22
+ 22 begin
23
+ 23 end
24
+ 24 internal
25
+ 25 singleton
26
+ 26 begin
27
+ 27 end
28
+ 28 internal
29
+ 29 singleton
30
+ 30 begin
31
+ 31 end
32
+ 32 internal
33
+ 33 singleton
34
+ 34 begin
35
+ 35 end
36
+ 36 internal
37
+ 37 singleton
38
+ 38 begin
39
+ 39 end
40
+ 40 internal
41
+ 41 singleton
42
+ 42 begin
43
+ 43 end
44
+ 44 internal
45
+ 45 singleton
46
+ 46 begin
47
+ 47 end
48
+ 48 internal
49
+ 49 singleton
50
+ 50 begin
51
+ 51 end
52
+ 52 internal
53
+ 53 singleton
54
+ 54 begin
55
+ 55 end
56
+ 56 internal
57
+ 57 singleton
58
+ 58 begin
59
+ 59 end
60
+ 60 internal
61
+ 61 singleton
62
+ 62 begin
63
+ 63 end
64
+ 64 internal
65
+ 65 singleton
66
+ 66 begin
67
+ 67 end
68
+ 68 internal
69
+ 69 singleton
70
+ 70 begin
71
+ 71 end
72
+ 72 internal
73
+ 73 singleton
74
+ 74 begin
75
+ 75 end
76
+ 76 internal
77
+ 77 singleton
78
+ 78 begin
79
+ 79 end
80
+ 80 internal
81
+ 81 singleton
82
+ 82 begin
83
+ 83 end
84
+ 84 internal
85
+ 85 singleton
86
+ 86 begin
87
+ 87 end
88
+ 88 internal
89
+ 89 singleton
90
+ 90 begin
91
+ 91 end
92
+ 92 internal
93
+ 93 singleton
94
+ 94 begin
95
+ 95 end
96
+ 96 internal
97
+ 97 singleton
98
+ 98 begin
99
+ 99 end
100
+ 100 internal
101
+ 101 singleton
102
+ 102 begin
103
+ 103 end
104
+ 104 internal
105
+ 105 singleton
106
+ 106 begin
107
+ 107 end
108
+ 108 internal
109
+ 109 singleton
110
+ 110 begin
111
+ 111 end
112
+ 112 internal
113
+ 113 singleton
114
+ 114 begin
115
+ 115 end
116
+ 116 internal
117
+ 117 singleton
118
+ 118 begin
119
+ 119 end
120
+ 120 internal
121
+ 121 singleton
122
+ 122 begin
123
+ 123 end
124
+ 124 internal
125
+ 125 singleton
126
+ 126 begin
127
+ 127 end
128
+ 128 internal
129
+ 129 singleton
130
+ 130 begin
131
+ 131 end
132
+ 132 internal
133
+ 133 singleton
134
+ 134 begin
135
+ 135 end
136
+ 136 internal
137
+ 137 singleton
138
+ 138 begin
139
+ 139 end
140
+ 140 internal
141
+ 141 singleton
142
+ 142 begin
143
+ 143 end
144
+ 144 internal
145
+ 145 singleton
146
+ 146 begin
147
+ 147 end
148
+ 148 internal
149
+ 149 singleton
150
+ 150 begin
151
+ 151 end
152
+ 152 internal
153
+ 153 singleton
154
+ 154 begin
155
+ 155 end
156
+ 156 internal
157
+ 157 singleton
vosk-model-ar-mgb2-0.4/graph/phones/word_boundary.txt ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ SIL nonword
2
+ SIL_B begin
3
+ SIL_E end
4
+ SIL_I internal
5
+ SIL_S singleton
6
+ $_B begin
7
+ $_E end
8
+ $_I internal
9
+ $_S singleton
10
+ &_B begin
11
+ &_E end
12
+ &_I internal
13
+ &_S singleton
14
+ '_B begin
15
+ '_E end
16
+ '_I internal
17
+ '_S singleton
18
+ <_B begin
19
+ <_E end
20
+ <_I internal
21
+ <_S singleton
22
+ >_B begin
23
+ >_E end
24
+ >_I internal
25
+ >_S singleton
26
+ A_B begin
27
+ A_E end
28
+ A_I internal
29
+ A_S singleton
30
+ D_B begin
31
+ D_E end
32
+ D_I internal
33
+ D_S singleton
34
+ E_B begin
35
+ E_E end
36
+ E_I internal
37
+ E_S singleton
38
+ H_B begin
39
+ H_E end
40
+ H_I internal
41
+ H_S singleton
42
+ S_B begin
43
+ S_E end
44
+ S_I internal
45
+ S_S singleton
46
+ T_B begin
47
+ T_E end
48
+ T_I internal
49
+ T_S singleton
50
+ V_B begin
51
+ V_E end
52
+ V_I internal
53
+ V_S singleton
54
+ Y_B begin
55
+ Y_E end
56
+ Y_I internal
57
+ Y_S singleton
58
+ Z_B begin
59
+ Z_E end
60
+ Z_I internal
61
+ Z_S singleton
62
+ a_B begin
63
+ a_E end
64
+ a_I internal
65
+ a_S singleton
66
+ b_B begin
67
+ b_E end
68
+ b_I internal
69
+ b_S singleton
70
+ d_B begin
71
+ d_E end
72
+ d_I internal
73
+ d_S singleton
74
+ f_B begin
75
+ f_E end
76
+ f_I internal
77
+ f_S singleton
78
+ g_B begin
79
+ g_E end
80
+ g_I internal
81
+ g_S singleton
82
+ h_B begin
83
+ h_E end
84
+ h_I internal
85
+ h_S singleton
86
+ j_B begin
87
+ j_E end
88
+ j_I internal
89
+ j_S singleton
90
+ k_B begin
91
+ k_E end
92
+ k_I internal
93
+ k_S singleton
94
+ l_B begin
95
+ l_E end
96
+ l_I internal
97
+ l_S singleton
98
+ m_B begin
99
+ m_E end
100
+ m_I internal
101
+ m_S singleton
102
+ n_B begin
103
+ n_E end
104
+ n_I internal
105
+ n_S singleton
106
+ p_B begin
107
+ p_E end
108
+ p_I internal
109
+ p_S singleton
110
+ q_B begin
111
+ q_E end
112
+ q_I internal
113
+ q_S singleton
114
+ r_B begin
115
+ r_E end
116
+ r_I internal
117
+ r_S singleton
118
+ s_B begin
119
+ s_E end
120
+ s_I internal
121
+ s_S singleton
122
+ t_B begin
123
+ t_E end
124
+ t_I internal
125
+ t_S singleton
126
+ v_B begin
127
+ v_E end
128
+ v_I internal
129
+ v_S singleton
130
+ w_B begin
131
+ w_E end
132
+ w_I internal
133
+ w_S singleton
134
+ x_B begin
135
+ x_E end
136
+ x_I internal
137
+ x_S singleton
138
+ y_B begin
139
+ y_E end
140
+ y_I internal
141
+ y_S singleton
142
+ z_B begin
143
+ z_E end
144
+ z_I internal
145
+ z_S singleton
146
+ {_B begin
147
+ {_E end
148
+ {_I internal
149
+ {_S singleton
150
+ |_B begin
151
+ |_E end
152
+ |_I internal
153
+ |_S singleton
154
+ }_B begin
155
+ }_E end
156
+ }_I internal
157
+ }_S singleton
vosk-model-ar-mgb2-0.4/graph/words.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1da082badb6c7e3cbf5c3caa6cd8b62ab6ffe65a57f4e7b92e75d1c02704553e
3
+ size 21222619
vosk-model-ar-mgb2-0.4/graph/words_bw.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a502ad2fdfd2f4e0a7a3f104738eca331a20ef4c82d32fbb06eba4d09b376ac
3
+ size 13907872
vosk-model-ar-mgb2-0.4/graph/words_head.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ <eps> 0
vosk-model-ar-mgb2-0.4/graph/words_tail.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ #0 957742
2
+ <s> 957743
3
+ </s> 957744
vosk-model-ar-mgb2-0.4/ivector/final.dubm ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:efb7f72fcf10ccb465f8970e83d811f4219fce2468ebb026a628b6894688af50
3
+ size 168048
vosk-model-ar-mgb2-0.4/ivector/final.ie ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3835f479a636ec3730edd5c937a7a046f665f61067892ace1f38eac71617222b
3
+ size 19757687
vosk-model-ar-mgb2-0.4/ivector/final.ie.id ADDED
@@ -0,0 +1 @@
 
 
1
+ 52508e2bd5a8af67fdcd9b272a6e3f77
vosk-model-ar-mgb2-0.4/ivector/final.mat ADDED
Binary file (45 kB). View file
 
vosk-model-ar-mgb2-0.4/ivector/global_cmvn.stats ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [
2
+ 1.18965e+11 1.942701e+10 -3.341223e+10 2.586335e+10 -4.536047e+10 -5.143079e+08 -2.817998e+10 -1.106823e+10 -5.748533e+09 -8.021131e+09 2.444721e+09 -1.185678e+10 -6.421724e+09 -5.475492e+09 -4.132559e+09 -4.982299e+09 -4.688524e+09 -1.968607e+09 -2.401957e+09 -1.374661e+09 -8.737595e+08 -4.833948e+08 -2.02156e+08 6.90596e+07 4.669136e+08 1.042838e+08 8.957011e+08 4292212 6.994046e+08 2.358506e+08 -2.035312e+08 4.944966e+08 -1.953043e+08 8.404543e+08 2.283313e+08 -3.559911e+08 2.555876e+08 -5.979873e+08 2.206491e+08 -2.063428e+08 1.212011e+09
3
+ 1.208634e+13 6.758905e+11 1.543773e+12 1.354227e+12 2.424682e+12 7.559103e+11 1.481059e+12 9.085413e+11 7.526449e+11 7.495913e+11 6.104847e+11 6.774222e+11 4.939513e+11 4.036599e+11 3.202782e+11 2.749835e+11 1.999791e+11 1.271602e+11 8.408587e+10 5.00519e+10 2.502974e+10 8.412481e+09 1.131555e+09 3.216713e+08 4.231433e+09 1.073782e+10 1.879558e+10 2.450884e+10 2.922538e+10 3.173832e+10 3.415615e+10 3.725646e+10 3.959967e+10 3.782093e+10 2.933737e+10 2.61524e+10 2.414329e+10 1.951272e+10 1.488638e+10 9.146851e+09 0 ]
vosk-model-ar-mgb2-0.4/ivector/online_cmvn.conf ADDED
@@ -0,0 +1 @@
 
 
1
+ # configuration file for apply-cmvn-online, used in the script ../local/online/run_online_decoding_nnet2.sh
vosk-model-ar-mgb2-0.4/ivector/splice.conf ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ --left-context=3
2
+ --right-context=3
vosk-model-ar-mgb2-0.4/scripts/buckwalter2unicode.py ADDED
@@ -0,0 +1,454 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/python
2
+
3
+ # buckwalter2unicode.py - A script to convert transliterated Arabic
4
+ # (using the Buckwalter system) to Unicode.
5
+ #
6
+ # Version 0.2 - 15th September 2004
7
+ #
8
+ # Andrew Roberts (andyr [at] comp (dot) leeds [dot] ac (dot) uk)
9
+ #
10
+ # Project homepage: http://www.comp.leeds.ac.uk/andyr/software/
11
+ #
12
+ # Now, listen carefully...
13
+ #
14
+ #
15
+ # This program is free software; you can redistribute it and/or modify
16
+ # it under the terms of the GNU General Public License as published by
17
+ # the Free Software Foundation; either version 2 of the License, or
18
+ # (at your option) any later version.
19
+ #
20
+ # This program is distributed in the hope that it will be useful,
21
+ # but WITHOUT ANY WARRANTY; without even the implied warranty of
22
+ # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23
+ # GNU General Public License for more details.
24
+ #
25
+ # You should have received a copy of the GNU General Public License
26
+ # along with this program; if not, write to the Free Software
27
+ # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
28
+ #
29
+
30
+ from __future__ import print_function
31
+ import sys, getopt, codecs, os, re
32
+
33
+ # Declare a dictionary with Buckwalter's ASCII symbols as the keys, and
34
+ # their unicode equivalents as values.
35
+
36
+ buck2uni = {"'": u"\u0621", # hamza-on-the-line
37
+ "|": u"\u0622", # madda
38
+ ">": u"\u0623", # hamza-on-'alif
39
+ "&": u"\u0624", # hamza-on-waaw
40
+ "<": u"\u0625", # hamza-under-'alif
41
+ "}": u"\u0626", # hamza-on-yaa'
42
+ "A": u"\u0627", # bare 'alif
43
+ "b": u"\u0628", # baa'
44
+ "p": u"\u0629", # taa' marbuuTa
45
+ "t": u"\u062A", # taa'
46
+ "v": u"\u062B", # thaa'
47
+ "j": u"\u062C", # jiim
48
+ "H": u"\u062D", # Haa'
49
+ "x": u"\u062E", # khaa'
50
+ "d": u"\u062F", # daal
51
+ "*": u"\u0630", # dhaal
52
+ "r": u"\u0631", # raa'
53
+ "z": u"\u0632", # zaay
54
+ "s": u"\u0633", # siin
55
+ "$": u"\u0634", # shiin
56
+ "S": u"\u0635", # Saad
57
+ "D": u"\u0636", # Daad
58
+ "T": u"\u0637", # Taa'
59
+ "Z": u"\u0638", # Zaa' (DHaa')
60
+ "E": u"\u0639", # cayn
61
+ "g": u"\u063A", # ghayn
62
+ "_": u"\u0640", # taTwiil
63
+ "f": u"\u0641", # faa'
64
+ "q": u"\u0642", # qaaf
65
+ "k": u"\u0643", # kaaf
66
+ "l": u"\u0644", # laam
67
+ "m": u"\u0645", # miim
68
+ "n": u"\u0646", # nuun
69
+ "h": u"\u0647", # haa'
70
+ "w": u"\u0648", # waaw
71
+ "Y": u"\u0649", # 'alif maqSuura
72
+ "y": u"\u064A", # yaa'
73
+ "F": u"\u064B", # fatHatayn
74
+ "N": u"\u064C", # Dammatayn
75
+ "K": u"\u064D", # kasratayn
76
+ "a": u"\u064E", # fatHa
77
+ "u": u"\u064F", # Damma
78
+ "i": u"\u0650", # kasra
79
+ "~": u"\u0651", # shaddah
80
+ "o": u"\u0652", # sukuun
81
+ "`": u"\u0670", # dagger 'alif
82
+ "{": u"\u0671", # waSla
83
+ }
84
+
85
+ # For a reverse transliteration (Unicode -> Buckwalter), a dictionary
86
+ # which is the reverse of the above buck2uni is essential.
87
+
88
+ uni2buck = {}
89
+
90
+ # Iterate through all the items in the buck2uni dict.
91
+ for (key, value) in buck2uni.items():
92
+ # The value from buck2uni becomes a key in uni2buck, and vice
93
+ # versa for the keys.
94
+ uni2buck[value] = key
95
+
96
+ # Declare some global variables...
97
+
98
+
99
+ inFilename = "" # Name of filename containing input.
100
+ outFilename = "" # Name of filename to send the output
101
+ inEnc = "" # The text encoding of the input file
102
+ outEnc = "" # The text encoding for the output file
103
+ ignoreChars = "" # If lines begin with these symbols, ignore.
104
+ columnRange = "" # Holds columns numbers to transliterate.
105
+ delimiter = "" # Holds user-defined column delimiter.
106
+ reverse = 0 # When equal to 1, perform reverse transliteration, i.e.,
107
+ # Unicode -> Buckwalter.
108
+
109
+ # A function to print to screen the usage details of this script.
110
+
111
+ def usage():
112
+ print("Usage: {} -i INFILE -o OUTFILE [-g CHARS -c RANGE -d CHAR".format(sys.argv[0]))
113
+ print(" -r -e INPUT_ENCODING, -E OUTPUT ENCODING]")
114
+ print(" {} -l".format(sys.argv[0]))
115
+ print(" {} -h".format(sys.argv[0]))
116
+ print("")
117
+ print(" -i INFILE, --input=INFILE:")
118
+ print(" Path to text file to be transliterated to Unicode.")
119
+ print(" -o OUTFILE, --output=OUTFILE:")
120
+ print(" Path of file to output the newly transliterated text.")
121
+ print(" -e ENC, --input-encoding=ENC:")
122
+ print(" Specify the text encoding of the source file. Default: latin_1.")
123
+ print(" -E ENC, --output-encoding=ENC:")
124
+ print(" Specify the text encoding of the target file. Default: utf_8.")
125
+ print(" -g CHARS, --ignore-lines=CHARS:")
126
+ print(" Will not transliterate lines that start with any of the CHARS")
127
+ print(" given. E.g., -g #; will not alter lines starting with # or ;.")
128
+ print(" (May need to be -g \#\; on some platforms. See README.txt.)")
129
+ print(" -c RANGE, --columns=RANGE:")
130
+ print(" If in columns, select columns to apply transliteration. Can be")
131
+ print(" comma separated numbers, or a range. E.g., -c 1, -c 1-3, -c 1,3.")
132
+ print(" -d CHAR, --delimiter=CHAR:")
133
+ print(" Specify the delimiter that defines the column if using the -c")
134
+ print(" option above. Default is ' ' (space).")
135
+ print(" -r, --reverse:")
136
+ print(" Reverses the transliteration, i.e., Arabic to Buckwalter.")
137
+ print(" When used, it will change the default input encoding to utf_8 and")
138
+ print(" output encoding to latin_1")
139
+ print(" -l, --list-encodings:")
140
+ print(" Displays all supported file encodings.")
141
+ print(" -h, --help:")
142
+ print(" Displays this page.")
143
+ print("")
144
+
145
+ # A function to print to screen all the available encodings supported by
146
+ # Python.
147
+
148
+ def displayEncodings():
149
+ print("Codec Aliases Languages")
150
+ print("ascii 646, us-ascii English")
151
+ print("cp037 IBM037, IBM039 English")
152
+ print("cp424 EBCDIC-CP-HE, IBM424 Hebrew")
153
+ print("cp437 437, IBM437 English")
154
+ print("cp500 EBCDIC-CP-BE, EBCDIC-CP-CH, IBM500 Western Europe")
155
+ print("cp737 Greek")
156
+ print("cp775 IBM775 Baltic languages")
157
+ print("cp850 850, IBM850 Western Europe")
158
+ print("cp852 852, IBM852 Central and Eastern Europe")
159
+ print("cp855 855, IBM855 Bulgarian, Byelorussian, Macedonian, Russian, Serbian")
160
+ print("cp856 Hebrew")
161
+ print("cp857 857, IBM857 Turkish")
162
+ print("cp860 860, IBM860 Portuguese")
163
+ print("cp861 861, CP-IS, IBM861 Icelandic")
164
+ print("cp862 862, IBM862 Hebrew")
165
+ print("cp863 863, IBM863 Canadian")
166
+ print("cp864 IBM864 Arabic")
167
+ print("cp865 865, IBM865 Danish, Norwegian")
168
+ print("cp869 869, CP-GR, IBM869 Greek")
169
+ print("cp874 Thai")
170
+ print("cp875 Greek")
171
+ print("cp1006 Urdu")
172
+ print("cp1026 ibm1026 Turkish")
173
+ print("cp1140 ibm1140 Western Europe")
174
+ print("cp1250 windows-1250 Central and Eastern Europe")
175
+ print("cp1251 windows-1251 Bulgarian, Byelorussian, Macedonian, Russian, Serbian")
176
+ print("cp1252 windows-1252 Western Europe")
177
+ print("cp1253 windows-1253 Greek")
178
+ print("cp1254 windows-1254 Turkish")
179
+ print("cp1255 windows-1255 Hebrew")
180
+ print("cp1256 windows-1256 Arabic")
181
+ print("cp1257 windows-1257 Baltic languages")
182
+ print("cp1258 windows-1258 Vietnamese")
183
+ print("latin_1 iso-8859-1, iso8859-1, 8859, cp819, latin, latin1, L1 West Europe")
184
+ print("iso8859_2 iso-8859-2, latin2, L2 Central and Eastern Europe")
185
+ print("iso8859_3 iso-8859-3, latin3, L3 Esperanto, Maltese")
186
+ print("iso8859_4 iso-8859-4, latin4, L4 Baltic languagues")
187
+ print("iso8859_5 iso-8859-5, cyrillic Bulgarian, Byelorussian, Macedonian, Russian, Serbian")
188
+ print("iso8859_6 iso-8859-6, arabic Arabic")
189
+ print("iso8859_7 iso-8859-7, greek, greek8 Greek")
190
+ print("iso8859_8 iso-8859-8, hebrew Hebrew")
191
+ print("iso8859_9 iso-8859-9, latin5, L5 Turkish")
192
+ print("iso8859_10 iso-8859-10, latin6, L6 Nordic languages")
193
+ print("iso8859_13 iso-8859-13 Baltic languages")
194
+ print("iso8859_14 iso-8859-14, latin8, L8 Celtic languages")
195
+ print("iso8859_15 iso-8859-15 Western Europe")
196
+ print("koi8_r Russian")
197
+ print("koi8_u Ukrainian")
198
+ print("mac_cyrillic maccyrillic Bulgarian, Byelorussian, Macedonian, Russian, Serbian")
199
+ print("mac_greek macgreek Greek")
200
+ print("mac_iceland maciceland Icelandic")
201
+ print("mac_latin2 maclatin2, maccentraleurope Central and Eastern Europe")
202
+ print("mac_roman macroman Western Europe")
203
+ print("mac_turkish macturkish Turkish")
204
+ print("utf_16 U16, utf16 all languages")
205
+ print("utf_16_be UTF-16BE all languages (BMP only)")
206
+ print("utf_16_le UTF-16LE all languages (BMP only)")
207
+ print("utf_7 U7 all languages")
208
+ print("utf_8 U8, UTF, utf8 all languages")
209
+
210
+ def parseIgnoreString(string):
211
+
212
+ symbols = []
213
+
214
+ for char in string:
215
+ symbols.append(char)
216
+
217
+ return symbols
218
+
219
+ # Begin parsing the command-line arguments...
220
+
221
+ try:
222
+ (options, args) = getopt.getopt(sys.argv[1:], "i:o:e:E:g:c:d:rlh",
223
+ ["input=","output=", "input-encoding=", "output-encoding=",
224
+ "ignore-lines=", "columns=", "delimiter=" "reverse", "list-encodings",
225
+ "help"])
226
+
227
+ except getopt.GetoptError:
228
+ # print help information and exit:
229
+ usage()
230
+ sys.exit(1)
231
+
232
+ # Loop over all arguments supplied by the user.
233
+ for (x, y) in options:
234
+ if x in ("-h", "--help"):
235
+ usage()
236
+ sys.exit(0)
237
+
238
+ if x in ("-l", "--list-encodings"):
239
+ displayEncodings()
240
+ sys.exit(0)
241
+
242
+ if x in ("-i", "--input"): inFilename = y
243
+ if x in ("-o", "--output"): outFilename = y
244
+ if x in ("-e", "--input-encoding"): inEnc= y
245
+ if x in ("-E", "--output-encoding"): outEnc= y
246
+ if x in ("-r", "--reverse"): reverse = 1
247
+ if x in ("-g", "--ignore-lines"): ignoreChars = y
248
+ if x in ("-c", "--columns"): columnRange = y
249
+ if x in ("-d", "--delimiter"):
250
+ delimiter = y
251
+ # Tabs come in off the command line from "\\t" to "\t". However,
252
+ # that's equivalent to "\\t" from python's point of view.
253
+ # Therefore replace any inputted "tabs" with proper tabs before
254
+ # proceeding.
255
+ delimiter = delimiter.replace("\\t", "\t")
256
+ # Do some error checking
257
+ if len(delimiter) > 1:
258
+ print("Delimeter should only be a single character. Using first character" + delimiter[0], file=sys.stderr)
259
+ delimiter = delimiter[0]
260
+
261
+ if buck2uni.get(delimiter):
262
+ print("Invalid delimiter. \"" + delimiter + "\" is part of the Buckwalter character set.", file=sys.stderr)
263
+ print("This will obviously cause much confusion as a delimiter!", file=sys.stderr)
264
+ print("Please try again. Aborting...", file=sys.stderr)
265
+ sys.exit(1)
266
+
267
+ # If no delimiter was set then, set the default to " " (space)
268
+ if not delimiter:
269
+ delimiter = " "
270
+
271
+ # If user didn't specify the encoding of the input file, then revert to
272
+ # defaults. The defaults can depending on the direction of
273
+ # transliteration:
274
+ #
275
+ # Buckwalter -> Unicode, default = latin1
276
+ # Unicode -> Buckwalter, default = utf_8
277
+
278
+
279
+ if not inEnc:
280
+ if reverse:
281
+ inEnc = "utf_8"
282
+ else:
283
+ inEnc = "latin_1"
284
+
285
+ # Similarly, if user didn't specify the encoding of the output file,
286
+ # then revert to defaults. The defaults can depending on the direction
287
+ # of transliteration:
288
+ #
289
+ # Buckwalter -> Unicode, default = utf_8
290
+ # Unicode -> Buckwalter, default # = latin_1
291
+
292
+ if not outEnc:
293
+ if reverse:
294
+ outEnc = "latin_1"
295
+ else:
296
+ outEnc = "utf_8"
297
+
298
+ # Ok, let's get the files open!
299
+
300
+ # Providing a file for output was specified...
301
+ if outFilename:
302
+ try:
303
+ # Create a file object, set it to "write" mode using the
304
+ # specified output encoding.
305
+ outFile = codecs.open(outFilename, "w", outEnc)
306
+
307
+ except IOError as msg:
308
+ # A problem occurred when trying to open this file. Report to
309
+ # user...
310
+ print(msg)
311
+ sys.exit(1)
312
+
313
+ # Script can not work without somewhere to store the transliteration.
314
+ # Exit.
315
+ else:
316
+ print("Must specify a file to use store the output! Aborting...")
317
+ sys.exit(1)
318
+
319
+ # Providing a file for input was specified...
320
+ if inFilename:
321
+ try:
322
+ # Create a file object, set it to "read" mode using the
323
+ # specified input encoding.
324
+ inFile = codecs.open(inFilename, "r", inEnc)
325
+
326
+ except IOError as msg:
327
+ # A problem occurred when trying to open this file. Report to
328
+ # user...
329
+ print(msg)
330
+ sys.exit(1)
331
+
332
+ # This script requires a file to read from. Exit.
333
+ else:
334
+ print("Must specify a file to use as input! Aborting...")
335
+ sys.exit(1)
336
+
337
+ def getColsFromRange(cRange):
338
+
339
+ columns = []
340
+ hyphenSearch = re.compile(r'-')
341
+
342
+ rangeElements = cRange.split(",")
343
+
344
+ for i in rangeElements:
345
+ # If it contains a hyphen (e.g., 1-3)
346
+ if hyphenSearch.search(i):
347
+ [start, end] = i.split("-")
348
+ columns = columns + list(range(int(start)-1,int(end)))
349
+ else:
350
+ columns.append(int(i)-1)
351
+
352
+ return columns
353
+
354
+ # This function transliterates a given string. It checks the direction
355
+ # of the transliteration and then uses the appropriate dictionary. A
356
+ # transliterated string is returned.
357
+
358
+ def transliterate(inString, lineNumber):
359
+ out = ""
360
+
361
+ if columnRange:
362
+ columns = getColsFromRange(columnRange)
363
+
364
+ # Split the line on the delimiter
365
+ lineCols = inString.split(delimiter)
366
+
367
+ # Iterate over each column. If it's one of the ones in the range
368
+ # specified, then transliterate, otherwise just output column
369
+ # unchanged.
370
+
371
+ for i in range(len(lineCols)):
372
+
373
+ # If first column, then don't prefix the delimiter
374
+ if i == 0:
375
+ if i in columns:
376
+ out = transliterateString(lineCols[i])
377
+ else :
378
+ out = lineCols[i]
379
+ else :
380
+ if i in columns:
381
+ out = out + delimiter + transliterateString(lineCols[i])
382
+ else :
383
+ out = out + delimiter + lineCols[i]
384
+
385
+ else:
386
+ out = transliterateString(inString)
387
+
388
+
389
+
390
+ return out
391
+
392
+ def transliterateString(inString):
393
+
394
+ out = ""
395
+
396
+ # For normal Buckwalter -> Unicode transliteration..
397
+ if not reverse:
398
+
399
+ # Loop over each character in the string, inString.
400
+ for char in inString:
401
+ # Look up current char in the dictionary to get its
402
+ # respective value. If there is no match, e.g., chars like
403
+ # spaces, then just stick with the current char without any
404
+ # conversion.
405
+ out = out + buck2uni.get(char, char)
406
+
407
+ # Same as above, just in the other direction.
408
+ else:
409
+
410
+ for char in inString:
411
+ out = out + uni2buck.get(char, char)
412
+
413
+ return out
414
+
415
+ #while 1:
416
+ # line = inFile.readline().strip()
417
+ # line = line.decode(inEnc)
418
+ # if not line:
419
+ # break
420
+
421
+ # process string
422
+ # outFile.write(transliterate(line) + os.linesep)
423
+
424
+ # Read in the lines of the input file.
425
+ lines = inFile.readlines()
426
+
427
+ currentLineNumber = 1
428
+ # Loop over each line
429
+ for line in lines:
430
+ line = line.strip()
431
+ try:
432
+ # Transliterate the current line, and then write the output to
433
+ # file.
434
+
435
+ if not ignoreChars:
436
+ outFile.write(transliterate(line, currentLineNumber) + " " + os.linesep)
437
+ else:
438
+ if line[0] in parseIgnoreString(ignoreChars):
439
+ outFile.write(line + " " + os.linesep)
440
+ else:
441
+ outFile.write(transliterate(line, currentLineNumber) + " " + os.linesep)
442
+
443
+ currentLineNumber = currentLineNumber + 1
444
+
445
+ except UnicodeError as msg:
446
+ # A problem when writing occurred. Report to user...
447
+ print(msg)
448
+ sys.exit(1)
449
+
450
+ # All done! Better close the files used before terminating...
451
+ inFile.close()
452
+ outFile.close()
453
+
454
+ # ... and relax! :)
vosk-model-small-ar-tn-0.1-linto/am/cmvn_opts ADDED
@@ -0,0 +1 @@
 
 
1
+ --norm-means=false --norm-vars=false
vosk-model-small-ar-tn-0.1-linto/am/final.ie.id ADDED
@@ -0,0 +1 @@
 
 
1
+ 0084a8987dd3b241beabb01efcc32e17
vosk-model-small-ar-tn-0.1-linto/am/final.mdl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9462fea3133f2b3ef672df2cabffb749f5eedf49427bff7bedd3457a9dfd7da3
3
+ size 77422160
vosk-model-small-ar-tn-0.1-linto/am/frame_subsampling_factor ADDED
@@ -0,0 +1 @@
 
 
1
+ 3
vosk-model-small-ar-tn-0.1-linto/am/num_jobs ADDED
@@ -0,0 +1 @@
 
 
1
+ 4
vosk-model-small-ar-tn-0.1-linto/am/phones.txt ADDED
@@ -0,0 +1,302 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ <eps> 0
2
+ SIL 1
3
+ SIL_B 2
4
+ SIL_E 3
5
+ SIL_I 4
6
+ SIL_S 5
7
+ A$_B 6
8
+ A$_E 7
9
+ A$_I 8
10
+ A$_S 9
11
+ A&_B 10
12
+ A&_E 11
13
+ A&_I 12
14
+ A&_S 13
15
+ A'_B 14
16
+ A'_E 15
17
+ A'_I 16
18
+ A'_S 17
19
+ A<_B 18
20
+ A<_E 19
21
+ A<_I 20
22
+ A<_S 21
23
+ A>_B 22
24
+ A>_E 23
25
+ A>_I 24
26
+ A>_S 25
27
+ AA_B 26
28
+ AA_E 27
29
+ AA_I 28
30
+ AA_S 29
31
+ AD_B 30
32
+ AD_E 31
33
+ AD_I 32
34
+ AD_S 33
35
+ AE_B 34
36
+ AE_E 35
37
+ AE_I 36
38
+ AE_S 37
39
+ AH_B 38
40
+ AH_E 39
41
+ AH_I 40
42
+ AH_S 41
43
+ AS_B 42
44
+ AS_E 43
45
+ AS_I 44
46
+ AS_S 45
47
+ AT_B 46
48
+ AT_E 47
49
+ AT_I 48
50
+ AT_S 49
51
+ AV_B 50
52
+ AV_E 51
53
+ AV_I 52
54
+ AV_S 53
55
+ AY_B 54
56
+ AY_E 55
57
+ AY_I 56
58
+ AY_S 57
59
+ AZ_B 58
60
+ AZ_E 59
61
+ AZ_I 60
62
+ AZ_S 61
63
+ Ab_B 62
64
+ Ab_E 63
65
+ Ab_I 64
66
+ Ab_S 65
67
+ Ad_B 66
68
+ Ad_E 67
69
+ Ad_I 68
70
+ Ad_S 69
71
+ Af_B 70
72
+ Af_E 71
73
+ Af_I 72
74
+ Af_S 73
75
+ Ag_B 74
76
+ Ag_E 75
77
+ Ag_I 76
78
+ Ag_S 77
79
+ Ah_B 78
80
+ Ah_E 79
81
+ Ah_I 80
82
+ Ah_S 81
83
+ Aj_B 82
84
+ Aj_E 83
85
+ Aj_I 84
86
+ Aj_S 85
87
+ Ak_B 86
88
+ Ak_E 87
89
+ Ak_I 88
90
+ Ak_S 89
91
+ Al_B 90
92
+ Al_E 91
93
+ Al_I 92
94
+ Al_S 93
95
+ Am_B 94
96
+ Am_E 95
97
+ Am_I 96
98
+ Am_S 97
99
+ An_B 98
100
+ An_E 99
101
+ An_I 100
102
+ An_S 101
103
+ Ap_B 102
104
+ Ap_E 103
105
+ Ap_I 104
106
+ Ap_S 105
107
+ Aq_B 106
108
+ Aq_E 107
109
+ Aq_I 108
110
+ Aq_S 109
111
+ Ar_B 110
112
+ Ar_E 111
113
+ Ar_I 112
114
+ Ar_S 113
115
+ As_B 114
116
+ As_E 115
117
+ As_I 116
118
+ As_S 117
119
+ At_B 118
120
+ At_E 119
121
+ At_I 120
122
+ At_S 121
123
+ Av_B 122
124
+ Av_E 123
125
+ Av_I 124
126
+ Av_S 125
127
+ Aw_B 126
128
+ Aw_E 127
129
+ Aw_I 128
130
+ Aw_S 129
131
+ Ax_B 130
132
+ Ax_E 131
133
+ Ax_I 132
134
+ Ax_S 133
135
+ Ay_B 134
136
+ Ay_E 135
137
+ Ay_I 136
138
+ Ay_S 137
139
+ Az_B 138
140
+ Az_E 139
141
+ Az_I 140
142
+ Az_S 141
143
+ A|_B 142
144
+ A|_E 143
145
+ A|_I 144
146
+ A|_S 145
147
+ A}_B 146
148
+ A}_E 147
149
+ A}_I 148
150
+ A}_S 149
151
+ L'_B 150
152
+ L'_E 151
153
+ L'_I 152
154
+ L'_S 153
155
+ La_B 154
156
+ La_E 155
157
+ La_I 156
158
+ La_S 157
159
+ Lb_B 158
160
+ Lb_E 159
161
+ Lb_I 160
162
+ Lb_S 161
163
+ Lc_B 162
164
+ Lc_E 163
165
+ Lc_I 164
166
+ Lc_S 165
167
+ Ld_B 166
168
+ Ld_E 167
169
+ Ld_I 168
170
+ Ld_S 169
171
+ Le_B 170
172
+ Le_E 171
173
+ Le_I 172
174
+ Le_S 173
175
+ Lf_B 174
176
+ Lf_E 175
177
+ Lf_I 176
178
+ Lf_S 177
179
+ Lg_B 178
180
+ Lg_E 179
181
+ Lg_I 180
182
+ Lg_S 181
183
+ Lh_B 182
184
+ Lh_E 183
185
+ Lh_I 184
186
+ Lh_S 185
187
+ Li_B 186
188
+ Li_E 187
189
+ Li_I 188
190
+ Li_S 189
191
+ Lj_B 190
192
+ Lj_E 191
193
+ Lj_I 192
194
+ Lj_S 193
195
+ Lk_B 194
196
+ Lk_E 195
197
+ Lk_I 196
198
+ Lk_S 197
199
+ Ll_B 198
200
+ Ll_E 199
201
+ Ll_I 200
202
+ Ll_S 201
203
+ Lm_B 202
204
+ Lm_E 203
205
+ Lm_I 204
206
+ Lm_S 205
207
+ Ln_B 206
208
+ Ln_E 207
209
+ Ln_I 208
210
+ Ln_S 209
211
+ Lo_B 210
212
+ Lo_E 211
213
+ Lo_I 212
214
+ Lo_S 213
215
+ Lp_B 214
216
+ Lp_E 215
217
+ Lp_I 216
218
+ Lp_S 217
219
+ Lq_B 218
220
+ Lq_E 219
221
+ Lq_I 220
222
+ Lq_S 221
223
+ Lr_B 222
224
+ Lr_E 223
225
+ Lr_I 224
226
+ Lr_S 225
227
+ Ls_B 226
228
+ Ls_E 227
229
+ Ls_I 228
230
+ Ls_S 229
231
+ Lt_B 230
232
+ Lt_E 231
233
+ Lt_I 232
234
+ Lt_S 233
235
+ Lu_B 234
236
+ Lu_E 235
237
+ Lu_I 236
238
+ Lu_S 237
239
+ Lv_B 238
240
+ Lv_E 239
241
+ Lv_I 240
242
+ Lv_S 241
243
+ Lw_B 242
244
+ Lw_E 243
245
+ Lw_I 244
246
+ Lw_S 245
247
+ Lx_B 246
248
+ Lx_E 247
249
+ Lx_I 248
250
+ Lx_S 249
251
+ Ly_B 250
252
+ Ly_E 251
253
+ Ly_I 252
254
+ Ly_S 253
255
+ Lz_B 254
256
+ Lz_E 255
257
+ Lz_I 256
258
+ Lz_S 257
259
+ ae_B 258
260
+ ae_E 259
261
+ ae_I 260
262
+ ae_S 261
263
+ cc_B 262
264
+ cc_E 263
265
+ cc_I 264
266
+ cc_S 265
267
+ ga_B 266
268
+ ga_E 267
269
+ ga_I 268
270
+ ga_S 269
271
+ ge_B 270
272
+ ge_E 271
273
+ ge_I 272
274
+ ge_S 273
275
+ gu_B 274
276
+ gu_E 275
277
+ gu_I 276
278
+ gu_S 277
279
+ ha_B 278
280
+ ha_E 279
281
+ ha_I 280
282
+ ha_S 281
283
+ he_B 282
284
+ he_E 283
285
+ he_I 284
286
+ he_S 285
287
+ hi_B 286
288
+ hi_E 287
289
+ hi_I 288
290
+ hi_S 289
291
+ ho_B 290
292
+ ho_E 291
293
+ ho_I 292
294
+ ho_S 293
295
+ hu_B 294
296
+ hu_E 295
297
+ hu_I 296
298
+ hu_S 297
299
+ #0 298
300
+ #1 299
301
+ #2 300
302
+ #3 301
vosk-model-small-ar-tn-0.1-linto/am/tree ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:841160139eae5a74a5ebb69fa407ff8db25d2e57d7344e929f246ea05b9dfc6c
3
+ size 658228
vosk-model-small-ar-tn-0.1-linto/conf/mfcc.conf ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ # config for high-resolution MFCC features, intended for neural network training.
2
+ # Note: we keep all cepstra, so it has the same info as filterbank features,
3
+ # but MFCC is more easily compressible (because less correlated) which is why
4
+ # we prefer this method.
5
+ --use-energy=false # use average of log energy, not energy.
6
+ --sample-frequency=16000
7
+ --num-mel-bins=40
8
+ --num-ceps=40
9
+ --low-freq=40 # low cutoff frequency for mel bins
10
+ --high-freq=-200 # high cutoff frequently, relative to Nyquist of 4000 (=3800)
vosk-model-small-ar-tn-0.1-linto/conf/model.conf ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ --min-active=200
2
+ --max-active=7000
3
+ --beam=11.0
4
+ --lattice-beam=6.0
5
+ --acoustic-scale=1.0
6
+ --frame-subsampling-factor=3
7
+ --endpoint.silence-phones=1:2:3:4:5
8
+ --endpoint.rule2.min-trailing-silence=0.5
9
+ --endpoint.rule3.min-trailing-silence=1.0
10
+ --endpoint.rule4.min-trailing-silence=2.0
vosk-model-small-ar-tn-0.1-linto/conf/splice.conf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ --left-context=3
2
+ --right-context=3
3
+
vosk-model-small-ar-tn-0.1-linto/graph/Gr.fst ADDED

Git LFS Details

  • SHA256: 7d492f26dadb789bcf9d5f9cf01117f4317fbbd69ba2bab1b9510a25fb050955
  • Pointer size: 134 Bytes
  • Size of remote file: 114 MB
vosk-model-small-ar-tn-0.1-linto/graph/HCLr.fst ADDED

Git LFS Details

  • SHA256: 915ff522cd79cea91dd3cdbac05529672c553cfef50d6a5bb9d6a6f3104ce1ce
  • Pointer size: 133 Bytes
  • Size of remote file: 35.1 MB
vosk-model-small-ar-tn-0.1-linto/graph/disambig_tid.int ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ 14649
2
+ 14650
3
+ 14651
4
+ 14652
vosk-model-small-ar-tn-0.1-linto/graph/phones/align_lexicon.int ADDED
The diff for this file is too large to render. See raw diff
 
vosk-model-small-ar-tn-0.1-linto/graph/phones/align_lexicon.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ae5ebc3c3a557ec303d9922a31906a9110883b1f1d1f6a92e58ba3237905095
3
+ size 16371230
vosk-model-small-ar-tn-0.1-linto/graph/phones/disambig.int ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ 298
2
+ 299
3
+ 300
4
+ 301
vosk-model-small-ar-tn-0.1-linto/graph/phones/disambig.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ #0
2
+ #1
3
+ #2
4
+ #3
vosk-model-small-ar-tn-0.1-linto/graph/phones/optional_silence.csl ADDED
@@ -0,0 +1 @@
 
 
1
+ 1