Add "Arabic" and "Arabic Tunisian" models
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +16 -0
- vosk-model-ar-mgb2-0.4/README +10 -0
- vosk-model-ar-mgb2-0.4/am/final.mdl +3 -0
- vosk-model-ar-mgb2-0.4/am/tree +3 -0
- vosk-model-ar-mgb2-0.4/conf/mfcc.conf +10 -0
- vosk-model-ar-mgb2-0.4/conf/model.conf +10 -0
- vosk-model-ar-mgb2-0.4/graph/HCLG.fst +3 -0
- vosk-model-ar-mgb2-0.4/graph/disambig_tid.int +2 -0
- vosk-model-ar-mgb2-0.4/graph/num_pdfs +1 -0
- vosk-model-ar-mgb2-0.4/graph/phones.txt +160 -0
- vosk-model-ar-mgb2-0.4/graph/phones/align_lexicon.int +3 -0
- vosk-model-ar-mgb2-0.4/graph/phones/align_lexicon.txt +3 -0
- vosk-model-ar-mgb2-0.4/graph/phones/disambig.int +2 -0
- vosk-model-ar-mgb2-0.4/graph/phones/disambig.txt +2 -0
- vosk-model-ar-mgb2-0.4/graph/phones/optional_silence.csl +1 -0
- vosk-model-ar-mgb2-0.4/graph/phones/optional_silence.int +1 -0
- vosk-model-ar-mgb2-0.4/graph/phones/optional_silence.txt +1 -0
- vosk-model-ar-mgb2-0.4/graph/phones/silence.csl +1 -0
- vosk-model-ar-mgb2-0.4/graph/phones/word_boundary.int +157 -0
- vosk-model-ar-mgb2-0.4/graph/phones/word_boundary.txt +157 -0
- vosk-model-ar-mgb2-0.4/graph/words.txt +3 -0
- vosk-model-ar-mgb2-0.4/graph/words_bw.txt +3 -0
- vosk-model-ar-mgb2-0.4/graph/words_head.txt +1 -0
- vosk-model-ar-mgb2-0.4/graph/words_tail.txt +3 -0
- vosk-model-ar-mgb2-0.4/ivector/final.dubm +3 -0
- vosk-model-ar-mgb2-0.4/ivector/final.ie +3 -0
- vosk-model-ar-mgb2-0.4/ivector/final.ie.id +1 -0
- vosk-model-ar-mgb2-0.4/ivector/final.mat +0 -0
- vosk-model-ar-mgb2-0.4/ivector/global_cmvn.stats +3 -0
- vosk-model-ar-mgb2-0.4/ivector/online_cmvn.conf +1 -0
- vosk-model-ar-mgb2-0.4/ivector/splice.conf +2 -0
- vosk-model-ar-mgb2-0.4/scripts/buckwalter2unicode.py +454 -0
- vosk-model-small-ar-tn-0.1-linto/am/cmvn_opts +1 -0
- vosk-model-small-ar-tn-0.1-linto/am/final.ie.id +1 -0
- vosk-model-small-ar-tn-0.1-linto/am/final.mdl +3 -0
- vosk-model-small-ar-tn-0.1-linto/am/frame_subsampling_factor +1 -0
- vosk-model-small-ar-tn-0.1-linto/am/num_jobs +1 -0
- vosk-model-small-ar-tn-0.1-linto/am/phones.txt +302 -0
- vosk-model-small-ar-tn-0.1-linto/am/tree +3 -0
- vosk-model-small-ar-tn-0.1-linto/conf/mfcc.conf +10 -0
- vosk-model-small-ar-tn-0.1-linto/conf/model.conf +10 -0
- vosk-model-small-ar-tn-0.1-linto/conf/splice.conf +3 -0
- vosk-model-small-ar-tn-0.1-linto/graph/Gr.fst +3 -0
- vosk-model-small-ar-tn-0.1-linto/graph/HCLr.fst +3 -0
- vosk-model-small-ar-tn-0.1-linto/graph/disambig_tid.int +4 -0
- vosk-model-small-ar-tn-0.1-linto/graph/phones/align_lexicon.int +0 -0
- vosk-model-small-ar-tn-0.1-linto/graph/phones/align_lexicon.txt +3 -0
- vosk-model-small-ar-tn-0.1-linto/graph/phones/disambig.int +4 -0
- vosk-model-small-ar-tn-0.1-linto/graph/phones/disambig.txt +4 -0
- vosk-model-small-ar-tn-0.1-linto/graph/phones/optional_silence.csl +1 -0
.gitattributes
CHANGED
|
@@ -169,3 +169,19 @@ vosk-model-en-us-0.22/rnnlm/final.raw filter=lfs diff=lfs merge=lfs -text
|
|
| 169 |
vosk-model-small-ko-0.22/graph/HCLr.fst filter=lfs diff=lfs merge=lfs -text
|
| 170 |
vosk-model-en-us-0.22/graph/HCLG.fst filter=lfs diff=lfs merge=lfs -text
|
| 171 |
vosk-model-en-us-0.22/rescore/G.carpa filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 169 |
vosk-model-small-ko-0.22/graph/HCLr.fst filter=lfs diff=lfs merge=lfs -text
|
| 170 |
vosk-model-en-us-0.22/graph/HCLG.fst filter=lfs diff=lfs merge=lfs -text
|
| 171 |
vosk-model-en-us-0.22/rescore/G.carpa filter=lfs diff=lfs merge=lfs -text
|
| 172 |
+
vosk-model-ar-mgb2-0.4/am/final.mdl filter=lfs diff=lfs merge=lfs -text
|
| 173 |
+
vosk-model-ar-mgb2-0.4/am/tree filter=lfs diff=lfs merge=lfs -text
|
| 174 |
+
vosk-model-ar-mgb2-0.4/graph/HCLG.fst filter=lfs diff=lfs merge=lfs -text
|
| 175 |
+
vosk-model-ar-mgb2-0.4/graph/phones/align_lexicon.int filter=lfs diff=lfs merge=lfs -text
|
| 176 |
+
vosk-model-ar-mgb2-0.4/graph/phones/align_lexicon.txt filter=lfs diff=lfs merge=lfs -text
|
| 177 |
+
vosk-model-ar-mgb2-0.4/graph/words_bw.txt filter=lfs diff=lfs merge=lfs -text
|
| 178 |
+
vosk-model-ar-mgb2-0.4/graph/words.txt filter=lfs diff=lfs merge=lfs -text
|
| 179 |
+
vosk-model-ar-mgb2-0.4/ivector/final.dubm filter=lfs diff=lfs merge=lfs -text
|
| 180 |
+
vosk-model-ar-mgb2-0.4/ivector/final.ie filter=lfs diff=lfs merge=lfs -text
|
| 181 |
+
vosk-model-small-ar-tn-0.1-linto/am/final.mdl filter=lfs diff=lfs merge=lfs -text
|
| 182 |
+
vosk-model-small-ar-tn-0.1-linto/am/tree filter=lfs diff=lfs merge=lfs -text
|
| 183 |
+
vosk-model-small-ar-tn-0.1-linto/graph/Gr.fst filter=lfs diff=lfs merge=lfs -text
|
| 184 |
+
vosk-model-small-ar-tn-0.1-linto/graph/HCLr.fst filter=lfs diff=lfs merge=lfs -text
|
| 185 |
+
vosk-model-small-ar-tn-0.1-linto/graph/phones/align_lexicon.txt filter=lfs diff=lfs merge=lfs -text
|
| 186 |
+
vosk-model-small-ar-tn-0.1-linto/ivector/final.dubm filter=lfs diff=lfs merge=lfs -text
|
| 187 |
+
vosk-model-small-ar-tn-0.1-linto/ivector/final.ie filter=lfs diff=lfs merge=lfs -text
|
vosk-model-ar-mgb2-0.4/README
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Arabic model trained from MGB-2 dataset
|
| 2 |
+
|
| 3 |
+
Get the model here https://kaldi-asr.org/models/m9
|
| 4 |
+
|
| 5 |
+
SIZE 617M
|
| 6 |
+
DATE 2020-02-26
|
| 7 |
+
UPLOADER Dongji Gao
|
| 8 |
+
RECIP Eegs/mgb2_arabic/s5
|
| 9 |
+
MODEL TYPE Chain (TDNN and LSTM)
|
| 10 |
+
ERROR RATE 16.40% WER (on dev set)
|
vosk-model-ar-mgb2-0.4/am/final.mdl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:270d47d60692aedb6b78b913ae24ab636264e67970b0b5ffa8cee666070501b1
|
| 3 |
+
size 147954790
|
vosk-model-ar-mgb2-0.4/am/tree
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:18f4a0198af15f21433b408c25f9adc00ba310e97352256bfce34185028765e4
|
| 3 |
+
size 724536
|
vosk-model-ar-mgb2-0.4/conf/mfcc.conf
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# config for high-resolution MFCC features, intended for neural network training.
|
| 2 |
+
# Note: we keep all cepstra, so it has the same info as filterbank features,
|
| 3 |
+
# but MFCC is more easily compressible (because less correlated) which is why
|
| 4 |
+
# we prefer this method.
|
| 5 |
+
--use-energy=false # use average of log energy, not energy.
|
| 6 |
+
--sample-frequency=16000
|
| 7 |
+
--num-mel-bins=40
|
| 8 |
+
--num-ceps=40
|
| 9 |
+
--low-freq=40 # low cutoff frequency for mel bins
|
| 10 |
+
--high-freq=-200 # high cutoff frequently, relative to Nyquist of 4000 (=3800)
|
vosk-model-ar-mgb2-0.4/conf/model.conf
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
--min-active=200
|
| 2 |
+
--max-active=7000
|
| 3 |
+
--beam=13.0
|
| 4 |
+
--lattice-beam=6.0
|
| 5 |
+
--acoustic-scale=1.0
|
| 6 |
+
--frame-subsampling-factor=3
|
| 7 |
+
--endpoint.silence-phones=1:2:3:4:5
|
| 8 |
+
--endpoint.rule2.min-trailing-silence=0.5
|
| 9 |
+
--endpoint.rule3.min-trailing-silence=1.0
|
| 10 |
+
--endpoint.rule4.min-trailing-silence=2.0
|
vosk-model-ar-mgb2-0.4/graph/HCLG.fst
ADDED
|
|
Git LFS Details
|
vosk-model-ar-mgb2-0.4/graph/disambig_tid.int
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
22981
|
| 2 |
+
22982
|
vosk-model-ar-mgb2-0.4/graph/num_pdfs
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
6360
|
vosk-model-ar-mgb2-0.4/graph/phones.txt
ADDED
|
@@ -0,0 +1,160 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<eps> 0
|
| 2 |
+
SIL 1
|
| 3 |
+
SIL_B 2
|
| 4 |
+
SIL_E 3
|
| 5 |
+
SIL_I 4
|
| 6 |
+
SIL_S 5
|
| 7 |
+
$_B 6
|
| 8 |
+
$_E 7
|
| 9 |
+
$_I 8
|
| 10 |
+
$_S 9
|
| 11 |
+
&_B 10
|
| 12 |
+
&_E 11
|
| 13 |
+
&_I 12
|
| 14 |
+
&_S 13
|
| 15 |
+
'_B 14
|
| 16 |
+
'_E 15
|
| 17 |
+
'_I 16
|
| 18 |
+
'_S 17
|
| 19 |
+
<_B 18
|
| 20 |
+
<_E 19
|
| 21 |
+
<_I 20
|
| 22 |
+
<_S 21
|
| 23 |
+
>_B 22
|
| 24 |
+
>_E 23
|
| 25 |
+
>_I 24
|
| 26 |
+
>_S 25
|
| 27 |
+
A_B 26
|
| 28 |
+
A_E 27
|
| 29 |
+
A_I 28
|
| 30 |
+
A_S 29
|
| 31 |
+
D_B 30
|
| 32 |
+
D_E 31
|
| 33 |
+
D_I 32
|
| 34 |
+
D_S 33
|
| 35 |
+
E_B 34
|
| 36 |
+
E_E 35
|
| 37 |
+
E_I 36
|
| 38 |
+
E_S 37
|
| 39 |
+
H_B 38
|
| 40 |
+
H_E 39
|
| 41 |
+
H_I 40
|
| 42 |
+
H_S 41
|
| 43 |
+
S_B 42
|
| 44 |
+
S_E 43
|
| 45 |
+
S_I 44
|
| 46 |
+
S_S 45
|
| 47 |
+
T_B 46
|
| 48 |
+
T_E 47
|
| 49 |
+
T_I 48
|
| 50 |
+
T_S 49
|
| 51 |
+
V_B 50
|
| 52 |
+
V_E 51
|
| 53 |
+
V_I 52
|
| 54 |
+
V_S 53
|
| 55 |
+
Y_B 54
|
| 56 |
+
Y_E 55
|
| 57 |
+
Y_I 56
|
| 58 |
+
Y_S 57
|
| 59 |
+
Z_B 58
|
| 60 |
+
Z_E 59
|
| 61 |
+
Z_I 60
|
| 62 |
+
Z_S 61
|
| 63 |
+
a_B 62
|
| 64 |
+
a_E 63
|
| 65 |
+
a_I 64
|
| 66 |
+
a_S 65
|
| 67 |
+
b_B 66
|
| 68 |
+
b_E 67
|
| 69 |
+
b_I 68
|
| 70 |
+
b_S 69
|
| 71 |
+
d_B 70
|
| 72 |
+
d_E 71
|
| 73 |
+
d_I 72
|
| 74 |
+
d_S 73
|
| 75 |
+
f_B 74
|
| 76 |
+
f_E 75
|
| 77 |
+
f_I 76
|
| 78 |
+
f_S 77
|
| 79 |
+
g_B 78
|
| 80 |
+
g_E 79
|
| 81 |
+
g_I 80
|
| 82 |
+
g_S 81
|
| 83 |
+
h_B 82
|
| 84 |
+
h_E 83
|
| 85 |
+
h_I 84
|
| 86 |
+
h_S 85
|
| 87 |
+
j_B 86
|
| 88 |
+
j_E 87
|
| 89 |
+
j_I 88
|
| 90 |
+
j_S 89
|
| 91 |
+
k_B 90
|
| 92 |
+
k_E 91
|
| 93 |
+
k_I 92
|
| 94 |
+
k_S 93
|
| 95 |
+
l_B 94
|
| 96 |
+
l_E 95
|
| 97 |
+
l_I 96
|
| 98 |
+
l_S 97
|
| 99 |
+
m_B 98
|
| 100 |
+
m_E 99
|
| 101 |
+
m_I 100
|
| 102 |
+
m_S 101
|
| 103 |
+
n_B 102
|
| 104 |
+
n_E 103
|
| 105 |
+
n_I 104
|
| 106 |
+
n_S 105
|
| 107 |
+
p_B 106
|
| 108 |
+
p_E 107
|
| 109 |
+
p_I 108
|
| 110 |
+
p_S 109
|
| 111 |
+
q_B 110
|
| 112 |
+
q_E 111
|
| 113 |
+
q_I 112
|
| 114 |
+
q_S 113
|
| 115 |
+
r_B 114
|
| 116 |
+
r_E 115
|
| 117 |
+
r_I 116
|
| 118 |
+
r_S 117
|
| 119 |
+
s_B 118
|
| 120 |
+
s_E 119
|
| 121 |
+
s_I 120
|
| 122 |
+
s_S 121
|
| 123 |
+
t_B 122
|
| 124 |
+
t_E 123
|
| 125 |
+
t_I 124
|
| 126 |
+
t_S 125
|
| 127 |
+
v_B 126
|
| 128 |
+
v_E 127
|
| 129 |
+
v_I 128
|
| 130 |
+
v_S 129
|
| 131 |
+
w_B 130
|
| 132 |
+
w_E 131
|
| 133 |
+
w_I 132
|
| 134 |
+
w_S 133
|
| 135 |
+
x_B 134
|
| 136 |
+
x_E 135
|
| 137 |
+
x_I 136
|
| 138 |
+
x_S 137
|
| 139 |
+
y_B 138
|
| 140 |
+
y_E 139
|
| 141 |
+
y_I 140
|
| 142 |
+
y_S 141
|
| 143 |
+
z_B 142
|
| 144 |
+
z_E 143
|
| 145 |
+
z_I 144
|
| 146 |
+
z_S 145
|
| 147 |
+
{_B 146
|
| 148 |
+
{_E 147
|
| 149 |
+
{_I 148
|
| 150 |
+
{_S 149
|
| 151 |
+
|_B 150
|
| 152 |
+
|_E 151
|
| 153 |
+
|_I 152
|
| 154 |
+
|_S 153
|
| 155 |
+
}_B 154
|
| 156 |
+
}_E 155
|
| 157 |
+
}_I 156
|
| 158 |
+
}_S 157
|
| 159 |
+
#0 158
|
| 160 |
+
#1 159
|
vosk-model-ar-mgb2-0.4/graph/phones/align_lexicon.int
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6fdf7d5a0049e2722df88323c48434848c93ed1805871c1f04b9360e4c1b38f5
|
| 3 |
+
size 35195793
|
vosk-model-ar-mgb2-0.4/graph/phones/align_lexicon.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b2101700722b6e45bddabaa4918f07e0d2575636c52393008eb29e0ed9d480d1
|
| 3 |
+
size 40057532
|
vosk-model-ar-mgb2-0.4/graph/phones/disambig.int
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
158
|
| 2 |
+
159
|
vosk-model-ar-mgb2-0.4/graph/phones/disambig.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#0
|
| 2 |
+
#1
|
vosk-model-ar-mgb2-0.4/graph/phones/optional_silence.csl
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
1
|
vosk-model-ar-mgb2-0.4/graph/phones/optional_silence.int
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
1
|
vosk-model-ar-mgb2-0.4/graph/phones/optional_silence.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
SIL
|
vosk-model-ar-mgb2-0.4/graph/phones/silence.csl
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
1:2:3:4:5
|
vosk-model-ar-mgb2-0.4/graph/phones/word_boundary.int
ADDED
|
@@ -0,0 +1,157 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
1 nonword
|
| 2 |
+
2 begin
|
| 3 |
+
3 end
|
| 4 |
+
4 internal
|
| 5 |
+
5 singleton
|
| 6 |
+
6 begin
|
| 7 |
+
7 end
|
| 8 |
+
8 internal
|
| 9 |
+
9 singleton
|
| 10 |
+
10 begin
|
| 11 |
+
11 end
|
| 12 |
+
12 internal
|
| 13 |
+
13 singleton
|
| 14 |
+
14 begin
|
| 15 |
+
15 end
|
| 16 |
+
16 internal
|
| 17 |
+
17 singleton
|
| 18 |
+
18 begin
|
| 19 |
+
19 end
|
| 20 |
+
20 internal
|
| 21 |
+
21 singleton
|
| 22 |
+
22 begin
|
| 23 |
+
23 end
|
| 24 |
+
24 internal
|
| 25 |
+
25 singleton
|
| 26 |
+
26 begin
|
| 27 |
+
27 end
|
| 28 |
+
28 internal
|
| 29 |
+
29 singleton
|
| 30 |
+
30 begin
|
| 31 |
+
31 end
|
| 32 |
+
32 internal
|
| 33 |
+
33 singleton
|
| 34 |
+
34 begin
|
| 35 |
+
35 end
|
| 36 |
+
36 internal
|
| 37 |
+
37 singleton
|
| 38 |
+
38 begin
|
| 39 |
+
39 end
|
| 40 |
+
40 internal
|
| 41 |
+
41 singleton
|
| 42 |
+
42 begin
|
| 43 |
+
43 end
|
| 44 |
+
44 internal
|
| 45 |
+
45 singleton
|
| 46 |
+
46 begin
|
| 47 |
+
47 end
|
| 48 |
+
48 internal
|
| 49 |
+
49 singleton
|
| 50 |
+
50 begin
|
| 51 |
+
51 end
|
| 52 |
+
52 internal
|
| 53 |
+
53 singleton
|
| 54 |
+
54 begin
|
| 55 |
+
55 end
|
| 56 |
+
56 internal
|
| 57 |
+
57 singleton
|
| 58 |
+
58 begin
|
| 59 |
+
59 end
|
| 60 |
+
60 internal
|
| 61 |
+
61 singleton
|
| 62 |
+
62 begin
|
| 63 |
+
63 end
|
| 64 |
+
64 internal
|
| 65 |
+
65 singleton
|
| 66 |
+
66 begin
|
| 67 |
+
67 end
|
| 68 |
+
68 internal
|
| 69 |
+
69 singleton
|
| 70 |
+
70 begin
|
| 71 |
+
71 end
|
| 72 |
+
72 internal
|
| 73 |
+
73 singleton
|
| 74 |
+
74 begin
|
| 75 |
+
75 end
|
| 76 |
+
76 internal
|
| 77 |
+
77 singleton
|
| 78 |
+
78 begin
|
| 79 |
+
79 end
|
| 80 |
+
80 internal
|
| 81 |
+
81 singleton
|
| 82 |
+
82 begin
|
| 83 |
+
83 end
|
| 84 |
+
84 internal
|
| 85 |
+
85 singleton
|
| 86 |
+
86 begin
|
| 87 |
+
87 end
|
| 88 |
+
88 internal
|
| 89 |
+
89 singleton
|
| 90 |
+
90 begin
|
| 91 |
+
91 end
|
| 92 |
+
92 internal
|
| 93 |
+
93 singleton
|
| 94 |
+
94 begin
|
| 95 |
+
95 end
|
| 96 |
+
96 internal
|
| 97 |
+
97 singleton
|
| 98 |
+
98 begin
|
| 99 |
+
99 end
|
| 100 |
+
100 internal
|
| 101 |
+
101 singleton
|
| 102 |
+
102 begin
|
| 103 |
+
103 end
|
| 104 |
+
104 internal
|
| 105 |
+
105 singleton
|
| 106 |
+
106 begin
|
| 107 |
+
107 end
|
| 108 |
+
108 internal
|
| 109 |
+
109 singleton
|
| 110 |
+
110 begin
|
| 111 |
+
111 end
|
| 112 |
+
112 internal
|
| 113 |
+
113 singleton
|
| 114 |
+
114 begin
|
| 115 |
+
115 end
|
| 116 |
+
116 internal
|
| 117 |
+
117 singleton
|
| 118 |
+
118 begin
|
| 119 |
+
119 end
|
| 120 |
+
120 internal
|
| 121 |
+
121 singleton
|
| 122 |
+
122 begin
|
| 123 |
+
123 end
|
| 124 |
+
124 internal
|
| 125 |
+
125 singleton
|
| 126 |
+
126 begin
|
| 127 |
+
127 end
|
| 128 |
+
128 internal
|
| 129 |
+
129 singleton
|
| 130 |
+
130 begin
|
| 131 |
+
131 end
|
| 132 |
+
132 internal
|
| 133 |
+
133 singleton
|
| 134 |
+
134 begin
|
| 135 |
+
135 end
|
| 136 |
+
136 internal
|
| 137 |
+
137 singleton
|
| 138 |
+
138 begin
|
| 139 |
+
139 end
|
| 140 |
+
140 internal
|
| 141 |
+
141 singleton
|
| 142 |
+
142 begin
|
| 143 |
+
143 end
|
| 144 |
+
144 internal
|
| 145 |
+
145 singleton
|
| 146 |
+
146 begin
|
| 147 |
+
147 end
|
| 148 |
+
148 internal
|
| 149 |
+
149 singleton
|
| 150 |
+
150 begin
|
| 151 |
+
151 end
|
| 152 |
+
152 internal
|
| 153 |
+
153 singleton
|
| 154 |
+
154 begin
|
| 155 |
+
155 end
|
| 156 |
+
156 internal
|
| 157 |
+
157 singleton
|
vosk-model-ar-mgb2-0.4/graph/phones/word_boundary.txt
ADDED
|
@@ -0,0 +1,157 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
SIL nonword
|
| 2 |
+
SIL_B begin
|
| 3 |
+
SIL_E end
|
| 4 |
+
SIL_I internal
|
| 5 |
+
SIL_S singleton
|
| 6 |
+
$_B begin
|
| 7 |
+
$_E end
|
| 8 |
+
$_I internal
|
| 9 |
+
$_S singleton
|
| 10 |
+
&_B begin
|
| 11 |
+
&_E end
|
| 12 |
+
&_I internal
|
| 13 |
+
&_S singleton
|
| 14 |
+
'_B begin
|
| 15 |
+
'_E end
|
| 16 |
+
'_I internal
|
| 17 |
+
'_S singleton
|
| 18 |
+
<_B begin
|
| 19 |
+
<_E end
|
| 20 |
+
<_I internal
|
| 21 |
+
<_S singleton
|
| 22 |
+
>_B begin
|
| 23 |
+
>_E end
|
| 24 |
+
>_I internal
|
| 25 |
+
>_S singleton
|
| 26 |
+
A_B begin
|
| 27 |
+
A_E end
|
| 28 |
+
A_I internal
|
| 29 |
+
A_S singleton
|
| 30 |
+
D_B begin
|
| 31 |
+
D_E end
|
| 32 |
+
D_I internal
|
| 33 |
+
D_S singleton
|
| 34 |
+
E_B begin
|
| 35 |
+
E_E end
|
| 36 |
+
E_I internal
|
| 37 |
+
E_S singleton
|
| 38 |
+
H_B begin
|
| 39 |
+
H_E end
|
| 40 |
+
H_I internal
|
| 41 |
+
H_S singleton
|
| 42 |
+
S_B begin
|
| 43 |
+
S_E end
|
| 44 |
+
S_I internal
|
| 45 |
+
S_S singleton
|
| 46 |
+
T_B begin
|
| 47 |
+
T_E end
|
| 48 |
+
T_I internal
|
| 49 |
+
T_S singleton
|
| 50 |
+
V_B begin
|
| 51 |
+
V_E end
|
| 52 |
+
V_I internal
|
| 53 |
+
V_S singleton
|
| 54 |
+
Y_B begin
|
| 55 |
+
Y_E end
|
| 56 |
+
Y_I internal
|
| 57 |
+
Y_S singleton
|
| 58 |
+
Z_B begin
|
| 59 |
+
Z_E end
|
| 60 |
+
Z_I internal
|
| 61 |
+
Z_S singleton
|
| 62 |
+
a_B begin
|
| 63 |
+
a_E end
|
| 64 |
+
a_I internal
|
| 65 |
+
a_S singleton
|
| 66 |
+
b_B begin
|
| 67 |
+
b_E end
|
| 68 |
+
b_I internal
|
| 69 |
+
b_S singleton
|
| 70 |
+
d_B begin
|
| 71 |
+
d_E end
|
| 72 |
+
d_I internal
|
| 73 |
+
d_S singleton
|
| 74 |
+
f_B begin
|
| 75 |
+
f_E end
|
| 76 |
+
f_I internal
|
| 77 |
+
f_S singleton
|
| 78 |
+
g_B begin
|
| 79 |
+
g_E end
|
| 80 |
+
g_I internal
|
| 81 |
+
g_S singleton
|
| 82 |
+
h_B begin
|
| 83 |
+
h_E end
|
| 84 |
+
h_I internal
|
| 85 |
+
h_S singleton
|
| 86 |
+
j_B begin
|
| 87 |
+
j_E end
|
| 88 |
+
j_I internal
|
| 89 |
+
j_S singleton
|
| 90 |
+
k_B begin
|
| 91 |
+
k_E end
|
| 92 |
+
k_I internal
|
| 93 |
+
k_S singleton
|
| 94 |
+
l_B begin
|
| 95 |
+
l_E end
|
| 96 |
+
l_I internal
|
| 97 |
+
l_S singleton
|
| 98 |
+
m_B begin
|
| 99 |
+
m_E end
|
| 100 |
+
m_I internal
|
| 101 |
+
m_S singleton
|
| 102 |
+
n_B begin
|
| 103 |
+
n_E end
|
| 104 |
+
n_I internal
|
| 105 |
+
n_S singleton
|
| 106 |
+
p_B begin
|
| 107 |
+
p_E end
|
| 108 |
+
p_I internal
|
| 109 |
+
p_S singleton
|
| 110 |
+
q_B begin
|
| 111 |
+
q_E end
|
| 112 |
+
q_I internal
|
| 113 |
+
q_S singleton
|
| 114 |
+
r_B begin
|
| 115 |
+
r_E end
|
| 116 |
+
r_I internal
|
| 117 |
+
r_S singleton
|
| 118 |
+
s_B begin
|
| 119 |
+
s_E end
|
| 120 |
+
s_I internal
|
| 121 |
+
s_S singleton
|
| 122 |
+
t_B begin
|
| 123 |
+
t_E end
|
| 124 |
+
t_I internal
|
| 125 |
+
t_S singleton
|
| 126 |
+
v_B begin
|
| 127 |
+
v_E end
|
| 128 |
+
v_I internal
|
| 129 |
+
v_S singleton
|
| 130 |
+
w_B begin
|
| 131 |
+
w_E end
|
| 132 |
+
w_I internal
|
| 133 |
+
w_S singleton
|
| 134 |
+
x_B begin
|
| 135 |
+
x_E end
|
| 136 |
+
x_I internal
|
| 137 |
+
x_S singleton
|
| 138 |
+
y_B begin
|
| 139 |
+
y_E end
|
| 140 |
+
y_I internal
|
| 141 |
+
y_S singleton
|
| 142 |
+
z_B begin
|
| 143 |
+
z_E end
|
| 144 |
+
z_I internal
|
| 145 |
+
z_S singleton
|
| 146 |
+
{_B begin
|
| 147 |
+
{_E end
|
| 148 |
+
{_I internal
|
| 149 |
+
{_S singleton
|
| 150 |
+
|_B begin
|
| 151 |
+
|_E end
|
| 152 |
+
|_I internal
|
| 153 |
+
|_S singleton
|
| 154 |
+
}_B begin
|
| 155 |
+
}_E end
|
| 156 |
+
}_I internal
|
| 157 |
+
}_S singleton
|
vosk-model-ar-mgb2-0.4/graph/words.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1da082badb6c7e3cbf5c3caa6cd8b62ab6ffe65a57f4e7b92e75d1c02704553e
|
| 3 |
+
size 21222619
|
vosk-model-ar-mgb2-0.4/graph/words_bw.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8a502ad2fdfd2f4e0a7a3f104738eca331a20ef4c82d32fbb06eba4d09b376ac
|
| 3 |
+
size 13907872
|
vosk-model-ar-mgb2-0.4/graph/words_head.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
<eps> 0
|
vosk-model-ar-mgb2-0.4/graph/words_tail.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#0 957742
|
| 2 |
+
<s> 957743
|
| 3 |
+
</s> 957744
|
vosk-model-ar-mgb2-0.4/ivector/final.dubm
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:efb7f72fcf10ccb465f8970e83d811f4219fce2468ebb026a628b6894688af50
|
| 3 |
+
size 168048
|
vosk-model-ar-mgb2-0.4/ivector/final.ie
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3835f479a636ec3730edd5c937a7a046f665f61067892ace1f38eac71617222b
|
| 3 |
+
size 19757687
|
vosk-model-ar-mgb2-0.4/ivector/final.ie.id
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
52508e2bd5a8af67fdcd9b272a6e3f77
|
vosk-model-ar-mgb2-0.4/ivector/final.mat
ADDED
|
Binary file (45 kB). View file
|
|
|
vosk-model-ar-mgb2-0.4/ivector/global_cmvn.stats
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
1.18965e+11 1.942701e+10 -3.341223e+10 2.586335e+10 -4.536047e+10 -5.143079e+08 -2.817998e+10 -1.106823e+10 -5.748533e+09 -8.021131e+09 2.444721e+09 -1.185678e+10 -6.421724e+09 -5.475492e+09 -4.132559e+09 -4.982299e+09 -4.688524e+09 -1.968607e+09 -2.401957e+09 -1.374661e+09 -8.737595e+08 -4.833948e+08 -2.02156e+08 6.90596e+07 4.669136e+08 1.042838e+08 8.957011e+08 4292212 6.994046e+08 2.358506e+08 -2.035312e+08 4.944966e+08 -1.953043e+08 8.404543e+08 2.283313e+08 -3.559911e+08 2.555876e+08 -5.979873e+08 2.206491e+08 -2.063428e+08 1.212011e+09
|
| 3 |
+
1.208634e+13 6.758905e+11 1.543773e+12 1.354227e+12 2.424682e+12 7.559103e+11 1.481059e+12 9.085413e+11 7.526449e+11 7.495913e+11 6.104847e+11 6.774222e+11 4.939513e+11 4.036599e+11 3.202782e+11 2.749835e+11 1.999791e+11 1.271602e+11 8.408587e+10 5.00519e+10 2.502974e+10 8.412481e+09 1.131555e+09 3.216713e+08 4.231433e+09 1.073782e+10 1.879558e+10 2.450884e+10 2.922538e+10 3.173832e+10 3.415615e+10 3.725646e+10 3.959967e+10 3.782093e+10 2.933737e+10 2.61524e+10 2.414329e+10 1.951272e+10 1.488638e+10 9.146851e+09 0 ]
|
vosk-model-ar-mgb2-0.4/ivector/online_cmvn.conf
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
# configuration file for apply-cmvn-online, used in the script ../local/online/run_online_decoding_nnet2.sh
|
vosk-model-ar-mgb2-0.4/ivector/splice.conf
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
--left-context=3
|
| 2 |
+
--right-context=3
|
vosk-model-ar-mgb2-0.4/scripts/buckwalter2unicode.py
ADDED
|
@@ -0,0 +1,454 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/python
|
| 2 |
+
|
| 3 |
+
# buckwalter2unicode.py - A script to convert transliterated Arabic
|
| 4 |
+
# (using the Buckwalter system) to Unicode.
|
| 5 |
+
#
|
| 6 |
+
# Version 0.2 - 15th September 2004
|
| 7 |
+
#
|
| 8 |
+
# Andrew Roberts (andyr [at] comp (dot) leeds [dot] ac (dot) uk)
|
| 9 |
+
#
|
| 10 |
+
# Project homepage: http://www.comp.leeds.ac.uk/andyr/software/
|
| 11 |
+
#
|
| 12 |
+
# Now, listen carefully...
|
| 13 |
+
#
|
| 14 |
+
#
|
| 15 |
+
# This program is free software; you can redistribute it and/or modify
|
| 16 |
+
# it under the terms of the GNU General Public License as published by
|
| 17 |
+
# the Free Software Foundation; either version 2 of the License, or
|
| 18 |
+
# (at your option) any later version.
|
| 19 |
+
#
|
| 20 |
+
# This program is distributed in the hope that it will be useful,
|
| 21 |
+
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
| 22 |
+
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
| 23 |
+
# GNU General Public License for more details.
|
| 24 |
+
#
|
| 25 |
+
# You should have received a copy of the GNU General Public License
|
| 26 |
+
# along with this program; if not, write to the Free Software
|
| 27 |
+
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
|
| 28 |
+
#
|
| 29 |
+
|
| 30 |
+
from __future__ import print_function
|
| 31 |
+
import sys, getopt, codecs, os, re
|
| 32 |
+
|
| 33 |
+
# Declare a dictionary with Buckwalter's ASCII symbols as the keys, and
|
| 34 |
+
# their unicode equivalents as values.
|
| 35 |
+
|
| 36 |
+
buck2uni = {"'": u"\u0621", # hamza-on-the-line
|
| 37 |
+
"|": u"\u0622", # madda
|
| 38 |
+
">": u"\u0623", # hamza-on-'alif
|
| 39 |
+
"&": u"\u0624", # hamza-on-waaw
|
| 40 |
+
"<": u"\u0625", # hamza-under-'alif
|
| 41 |
+
"}": u"\u0626", # hamza-on-yaa'
|
| 42 |
+
"A": u"\u0627", # bare 'alif
|
| 43 |
+
"b": u"\u0628", # baa'
|
| 44 |
+
"p": u"\u0629", # taa' marbuuTa
|
| 45 |
+
"t": u"\u062A", # taa'
|
| 46 |
+
"v": u"\u062B", # thaa'
|
| 47 |
+
"j": u"\u062C", # jiim
|
| 48 |
+
"H": u"\u062D", # Haa'
|
| 49 |
+
"x": u"\u062E", # khaa'
|
| 50 |
+
"d": u"\u062F", # daal
|
| 51 |
+
"*": u"\u0630", # dhaal
|
| 52 |
+
"r": u"\u0631", # raa'
|
| 53 |
+
"z": u"\u0632", # zaay
|
| 54 |
+
"s": u"\u0633", # siin
|
| 55 |
+
"$": u"\u0634", # shiin
|
| 56 |
+
"S": u"\u0635", # Saad
|
| 57 |
+
"D": u"\u0636", # Daad
|
| 58 |
+
"T": u"\u0637", # Taa'
|
| 59 |
+
"Z": u"\u0638", # Zaa' (DHaa')
|
| 60 |
+
"E": u"\u0639", # cayn
|
| 61 |
+
"g": u"\u063A", # ghayn
|
| 62 |
+
"_": u"\u0640", # taTwiil
|
| 63 |
+
"f": u"\u0641", # faa'
|
| 64 |
+
"q": u"\u0642", # qaaf
|
| 65 |
+
"k": u"\u0643", # kaaf
|
| 66 |
+
"l": u"\u0644", # laam
|
| 67 |
+
"m": u"\u0645", # miim
|
| 68 |
+
"n": u"\u0646", # nuun
|
| 69 |
+
"h": u"\u0647", # haa'
|
| 70 |
+
"w": u"\u0648", # waaw
|
| 71 |
+
"Y": u"\u0649", # 'alif maqSuura
|
| 72 |
+
"y": u"\u064A", # yaa'
|
| 73 |
+
"F": u"\u064B", # fatHatayn
|
| 74 |
+
"N": u"\u064C", # Dammatayn
|
| 75 |
+
"K": u"\u064D", # kasratayn
|
| 76 |
+
"a": u"\u064E", # fatHa
|
| 77 |
+
"u": u"\u064F", # Damma
|
| 78 |
+
"i": u"\u0650", # kasra
|
| 79 |
+
"~": u"\u0651", # shaddah
|
| 80 |
+
"o": u"\u0652", # sukuun
|
| 81 |
+
"`": u"\u0670", # dagger 'alif
|
| 82 |
+
"{": u"\u0671", # waSla
|
| 83 |
+
}
|
| 84 |
+
|
| 85 |
+
# For a reverse transliteration (Unicode -> Buckwalter), a dictionary
|
| 86 |
+
# which is the reverse of the above buck2uni is essential.
|
| 87 |
+
|
| 88 |
+
uni2buck = {}
|
| 89 |
+
|
| 90 |
+
# Iterate through all the items in the buck2uni dict.
|
| 91 |
+
for (key, value) in buck2uni.items():
|
| 92 |
+
# The value from buck2uni becomes a key in uni2buck, and vice
|
| 93 |
+
# versa for the keys.
|
| 94 |
+
uni2buck[value] = key
|
| 95 |
+
|
| 96 |
+
# Declare some global variables...
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
inFilename = "" # Name of filename containing input.
|
| 100 |
+
outFilename = "" # Name of filename to send the output
|
| 101 |
+
inEnc = "" # The text encoding of the input file
|
| 102 |
+
outEnc = "" # The text encoding for the output file
|
| 103 |
+
ignoreChars = "" # If lines begin with these symbols, ignore.
|
| 104 |
+
columnRange = "" # Holds columns numbers to transliterate.
|
| 105 |
+
delimiter = "" # Holds user-defined column delimiter.
|
| 106 |
+
reverse = 0 # When equal to 1, perform reverse transliteration, i.e.,
|
| 107 |
+
# Unicode -> Buckwalter.
|
| 108 |
+
|
| 109 |
+
# A function to print to screen the usage details of this script.
|
| 110 |
+
|
| 111 |
+
def usage():
|
| 112 |
+
print("Usage: {} -i INFILE -o OUTFILE [-g CHARS -c RANGE -d CHAR".format(sys.argv[0]))
|
| 113 |
+
print(" -r -e INPUT_ENCODING, -E OUTPUT ENCODING]")
|
| 114 |
+
print(" {} -l".format(sys.argv[0]))
|
| 115 |
+
print(" {} -h".format(sys.argv[0]))
|
| 116 |
+
print("")
|
| 117 |
+
print(" -i INFILE, --input=INFILE:")
|
| 118 |
+
print(" Path to text file to be transliterated to Unicode.")
|
| 119 |
+
print(" -o OUTFILE, --output=OUTFILE:")
|
| 120 |
+
print(" Path of file to output the newly transliterated text.")
|
| 121 |
+
print(" -e ENC, --input-encoding=ENC:")
|
| 122 |
+
print(" Specify the text encoding of the source file. Default: latin_1.")
|
| 123 |
+
print(" -E ENC, --output-encoding=ENC:")
|
| 124 |
+
print(" Specify the text encoding of the target file. Default: utf_8.")
|
| 125 |
+
print(" -g CHARS, --ignore-lines=CHARS:")
|
| 126 |
+
print(" Will not transliterate lines that start with any of the CHARS")
|
| 127 |
+
print(" given. E.g., -g #; will not alter lines starting with # or ;.")
|
| 128 |
+
print(" (May need to be -g \#\; on some platforms. See README.txt.)")
|
| 129 |
+
print(" -c RANGE, --columns=RANGE:")
|
| 130 |
+
print(" If in columns, select columns to apply transliteration. Can be")
|
| 131 |
+
print(" comma separated numbers, or a range. E.g., -c 1, -c 1-3, -c 1,3.")
|
| 132 |
+
print(" -d CHAR, --delimiter=CHAR:")
|
| 133 |
+
print(" Specify the delimiter that defines the column if using the -c")
|
| 134 |
+
print(" option above. Default is ' ' (space).")
|
| 135 |
+
print(" -r, --reverse:")
|
| 136 |
+
print(" Reverses the transliteration, i.e., Arabic to Buckwalter.")
|
| 137 |
+
print(" When used, it will change the default input encoding to utf_8 and")
|
| 138 |
+
print(" output encoding to latin_1")
|
| 139 |
+
print(" -l, --list-encodings:")
|
| 140 |
+
print(" Displays all supported file encodings.")
|
| 141 |
+
print(" -h, --help:")
|
| 142 |
+
print(" Displays this page.")
|
| 143 |
+
print("")
|
| 144 |
+
|
| 145 |
+
# A function to print to screen all the available encodings supported by
|
| 146 |
+
# Python.
|
| 147 |
+
|
| 148 |
+
def displayEncodings():
|
| 149 |
+
print("Codec Aliases Languages")
|
| 150 |
+
print("ascii 646, us-ascii English")
|
| 151 |
+
print("cp037 IBM037, IBM039 English")
|
| 152 |
+
print("cp424 EBCDIC-CP-HE, IBM424 Hebrew")
|
| 153 |
+
print("cp437 437, IBM437 English")
|
| 154 |
+
print("cp500 EBCDIC-CP-BE, EBCDIC-CP-CH, IBM500 Western Europe")
|
| 155 |
+
print("cp737 Greek")
|
| 156 |
+
print("cp775 IBM775 Baltic languages")
|
| 157 |
+
print("cp850 850, IBM850 Western Europe")
|
| 158 |
+
print("cp852 852, IBM852 Central and Eastern Europe")
|
| 159 |
+
print("cp855 855, IBM855 Bulgarian, Byelorussian, Macedonian, Russian, Serbian")
|
| 160 |
+
print("cp856 Hebrew")
|
| 161 |
+
print("cp857 857, IBM857 Turkish")
|
| 162 |
+
print("cp860 860, IBM860 Portuguese")
|
| 163 |
+
print("cp861 861, CP-IS, IBM861 Icelandic")
|
| 164 |
+
print("cp862 862, IBM862 Hebrew")
|
| 165 |
+
print("cp863 863, IBM863 Canadian")
|
| 166 |
+
print("cp864 IBM864 Arabic")
|
| 167 |
+
print("cp865 865, IBM865 Danish, Norwegian")
|
| 168 |
+
print("cp869 869, CP-GR, IBM869 Greek")
|
| 169 |
+
print("cp874 Thai")
|
| 170 |
+
print("cp875 Greek")
|
| 171 |
+
print("cp1006 Urdu")
|
| 172 |
+
print("cp1026 ibm1026 Turkish")
|
| 173 |
+
print("cp1140 ibm1140 Western Europe")
|
| 174 |
+
print("cp1250 windows-1250 Central and Eastern Europe")
|
| 175 |
+
print("cp1251 windows-1251 Bulgarian, Byelorussian, Macedonian, Russian, Serbian")
|
| 176 |
+
print("cp1252 windows-1252 Western Europe")
|
| 177 |
+
print("cp1253 windows-1253 Greek")
|
| 178 |
+
print("cp1254 windows-1254 Turkish")
|
| 179 |
+
print("cp1255 windows-1255 Hebrew")
|
| 180 |
+
print("cp1256 windows-1256 Arabic")
|
| 181 |
+
print("cp1257 windows-1257 Baltic languages")
|
| 182 |
+
print("cp1258 windows-1258 Vietnamese")
|
| 183 |
+
print("latin_1 iso-8859-1, iso8859-1, 8859, cp819, latin, latin1, L1 West Europe")
|
| 184 |
+
print("iso8859_2 iso-8859-2, latin2, L2 Central and Eastern Europe")
|
| 185 |
+
print("iso8859_3 iso-8859-3, latin3, L3 Esperanto, Maltese")
|
| 186 |
+
print("iso8859_4 iso-8859-4, latin4, L4 Baltic languagues")
|
| 187 |
+
print("iso8859_5 iso-8859-5, cyrillic Bulgarian, Byelorussian, Macedonian, Russian, Serbian")
|
| 188 |
+
print("iso8859_6 iso-8859-6, arabic Arabic")
|
| 189 |
+
print("iso8859_7 iso-8859-7, greek, greek8 Greek")
|
| 190 |
+
print("iso8859_8 iso-8859-8, hebrew Hebrew")
|
| 191 |
+
print("iso8859_9 iso-8859-9, latin5, L5 Turkish")
|
| 192 |
+
print("iso8859_10 iso-8859-10, latin6, L6 Nordic languages")
|
| 193 |
+
print("iso8859_13 iso-8859-13 Baltic languages")
|
| 194 |
+
print("iso8859_14 iso-8859-14, latin8, L8 Celtic languages")
|
| 195 |
+
print("iso8859_15 iso-8859-15 Western Europe")
|
| 196 |
+
print("koi8_r Russian")
|
| 197 |
+
print("koi8_u Ukrainian")
|
| 198 |
+
print("mac_cyrillic maccyrillic Bulgarian, Byelorussian, Macedonian, Russian, Serbian")
|
| 199 |
+
print("mac_greek macgreek Greek")
|
| 200 |
+
print("mac_iceland maciceland Icelandic")
|
| 201 |
+
print("mac_latin2 maclatin2, maccentraleurope Central and Eastern Europe")
|
| 202 |
+
print("mac_roman macroman Western Europe")
|
| 203 |
+
print("mac_turkish macturkish Turkish")
|
| 204 |
+
print("utf_16 U16, utf16 all languages")
|
| 205 |
+
print("utf_16_be UTF-16BE all languages (BMP only)")
|
| 206 |
+
print("utf_16_le UTF-16LE all languages (BMP only)")
|
| 207 |
+
print("utf_7 U7 all languages")
|
| 208 |
+
print("utf_8 U8, UTF, utf8 all languages")
|
| 209 |
+
|
| 210 |
+
def parseIgnoreString(string):
|
| 211 |
+
|
| 212 |
+
symbols = []
|
| 213 |
+
|
| 214 |
+
for char in string:
|
| 215 |
+
symbols.append(char)
|
| 216 |
+
|
| 217 |
+
return symbols
|
| 218 |
+
|
| 219 |
+
# Begin parsing the command-line arguments...
|
| 220 |
+
|
| 221 |
+
try:
|
| 222 |
+
(options, args) = getopt.getopt(sys.argv[1:], "i:o:e:E:g:c:d:rlh",
|
| 223 |
+
["input=","output=", "input-encoding=", "output-encoding=",
|
| 224 |
+
"ignore-lines=", "columns=", "delimiter=" "reverse", "list-encodings",
|
| 225 |
+
"help"])
|
| 226 |
+
|
| 227 |
+
except getopt.GetoptError:
|
| 228 |
+
# print help information and exit:
|
| 229 |
+
usage()
|
| 230 |
+
sys.exit(1)
|
| 231 |
+
|
| 232 |
+
# Loop over all arguments supplied by the user.
|
| 233 |
+
for (x, y) in options:
|
| 234 |
+
if x in ("-h", "--help"):
|
| 235 |
+
usage()
|
| 236 |
+
sys.exit(0)
|
| 237 |
+
|
| 238 |
+
if x in ("-l", "--list-encodings"):
|
| 239 |
+
displayEncodings()
|
| 240 |
+
sys.exit(0)
|
| 241 |
+
|
| 242 |
+
if x in ("-i", "--input"): inFilename = y
|
| 243 |
+
if x in ("-o", "--output"): outFilename = y
|
| 244 |
+
if x in ("-e", "--input-encoding"): inEnc= y
|
| 245 |
+
if x in ("-E", "--output-encoding"): outEnc= y
|
| 246 |
+
if x in ("-r", "--reverse"): reverse = 1
|
| 247 |
+
if x in ("-g", "--ignore-lines"): ignoreChars = y
|
| 248 |
+
if x in ("-c", "--columns"): columnRange = y
|
| 249 |
+
if x in ("-d", "--delimiter"):
|
| 250 |
+
delimiter = y
|
| 251 |
+
# Tabs come in off the command line from "\\t" to "\t". However,
|
| 252 |
+
# that's equivalent to "\\t" from python's point of view.
|
| 253 |
+
# Therefore replace any inputted "tabs" with proper tabs before
|
| 254 |
+
# proceeding.
|
| 255 |
+
delimiter = delimiter.replace("\\t", "\t")
|
| 256 |
+
# Do some error checking
|
| 257 |
+
if len(delimiter) > 1:
|
| 258 |
+
print("Delimeter should only be a single character. Using first character" + delimiter[0], file=sys.stderr)
|
| 259 |
+
delimiter = delimiter[0]
|
| 260 |
+
|
| 261 |
+
if buck2uni.get(delimiter):
|
| 262 |
+
print("Invalid delimiter. \"" + delimiter + "\" is part of the Buckwalter character set.", file=sys.stderr)
|
| 263 |
+
print("This will obviously cause much confusion as a delimiter!", file=sys.stderr)
|
| 264 |
+
print("Please try again. Aborting...", file=sys.stderr)
|
| 265 |
+
sys.exit(1)
|
| 266 |
+
|
| 267 |
+
# If no delimiter was set then, set the default to " " (space)
|
| 268 |
+
if not delimiter:
|
| 269 |
+
delimiter = " "
|
| 270 |
+
|
| 271 |
+
# If user didn't specify the encoding of the input file, then revert to
|
| 272 |
+
# defaults. The defaults can depending on the direction of
|
| 273 |
+
# transliteration:
|
| 274 |
+
#
|
| 275 |
+
# Buckwalter -> Unicode, default = latin1
|
| 276 |
+
# Unicode -> Buckwalter, default = utf_8
|
| 277 |
+
|
| 278 |
+
|
| 279 |
+
if not inEnc:
|
| 280 |
+
if reverse:
|
| 281 |
+
inEnc = "utf_8"
|
| 282 |
+
else:
|
| 283 |
+
inEnc = "latin_1"
|
| 284 |
+
|
| 285 |
+
# Similarly, if user didn't specify the encoding of the output file,
|
| 286 |
+
# then revert to defaults. The defaults can depending on the direction
|
| 287 |
+
# of transliteration:
|
| 288 |
+
#
|
| 289 |
+
# Buckwalter -> Unicode, default = utf_8
|
| 290 |
+
# Unicode -> Buckwalter, default # = latin_1
|
| 291 |
+
|
| 292 |
+
if not outEnc:
|
| 293 |
+
if reverse:
|
| 294 |
+
outEnc = "latin_1"
|
| 295 |
+
else:
|
| 296 |
+
outEnc = "utf_8"
|
| 297 |
+
|
| 298 |
+
# Ok, let's get the files open!
|
| 299 |
+
|
| 300 |
+
# Providing a file for output was specified...
|
| 301 |
+
if outFilename:
|
| 302 |
+
try:
|
| 303 |
+
# Create a file object, set it to "write" mode using the
|
| 304 |
+
# specified output encoding.
|
| 305 |
+
outFile = codecs.open(outFilename, "w", outEnc)
|
| 306 |
+
|
| 307 |
+
except IOError as msg:
|
| 308 |
+
# A problem occurred when trying to open this file. Report to
|
| 309 |
+
# user...
|
| 310 |
+
print(msg)
|
| 311 |
+
sys.exit(1)
|
| 312 |
+
|
| 313 |
+
# Script can not work without somewhere to store the transliteration.
|
| 314 |
+
# Exit.
|
| 315 |
+
else:
|
| 316 |
+
print("Must specify a file to use store the output! Aborting...")
|
| 317 |
+
sys.exit(1)
|
| 318 |
+
|
| 319 |
+
# Providing a file for input was specified...
|
| 320 |
+
if inFilename:
|
| 321 |
+
try:
|
| 322 |
+
# Create a file object, set it to "read" mode using the
|
| 323 |
+
# specified input encoding.
|
| 324 |
+
inFile = codecs.open(inFilename, "r", inEnc)
|
| 325 |
+
|
| 326 |
+
except IOError as msg:
|
| 327 |
+
# A problem occurred when trying to open this file. Report to
|
| 328 |
+
# user...
|
| 329 |
+
print(msg)
|
| 330 |
+
sys.exit(1)
|
| 331 |
+
|
| 332 |
+
# This script requires a file to read from. Exit.
|
| 333 |
+
else:
|
| 334 |
+
print("Must specify a file to use as input! Aborting...")
|
| 335 |
+
sys.exit(1)
|
| 336 |
+
|
| 337 |
+
def getColsFromRange(cRange):
|
| 338 |
+
|
| 339 |
+
columns = []
|
| 340 |
+
hyphenSearch = re.compile(r'-')
|
| 341 |
+
|
| 342 |
+
rangeElements = cRange.split(",")
|
| 343 |
+
|
| 344 |
+
for i in rangeElements:
|
| 345 |
+
# If it contains a hyphen (e.g., 1-3)
|
| 346 |
+
if hyphenSearch.search(i):
|
| 347 |
+
[start, end] = i.split("-")
|
| 348 |
+
columns = columns + list(range(int(start)-1,int(end)))
|
| 349 |
+
else:
|
| 350 |
+
columns.append(int(i)-1)
|
| 351 |
+
|
| 352 |
+
return columns
|
| 353 |
+
|
| 354 |
+
# This function transliterates a given string. It checks the direction
|
| 355 |
+
# of the transliteration and then uses the appropriate dictionary. A
|
| 356 |
+
# transliterated string is returned.
|
| 357 |
+
|
| 358 |
+
def transliterate(inString, lineNumber):
|
| 359 |
+
out = ""
|
| 360 |
+
|
| 361 |
+
if columnRange:
|
| 362 |
+
columns = getColsFromRange(columnRange)
|
| 363 |
+
|
| 364 |
+
# Split the line on the delimiter
|
| 365 |
+
lineCols = inString.split(delimiter)
|
| 366 |
+
|
| 367 |
+
# Iterate over each column. If it's one of the ones in the range
|
| 368 |
+
# specified, then transliterate, otherwise just output column
|
| 369 |
+
# unchanged.
|
| 370 |
+
|
| 371 |
+
for i in range(len(lineCols)):
|
| 372 |
+
|
| 373 |
+
# If first column, then don't prefix the delimiter
|
| 374 |
+
if i == 0:
|
| 375 |
+
if i in columns:
|
| 376 |
+
out = transliterateString(lineCols[i])
|
| 377 |
+
else :
|
| 378 |
+
out = lineCols[i]
|
| 379 |
+
else :
|
| 380 |
+
if i in columns:
|
| 381 |
+
out = out + delimiter + transliterateString(lineCols[i])
|
| 382 |
+
else :
|
| 383 |
+
out = out + delimiter + lineCols[i]
|
| 384 |
+
|
| 385 |
+
else:
|
| 386 |
+
out = transliterateString(inString)
|
| 387 |
+
|
| 388 |
+
|
| 389 |
+
|
| 390 |
+
return out
|
| 391 |
+
|
| 392 |
+
def transliterateString(inString):
|
| 393 |
+
|
| 394 |
+
out = ""
|
| 395 |
+
|
| 396 |
+
# For normal Buckwalter -> Unicode transliteration..
|
| 397 |
+
if not reverse:
|
| 398 |
+
|
| 399 |
+
# Loop over each character in the string, inString.
|
| 400 |
+
for char in inString:
|
| 401 |
+
# Look up current char in the dictionary to get its
|
| 402 |
+
# respective value. If there is no match, e.g., chars like
|
| 403 |
+
# spaces, then just stick with the current char without any
|
| 404 |
+
# conversion.
|
| 405 |
+
out = out + buck2uni.get(char, char)
|
| 406 |
+
|
| 407 |
+
# Same as above, just in the other direction.
|
| 408 |
+
else:
|
| 409 |
+
|
| 410 |
+
for char in inString:
|
| 411 |
+
out = out + uni2buck.get(char, char)
|
| 412 |
+
|
| 413 |
+
return out
|
| 414 |
+
|
| 415 |
+
#while 1:
|
| 416 |
+
# line = inFile.readline().strip()
|
| 417 |
+
# line = line.decode(inEnc)
|
| 418 |
+
# if not line:
|
| 419 |
+
# break
|
| 420 |
+
|
| 421 |
+
# process string
|
| 422 |
+
# outFile.write(transliterate(line) + os.linesep)
|
| 423 |
+
|
| 424 |
+
# Read in the lines of the input file.
|
| 425 |
+
lines = inFile.readlines()
|
| 426 |
+
|
| 427 |
+
currentLineNumber = 1
|
| 428 |
+
# Loop over each line
|
| 429 |
+
for line in lines:
|
| 430 |
+
line = line.strip()
|
| 431 |
+
try:
|
| 432 |
+
# Transliterate the current line, and then write the output to
|
| 433 |
+
# file.
|
| 434 |
+
|
| 435 |
+
if not ignoreChars:
|
| 436 |
+
outFile.write(transliterate(line, currentLineNumber) + " " + os.linesep)
|
| 437 |
+
else:
|
| 438 |
+
if line[0] in parseIgnoreString(ignoreChars):
|
| 439 |
+
outFile.write(line + " " + os.linesep)
|
| 440 |
+
else:
|
| 441 |
+
outFile.write(transliterate(line, currentLineNumber) + " " + os.linesep)
|
| 442 |
+
|
| 443 |
+
currentLineNumber = currentLineNumber + 1
|
| 444 |
+
|
| 445 |
+
except UnicodeError as msg:
|
| 446 |
+
# A problem when writing occurred. Report to user...
|
| 447 |
+
print(msg)
|
| 448 |
+
sys.exit(1)
|
| 449 |
+
|
| 450 |
+
# All done! Better close the files used before terminating...
|
| 451 |
+
inFile.close()
|
| 452 |
+
outFile.close()
|
| 453 |
+
|
| 454 |
+
# ... and relax! :)
|
vosk-model-small-ar-tn-0.1-linto/am/cmvn_opts
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
--norm-means=false --norm-vars=false
|
vosk-model-small-ar-tn-0.1-linto/am/final.ie.id
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
0084a8987dd3b241beabb01efcc32e17
|
vosk-model-small-ar-tn-0.1-linto/am/final.mdl
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9462fea3133f2b3ef672df2cabffb749f5eedf49427bff7bedd3457a9dfd7da3
|
| 3 |
+
size 77422160
|
vosk-model-small-ar-tn-0.1-linto/am/frame_subsampling_factor
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
3
|
vosk-model-small-ar-tn-0.1-linto/am/num_jobs
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
4
|
vosk-model-small-ar-tn-0.1-linto/am/phones.txt
ADDED
|
@@ -0,0 +1,302 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<eps> 0
|
| 2 |
+
SIL 1
|
| 3 |
+
SIL_B 2
|
| 4 |
+
SIL_E 3
|
| 5 |
+
SIL_I 4
|
| 6 |
+
SIL_S 5
|
| 7 |
+
A$_B 6
|
| 8 |
+
A$_E 7
|
| 9 |
+
A$_I 8
|
| 10 |
+
A$_S 9
|
| 11 |
+
A&_B 10
|
| 12 |
+
A&_E 11
|
| 13 |
+
A&_I 12
|
| 14 |
+
A&_S 13
|
| 15 |
+
A'_B 14
|
| 16 |
+
A'_E 15
|
| 17 |
+
A'_I 16
|
| 18 |
+
A'_S 17
|
| 19 |
+
A<_B 18
|
| 20 |
+
A<_E 19
|
| 21 |
+
A<_I 20
|
| 22 |
+
A<_S 21
|
| 23 |
+
A>_B 22
|
| 24 |
+
A>_E 23
|
| 25 |
+
A>_I 24
|
| 26 |
+
A>_S 25
|
| 27 |
+
AA_B 26
|
| 28 |
+
AA_E 27
|
| 29 |
+
AA_I 28
|
| 30 |
+
AA_S 29
|
| 31 |
+
AD_B 30
|
| 32 |
+
AD_E 31
|
| 33 |
+
AD_I 32
|
| 34 |
+
AD_S 33
|
| 35 |
+
AE_B 34
|
| 36 |
+
AE_E 35
|
| 37 |
+
AE_I 36
|
| 38 |
+
AE_S 37
|
| 39 |
+
AH_B 38
|
| 40 |
+
AH_E 39
|
| 41 |
+
AH_I 40
|
| 42 |
+
AH_S 41
|
| 43 |
+
AS_B 42
|
| 44 |
+
AS_E 43
|
| 45 |
+
AS_I 44
|
| 46 |
+
AS_S 45
|
| 47 |
+
AT_B 46
|
| 48 |
+
AT_E 47
|
| 49 |
+
AT_I 48
|
| 50 |
+
AT_S 49
|
| 51 |
+
AV_B 50
|
| 52 |
+
AV_E 51
|
| 53 |
+
AV_I 52
|
| 54 |
+
AV_S 53
|
| 55 |
+
AY_B 54
|
| 56 |
+
AY_E 55
|
| 57 |
+
AY_I 56
|
| 58 |
+
AY_S 57
|
| 59 |
+
AZ_B 58
|
| 60 |
+
AZ_E 59
|
| 61 |
+
AZ_I 60
|
| 62 |
+
AZ_S 61
|
| 63 |
+
Ab_B 62
|
| 64 |
+
Ab_E 63
|
| 65 |
+
Ab_I 64
|
| 66 |
+
Ab_S 65
|
| 67 |
+
Ad_B 66
|
| 68 |
+
Ad_E 67
|
| 69 |
+
Ad_I 68
|
| 70 |
+
Ad_S 69
|
| 71 |
+
Af_B 70
|
| 72 |
+
Af_E 71
|
| 73 |
+
Af_I 72
|
| 74 |
+
Af_S 73
|
| 75 |
+
Ag_B 74
|
| 76 |
+
Ag_E 75
|
| 77 |
+
Ag_I 76
|
| 78 |
+
Ag_S 77
|
| 79 |
+
Ah_B 78
|
| 80 |
+
Ah_E 79
|
| 81 |
+
Ah_I 80
|
| 82 |
+
Ah_S 81
|
| 83 |
+
Aj_B 82
|
| 84 |
+
Aj_E 83
|
| 85 |
+
Aj_I 84
|
| 86 |
+
Aj_S 85
|
| 87 |
+
Ak_B 86
|
| 88 |
+
Ak_E 87
|
| 89 |
+
Ak_I 88
|
| 90 |
+
Ak_S 89
|
| 91 |
+
Al_B 90
|
| 92 |
+
Al_E 91
|
| 93 |
+
Al_I 92
|
| 94 |
+
Al_S 93
|
| 95 |
+
Am_B 94
|
| 96 |
+
Am_E 95
|
| 97 |
+
Am_I 96
|
| 98 |
+
Am_S 97
|
| 99 |
+
An_B 98
|
| 100 |
+
An_E 99
|
| 101 |
+
An_I 100
|
| 102 |
+
An_S 101
|
| 103 |
+
Ap_B 102
|
| 104 |
+
Ap_E 103
|
| 105 |
+
Ap_I 104
|
| 106 |
+
Ap_S 105
|
| 107 |
+
Aq_B 106
|
| 108 |
+
Aq_E 107
|
| 109 |
+
Aq_I 108
|
| 110 |
+
Aq_S 109
|
| 111 |
+
Ar_B 110
|
| 112 |
+
Ar_E 111
|
| 113 |
+
Ar_I 112
|
| 114 |
+
Ar_S 113
|
| 115 |
+
As_B 114
|
| 116 |
+
As_E 115
|
| 117 |
+
As_I 116
|
| 118 |
+
As_S 117
|
| 119 |
+
At_B 118
|
| 120 |
+
At_E 119
|
| 121 |
+
At_I 120
|
| 122 |
+
At_S 121
|
| 123 |
+
Av_B 122
|
| 124 |
+
Av_E 123
|
| 125 |
+
Av_I 124
|
| 126 |
+
Av_S 125
|
| 127 |
+
Aw_B 126
|
| 128 |
+
Aw_E 127
|
| 129 |
+
Aw_I 128
|
| 130 |
+
Aw_S 129
|
| 131 |
+
Ax_B 130
|
| 132 |
+
Ax_E 131
|
| 133 |
+
Ax_I 132
|
| 134 |
+
Ax_S 133
|
| 135 |
+
Ay_B 134
|
| 136 |
+
Ay_E 135
|
| 137 |
+
Ay_I 136
|
| 138 |
+
Ay_S 137
|
| 139 |
+
Az_B 138
|
| 140 |
+
Az_E 139
|
| 141 |
+
Az_I 140
|
| 142 |
+
Az_S 141
|
| 143 |
+
A|_B 142
|
| 144 |
+
A|_E 143
|
| 145 |
+
A|_I 144
|
| 146 |
+
A|_S 145
|
| 147 |
+
A}_B 146
|
| 148 |
+
A}_E 147
|
| 149 |
+
A}_I 148
|
| 150 |
+
A}_S 149
|
| 151 |
+
L'_B 150
|
| 152 |
+
L'_E 151
|
| 153 |
+
L'_I 152
|
| 154 |
+
L'_S 153
|
| 155 |
+
La_B 154
|
| 156 |
+
La_E 155
|
| 157 |
+
La_I 156
|
| 158 |
+
La_S 157
|
| 159 |
+
Lb_B 158
|
| 160 |
+
Lb_E 159
|
| 161 |
+
Lb_I 160
|
| 162 |
+
Lb_S 161
|
| 163 |
+
Lc_B 162
|
| 164 |
+
Lc_E 163
|
| 165 |
+
Lc_I 164
|
| 166 |
+
Lc_S 165
|
| 167 |
+
Ld_B 166
|
| 168 |
+
Ld_E 167
|
| 169 |
+
Ld_I 168
|
| 170 |
+
Ld_S 169
|
| 171 |
+
Le_B 170
|
| 172 |
+
Le_E 171
|
| 173 |
+
Le_I 172
|
| 174 |
+
Le_S 173
|
| 175 |
+
Lf_B 174
|
| 176 |
+
Lf_E 175
|
| 177 |
+
Lf_I 176
|
| 178 |
+
Lf_S 177
|
| 179 |
+
Lg_B 178
|
| 180 |
+
Lg_E 179
|
| 181 |
+
Lg_I 180
|
| 182 |
+
Lg_S 181
|
| 183 |
+
Lh_B 182
|
| 184 |
+
Lh_E 183
|
| 185 |
+
Lh_I 184
|
| 186 |
+
Lh_S 185
|
| 187 |
+
Li_B 186
|
| 188 |
+
Li_E 187
|
| 189 |
+
Li_I 188
|
| 190 |
+
Li_S 189
|
| 191 |
+
Lj_B 190
|
| 192 |
+
Lj_E 191
|
| 193 |
+
Lj_I 192
|
| 194 |
+
Lj_S 193
|
| 195 |
+
Lk_B 194
|
| 196 |
+
Lk_E 195
|
| 197 |
+
Lk_I 196
|
| 198 |
+
Lk_S 197
|
| 199 |
+
Ll_B 198
|
| 200 |
+
Ll_E 199
|
| 201 |
+
Ll_I 200
|
| 202 |
+
Ll_S 201
|
| 203 |
+
Lm_B 202
|
| 204 |
+
Lm_E 203
|
| 205 |
+
Lm_I 204
|
| 206 |
+
Lm_S 205
|
| 207 |
+
Ln_B 206
|
| 208 |
+
Ln_E 207
|
| 209 |
+
Ln_I 208
|
| 210 |
+
Ln_S 209
|
| 211 |
+
Lo_B 210
|
| 212 |
+
Lo_E 211
|
| 213 |
+
Lo_I 212
|
| 214 |
+
Lo_S 213
|
| 215 |
+
Lp_B 214
|
| 216 |
+
Lp_E 215
|
| 217 |
+
Lp_I 216
|
| 218 |
+
Lp_S 217
|
| 219 |
+
Lq_B 218
|
| 220 |
+
Lq_E 219
|
| 221 |
+
Lq_I 220
|
| 222 |
+
Lq_S 221
|
| 223 |
+
Lr_B 222
|
| 224 |
+
Lr_E 223
|
| 225 |
+
Lr_I 224
|
| 226 |
+
Lr_S 225
|
| 227 |
+
Ls_B 226
|
| 228 |
+
Ls_E 227
|
| 229 |
+
Ls_I 228
|
| 230 |
+
Ls_S 229
|
| 231 |
+
Lt_B 230
|
| 232 |
+
Lt_E 231
|
| 233 |
+
Lt_I 232
|
| 234 |
+
Lt_S 233
|
| 235 |
+
Lu_B 234
|
| 236 |
+
Lu_E 235
|
| 237 |
+
Lu_I 236
|
| 238 |
+
Lu_S 237
|
| 239 |
+
Lv_B 238
|
| 240 |
+
Lv_E 239
|
| 241 |
+
Lv_I 240
|
| 242 |
+
Lv_S 241
|
| 243 |
+
Lw_B 242
|
| 244 |
+
Lw_E 243
|
| 245 |
+
Lw_I 244
|
| 246 |
+
Lw_S 245
|
| 247 |
+
Lx_B 246
|
| 248 |
+
Lx_E 247
|
| 249 |
+
Lx_I 248
|
| 250 |
+
Lx_S 249
|
| 251 |
+
Ly_B 250
|
| 252 |
+
Ly_E 251
|
| 253 |
+
Ly_I 252
|
| 254 |
+
Ly_S 253
|
| 255 |
+
Lz_B 254
|
| 256 |
+
Lz_E 255
|
| 257 |
+
Lz_I 256
|
| 258 |
+
Lz_S 257
|
| 259 |
+
ae_B 258
|
| 260 |
+
ae_E 259
|
| 261 |
+
ae_I 260
|
| 262 |
+
ae_S 261
|
| 263 |
+
cc_B 262
|
| 264 |
+
cc_E 263
|
| 265 |
+
cc_I 264
|
| 266 |
+
cc_S 265
|
| 267 |
+
ga_B 266
|
| 268 |
+
ga_E 267
|
| 269 |
+
ga_I 268
|
| 270 |
+
ga_S 269
|
| 271 |
+
ge_B 270
|
| 272 |
+
ge_E 271
|
| 273 |
+
ge_I 272
|
| 274 |
+
ge_S 273
|
| 275 |
+
gu_B 274
|
| 276 |
+
gu_E 275
|
| 277 |
+
gu_I 276
|
| 278 |
+
gu_S 277
|
| 279 |
+
ha_B 278
|
| 280 |
+
ha_E 279
|
| 281 |
+
ha_I 280
|
| 282 |
+
ha_S 281
|
| 283 |
+
he_B 282
|
| 284 |
+
he_E 283
|
| 285 |
+
he_I 284
|
| 286 |
+
he_S 285
|
| 287 |
+
hi_B 286
|
| 288 |
+
hi_E 287
|
| 289 |
+
hi_I 288
|
| 290 |
+
hi_S 289
|
| 291 |
+
ho_B 290
|
| 292 |
+
ho_E 291
|
| 293 |
+
ho_I 292
|
| 294 |
+
ho_S 293
|
| 295 |
+
hu_B 294
|
| 296 |
+
hu_E 295
|
| 297 |
+
hu_I 296
|
| 298 |
+
hu_S 297
|
| 299 |
+
#0 298
|
| 300 |
+
#1 299
|
| 301 |
+
#2 300
|
| 302 |
+
#3 301
|
vosk-model-small-ar-tn-0.1-linto/am/tree
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:841160139eae5a74a5ebb69fa407ff8db25d2e57d7344e929f246ea05b9dfc6c
|
| 3 |
+
size 658228
|
vosk-model-small-ar-tn-0.1-linto/conf/mfcc.conf
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# config for high-resolution MFCC features, intended for neural network training.
|
| 2 |
+
# Note: we keep all cepstra, so it has the same info as filterbank features,
|
| 3 |
+
# but MFCC is more easily compressible (because less correlated) which is why
|
| 4 |
+
# we prefer this method.
|
| 5 |
+
--use-energy=false # use average of log energy, not energy.
|
| 6 |
+
--sample-frequency=16000
|
| 7 |
+
--num-mel-bins=40
|
| 8 |
+
--num-ceps=40
|
| 9 |
+
--low-freq=40 # low cutoff frequency for mel bins
|
| 10 |
+
--high-freq=-200 # high cutoff frequently, relative to Nyquist of 4000 (=3800)
|
vosk-model-small-ar-tn-0.1-linto/conf/model.conf
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
--min-active=200
|
| 2 |
+
--max-active=7000
|
| 3 |
+
--beam=11.0
|
| 4 |
+
--lattice-beam=6.0
|
| 5 |
+
--acoustic-scale=1.0
|
| 6 |
+
--frame-subsampling-factor=3
|
| 7 |
+
--endpoint.silence-phones=1:2:3:4:5
|
| 8 |
+
--endpoint.rule2.min-trailing-silence=0.5
|
| 9 |
+
--endpoint.rule3.min-trailing-silence=1.0
|
| 10 |
+
--endpoint.rule4.min-trailing-silence=2.0
|
vosk-model-small-ar-tn-0.1-linto/conf/splice.conf
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
--left-context=3
|
| 2 |
+
--right-context=3
|
| 3 |
+
|
vosk-model-small-ar-tn-0.1-linto/graph/Gr.fst
ADDED
|
|
Git LFS Details
|
vosk-model-small-ar-tn-0.1-linto/graph/HCLr.fst
ADDED
|
|
Git LFS Details
|
vosk-model-small-ar-tn-0.1-linto/graph/disambig_tid.int
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
14649
|
| 2 |
+
14650
|
| 3 |
+
14651
|
| 4 |
+
14652
|
vosk-model-small-ar-tn-0.1-linto/graph/phones/align_lexicon.int
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
vosk-model-small-ar-tn-0.1-linto/graph/phones/align_lexicon.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9ae5ebc3c3a557ec303d9922a31906a9110883b1f1d1f6a92e58ba3237905095
|
| 3 |
+
size 16371230
|
vosk-model-small-ar-tn-0.1-linto/graph/phones/disambig.int
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
298
|
| 2 |
+
299
|
| 3 |
+
300
|
| 4 |
+
301
|
vosk-model-small-ar-tn-0.1-linto/graph/phones/disambig.txt
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#0
|
| 2 |
+
#1
|
| 3 |
+
#2
|
| 4 |
+
#3
|
vosk-model-small-ar-tn-0.1-linto/graph/phones/optional_silence.csl
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
1
|