odg123 commited on
Commit
d596074
·
verified ·
1 Parent(s): ae46197

Upload icefall experiment results and logs

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .flake8 +35 -0
  2. .git-blame-ignore-revs +3 -0
  3. .gitattributes +22 -0
  4. .github/scripts/.gitignore +1 -0
  5. .github/scripts/aishell/ASR/run.sh +343 -0
  6. .github/scripts/audioset/AT/run.sh +94 -0
  7. .github/scripts/baker_zh/TTS/run-matcha.sh +167 -0
  8. .github/scripts/compute-fbank-librispeech-test-clean-and-test-other.sh +19 -0
  9. .github/scripts/docker/Dockerfile +75 -0
  10. .github/scripts/docker/generate_build_matrix.py +140 -0
  11. .github/scripts/download-gigaspeech-dev-test-dataset.sh +17 -0
  12. .github/scripts/download-librispeech-test-clean-and-test-other-dataset.sh +25 -0
  13. .github/scripts/generate-piper-phonemize-page.py +90 -0
  14. .github/scripts/install-kaldifeat.sh +15 -0
  15. .github/scripts/ksponspeech/ASR/run.sh +132 -0
  16. .github/scripts/librispeech/ASR/run.sh +1644 -0
  17. .github/scripts/librispeech/ASR/run_rknn.sh +275 -0
  18. .github/scripts/ljspeech/TTS/run-matcha.sh +157 -0
  19. .github/scripts/ljspeech/TTS/run.sh +157 -0
  20. .github/scripts/multi_zh-hans/ASR/run.sh +756 -0
  21. .github/scripts/multi_zh-hans/ASR/run_rknn.sh +73 -0
  22. .github/scripts/prepare-librispeech-test-clean-and-test-other-manifests.sh +13 -0
  23. .github/scripts/run-gigaspeech-pruned-transducer-stateless2-2022-05-12.sh +62 -0
  24. .github/scripts/run-gigaspeech-zipformer-2023-10-17.sh +172 -0
  25. .github/scripts/run-librispeech-lstm-transducer-stateless2-2022-09-03.sh +191 -0
  26. .github/scripts/run-multi-corpora-zipformer.sh +135 -0
  27. .github/scripts/run-swbd-conformer-ctc-2023-08-26.sh +44 -0
  28. .github/scripts/run-wenetspeech-pruned-transducer-stateless2.sh +119 -0
  29. .github/scripts/test-ncnn-export.sh +230 -0
  30. .github/scripts/test-onnx-export.sh +466 -0
  31. .github/scripts/wenetspeech/ASR/run_rknn.sh +196 -0
  32. .github/scripts/yesno/ASR/run.sh +86 -0
  33. .github/workflows/aishell.yml +72 -0
  34. .github/workflows/audioset.yml +137 -0
  35. .github/workflows/baker_zh.yml +152 -0
  36. .github/workflows/build-cpu-docker.yml +81 -0
  37. .github/workflows/build-doc.yml +74 -0
  38. .github/workflows/build-docker-image.yml +84 -0
  39. .github/workflows/ksponspeech.yml +167 -0
  40. .github/workflows/librispeech.yml +72 -0
  41. .github/workflows/ljspeech.yml +166 -0
  42. .github/workflows/multi-zh-hans.yml +86 -0
  43. .github/workflows/rknn.yml +134 -0
  44. .github/workflows/run-docker-image.yml +144 -0
  45. .github/workflows/run-gigaspeech-2022-05-13.yml +128 -0
  46. .github/workflows/run-gigaspeech-zipformer-2023-10-17.yml +136 -0
  47. .github/workflows/run-librispeech-lstm-transducer-stateless2-2022-09-03.yml +165 -0
  48. .github/workflows/run-multi-corpora-zipformer.yml +86 -0
  49. .github/workflows/run-ptb-rnn-lm.yml +73 -0
  50. .github/workflows/run-swbd-conformer-ctc.yml +86 -0
.flake8 ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [flake8]
2
+ show-source=true
3
+ statistics=true
4
+ max-line-length = 88
5
+ per-file-ignores =
6
+ # line too long
7
+ icefall/diagnostics.py: E501,
8
+ egs/*/ASR/*/conformer.py: E501,
9
+ egs/*/ASR/pruned_transducer_stateless*/*.py: E501,
10
+ egs/*/ASR/*/optim.py: E501,
11
+ egs/*/ASR/*/scaling.py: E501,
12
+ egs/librispeech/ASR/lstm_transducer_stateless*/*.py: E501, E203
13
+ egs/librispeech/ASR/conv_emformer_transducer_stateless*/*.py: E501, E203
14
+ egs/librispeech/ASR/conformer_ctc*/*py: E501,
15
+ egs/librispeech/ASR/zipformer_mmi/*.py: E501, E203
16
+ egs/librispeech/ASR/zipformer/*.py: E501, E203
17
+ egs/librispeech/ASR/RESULTS.md: E999,
18
+ egs/ljspeech/TTS/vits/*.py: E501, E203
19
+ # invalid escape sequence (cause by tex formular), W605
20
+ icefall/utils.py: E501, W605
21
+
22
+ exclude =
23
+ .git,
24
+ **/data/**,
25
+ icefall/shared/make_kn_lm.py,
26
+ icefall/__init__.py
27
+ icefall/ctc/__init__.py
28
+
29
+ ignore =
30
+ # E203 white space before ":"
31
+ E203,
32
+ # W503 line break before binary operator
33
+ W503,
34
+ # E226 missing whitespace around arithmetic operator
35
+ E226,
.git-blame-ignore-revs ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # Migrate to 88 characters per line (see: https://github.com/lhotse-speech/lhotse/issues/890)
2
+ 107df3b115a58f1b68a6458c3f94a130004be34c
3
+ d31db010371a4128856480382876acdc0d1739ed
.gitattributes CHANGED
@@ -33,3 +33,25 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ docs/source/_static/kaldi-align/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav filter=lfs diff=lfs merge=lfs -text
37
+ docs/source/_static/logo.png filter=lfs diff=lfs merge=lfs -text
38
+ docs/source/contributing/images/doc-contrib.png filter=lfs diff=lfs merge=lfs -text
39
+ docs/source/contributing/images/pre-commit-check-success.png filter=lfs diff=lfs merge=lfs -text
40
+ docs/source/contributing/images/pre-commit-check.png filter=lfs diff=lfs merge=lfs -text
41
+ docs/source/docker/img/docker-hub.png filter=lfs diff=lfs merge=lfs -text
42
+ docs/source/huggingface/pic/hugging-face-sherpa-2.png filter=lfs diff=lfs merge=lfs -text
43
+ docs/source/huggingface/pic/hugging-face-sherpa-3.png filter=lfs diff=lfs merge=lfs -text
44
+ docs/source/huggingface/pic/hugging-face-sherpa.png filter=lfs diff=lfs merge=lfs -text
45
+ docs/source/recipes/Non-streaming-ASR/aishell/images/aishell-conformer-ctc-tensorboard-log.jpg filter=lfs diff=lfs merge=lfs -text
46
+ docs/source/recipes/Non-streaming-ASR/aishell/images/aishell-tdnn-lstm-ctc-tensorboard-log.jpg filter=lfs diff=lfs merge=lfs -text
47
+ docs/source/recipes/Non-streaming-ASR/aishell/images/aishell-transducer_stateless_modified-tensorboard-log.png filter=lfs diff=lfs merge=lfs -text
48
+ docs/source/recipes/Non-streaming-ASR/librispeech/images/librispeech-conformer-ctc-tensorboard-log.png filter=lfs diff=lfs merge=lfs -text
49
+ docs/source/recipes/Non-streaming-ASR/librispeech/images/librispeech-pruned-transducer-tensorboard-log.jpg filter=lfs diff=lfs merge=lfs -text
50
+ docs/source/recipes/Non-streaming-ASR/yesno/images/tdnn-tensorboard-log.png filter=lfs diff=lfs merge=lfs -text
51
+ docs/source/recipes/Streaming-ASR/librispeech/images/librispeech-lstm-transducer-tensorboard-log.png filter=lfs diff=lfs merge=lfs -text
52
+ docs/source/recipes/Streaming-ASR/librispeech/images/streaming-librispeech-pruned-transducer-tensorboard-log.jpg filter=lfs diff=lfs merge=lfs -text
53
+ egs/ami/ASR/xlsr_transducer/tts_2_en.wav filter=lfs diff=lfs merge=lfs -text
54
+ egs/libricss/SURT/heat.png filter=lfs diff=lfs merge=lfs -text
55
+ egs/libricss/SURT/surt.png filter=lfs diff=lfs merge=lfs -text
56
+ egs/librispeech/WSASR/figures/otc_training_graph.drawio.png filter=lfs diff=lfs merge=lfs -text
57
+ egs/speech_llm/ASR_LLM/assets/framework.png filter=lfs diff=lfs merge=lfs -text
.github/scripts/.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ piper_phonemize.html
.github/scripts/aishell/ASR/run.sh ADDED
@@ -0,0 +1,343 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+
3
+ set -ex
4
+
5
+ log() {
6
+ # This function is from espnet
7
+ local fname=${BASH_SOURCE[1]##*/}
8
+ echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
9
+ }
10
+
11
+ cd egs/aishell/ASR
12
+
13
+ function download_test_dev_manifests() {
14
+ git lfs install
15
+
16
+ fbank_url=https://huggingface.co/csukuangfj/aishell-test-dev-manifests
17
+ log "Downloading pre-commputed fbank from $fbank_url"
18
+
19
+ git clone https://huggingface.co/csukuangfj/aishell-test-dev-manifests
20
+ ln -s $PWD/aishell-test-dev-manifests/data .
21
+ }
22
+
23
+ function test_transducer_stateless3_2022_06_20() {
24
+ repo_url=https://huggingface.co/csukuangfj/icefall-aishell-pruned-transducer-stateless3-2022-06-20
25
+ log "Downloading pre-trained model from $repo_url"
26
+ git clone $repo_url
27
+ repo=$(basename $repo_url)
28
+
29
+ log "Display test files"
30
+ tree $repo/
31
+ ls -lh $repo/test_wavs/*.wav
32
+
33
+ pushd $repo/exp
34
+ ln -s pretrained-epoch-29-avg-5-torch-1.10.0.pt pretrained.pt
35
+ popd
36
+
37
+ log "test greedy_search with pretrained.py"
38
+
39
+ for sym in 1 2 3; do
40
+ log "Greedy search with --max-sym-per-frame $sym"
41
+
42
+ ./pruned_transducer_stateless3/pretrained.py \
43
+ --method greedy_search \
44
+ --max-sym-per-frame $sym \
45
+ --checkpoint $repo/exp/pretrained.pt \
46
+ --lang-dir $repo/data/lang_char \
47
+ $repo/test_wavs/BAC009S0764W0121.wav \
48
+ $repo/test_wavs/BAC009S0764W0122.wav \
49
+ $repo/test_wavs/BAC009S0764W0123.wav
50
+ done
51
+
52
+ log "test beam search with pretrained.py"
53
+
54
+ for method in modified_beam_search beam_search fast_beam_search; do
55
+ log "$method"
56
+
57
+ ./pruned_transducer_stateless3/pretrained.py \
58
+ --method $method \
59
+ --beam-size 4 \
60
+ --checkpoint $repo/exp/pretrained.pt \
61
+ --lang-dir $repo/data/lang_char \
62
+ $repo/test_wavs/BAC009S0764W0121.wav \
63
+ $repo/test_wavs/BAC009S0764W0122.wav \
64
+ $repo/test_wavs/BAC009S0764W0123.wav
65
+ done
66
+
67
+ echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}"
68
+ echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}"
69
+ if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then
70
+ mkdir -p pruned_transducer_stateless3/exp
71
+ ln -s $PWD/$repo/exp/pretrained.pt pruned_transducer_stateless3/exp/epoch-999.pt
72
+ ln -s $PWD/$repo/data/lang_char data/
73
+
74
+ ls -lh data
75
+ ls -lh pruned_transducer_stateless3/exp
76
+
77
+ log "Decoding test and dev"
78
+
79
+ # use a small value for decoding with CPU
80
+ max_duration=100
81
+
82
+ for method in greedy_search fast_beam_search modified_beam_search; do
83
+ log "Decoding with $method"
84
+
85
+ ./pruned_transducer_stateless3/decode.py \
86
+ --decoding-method $method \
87
+ --epoch 999 \
88
+ --avg 1 \
89
+ --max-duration $max_duration \
90
+ --exp-dir pruned_transducer_stateless3/exp
91
+ done
92
+
93
+ rm pruned_transducer_stateless3/exp/*.pt
94
+ fi
95
+
96
+ rm -rf $repo
97
+ }
98
+
99
+ function test_zipformer_large_2023_10_24() {
100
+ log "CI testing large model"
101
+ repo_url=https://huggingface.co/zrjin/icefall-asr-aishell-zipformer-large-2023-10-24/
102
+ log "Downloading pre-trained model from $repo_url"
103
+ git clone $repo_url
104
+ repo=$(basename $repo_url)
105
+
106
+ log "Display test files"
107
+ tree $repo/
108
+ ls -lh $repo/test_wavs/*.wav
109
+
110
+ for method in modified_beam_search greedy_search fast_beam_search; do
111
+ log "$method"
112
+
113
+ ./zipformer/pretrained.py \
114
+ --method $method \
115
+ --context-size 1 \
116
+ --checkpoint $repo/exp/pretrained.pt \
117
+ --tokens $repo/data/lang_char/tokens.txt \
118
+ --num-encoder-layers 2,2,4,5,4,2 \
119
+ --feedforward-dim 512,768,1536,2048,1536,768 \
120
+ --encoder-dim 192,256,512,768,512,256 \
121
+ --encoder-unmasked-dim 192,192,256,320,256,192 \
122
+ $repo/test_wavs/BAC009S0764W0121.wav \
123
+ $repo/test_wavs/BAC009S0764W0122.wav \
124
+ $repo/test_wavs/BAC009S0764W0123.wav
125
+ done
126
+ rm -rf $repo
127
+ }
128
+
129
+ function test_zipformer_2023_10_24() {
130
+ repo_url=https://huggingface.co/zrjin/icefall-asr-aishell-zipformer-2023-10-24/
131
+ log "Downloading pre-trained model from $repo_url"
132
+ git clone $repo_url
133
+ repo=$(basename $repo_url)
134
+
135
+ log "Display test files"
136
+ tree $repo/
137
+ ls -lh $repo/test_wavs/*.wav
138
+
139
+
140
+ for method in modified_beam_search greedy_search fast_beam_search; do
141
+ log "$method"
142
+
143
+ ./zipformer/pretrained.py \
144
+ --method $method \
145
+ --context-size 1 \
146
+ --checkpoint $repo/exp/pretrained.pt \
147
+ --tokens $repo/data/lang_char/tokens.txt \
148
+ $repo/test_wavs/BAC009S0764W0121.wav \
149
+ $repo/test_wavs/BAC009S0764W0122.wav \
150
+ $repo/test_wavs/BAC009S0764W0123.wav
151
+ done
152
+ rm -rf $repo
153
+ }
154
+
155
+ function test_zipformer_small_2023_10_24() {
156
+ log "CI testing small model"
157
+ repo_url=https://huggingface.co/zrjin/icefall-asr-aishell-zipformer-small-2023-10-24/
158
+ log "Downloading pre-trained model from $repo_url"
159
+ git clone $repo_url
160
+ repo=$(basename $repo_url)
161
+
162
+ log "Display test files"
163
+ tree $repo/
164
+ ls -lh $repo/test_wavs/*.wav
165
+
166
+
167
+ for method in modified_beam_search greedy_search fast_beam_search; do
168
+ log "$method"
169
+
170
+ ./zipformer/pretrained.py \
171
+ --method $method \
172
+ --context-size 1 \
173
+ --checkpoint $repo/exp/pretrained.pt \
174
+ --tokens $repo/data/lang_char/tokens.txt \
175
+ --num-encoder-layers 2,2,2,2,2,2 \
176
+ --feedforward-dim 512,768,768,768,768,768 \
177
+ --encoder-dim 192,256,256,256,256,256 \
178
+ --encoder-unmasked-dim 192,192,192,192,192,192 \
179
+ $repo/test_wavs/BAC009S0764W0121.wav \
180
+ $repo/test_wavs/BAC009S0764W0122.wav \
181
+ $repo/test_wavs/BAC009S0764W0123.wav
182
+ done
183
+ rm -rf $repo
184
+ }
185
+
186
+ function test_transducer_stateless_modified_2022_03_01() {
187
+ repo_url=https://huggingface.co/csukuangfj/icefall-aishell-transducer-stateless-modified-2022-03-01
188
+
189
+ log "Downloading pre-trained model from $repo_url"
190
+ git lfs install
191
+ git clone $repo_url
192
+ repo=$(basename $repo_url)
193
+
194
+ log "Display test files"
195
+ tree $repo/
196
+ ls -lh $repo/test_wavs/*.wav
197
+
198
+ for sym in 1 2 3; do
199
+ log "Greedy search with --max-sym-per-frame $sym"
200
+
201
+ ./transducer_stateless_modified/pretrained.py \
202
+ --method greedy_search \
203
+ --max-sym-per-frame $sym \
204
+ --checkpoint $repo/exp/pretrained.pt \
205
+ --lang-dir $repo/data/lang_char \
206
+ $repo/test_wavs/BAC009S0764W0121.wav \
207
+ $repo/test_wavs/BAC009S0764W0122.wav \
208
+ $repo/test_wavs/BAC009S0764W0123.wav
209
+ done
210
+
211
+ for method in modified_beam_search beam_search; do
212
+ log "$method"
213
+
214
+ ./transducer_stateless_modified/pretrained.py \
215
+ --method $method \
216
+ --beam-size 4 \
217
+ --checkpoint $repo/exp/pretrained.pt \
218
+ --lang-dir $repo/data/lang_char \
219
+ $repo/test_wavs/BAC009S0764W0121.wav \
220
+ $repo/test_wavs/BAC009S0764W0122.wav \
221
+ $repo/test_wavs/BAC009S0764W0123.wav
222
+ done
223
+ rm -rf $repo
224
+ }
225
+
226
+ function test_transducer_stateless_modified_2_2022_03_01() {
227
+ repo_url=https://huggingface.co/csukuangfj/icefall-aishell-transducer-stateless-modified-2-2022-03-01
228
+
229
+ log "Downloading pre-trained model from $repo_url"
230
+ git lfs install
231
+ git clone $repo_url
232
+ repo=$(basename $repo_url)
233
+
234
+ log "Display test files"
235
+ tree $repo/
236
+ ls -lh $repo/test_wavs/*.wav
237
+
238
+ for sym in 1 2 3; do
239
+ log "Greedy search with --max-sym-per-frame $sym"
240
+
241
+ ./transducer_stateless_modified-2/pretrained.py \
242
+ --method greedy_search \
243
+ --max-sym-per-frame $sym \
244
+ --checkpoint $repo/exp/pretrained.pt \
245
+ --lang-dir $repo/data/lang_char \
246
+ $repo/test_wavs/BAC009S0764W0121.wav \
247
+ $repo/test_wavs/BAC009S0764W0122.wav \
248
+ $repo/test_wavs/BAC009S0764W0123.wav
249
+ done
250
+
251
+ for method in modified_beam_search beam_search; do
252
+ log "$method"
253
+
254
+ ./transducer_stateless_modified-2/pretrained.py \
255
+ --method $method \
256
+ --beam-size 4 \
257
+ --checkpoint $repo/exp/pretrained.pt \
258
+ --lang-dir $repo/data/lang_char \
259
+ $repo/test_wavs/BAC009S0764W0121.wav \
260
+ $repo/test_wavs/BAC009S0764W0122.wav \
261
+ $repo/test_wavs/BAC009S0764W0123.wav
262
+ done
263
+ rm -rf $repo
264
+ }
265
+
266
+ function test_conformer_ctc() {
267
+ repo_url=https://huggingface.co/csukuangfj/icefall_asr_aishell_conformer_ctc
268
+ log "Downloading pre-trained model from $repo_url"
269
+ GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
270
+ repo=$(basename $repo_url)
271
+ pushd $repo
272
+
273
+ git lfs pull --include "exp/pretrained.pt"
274
+ git lfs pull --include "data/lang_char/H.fst"
275
+ git lfs pull --include "data/lang_char/HL.fst"
276
+ git lfs pull --include "data/lang_char/HLG.fst"
277
+
278
+ popd
279
+
280
+ log "Display test files"
281
+ tree $repo/
282
+ ls -lh $repo/test_wavs/*.wav
283
+
284
+ log "CTC decoding"
285
+
286
+ log "Exporting model with torchscript"
287
+
288
+ pushd $repo/exp
289
+ ln -s pretrained.pt epoch-99.pt
290
+ popd
291
+
292
+ ./conformer_ctc/export.py \
293
+ --epoch 99 \
294
+ --avg 1 \
295
+ --exp-dir $repo/exp \
296
+ --tokens $repo/data/lang_char/tokens.txt \
297
+ --jit 1
298
+
299
+ ls -lh $repo/exp
300
+
301
+ ls -lh $repo/data/lang_char
302
+
303
+ log "Decoding with H on CPU with OpenFst"
304
+
305
+ ./conformer_ctc/jit_pretrained_decode_with_H.py \
306
+ --nn-model $repo/exp/cpu_jit.pt \
307
+ --H $repo/data/lang_char/H.fst \
308
+ --tokens $repo/data/lang_char/tokens.txt \
309
+ $repo/test_wavs/0.wav \
310
+ $repo/test_wavs/1.wav \
311
+ $repo/test_wavs/2.wav
312
+
313
+ log "Decoding with HL on CPU with OpenFst"
314
+
315
+ ./conformer_ctc/jit_pretrained_decode_with_HL.py \
316
+ --nn-model $repo/exp/cpu_jit.pt \
317
+ --HL $repo/data/lang_char/HL.fst \
318
+ --words $repo/data/lang_char/words.txt \
319
+ $repo/test_wavs/0.wav \
320
+ $repo/test_wavs/1.wav \
321
+ $repo/test_wavs/2.wav
322
+
323
+ log "Decoding with HLG on CPU with OpenFst"
324
+
325
+ ./conformer_ctc/jit_pretrained_decode_with_HLG.py \
326
+ --nn-model $repo/exp/cpu_jit.pt \
327
+ --HLG $repo/data/lang_char/HLG.fst \
328
+ --words $repo/data/lang_char/words.txt \
329
+ $repo/test_wavs/0.wav \
330
+ $repo/test_wavs/1.wav \
331
+ $repo/test_wavs/2.wav
332
+
333
+ rm -rf $repo
334
+ }
335
+
336
+ download_test_dev_manifests
337
+ test_transducer_stateless3_2022_06_20
338
+ test_zipformer_large_2023_10_24
339
+ test_zipformer_2023_10_24
340
+ test_zipformer_small_2023_10_24
341
+ test_transducer_stateless_modified_2022_03_01
342
+ test_transducer_stateless_modified_2_2022_03_01
343
+ # test_conformer_ctc # fails for torch 1.13.x and torch 2.0.x
.github/scripts/audioset/AT/run.sh ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+
3
+ set -ex
4
+
5
+ python3 -m pip install onnxoptimizer onnxsim
6
+
7
+ log() {
8
+ # This function is from espnet
9
+ local fname=${BASH_SOURCE[1]##*/}
10
+ echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
11
+ }
12
+
13
+ cd egs/audioset/AT
14
+
15
+ function test_pretrained() {
16
+ repo_url=https://huggingface.co/marcoyang/icefall-audio-tagging-audioset-zipformer-2024-03-12
17
+ repo=$(basename $repo_url)
18
+ GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
19
+ pushd $repo/exp
20
+ git lfs pull --include pretrained.pt
21
+ ln -s pretrained.pt epoch-99.pt
22
+ ls -lh
23
+ popd
24
+
25
+ log "test pretrained.pt"
26
+
27
+ python3 zipformer/pretrained.py \
28
+ --checkpoint $repo/exp/pretrained.pt \
29
+ --label-dict $repo/data/class_labels_indices.csv \
30
+ $repo/test_wavs/1.wav \
31
+ $repo/test_wavs/2.wav \
32
+ $repo/test_wavs/3.wav \
33
+ $repo/test_wavs/4.wav
34
+
35
+ log "test jit export"
36
+ ls -lh $repo/exp/
37
+ python3 zipformer/export.py \
38
+ --exp-dir $repo/exp \
39
+ --epoch 99 \
40
+ --avg 1 \
41
+ --use-averaged-model 0 \
42
+ --jit 1
43
+ ls -lh $repo/exp/
44
+
45
+ log "test jit models"
46
+ python3 zipformer/jit_pretrained.py \
47
+ --nn-model-filename $repo/exp/jit_script.pt \
48
+ --label-dict $repo/data/class_labels_indices.csv \
49
+ $repo/test_wavs/1.wav \
50
+ $repo/test_wavs/2.wav \
51
+ $repo/test_wavs/3.wav \
52
+ $repo/test_wavs/4.wav
53
+
54
+ log "test onnx export"
55
+ ls -lh $repo/exp/
56
+ python3 zipformer/export-onnx.py \
57
+ --exp-dir $repo/exp \
58
+ --epoch 99 \
59
+ --avg 1 \
60
+ --use-averaged-model 0
61
+
62
+ ls -lh $repo/exp/
63
+
64
+ pushd $repo/exp/
65
+ mv model-epoch-99-avg-1.onnx model.onnx
66
+ mv model-epoch-99-avg-1.int8.onnx model.int8.onnx
67
+ popd
68
+
69
+ ls -lh $repo/exp/
70
+
71
+ log "test onnx models"
72
+ for m in model.onnx model.int8.onnx; do
73
+ log "$m"
74
+ python3 zipformer/onnx_pretrained.py \
75
+ --model-filename $repo/exp/model.onnx \
76
+ --label-dict $repo/data/class_labels_indices.csv \
77
+ $repo/test_wavs/1.wav \
78
+ $repo/test_wavs/2.wav \
79
+ $repo/test_wavs/3.wav \
80
+ $repo/test_wavs/4.wav
81
+ done
82
+
83
+ log "prepare data for uploading to huggingface"
84
+ dst=/icefall/model-onnx
85
+ mkdir -p $dst
86
+ cp -v $repo/exp/*.onnx $dst/
87
+ cp -v $repo/data/* $dst/
88
+ cp -av $repo/test_wavs $dst
89
+
90
+ ls -lh $dst
91
+ ls -lh $dst/test_wavs
92
+ }
93
+
94
+ test_pretrained
.github/scripts/baker_zh/TTS/run-matcha.sh ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+
3
+ set -ex
4
+
5
+ apt-get update
6
+ apt-get install -y sox
7
+
8
+ python3 -m pip install numba conformer==0.3.2 diffusers librosa
9
+ python3 -m pip install jieba
10
+
11
+
12
+ log() {
13
+ # This function is from espnet
14
+ local fname=${BASH_SOURCE[1]##*/}
15
+ echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
16
+ }
17
+
18
+ cd egs/baker_zh/TTS
19
+
20
+ sed -i.bak s/600/8/g ./prepare.sh
21
+ sed -i.bak s/"first 100"/"first 3"/g ./prepare.sh
22
+ sed -i.bak s/500/5/g ./prepare.sh
23
+ git diff
24
+
25
+ function prepare_data() {
26
+ # We have created a subset of the data for testing
27
+ #
28
+ mkdir -p download
29
+ pushd download
30
+ wget -q https://huggingface.co/csukuangfj/tmp-files/resolve/main/BZNSYP-samples.tar.bz2
31
+ tar xvf BZNSYP-samples.tar.bz2
32
+ mv BZNSYP-samples BZNSYP
33
+ rm BZNSYP-samples.tar.bz2
34
+ popd
35
+
36
+ ./prepare.sh
37
+ tree .
38
+ }
39
+
40
+ function train() {
41
+ pushd ./matcha
42
+ sed -i.bak s/1500/3/g ./train.py
43
+ git diff .
44
+ popd
45
+
46
+ ./matcha/train.py \
47
+ --exp-dir matcha/exp \
48
+ --num-epochs 1 \
49
+ --save-every-n 1 \
50
+ --num-buckets 2 \
51
+ --tokens data/tokens.txt \
52
+ --max-duration 20
53
+
54
+ ls -lh matcha/exp
55
+ }
56
+
57
+ function infer() {
58
+ curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v2
59
+
60
+ ./matcha/infer.py \
61
+ --num-buckets 2 \
62
+ --epoch 1 \
63
+ --exp-dir ./matcha/exp \
64
+ --tokens data/tokens.txt \
65
+ --cmvn ./data/fbank/cmvn.json \
66
+ --vocoder ./generator_v2 \
67
+ --input-text "当夜幕降临,星光点点,伴随着微风拂面,我在静谧中感受着时光的流转,思念如涟漪荡漾,梦境如画卷展开,我与自然融为一体,沉静在这片宁静的美丽之中,感受着生命的奇迹与温柔。" \
68
+ --output-wav ./generated.wav
69
+
70
+ ls -lh *.wav
71
+ soxi ./generated.wav
72
+ rm -v ./generated.wav
73
+ rm -v generator_v2
74
+ }
75
+
76
+ function export_onnx() {
77
+ pushd matcha/exp
78
+ curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-baker-matcha-zh-2024-12-27/resolve/main/epoch-2000.pt
79
+ popd
80
+
81
+ pushd data/fbank
82
+ rm -v *.json
83
+ curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-baker-matcha-zh-2024-12-27/resolve/main/cmvn.json
84
+ popd
85
+
86
+ ./matcha/export_onnx.py \
87
+ --exp-dir ./matcha/exp \
88
+ --epoch 2000 \
89
+ --tokens ./data/tokens.txt \
90
+ --cmvn ./data/fbank/cmvn.json
91
+
92
+ ls -lh *.onnx
93
+
94
+ if false; then
95
+ # The CI machine does not have enough memory to run it
96
+ #
97
+ curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v1
98
+ curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v2
99
+ curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v3
100
+ python3 ./matcha/export_onnx_hifigan.py
101
+ else
102
+ curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-ljspeech-matcha-en-2024-10-28/resolve/main/exp/hifigan_v1.onnx
103
+ curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-ljspeech-matcha-en-2024-10-28/resolve/main/exp/hifigan_v2.onnx
104
+ curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-ljspeech-matcha-en-2024-10-28/resolve/main/exp/hifigan_v3.onnx
105
+ fi
106
+
107
+ ls -lh *.onnx
108
+
109
+ python3 ./matcha/generate_lexicon.py
110
+
111
+ for v in v1 v2 v3; do
112
+ python3 ./matcha/onnx_pretrained.py \
113
+ --acoustic-model ./model-steps-6.onnx \
114
+ --vocoder ./hifigan_$v.onnx \
115
+ --tokens ./data/tokens.txt \
116
+ --lexicon ./lexicon.txt \
117
+ --input-text "当夜幕降临,星光点点,伴随着微风拂面,我在静谧中感受着时光的流转,思念如涟漪荡漾,梦境如画卷展开,我与自然融为一体,沉静在这片宁静的美丽之中,感受着生命的奇迹与温柔。" \
118
+ --output-wav /icefall/generated-matcha-tts-steps-6-$v.wav
119
+ done
120
+
121
+ ls -lh /icefall/*.wav
122
+ soxi /icefall/generated-matcha-tts-steps-6-*.wav
123
+ cp ./model-steps-*.onnx /icefall
124
+
125
+ d=matcha-icefall-zh-baker
126
+ mkdir $d
127
+ cp -v data/tokens.txt $d
128
+ cp -v lexicon.txt $d
129
+ cp model-steps-3.onnx $d
130
+ pushd $d
131
+ curl -SL -O https://github.com/csukuangfj/cppjieba/releases/download/sherpa-onnx-2024-04-19/dict.tar.bz2
132
+ tar xvf dict.tar.bz2
133
+ rm dict.tar.bz2
134
+
135
+ curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/date.fst
136
+ curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/number.fst
137
+ curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/phone.fst
138
+
139
+ cat >README.md <<EOF
140
+ # Introduction
141
+
142
+ This model is trained using the dataset from
143
+ https://en.data-baker.com/datasets/freeDatasets/
144
+
145
+ The dataset contains 10000 Chinese sentences of a native Chinese female speaker,
146
+ which is about 12 hours.
147
+
148
+ **Note**: The dataset is for non-commercial use only.
149
+
150
+ You can find the training code at
151
+ https://github.com/k2-fsa/icefall/tree/master/egs/baker_zh/TTS
152
+ EOF
153
+
154
+ ls -lh
155
+ popd
156
+ tar cvjf $d.tar.bz2 $d
157
+ mv $d.tar.bz2 /icefall
158
+ mv $d /icefall
159
+ }
160
+
161
+ prepare_data
162
+ train
163
+ infer
164
+ export_onnx
165
+
166
+ rm -rfv generator_v* matcha/exp
167
+ git checkout .
.github/scripts/compute-fbank-librispeech-test-clean-and-test-other.sh ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+
3
+ # This script computes fbank features for the test-clean and test-other datasets.
4
+ # The computed features are saved to ~/tmp/fbank-libri and are
5
+ # cached for later runs
6
+
7
+ set -e
8
+
9
+ export PYTHONPATH=$PWD:$PYTHONPATH
10
+ echo $PYTHONPATH
11
+
12
+ mkdir ~/tmp/fbank-libri
13
+ cd egs/librispeech/ASR
14
+ mkdir -p data
15
+ cd data
16
+ [ ! -e fbank ] && ln -s ~/tmp/fbank-libri fbank
17
+ cd ..
18
+ ./local/compute_fbank_librispeech.py --dataset 'test-clean test-other'
19
+ ls -lh data/fbank/
.github/scripts/docker/Dockerfile ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG PYTHON_VERSION=3.8
2
+ FROM python:${PYTHON_VERSION}
3
+
4
+ ARG TORCHAUDIO_VERSION="0.13.0"
5
+ ARG TORCH_VERSION="1.13.0"
6
+ ARG K2_VERSION="1.24.4.dev20231220"
7
+ ARG KALDIFEAT_VERSION="1.25.3.dev20231221"
8
+
9
+ ARG _K2_VERSION="${K2_VERSION}+cpu.torch${TORCH_VERSION}"
10
+ ARG _KALDIFEAT_VERSION="${KALDIFEAT_VERSION}+cpu.torch${TORCH_VERSION}"
11
+
12
+ RUN apt-get update -y && \
13
+ apt-get install -qq -y \
14
+ cmake \
15
+ ffmpeg \
16
+ git \
17
+ git-lfs \
18
+ graphviz \
19
+ less \
20
+ tree \
21
+ vim \
22
+ && \
23
+ apt-get clean && \
24
+ rm -rf /var/cache/apt/archives /var/lib/apt/lists
25
+
26
+
27
+ LABEL authors="Fangjun Kuang <csukuangfj@gmail.com>"
28
+ LABEL k2_version=${_K2_VERSION}
29
+ LABEL kaldifeat_version=${_KALDIFEAT_VERSION}
30
+ LABEL github_repo="https://github.com/k2-fsa/icefall"
31
+
32
+ # Install dependencies
33
+ RUN pip install --no-cache-dir \
34
+ torch==${TORCH_VERSION}+cpu -f https://download.pytorch.org/whl/torch \
35
+ torchaudio==${TORCHAUDIO_VERSION}+cpu -f https://download.pytorch.org/whl/torchaudio \
36
+ k2==${_K2_VERSION} -f https://k2-fsa.github.io/k2/cpu.html \
37
+ \
38
+ git+https://github.com/lhotse-speech/lhotse \
39
+ kaldifeat==${_KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cpu.html \
40
+ conformer==0.3.2 \
41
+ cython \
42
+ diffusers \
43
+ dill \
44
+ espnet_tts_frontend \
45
+ graphviz \
46
+ kaldi-decoder \
47
+ kaldi_native_io \
48
+ kaldialign \
49
+ kaldifst \
50
+ kaldilm \
51
+ librosa \
52
+ "matplotlib<=3.9.4" \
53
+ multi_quantization \
54
+ numba \
55
+ "numpy<2.0" \
56
+ onnxoptimizer \
57
+ onnxsim \
58
+ onnx==1.17.0 \
59
+ onnxmltools \
60
+ onnxruntime==1.17.1 \
61
+ piper_phonemize -f https://k2-fsa.github.io/icefall/piper_phonemize.html \
62
+ pypinyin==0.50.0 \
63
+ pytest \
64
+ sentencepiece>=0.1.96 \
65
+ six \
66
+ tensorboard \
67
+ typeguard
68
+
69
+ # RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \
70
+ # cd /workspace/icefall && \
71
+ # pip install --no-cache-dir -r requirements.txt
72
+ #
73
+ # ENV PYTHONPATH /workspace/icefall:$PYTHONPATH
74
+ #
75
+ # WORKDIR /workspace/icefall
.github/scripts/docker/generate_build_matrix.py ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ # Copyright 2023 Xiaomi Corp. (authors: Fangjun Kuang)
3
+
4
+
5
+ import argparse
6
+ import json
7
+
8
+
9
+ def get_args():
10
+ parser = argparse.ArgumentParser()
11
+ parser.add_argument(
12
+ "--min-torch-version",
13
+ help="torch version",
14
+ )
15
+
16
+ parser.add_argument(
17
+ "--torch-version",
18
+ help="torch version",
19
+ )
20
+
21
+ parser.add_argument(
22
+ "--python-version",
23
+ help="python version",
24
+ )
25
+ return parser.parse_args()
26
+
27
+
28
+ def version_gt(a, b):
29
+ a_major, a_minor = list(map(int, a.split(".")))[:2]
30
+ b_major, b_minor = list(map(int, b.split(".")))[:2]
31
+ if a_major > b_major:
32
+ return True
33
+
34
+ if a_major == b_major and a_minor > b_minor:
35
+ return True
36
+
37
+ return False
38
+
39
+
40
+ def version_ge(a, b):
41
+ a_major, a_minor = list(map(int, a.split(".")))[:2]
42
+ b_major, b_minor = list(map(int, b.split(".")))[:2]
43
+ if a_major > b_major:
44
+ return True
45
+
46
+ if a_major == b_major and a_minor >= b_minor:
47
+ return True
48
+
49
+ return False
50
+
51
+
52
+ def get_torchaudio_version(torch_version):
53
+ if torch_version == "1.13.0":
54
+ return "0.13.0"
55
+ elif torch_version == "1.13.1":
56
+ return "0.13.1"
57
+ elif torch_version == "2.0.0":
58
+ return "2.0.1"
59
+ elif torch_version == "2.0.1":
60
+ return "2.0.2"
61
+ else:
62
+ return torch_version
63
+
64
+
65
+ def get_matrix(min_torch_version, specified_torch_version, specified_python_version):
66
+ k2_version = "1.24.4.dev20250630"
67
+ kaldifeat_version = "1.25.5.dev20250630"
68
+ version = "20250630"
69
+
70
+ # torchaudio 2.5.0 does not support python 3.13
71
+ python_version = ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"]
72
+ torch_version = []
73
+ torch_version += ["1.13.0", "1.13.1"]
74
+ torch_version += ["2.0.0", "2.0.1"]
75
+ torch_version += ["2.1.0", "2.1.1", "2.1.2"]
76
+ torch_version += ["2.2.0", "2.2.1", "2.2.2"]
77
+ # Test only torch >= 2.3.0
78
+ torch_version += ["2.3.0", "2.3.1"]
79
+ torch_version += ["2.4.0"]
80
+ torch_version += ["2.4.1"]
81
+ torch_version += ["2.5.0"]
82
+ torch_version += ["2.5.1"]
83
+ torch_version += ["2.6.0", "2.7.0", "2.7.1"]
84
+
85
+ if specified_torch_version:
86
+ torch_version = [specified_torch_version]
87
+
88
+ if specified_python_version:
89
+ python_version = [specified_python_version]
90
+
91
+ matrix = []
92
+ for p in python_version:
93
+ for t in torch_version:
94
+ if min_torch_version and version_gt(min_torch_version, t):
95
+ continue
96
+
97
+ # torchaudio <= 1.13.x supports only python <= 3.10
98
+
99
+ if version_gt(p, "3.10") and not version_gt(t, "2.0"):
100
+ continue
101
+
102
+ # only torch>=2.2.0 supports python 3.12
103
+ if version_gt(p, "3.11") and not version_gt(t, "2.1"):
104
+ continue
105
+
106
+ if version_gt(p, "3.12") and not version_gt(t, "2.4"):
107
+ continue
108
+
109
+ if version_gt(t, "2.4") and version_gt("3.10", p):
110
+ # torch>=2.5 requires python 3.10
111
+ continue
112
+
113
+ k2_version_2 = k2_version
114
+ kaldifeat_version_2 = kaldifeat_version
115
+
116
+ matrix.append(
117
+ {
118
+ "k2-version": k2_version_2,
119
+ "kaldifeat-version": kaldifeat_version_2,
120
+ "version": version,
121
+ "python-version": p,
122
+ "torch-version": t,
123
+ "torchaudio-version": get_torchaudio_version(t),
124
+ }
125
+ )
126
+ return matrix
127
+
128
+
129
+ def main():
130
+ args = get_args()
131
+ matrix = get_matrix(
132
+ min_torch_version=args.min_torch_version,
133
+ specified_torch_version=args.torch_version,
134
+ specified_python_version=args.python_version,
135
+ )
136
+ print(json.dumps({"include": matrix}))
137
+
138
+
139
+ if __name__ == "__main__":
140
+ main()
.github/scripts/download-gigaspeech-dev-test-dataset.sh ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+
3
+ # This script downloads the pre-computed fbank features for
4
+ # dev and test datasets of GigaSpeech.
5
+ #
6
+ # You will find directories `~/tmp/giga-dev-dataset-fbank` after running
7
+ # this script.
8
+
9
+ set -e
10
+
11
+ mkdir -p ~/tmp
12
+ cd ~/tmp
13
+
14
+ git lfs install
15
+ git clone https://huggingface.co/csukuangfj/giga-dev-dataset-fbank
16
+
17
+ ls -lh giga-dev-dataset-fbank/data/fbank
.github/scripts/download-librispeech-test-clean-and-test-other-dataset.sh ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+
3
+ # This script downloads the test-clean and test-other datasets
4
+ # of LibriSpeech and unzip them to the folder ~/tmp/download,
5
+ # which is cached by GitHub actions for later runs.
6
+ #
7
+ # You will find directories ~/tmp/download/LibriSpeech after running
8
+ # this script.
9
+
10
+ set -e
11
+
12
+ mkdir ~/tmp/download
13
+ cd egs/librispeech/ASR
14
+ ln -s ~/tmp/download .
15
+ cd download
16
+ wget -q --no-check-certificate https://www.openslr.org/resources/12/test-clean.tar.gz
17
+ tar xf test-clean.tar.gz
18
+ rm test-clean.tar.gz
19
+
20
+ wget -q --no-check-certificate https://www.openslr.org/resources/12/test-other.tar.gz
21
+ tar xf test-other.tar.gz
22
+ rm test-other.tar.gz
23
+ pwd
24
+ ls -lh
25
+ ls -lh LibriSpeech
.github/scripts/generate-piper-phonemize-page.py ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+
3
+
4
+ def get_v1_2_0_files():
5
+ prefix = (
6
+ "https://github.com/csukuangfj/piper-phonemize/releases/download/2023.12.5/"
7
+ )
8
+ files = [
9
+ "piper_phonemize-1.2.0-cp310-cp310-macosx_10_14_x86_64.whl",
10
+ "piper_phonemize-1.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
11
+ "piper_phonemize-1.2.0-cp311-cp311-macosx_10_14_x86_64.whl",
12
+ "piper_phonemize-1.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
13
+ "piper_phonemize-1.2.0-cp312-cp312-macosx_10_14_x86_64.whl",
14
+ "piper_phonemize-1.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
15
+ "piper_phonemize-1.2.0-cp37-cp37m-macosx_10_14_x86_64.whl",
16
+ "piper_phonemize-1.2.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
17
+ "piper_phonemize-1.2.0-cp38-cp38-macosx_10_14_x86_64.whl",
18
+ "piper_phonemize-1.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
19
+ "piper_phonemize-1.2.0-cp39-cp39-macosx_10_14_x86_64.whl",
20
+ "piper_phonemize-1.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
21
+ ]
22
+ ans = [prefix + f for f in files]
23
+ ans.sort()
24
+ return ans
25
+
26
+
27
+ def get_v1_3_0_files():
28
+ prefix = (
29
+ "https://github.com/csukuangfj/piper-phonemize/releases/download/2025.06.23/"
30
+ )
31
+ files = [
32
+ "piper_phonemize-1.3.0-cp310-cp310-macosx_10_9_universal2.whl",
33
+ "piper_phonemize-1.3.0-cp310-cp310-macosx_10_9_x86_64.whl",
34
+ "piper_phonemize-1.3.0-cp310-cp310-macosx_11_0_arm64.whl",
35
+ "piper_phonemize-1.3.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl",
36
+ "piper_phonemize-1.3.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl",
37
+ "piper_phonemize-1.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
38
+ "piper_phonemize-1.3.0-cp310-cp310-win_amd64.whl",
39
+ "piper_phonemize-1.3.0-cp311-cp311-macosx_10_9_universal2.whl",
40
+ "piper_phonemize-1.3.0-cp311-cp311-macosx_10_9_x86_64.whl",
41
+ "piper_phonemize-1.3.0-cp311-cp311-macosx_11_0_arm64.whl",
42
+ "piper_phonemize-1.3.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl",
43
+ "piper_phonemize-1.3.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl",
44
+ "piper_phonemize-1.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
45
+ "piper_phonemize-1.3.0-cp311-cp311-win_amd64.whl",
46
+ "piper_phonemize-1.3.0-cp312-cp312-macosx_10_13_universal2.whl",
47
+ "piper_phonemize-1.3.0-cp312-cp312-macosx_10_13_x86_64.whl",
48
+ "piper_phonemize-1.3.0-cp312-cp312-macosx_11_0_arm64.whl",
49
+ "piper_phonemize-1.3.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl",
50
+ "piper_phonemize-1.3.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl",
51
+ "piper_phonemize-1.3.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
52
+ "piper_phonemize-1.3.0-cp312-cp312-win_amd64.whl",
53
+ "piper_phonemize-1.3.0-cp313-cp313-macosx_10_13_universal2.whl",
54
+ "piper_phonemize-1.3.0-cp313-cp313-macosx_10_13_x86_64.whl",
55
+ "piper_phonemize-1.3.0-cp313-cp313-macosx_11_0_arm64.whl",
56
+ "piper_phonemize-1.3.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl",
57
+ "piper_phonemize-1.3.0-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl",
58
+ "piper_phonemize-1.3.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
59
+ "piper_phonemize-1.3.0-cp313-cp313-win_amd64.whl",
60
+ "piper_phonemize-1.3.0-cp38-cp38-macosx_10_9_universal2.whl",
61
+ "piper_phonemize-1.3.0-cp38-cp38-macosx_10_9_x86_64.whl",
62
+ "piper_phonemize-1.3.0-cp38-cp38-macosx_11_0_arm64.whl",
63
+ "piper_phonemize-1.3.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl",
64
+ "piper_phonemize-1.3.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl",
65
+ "piper_phonemize-1.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
66
+ "piper_phonemize-1.3.0-cp38-cp38-win_amd64.whl",
67
+ "piper_phonemize-1.3.0-cp39-cp39-macosx_10_9_universal2.whl",
68
+ "piper_phonemize-1.3.0-cp39-cp39-macosx_10_9_x86_64.whl",
69
+ "piper_phonemize-1.3.0-cp39-cp39-macosx_11_0_arm64.whl",
70
+ "piper_phonemize-1.3.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl",
71
+ "piper_phonemize-1.3.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl",
72
+ "piper_phonemize-1.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
73
+ "piper_phonemize-1.3.0-cp39-cp39-win_amd64.whl",
74
+ ]
75
+ ans = [prefix + f for f in files]
76
+ ans.sort()
77
+ return ans
78
+
79
+
80
+ def main():
81
+ files = get_v1_3_0_files() + get_v1_2_0_files()
82
+
83
+ with open("piper_phonemize.html", "w") as f:
84
+ for url in files:
85
+ file = url.split("/")[-1]
86
+ f.write(f'<a href="{url}">{file}</a><br/>\n')
87
+
88
+
89
+ if __name__ == "__main__":
90
+ main()
.github/scripts/install-kaldifeat.sh ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+
3
+ # This script installs kaldifeat into the directory ~/tmp/kaldifeat
4
+ # which is cached by GitHub actions for later runs.
5
+
6
+ set -e
7
+
8
+ mkdir -p ~/tmp
9
+ cd ~/tmp
10
+ git clone https://github.com/csukuangfj/kaldifeat
11
+ cd kaldifeat
12
+ mkdir build
13
+ cd build
14
+ cmake -DCMAKE_BUILD_TYPE=Release ..
15
+ make -j2 _kaldifeat
.github/scripts/ksponspeech/ASR/run.sh ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+
3
+ set -ex
4
+
5
+ log() {
6
+ # This function is from espnet
7
+ local fname=${BASH_SOURCE[1]##*/}
8
+ echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
9
+ }
10
+
11
+ cd egs/ksponspeech/ASR
12
+
13
+
14
+ function test_pretrained_non_streaming() {
15
+ git lfs install
16
+ git clone https://huggingface.co/johnBamma/icefall-asr-ksponspeech-zipformer-2024-06-24
17
+ repo=icefall-asr-ksponspeech-zipformer-2024-06-24
18
+ pushd $repo
19
+ mkdir test_wavs
20
+ cd test_wavs
21
+ curl -SL -O https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16/resolve/main/test_wavs/0.wav
22
+ curl -SL -O https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16/resolve/main/test_wavs/1.wav
23
+ curl -SL -O https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16/resolve/main/test_wavs/2.wav
24
+ curl -SL -O https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16/resolve/main/test_wavs/3.wav
25
+ curl -SL -O https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16/resolve/main/test_wavs/trans.txt
26
+ cd ../exp
27
+ ln -s pretrained.pt epoch-99.pt
28
+ ls -lh
29
+ popd
30
+
31
+ log 'test pretrained.py'
32
+ ./zipformer/pretrained.py \
33
+ --checkpoint $repo/exp/pretrained.pt \
34
+ --tokens $repo/data/lang_bpe_5000/tokens.txt \
35
+ --method greedy_search \
36
+ $repo/test_wavs/0.wav \
37
+ $repo/test_wavs/1.wav \
38
+ $repo/test_wavs/2.wav \
39
+ $repo/test_wavs/3.wav
40
+
41
+ log 'test export-onnx.py'
42
+
43
+ ./zipformer/export-onnx.py \
44
+ --tokens $repo/data/lang_bpe_5000/tokens.txt \
45
+ --use-averaged-model 0 \
46
+ --epoch 99 \
47
+ --avg 1 \
48
+ --exp-dir $repo/exp/
49
+
50
+ ls -lh $repo/exp
51
+
52
+ ls -lh $repo/data/lang_bpe_5000/
53
+
54
+ log 'test exported onnx models'
55
+ ./zipformer/onnx_pretrained.py \
56
+ --encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
57
+ --decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
58
+ --joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
59
+ --tokens $repo/data/lang_bpe_5000/tokens.txt \
60
+ $repo/test_wavs/0.wav
61
+
62
+ dst=/tmp/model-2024-06-24
63
+ mkdir -p $dst
64
+
65
+ cp -av $repo/test_wavs $dst
66
+ cp -v $repo/exp/*.onnx $dst
67
+ cp -v $repo/exp/*.onnx $dst
68
+ cp -v $repo/data/lang_bpe_5000/tokens.txt $dst
69
+ cp -v $repo/data/lang_bpe_5000/bpe.model $dst
70
+ rm -rf $repo
71
+ }
72
+
73
+ function test_pretrained_streaming() {
74
+ git lfs install
75
+ git clone https://huggingface.co/johnBamma/icefall-asr-ksponspeech-pruned-transducer-stateless7-streaming-2024-06-12
76
+ repo=icefall-asr-ksponspeech-pruned-transducer-stateless7-streaming-2024-06-12
77
+ pushd $repo
78
+ mkdir test_wavs
79
+ cd test_wavs
80
+ curl -SL -O https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16/resolve/main/test_wavs/0.wav
81
+ curl -SL -O https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16/resolve/main/test_wavs/1.wav
82
+ curl -SL -O https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16/resolve/main/test_wavs/2.wav
83
+ curl -SL -O https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16/resolve/main/test_wavs/3.wav
84
+ cd ../exp
85
+ ln -s pretrained.pt epoch-99.pt
86
+ ls -lh
87
+ popd
88
+
89
+ log 'test pretrained.py'
90
+ ./pruned_transducer_stateless7_streaming/pretrained.py \
91
+ --checkpoint $repo/exp/pretrained.pt \
92
+ --tokens $repo/data/lang_bpe_5000/tokens.txt \
93
+ --method greedy_search \
94
+ $repo/test_wavs/0.wav \
95
+ $repo/test_wavs/1.wav \
96
+ $repo/test_wavs/2.wav \
97
+ $repo/test_wavs/3.wav
98
+
99
+ log 'test export-onnx.py'
100
+
101
+ ./pruned_transducer_stateless7_streaming/export-onnx.py \
102
+ --tokens $repo/data/lang_bpe_5000/tokens.txt \
103
+ --use-averaged-model 0 \
104
+ --epoch 99 \
105
+ --avg 1 \
106
+ --decode-chunk-len 32 \
107
+ --exp-dir $repo/exp/
108
+
109
+ ls -lh $repo/exp
110
+
111
+ ls -lh $repo/data/lang_bpe_5000/
112
+
113
+ log 'test exported onnx models'
114
+ ./pruned_transducer_stateless7_streaming/onnx_pretrained.py \
115
+ --encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
116
+ --decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
117
+ --joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
118
+ --tokens $repo/data/lang_bpe_5000/tokens.txt \
119
+ $repo/test_wavs/0.wav
120
+
121
+ dst=/tmp/model-2024-06-16
122
+ mkdir -p $dst
123
+
124
+ cp -v $repo/exp/*.onnx $dst
125
+ cp -v $repo/exp/*.onnx $dst
126
+ cp -v $repo/data/lang_bpe_5000/tokens.txt $dst
127
+ cp -v $repo/data/lang_bpe_5000/bpe.model $dst
128
+ rm -rf $repo
129
+ }
130
+
131
+ test_pretrained_non_streaming
132
+ test_pretrained_streaming
.github/scripts/librispeech/ASR/run.sh ADDED
@@ -0,0 +1,1644 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+
3
+ set -ex
4
+
5
+ log() {
6
+ # This function is from espnet
7
+ local fname=${BASH_SOURCE[1]##*/}
8
+ echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
9
+ }
10
+
11
+ cd egs/librispeech/ASR
12
+
13
+ function prepare_data() {
14
+ # We don't download the LM file since it is so large that it will
15
+ # cause OOM error for CI later.
16
+ mkdir -p download/lm
17
+ pushd download/lm
18
+ wget -q https://huggingface.co/csukuangfj/librispeech-for-ci/resolve/main/librispeech-lm-norm.txt.gz
19
+ wget -q https://huggingface.co/csukuangfj/librispeech-for-ci/resolve/main/librispeech-lexicon.txt
20
+ wget -q https://huggingface.co/csukuangfj/librispeech-for-ci/resolve/main/librispeech-vocab.txt
21
+ ls -lh
22
+ gunzip librispeech-lm-norm.txt.gz
23
+
24
+ ls -lh
25
+ popd
26
+
27
+ pushd download/
28
+ wget -q https://huggingface.co/csukuangfj/librispeech-for-ci/resolve/main/LibriSpeech.tar.bz2
29
+ tar xf LibriSpeech.tar.bz2
30
+ rm LibriSpeech.tar.bz2
31
+
32
+ cd LibriSpeech
33
+ ln -s train-clean-100 train-clean-360
34
+ ln -s train-other-500 train-other-500
35
+ popd
36
+
37
+ mkdir -p data/manifests
38
+
39
+ lhotse prepare librispeech -j 2 -p dev-clean -p dev-other -p test-clean -p test-other -p train-clean-100 download/LibriSpeech data/manifests
40
+ ls -lh data/manifests
41
+
42
+ ./local/compute_fbank_librispeech.py --dataset "dev-clean dev-other test-clean test-other train-clean-100" --perturb-speed False
43
+ ls -lh data/fbank
44
+
45
+ ./prepare.sh --stage 5 --stop-stage 6
46
+ }
47
+
48
+ function run_diagnostics() {
49
+ ./zipformer/train.py \
50
+ --world-size 1 \
51
+ --num-epochs 1 \
52
+ --start-epoch 1 \
53
+ --use-fp16 0 \
54
+ --exp-dir zipformer/exp-small \
55
+ --causal 0 \
56
+ --num-encoder-layers 1,1,1,1,1,1 \
57
+ --feedforward-dim 64,96,96,96,96,96 \
58
+ --encoder-dim 32,64,64,64,64,64 \
59
+ --encoder-unmasked-dim 32,32,32,32,32,32 \
60
+ --base-lr 0.04 \
61
+ --full-libri 0 \
62
+ --enable-musan 0 \
63
+ --max-duration 30 \
64
+ --print-diagnostics 1
65
+ }
66
+
67
+ function test_streaming_zipformer_ctc_hlg() {
68
+ repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-streaming-zipformer-small-2024-03-18
69
+
70
+ log "Downloading pre-trained model from $repo_url"
71
+ git lfs install
72
+ git clone $repo_url
73
+ repo=$(basename $repo_url)
74
+
75
+ rm $repo/exp-ctc-rnnt-small/*.onnx
76
+ ls -lh $repo/exp-ctc-rnnt-small
77
+
78
+ # export models to onnx
79
+ ./zipformer/export-onnx-streaming-ctc.py \
80
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
81
+ --epoch 30 \
82
+ --avg 3 \
83
+ --exp-dir $repo/exp-ctc-rnnt-small \
84
+ --causal 1 \
85
+ --use-ctc 1 \
86
+ --chunk-size 16 \
87
+ --left-context-frames 128 \
88
+ \
89
+ --num-encoder-layers 2,2,2,2,2,2 \
90
+ --feedforward-dim 512,768,768,768,768,768 \
91
+ --encoder-dim 192,256,256,256,256,256 \
92
+ --encoder-unmasked-dim 192,192,192,192,192,192
93
+
94
+ ls -lh $repo/exp-ctc-rnnt-small
95
+
96
+ for wav in 0.wav 1.wav 8k.wav; do
97
+ python3 ./zipformer/onnx_pretrained_ctc_HLG_streaming.py \
98
+ --nn-model $repo/exp-ctc-rnnt-small/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx \
99
+ --words $repo/data/lang_bpe_500/words.txt \
100
+ --HLG $repo/data/lang_bpe_500/HLG.fst \
101
+ $repo/test_wavs/$wav
102
+ done
103
+
104
+ rm -rf $repo
105
+ }
106
+
107
+ function test_pruned_transducer_stateless_2022_03_12() {
108
+ repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless-2022-03-12
109
+
110
+ log "Downloading pre-trained model from $repo_url"
111
+ git lfs install
112
+ git clone $repo_url
113
+ repo=$(basename $repo_url)
114
+
115
+ log "Display test files"
116
+ tree $repo/
117
+ ls -lh $repo/test_wavs/*.wav
118
+
119
+ for sym in 1 2 3; do
120
+ log "Greedy search with --max-sym-per-frame $sym"
121
+
122
+ ./pruned_transducer_stateless/pretrained.py \
123
+ --method greedy_search \
124
+ --max-sym-per-frame $sym \
125
+ --checkpoint $repo/exp/pretrained.pt \
126
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
127
+ $repo/test_wavs/1089-134686-0001.wav \
128
+ $repo/test_wavs/1221-135766-0001.wav \
129
+ $repo/test_wavs/1221-135766-0002.wav
130
+ done
131
+
132
+ for method in fast_beam_search modified_beam_search beam_search; do
133
+ log "$method"
134
+
135
+ ./pruned_transducer_stateless/pretrained.py \
136
+ --method $method \
137
+ --beam-size 4 \
138
+ --checkpoint $repo/exp/pretrained.pt \
139
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
140
+ $repo/test_wavs/1089-134686-0001.wav \
141
+ $repo/test_wavs/1221-135766-0001.wav \
142
+ $repo/test_wavs/1221-135766-0002.wav
143
+ done
144
+ rm -rf $repo
145
+ }
146
+
147
+ function test_pruned_transducer_stateless2_2022_04_29() {
148
+ repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless2-2022-04-29
149
+
150
+ log "Downloading pre-trained model from $repo_url"
151
+ GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
152
+ repo=$(basename $repo_url)
153
+
154
+ pushd $repo
155
+ git lfs pull --include "data/lang_bpe_500/bpe.model"
156
+ git lfs pull --include "exp/pretrained-epoch-38-avg-10.pt"
157
+ popd
158
+
159
+ log "Display test files"
160
+ tree $repo/
161
+ ls -lh $repo/test_wavs/*.wav
162
+
163
+ pushd $repo/exp
164
+ ln -s pretrained-epoch-38-avg-10.pt pretrained.pt
165
+ popd
166
+
167
+ for sym in 1 2 3; do
168
+ log "Greedy search with --max-sym-per-frame $sym"
169
+
170
+ ./pruned_transducer_stateless2/pretrained.py \
171
+ --method greedy_search \
172
+ --max-sym-per-frame $sym \
173
+ --checkpoint $repo/exp/pretrained.pt \
174
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
175
+ $repo/test_wavs/1089-134686-0001.wav \
176
+ $repo/test_wavs/1221-135766-0001.wav \
177
+ $repo/test_wavs/1221-135766-0002.wav
178
+ done
179
+
180
+ for method in modified_beam_search beam_search fast_beam_search; do
181
+ log "$method"
182
+
183
+ ./pruned_transducer_stateless2/pretrained.py \
184
+ --method $method \
185
+ --beam-size 4 \
186
+ --checkpoint $repo/exp/pretrained.pt \
187
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
188
+ $repo/test_wavs/1089-134686-0001.wav \
189
+ $repo/test_wavs/1221-135766-0001.wav \
190
+ $repo/test_wavs/1221-135766-0002.wav
191
+ done
192
+ rm -rf $repo
193
+ }
194
+
195
+ function test_pruned_transducer_stateless3_2022_04_29() {
196
+ repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless3-2022-04-29
197
+
198
+ log "Downloading pre-trained model from $repo_url"
199
+ GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
200
+ repo=$(basename $repo_url)
201
+ pushd $repo
202
+ git lfs pull --include "data/lang_bpe_500/bpe.model"
203
+ git lfs pull --include "exp/pretrained-epoch-25-avg-6.pt"
204
+ popd
205
+
206
+ log "Display test files"
207
+ tree $repo/
208
+ ls -lh $repo/test_wavs/*.wav
209
+
210
+ pushd $repo/exp
211
+ ln -s pretrained-epoch-25-avg-6.pt pretrained.pt
212
+ popd
213
+
214
+ for sym in 1 2 3; do
215
+ log "Greedy search with --max-sym-per-frame $sym"
216
+
217
+ ./pruned_transducer_stateless3/pretrained.py \
218
+ --method greedy_search \
219
+ --max-sym-per-frame $sym \
220
+ --checkpoint $repo/exp/pretrained.pt \
221
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
222
+ $repo/test_wavs/1089-134686-0001.wav \
223
+ $repo/test_wavs/1221-135766-0001.wav \
224
+ $repo/test_wavs/1221-135766-0002.wav
225
+ done
226
+
227
+ for method in modified_beam_search beam_search fast_beam_search; do
228
+ log "$method"
229
+
230
+ ./pruned_transducer_stateless3/pretrained.py \
231
+ --method $method \
232
+ --beam-size 4 \
233
+ --checkpoint $repo/exp/pretrained.pt \
234
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
235
+ $repo/test_wavs/1089-134686-0001.wav \
236
+ $repo/test_wavs/1221-135766-0001.wav \
237
+ $repo/test_wavs/1221-135766-0002.wav
238
+ done
239
+ rm -rf $repo
240
+ }
241
+
242
+ function test_pruned_transducer_stateless5_2022_05_13() {
243
+ repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless5-2022-05-13
244
+
245
+ log "Downloading pre-trained model from $repo_url"
246
+ git lfs install
247
+ git clone $repo_url
248
+ repo=$(basename $repo_url)
249
+
250
+ log "Display test files"
251
+ tree $repo/
252
+ ls -lh $repo/test_wavs/*.wav
253
+
254
+ pushd $repo/exp
255
+ ln -s pretrained-epoch-39-avg-7.pt pretrained.pt
256
+ popd
257
+
258
+ for sym in 1 2 3; do
259
+ log "Greedy search with --max-sym-per-frame $sym"
260
+
261
+ ./pruned_transducer_stateless5/pretrained.py \
262
+ --method greedy_search \
263
+ --max-sym-per-frame $sym \
264
+ --checkpoint $repo/exp/pretrained.pt \
265
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
266
+ --num-encoder-layers 18 \
267
+ --dim-feedforward 2048 \
268
+ --nhead 8 \
269
+ --encoder-dim 512 \
270
+ --decoder-dim 512 \
271
+ --joiner-dim 512 \
272
+ $repo/test_wavs/1089-134686-0001.wav \
273
+ $repo/test_wavs/1221-135766-0001.wav \
274
+ $repo/test_wavs/1221-135766-0002.wav
275
+ done
276
+
277
+ for method in modified_beam_search beam_search fast_beam_search; do
278
+ log "$method"
279
+
280
+ ./pruned_transducer_stateless5/pretrained.py \
281
+ --method $method \
282
+ --beam-size 4 \
283
+ --checkpoint $repo/exp/pretrained.pt \
284
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
285
+ $repo/test_wavs/1089-134686-0001.wav \
286
+ $repo/test_wavs/1221-135766-0001.wav \
287
+ $repo/test_wavs/1221-135766-0002.wav \
288
+ --num-encoder-layers 18 \
289
+ --dim-feedforward 2048 \
290
+ --nhead 8 \
291
+ --encoder-dim 512 \
292
+ --decoder-dim 512 \
293
+ --joiner-dim 512
294
+ done
295
+ rm -rf $repo
296
+ }
297
+
298
+ function test_pruned_transducer_stateless7_2022_11_11() {
299
+ repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless7-2022-11-11
300
+
301
+ log "Downloading pre-trained model from $repo_url"
302
+ git lfs install
303
+ GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
304
+ repo=$(basename $repo_url)
305
+
306
+ log "Display test files"
307
+ tree $repo/
308
+ ls -lh $repo/test_wavs/*.wav
309
+
310
+ pushd $repo/exp
311
+ git lfs pull --include "data/lang_bpe_500/bpe.model"
312
+ git lfs pull --include "exp/cpu_jit.pt"
313
+ git lfs pull --include "exp/pretrained.pt"
314
+ ln -s pretrained.pt epoch-99.pt
315
+ ls -lh *.pt
316
+ popd
317
+
318
+ log "Export to torchscript model"
319
+ ./pruned_transducer_stateless7/export.py \
320
+ --exp-dir $repo/exp \
321
+ --use-averaged-model false \
322
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
323
+ --epoch 99 \
324
+ --avg 1 \
325
+ --jit 1
326
+
327
+ ls -lh $repo/exp/*.pt
328
+
329
+ log "Decode with models exported by torch.jit.script()"
330
+
331
+ ./pruned_transducer_stateless7/jit_pretrained.py \
332
+ --bpe-model $repo/data/lang_bpe_500/bpe.model \
333
+ --nn-model-filename $repo/exp/cpu_jit.pt \
334
+ $repo/test_wavs/1089-134686-0001.wav \
335
+ $repo/test_wavs/1221-135766-0001.wav \
336
+ $repo/test_wavs/1221-135766-0002.wav
337
+
338
+ for sym in 1 2 3; do
339
+ log "Greedy search with --max-sym-per-frame $sym"
340
+
341
+ ./pruned_transducer_stateless7/pretrained.py \
342
+ --method greedy_search \
343
+ --max-sym-per-frame $sym \
344
+ --checkpoint $repo/exp/pretrained.pt \
345
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
346
+ $repo/test_wavs/1089-134686-0001.wav \
347
+ $repo/test_wavs/1221-135766-0001.wav \
348
+ $repo/test_wavs/1221-135766-0002.wav
349
+ done
350
+
351
+ for method in modified_beam_search beam_search fast_beam_search; do
352
+ log "$method"
353
+
354
+ ./pruned_transducer_stateless7/pretrained.py \
355
+ --method $method \
356
+ --beam-size 4 \
357
+ --checkpoint $repo/exp/pretrained.pt \
358
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
359
+ $repo/test_wavs/1089-134686-0001.wav \
360
+ $repo/test_wavs/1221-135766-0001.wav \
361
+ $repo/test_wavs/1221-135766-0002.wav
362
+ done
363
+ rm -rf $repo
364
+ }
365
+
366
+ function test_pruned_transducer_stateless8_2022_11_14() {
367
+ repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless8-2022-11-14
368
+
369
+ log "Downloading pre-trained model from $repo_url"
370
+ git lfs install
371
+ GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
372
+ repo=$(basename $repo_url)
373
+
374
+ log "Display test files"
375
+ tree $repo/
376
+ ls -lh $repo/test_wavs/*.wav
377
+
378
+ pushd $repo/exp
379
+ git lfs pull --include "data/lang_bpe_500/bpe.model"
380
+ git lfs pull --include "exp/cpu_jit.pt"
381
+ git lfs pull --include "exp/pretrained.pt"
382
+ ln -s pretrained.pt epoch-99.pt
383
+ ls -lh *.pt
384
+ popd
385
+
386
+ log "Decode with models exported by torch.jit.script()"
387
+
388
+ ./pruned_transducer_stateless8/jit_pretrained.py \
389
+ --bpe-model $repo/data/lang_bpe_500/bpe.model \
390
+ --nn-model-filename $repo/exp/cpu_jit.pt \
391
+ $repo/test_wavs/1089-134686-0001.wav \
392
+ $repo/test_wavs/1221-135766-0001.wav \
393
+ $repo/test_wavs/1221-135766-0002.wav
394
+
395
+ log "Export to torchscript model"
396
+ ./pruned_transducer_stateless8/export.py \
397
+ --exp-dir $repo/exp \
398
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
399
+ --use-averaged-model false \
400
+ --epoch 99 \
401
+ --avg 1 \
402
+ --jit 1
403
+
404
+ ls -lh $repo/exp/*.pt
405
+
406
+ log "Decode with models exported by torch.jit.script()"
407
+
408
+ ./pruned_transducer_stateless8/jit_pretrained.py \
409
+ --bpe-model $repo/data/lang_bpe_500/bpe.model \
410
+ --nn-model-filename $repo/exp/cpu_jit.pt \
411
+ $repo/test_wavs/1089-134686-0001.wav \
412
+ $repo/test_wavs/1221-135766-0001.wav \
413
+ $repo/test_wavs/1221-135766-0002.wav
414
+
415
+ for sym in 1 2 3; do
416
+ log "Greedy search with --max-sym-per-frame $sym"
417
+
418
+ ./pruned_transducer_stateless8/pretrained.py \
419
+ --method greedy_search \
420
+ --max-sym-per-frame $sym \
421
+ --checkpoint $repo/exp/pretrained.pt \
422
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
423
+ $repo/test_wavs/1089-134686-0001.wav \
424
+ $repo/test_wavs/1221-135766-0001.wav \
425
+ $repo/test_wavs/1221-135766-0002.wav
426
+ done
427
+
428
+ for method in modified_beam_search beam_search fast_beam_search; do
429
+ log "$method"
430
+
431
+ ./pruned_transducer_stateless8/pretrained.py \
432
+ --method $method \
433
+ --beam-size 4 \
434
+ --checkpoint $repo/exp/pretrained.pt \
435
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
436
+ $repo/test_wavs/1089-134686-0001.wav \
437
+ $repo/test_wavs/1221-135766-0001.wav \
438
+ $repo/test_wavs/1221-135766-0002.wav
439
+ done
440
+ rm -rf $repo
441
+ }
442
+
443
+ function test_pruned_transducer_stateless7_ctc_2022_12_01() {
444
+ repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-pruned-transducer-stateless7-ctc-2022-12-01
445
+
446
+ log "Downloading pre-trained model from $repo_url"
447
+ GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
448
+ repo=$(basename $repo_url)
449
+
450
+ log "Display test files"
451
+ tree $repo/
452
+ ls -lh $repo/test_wavs/*.wav
453
+
454
+ pushd $repo/exp
455
+ git lfs pull --include "data/lang_bpe_500/HLG.pt"
456
+ git lfs pull --include "data/lang_bpe_500/L.pt"
457
+ git lfs pull --include "data/lang_bpe_500/LG.pt"
458
+ git lfs pull --include "data/lang_bpe_500/Linv.pt"
459
+ git lfs pull --include "data/lang_bpe_500/bpe.model"
460
+ git lfs pull --include "data/lm/G_4_gram.pt"
461
+ git lfs pull --include "exp/cpu_jit.pt"
462
+ git lfs pull --include "exp/pretrained.pt"
463
+ ln -s pretrained.pt epoch-99.pt
464
+ ls -lh *.pt
465
+ popd
466
+
467
+ log "Export to torchscript model"
468
+ ./pruned_transducer_stateless7_ctc/export.py \
469
+ --exp-dir $repo/exp \
470
+ --use-averaged-model false \
471
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
472
+ --epoch 99 \
473
+ --avg 1 \
474
+ --jit 1
475
+
476
+ ls -lh $repo/exp/*.pt
477
+
478
+ log "Decode with models exported by torch.jit.script()"
479
+
480
+ ./pruned_transducer_stateless7_ctc/jit_pretrained.py \
481
+ --bpe-model $repo/data/lang_bpe_500/bpe.model \
482
+ --nn-model-filename $repo/exp/cpu_jit.pt \
483
+ $repo/test_wavs/1089-134686-0001.wav \
484
+ $repo/test_wavs/1221-135766-0001.wav \
485
+ $repo/test_wavs/1221-135766-0002.wav
486
+
487
+ for m in ctc-decoding 1best; do
488
+ ./pruned_transducer_stateless7_ctc/jit_pretrained_ctc.py \
489
+ --model-filename $repo/exp/cpu_jit.pt \
490
+ --words-file $repo/data/lang_bpe_500/words.txt \
491
+ --HLG $repo/data/lang_bpe_500/HLG.pt \
492
+ --bpe-model $repo/data/lang_bpe_500/bpe.model \
493
+ --G $repo/data/lm/G_4_gram.pt \
494
+ --method $m \
495
+ --sample-rate 16000 \
496
+ $repo/test_wavs/1089-134686-0001.wav \
497
+ $repo/test_wavs/1221-135766-0001.wav \
498
+ $repo/test_wavs/1221-135766-0002.wav
499
+ done
500
+
501
+ for sym in 1 2 3; do
502
+ log "Greedy search with --max-sym-per-frame $sym"
503
+
504
+ ./pruned_transducer_stateless7_ctc/pretrained.py \
505
+ --method greedy_search \
506
+ --max-sym-per-frame $sym \
507
+ --checkpoint $repo/exp/pretrained.pt \
508
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
509
+ $repo/test_wavs/1089-134686-0001.wav \
510
+ $repo/test_wavs/1221-135766-0001.wav \
511
+ $repo/test_wavs/1221-135766-0002.wav
512
+ done
513
+
514
+ for method in modified_beam_search beam_search fast_beam_search; do
515
+ log "$method"
516
+
517
+ ./pruned_transducer_stateless7_ctc/pretrained.py \
518
+ --method $method \
519
+ --beam-size 4 \
520
+ --checkpoint $repo/exp/pretrained.pt \
521
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
522
+ $repo/test_wavs/1089-134686-0001.wav \
523
+ $repo/test_wavs/1221-135766-0001.wav \
524
+ $repo/test_wavs/1221-135766-0002.wav
525
+ done
526
+
527
+ for m in ctc-decoding 1best; do
528
+ ./pruned_transducer_stateless7_ctc/pretrained_ctc.py \
529
+ --checkpoint $repo/exp/pretrained.pt \
530
+ --words-file $repo/data/lang_bpe_500/words.txt \
531
+ --HLG $repo/data/lang_bpe_500/HLG.pt \
532
+ --bpe-model $repo/data/lang_bpe_500/bpe.model \
533
+ --G $repo/data/lm/G_4_gram.pt \
534
+ --method $m \
535
+ --sample-rate 16000 \
536
+ $repo/test_wavs/1089-134686-0001.wav \
537
+ $repo/test_wavs/1221-135766-0001.wav \
538
+ $repo/test_wavs/1221-135766-0002.wav
539
+ done
540
+ rm -rf $repo
541
+ }
542
+
543
+ function test_zipformer_mmi_2022_12_08() {
544
+ repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-zipformer-mmi-2022-12-08
545
+
546
+ log "Downloading pre-trained model from $repo_url"
547
+ GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
548
+ repo=$(basename $repo_url)
549
+
550
+ log "Display test files"
551
+ tree $repo/
552
+ ls -lh $repo/test_wavs/*.wav
553
+
554
+ pushd $repo/exp
555
+ git lfs pull --include "data/lang_bpe_500/3gram.pt"
556
+ git lfs pull --include "data/lang_bpe_500/4gram.pt"
557
+ git lfs pull --include "data/lang_bpe_500/L.pt"
558
+ git lfs pull --include "data/lang_bpe_500/LG.pt"
559
+ git lfs pull --include "data/lang_bpe_500/Linv.pt"
560
+ git lfs pull --include "data/lang_bpe_500/bpe.model"
561
+ git lfs pull --include "exp/cpu_jit.pt"
562
+ git lfs pull --include "exp/pretrained.pt"
563
+ ln -s pretrained.pt epoch-99.pt
564
+ ls -lh *.pt
565
+ popd
566
+
567
+ log "Export to torchscript model"
568
+ ./zipformer_mmi/export.py \
569
+ --exp-dir $repo/exp \
570
+ --use-averaged-model false \
571
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
572
+ --epoch 99 \
573
+ --avg 1 \
574
+ --jit 1
575
+
576
+ ls -lh $repo/exp/*.pt
577
+
578
+ log "Decode with models exported by torch.jit.script()"
579
+
580
+ ./zipformer_mmi/jit_pretrained.py \
581
+ --bpe-model $repo/data/lang_bpe_500/bpe.model \
582
+ --nn-model-filename $repo/exp/cpu_jit.pt \
583
+ --lang-dir $repo/data/lang_bpe_500 \
584
+ $repo/test_wavs/1089-134686-0001.wav \
585
+ $repo/test_wavs/1221-135766-0001.wav \
586
+ $repo/test_wavs/1221-135766-0002.wav
587
+
588
+ for method in 1best nbest nbest-rescoring-LG nbest-rescoring-3-gram nbest-rescoring-4-gram; do
589
+ log "$method"
590
+
591
+ ./zipformer_mmi/pretrained.py \
592
+ --method $method \
593
+ --checkpoint $repo/exp/pretrained.pt \
594
+ --lang-dir $repo/data/lang_bpe_500 \
595
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
596
+ $repo/test_wavs/1089-134686-0001.wav \
597
+ $repo/test_wavs/1221-135766-0001.wav \
598
+ $repo/test_wavs/1221-135766-0002.wav
599
+ done
600
+ rm -rf $repo
601
+ }
602
+
603
+ function test_pruned_transducer_stateless7_streaming_2022_12_29() {
604
+ repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29
605
+
606
+ log "Downloading pre-trained model from $repo_url"
607
+ git lfs install
608
+ GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
609
+ repo=$(basename $repo_url)
610
+
611
+ log "Display test files"
612
+ tree $repo/
613
+ ls -lh $repo/test_wavs/*.wav
614
+
615
+ pushd $repo
616
+ git lfs pull --include "data/lang_bpe_500/bpe.model"
617
+ git lfs pull --include "exp/cpu_jit.pt"
618
+ git lfs pull --include "exp/pretrained.pt"
619
+ git lfs pull --include "exp/encoder_jit_trace.pt"
620
+ git lfs pull --include "exp/decoder_jit_trace.pt"
621
+ git lfs pull --include "exp/joiner_jit_trace.pt"
622
+ cd exp
623
+ ln -s pretrained.pt epoch-99.pt
624
+ ls -lh *.pt
625
+ popd
626
+
627
+ log "Export to torchscript model"
628
+ ./pruned_transducer_stateless7_streaming/export.py \
629
+ --exp-dir $repo/exp \
630
+ --use-averaged-model false \
631
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
632
+ --decode-chunk-len 32 \
633
+ --epoch 99 \
634
+ --avg 1 \
635
+ --jit 1
636
+
637
+ ls -lh $repo/exp/*.pt
638
+
639
+ log "Decode with models exported by torch.jit.script()"
640
+
641
+ ./pruned_transducer_stateless7_streaming/jit_pretrained.py \
642
+ --bpe-model $repo/data/lang_bpe_500/bpe.model \
643
+ --nn-model-filename $repo/exp/cpu_jit.pt \
644
+ --decode-chunk-len 32 \
645
+ $repo/test_wavs/1089-134686-0001.wav \
646
+ $repo/test_wavs/1221-135766-0001.wav \
647
+ $repo/test_wavs/1221-135766-0002.wav
648
+
649
+ log "Export to torchscript model by torch.jit.trace()"
650
+ ./pruned_transducer_stateless7_streaming/jit_trace_export.py \
651
+ --exp-dir $repo/exp \
652
+ --use-averaged-model false \
653
+ --bpe-model $repo/data/lang_bpe_500/bpe.model \
654
+ --decode-chunk-len 32 \
655
+ --epoch 99 \
656
+ --avg 1
657
+
658
+ log "Decode with models exported by torch.jit.trace()"
659
+
660
+ ./pruned_transducer_stateless7_streaming/jit_trace_pretrained.py \
661
+ --bpe-model $repo/data/lang_bpe_500/bpe.model \
662
+ --encoder-model-filename $repo/exp/encoder_jit_trace.pt \
663
+ --decoder-model-filename $repo/exp/decoder_jit_trace.pt \
664
+ --joiner-model-filename $repo/exp/joiner_jit_trace.pt \
665
+ --decode-chunk-len 32 \
666
+ $repo/test_wavs/1089-134686-0001.wav
667
+
668
+ for sym in 1 2 3; do
669
+ log "Greedy search with --max-sym-per-frame $sym"
670
+
671
+ ./pruned_transducer_stateless7_streaming/pretrained.py \
672
+ --method greedy_search \
673
+ --max-sym-per-frame $sym \
674
+ --checkpoint $repo/exp/pretrained.pt \
675
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
676
+ --decode-chunk-len 32 \
677
+ $repo/test_wavs/1089-134686-0001.wav \
678
+ $repo/test_wavs/1221-135766-0001.wav \
679
+ $repo/test_wavs/1221-135766-0002.wav
680
+ done
681
+
682
+ for method in modified_beam_search beam_search fast_beam_search; do
683
+ log "$method"
684
+
685
+ ./pruned_transducer_stateless7_streaming/pretrained.py \
686
+ --method $method \
687
+ --beam-size 4 \
688
+ --checkpoint $repo/exp/pretrained.pt \
689
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
690
+ --decode-chunk-len 32 \
691
+ $repo/test_wavs/1089-134686-0001.wav \
692
+ $repo/test_wavs/1221-135766-0001.wav \
693
+ $repo/test_wavs/1221-135766-0002.wav
694
+ done
695
+
696
+ rm -rf $repo
697
+ }
698
+
699
+ function test_pruned_transducer_stateless7_ctc_bs_2023_01_29() {
700
+ repo_url=https://huggingface.co/yfyeung/icefall-asr-librispeech-pruned_transducer_stateless7_ctc_bs-2023-01-29
701
+
702
+ log "Downloading pre-trained model from $repo_url"
703
+ GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
704
+ repo=$(basename $repo_url)
705
+
706
+ log "Display test files"
707
+ tree $repo/
708
+ ls -lh $repo/test_wavs/*.wav
709
+
710
+ pushd $repo/exp
711
+ git lfs pull --include "data/lang_bpe_500/L.pt"
712
+ git lfs pull --include "data/lang_bpe_500/LG.pt"
713
+ git lfs pull --include "data/lang_bpe_500/HLG.pt"
714
+ git lfs pull --include "data/lang_bpe_500/Linv.pt"
715
+ git lfs pull --include "data/lang_bpe_500/bpe.model"
716
+ git lfs pull --include "exp/cpu_jit.pt"
717
+ git lfs pull --include "exp/pretrained.pt"
718
+ ln -s pretrained.pt epoch-99.pt
719
+ ls -lh *.pt
720
+ popd
721
+
722
+ log "Export to torchscript model"
723
+ ./pruned_transducer_stateless7_ctc_bs/export.py \
724
+ --exp-dir $repo/exp \
725
+ --use-averaged-model false \
726
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
727
+ --epoch 99 \
728
+ --avg 1 \
729
+ --jit 1
730
+
731
+ ls -lh $repo/exp/*.pt
732
+
733
+ log "Decode with models exported by torch.jit.script()"
734
+
735
+ ./pruned_transducer_stateless7_ctc_bs/jit_pretrained.py \
736
+ --bpe-model $repo/data/lang_bpe_500/bpe.model \
737
+ --nn-model-filename $repo/exp/cpu_jit.pt \
738
+ $repo/test_wavs/1089-134686-0001.wav \
739
+ $repo/test_wavs/1221-135766-0001.wav \
740
+ $repo/test_wavs/1221-135766-0002.wav
741
+
742
+ for m in ctc-decoding 1best; do
743
+ ./pruned_transducer_stateless7_ctc_bs/jit_pretrained_ctc.py \
744
+ --model-filename $repo/exp/cpu_jit.pt \
745
+ --words-file $repo/data/lang_bpe_500/words.txt \
746
+ --HLG $repo/data/lang_bpe_500/HLG.pt \
747
+ --bpe-model $repo/data/lang_bpe_500/bpe.model \
748
+ --method $m \
749
+ --sample-rate 16000 \
750
+ $repo/test_wavs/1089-134686-0001.wav \
751
+ $repo/test_wavs/1221-135766-0001.wav \
752
+ $repo/test_wavs/1221-135766-0002.wav
753
+ done
754
+
755
+ for sym in 1 2 3; do
756
+ log "Greedy search with --max-sym-per-frame $sym"
757
+
758
+ ./pruned_transducer_stateless7_ctc_bs/pretrained.py \
759
+ --method greedy_search \
760
+ --max-sym-per-frame $sym \
761
+ --checkpoint $repo/exp/pretrained.pt \
762
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
763
+ $repo/test_wavs/1089-134686-0001.wav \
764
+ $repo/test_wavs/1221-135766-0001.wav \
765
+ $repo/test_wavs/1221-135766-0002.wav
766
+ done
767
+
768
+ for method in modified_beam_search beam_search fast_beam_search; do
769
+ log "$method"
770
+
771
+ ./pruned_transducer_stateless7_ctc_bs/pretrained.py \
772
+ --method $method \
773
+ --beam-size 4 \
774
+ --checkpoint $repo/exp/pretrained.pt \
775
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
776
+ $repo/test_wavs/1089-134686-0001.wav \
777
+ $repo/test_wavs/1221-135766-0001.wav \
778
+ $repo/test_wavs/1221-135766-0002.wav
779
+ done
780
+
781
+ for m in ctc-decoding 1best; do
782
+ ./pruned_transducer_stateless7_ctc_bs/pretrained_ctc.py \
783
+ --checkpoint $repo/exp/pretrained.pt \
784
+ --words-file $repo/data/lang_bpe_500/words.txt \
785
+ --HLG $repo/data/lang_bpe_500/HLG.pt \
786
+ --bpe-model $repo/data/lang_bpe_500/bpe.model \
787
+ --method $m \
788
+ --sample-rate 16000 \
789
+ $repo/test_wavs/1089-134686-0001.wav \
790
+ $repo/test_wavs/1221-135766-0001.wav \
791
+ $repo/test_wavs/1221-135766-0002.wav
792
+ done
793
+ rm -rf $repo
794
+ }
795
+
796
+ function test_conformer_ctc3_2022_11_27() {
797
+ repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-conformer-ctc3-2022-11-27
798
+
799
+ log "Downloading pre-trained model from $repo_url"
800
+ GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
801
+ repo=$(basename $repo_url)
802
+
803
+ log "Display test files"
804
+ tree $repo/
805
+ ls -lh $repo/test_wavs/*.wav
806
+
807
+ pushd $repo/exp
808
+ git lfs pull --include "data/lang_bpe_500/HLG.pt"
809
+ git lfs pull --include "data/lang_bpe_500/L.pt"
810
+ git lfs pull --include "data/lang_bpe_500/LG.pt"
811
+ git lfs pull --include "data/lang_bpe_500/Linv.pt"
812
+ git lfs pull --include "data/lang_bpe_500/bpe.model"
813
+ git lfs pull --include "data/lm/G_4_gram.pt"
814
+ git lfs pull --include "exp/jit_trace.pt"
815
+ git lfs pull --include "exp/pretrained.pt"
816
+ ln -s pretrained.pt epoch-99.pt
817
+ ls -lh *.pt
818
+ popd
819
+
820
+ log "Decode with models exported by torch.jit.trace()"
821
+
822
+ for m in ctc-decoding 1best; do
823
+ ./conformer_ctc3/jit_pretrained.py \
824
+ --model-filename $repo/exp/jit_trace.pt \
825
+ --words-file $repo/data/lang_bpe_500/words.txt \
826
+ --HLG $repo/data/lang_bpe_500/HLG.pt \
827
+ --bpe-model $repo/data/lang_bpe_500/bpe.model \
828
+ --G $repo/data/lm/G_4_gram.pt \
829
+ --method $m \
830
+ --sample-rate 16000 \
831
+ $repo/test_wavs/1089-134686-0001.wav \
832
+ $repo/test_wavs/1221-135766-0001.wav \
833
+ $repo/test_wavs/1221-135766-0002.wav
834
+ done
835
+
836
+ log "Export to torchscript model"
837
+
838
+ ./conformer_ctc3/export.py \
839
+ --exp-dir $repo/exp \
840
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
841
+ --jit-trace 1 \
842
+ --epoch 99 \
843
+ --avg 1 \
844
+ --use-averaged-model 0
845
+
846
+ ls -lh $repo/exp/*.pt
847
+
848
+ log "Decode with models exported by torch.jit.trace()"
849
+
850
+ for m in ctc-decoding 1best; do
851
+ ./conformer_ctc3/jit_pretrained.py \
852
+ --model-filename $repo/exp/jit_trace.pt \
853
+ --words-file $repo/data/lang_bpe_500/words.txt \
854
+ --HLG $repo/data/lang_bpe_500/HLG.pt \
855
+ --bpe-model $repo/data/lang_bpe_500/bpe.model \
856
+ --G $repo/data/lm/G_4_gram.pt \
857
+ --method $m \
858
+ --sample-rate 16000 \
859
+ $repo/test_wavs/1089-134686-0001.wav \
860
+ $repo/test_wavs/1221-135766-0001.wav \
861
+ $repo/test_wavs/1221-135766-0002.wav
862
+ done
863
+
864
+ for m in ctc-decoding 1best; do
865
+ ./conformer_ctc3/pretrained.py \
866
+ --checkpoint $repo/exp/pretrained.pt \
867
+ --words-file $repo/data/lang_bpe_500/words.txt \
868
+ --HLG $repo/data/lang_bpe_500/HLG.pt \
869
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
870
+ --G $repo/data/lm/G_4_gram.pt \
871
+ --method $m \
872
+ --sample-rate 16000 \
873
+ $repo/test_wavs/1089-134686-0001.wav \
874
+ $repo/test_wavs/1221-135766-0001.wav \
875
+ $repo/test_wavs/1221-135766-0002.wav
876
+ done
877
+ rm -rf $repo
878
+ }
879
+
880
+ function test_lstm_transducer_stateless2_2022_09_03() {
881
+ repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03
882
+
883
+ log "Downloading pre-trained model from $repo_url"
884
+ git lfs install
885
+ git clone $repo_url
886
+ repo=$(basename $repo_url)
887
+ abs_repo=$(realpath $repo)
888
+
889
+ log "Display test files"
890
+ tree $repo/
891
+ ls -lh $repo/test_wavs/*.wav
892
+
893
+ pushd $repo/exp
894
+ ln -s pretrained-iter-468000-avg-16.pt pretrained.pt
895
+ ln -s pretrained-iter-468000-avg-16.pt epoch-99.pt
896
+ popd
897
+
898
+ log "Test exporting with torch.jit.trace()"
899
+
900
+ ./lstm_transducer_stateless2/export.py \
901
+ --exp-dir $repo/exp \
902
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
903
+ --epoch 99 \
904
+ --avg 1 \
905
+ --use-averaged-model 0 \
906
+ --jit-trace 1
907
+
908
+ log "Decode with models exported by torch.jit.trace()"
909
+
910
+ ./lstm_transducer_stateless2/jit_pretrained.py \
911
+ --bpe-model $repo/data/lang_bpe_500/bpe.model \
912
+ --encoder-model-filename $repo/exp/encoder_jit_trace.pt \
913
+ --decoder-model-filename $repo/exp/decoder_jit_trace.pt \
914
+ --joiner-model-filename $repo/exp/joiner_jit_trace.pt \
915
+ $repo/test_wavs/1089-134686-0001.wav \
916
+ $repo/test_wavs/1221-135766-0001.wav \
917
+ $repo/test_wavs/1221-135766-0002.wav
918
+
919
+ for sym in 1 2 3; do
920
+ log "Greedy search with --max-sym-per-frame $sym"
921
+
922
+ ./lstm_transducer_stateless2/pretrained.py \
923
+ --method greedy_search \
924
+ --max-sym-per-frame $sym \
925
+ --checkpoint $repo/exp/pretrained.pt \
926
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
927
+ $repo/test_wavs/1089-134686-0001.wav \
928
+ $repo/test_wavs/1221-135766-0001.wav \
929
+ $repo/test_wavs/1221-135766-0002.wav
930
+ done
931
+
932
+ for method in modified_beam_search beam_search fast_beam_search; do
933
+ log "$method"
934
+
935
+ ./lstm_transducer_stateless2/pretrained.py \
936
+ --method $method \
937
+ --beam-size 4 \
938
+ --checkpoint $repo/exp/pretrained.pt \
939
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
940
+ $repo/test_wavs/1089-134686-0001.wav \
941
+ $repo/test_wavs/1221-135766-0001.wav \
942
+ $repo/test_wavs/1221-135766-0002.wav
943
+ done
944
+ rm -rf $repo
945
+ }
946
+
947
+ function test_pruned_transducer_stateless3_2022_05_13() {
948
+ repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13
949
+
950
+ log "Downloading pre-trained model from $repo_url"
951
+ git lfs install
952
+ git clone $repo_url
953
+ repo=$(basename $repo_url)
954
+
955
+ log "Display test files"
956
+ tree $repo/
957
+ ls -lh $repo/test_wavs/*.wav
958
+
959
+ pushd $repo/exp
960
+ ln -s pretrained-iter-1224000-avg-14.pt pretrained.pt
961
+ ln -s pretrained-iter-1224000-avg-14.pt epoch-99.pt
962
+ popd
963
+
964
+
965
+ log "Export to torchscript model"
966
+ ./pruned_transducer_stateless3/export.py \
967
+ --exp-dir $repo/exp \
968
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
969
+ --epoch 99 \
970
+ --avg 1 \
971
+ --jit 1
972
+
973
+ ./pruned_transducer_stateless3/export.py \
974
+ --exp-dir $repo/exp \
975
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
976
+ --epoch 99 \
977
+ --avg 1 \
978
+ --jit-trace 1
979
+
980
+ ls -lh $repo/exp/*.pt
981
+
982
+ log "Decode with models exported by torch.jit.trace()"
983
+
984
+ ./pruned_transducer_stateless3/jit_pretrained.py \
985
+ --bpe-model $repo/data/lang_bpe_500/bpe.model \
986
+ --encoder-model-filename $repo/exp/encoder_jit_trace.pt \
987
+ --decoder-model-filename $repo/exp/decoder_jit_trace.pt \
988
+ --joiner-model-filename $repo/exp/joiner_jit_trace.pt \
989
+ $repo/test_wavs/1089-134686-0001.wav \
990
+ $repo/test_wavs/1221-135766-0001.wav \
991
+ $repo/test_wavs/1221-135766-0002.wav
992
+
993
+ log "Decode with models exported by torch.jit.script()"
994
+
995
+ ./pruned_transducer_stateless3/jit_pretrained.py \
996
+ --bpe-model $repo/data/lang_bpe_500/bpe.model \
997
+ --encoder-model-filename $repo/exp/encoder_jit_script.pt \
998
+ --decoder-model-filename $repo/exp/decoder_jit_script.pt \
999
+ --joiner-model-filename $repo/exp/joiner_jit_script.pt \
1000
+ $repo/test_wavs/1089-134686-0001.wav \
1001
+ $repo/test_wavs/1221-135766-0001.wav \
1002
+ $repo/test_wavs/1221-135766-0002.wav
1003
+
1004
+
1005
+ for sym in 1 2 3; do
1006
+ log "Greedy search with --max-sym-per-frame $sym"
1007
+
1008
+ ./pruned_transducer_stateless3/pretrained.py \
1009
+ --method greedy_search \
1010
+ --max-sym-per-frame $sym \
1011
+ --checkpoint $repo/exp/pretrained.pt \
1012
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
1013
+ $repo/test_wavs/1089-134686-0001.wav \
1014
+ $repo/test_wavs/1221-135766-0001.wav \
1015
+ $repo/test_wavs/1221-135766-0002.wav
1016
+ done
1017
+
1018
+ for method in modified_beam_search beam_search fast_beam_search; do
1019
+ log "$method"
1020
+
1021
+ ./pruned_transducer_stateless3/pretrained.py \
1022
+ --method $method \
1023
+ --beam-size 4 \
1024
+ --checkpoint $repo/exp/pretrained.pt \
1025
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
1026
+ $repo/test_wavs/1089-134686-0001.wav \
1027
+ $repo/test_wavs/1221-135766-0001.wav \
1028
+ $repo/test_wavs/1221-135766-0002.wav
1029
+ done
1030
+
1031
+ rm -rf $repo
1032
+ }
1033
+
1034
+ function test_streaming_pruned_transducer_stateless2_20220625() {
1035
+ repo_url=https://huggingface.co/pkufool/icefall_librispeech_streaming_pruned_transducer_stateless2_20220625
1036
+
1037
+ log "Downloading pre-trained model from $repo_url"
1038
+ git lfs install
1039
+ git clone $repo_url
1040
+ repo=$(basename $repo_url)
1041
+
1042
+ log "Display test files"
1043
+ tree $repo/
1044
+ ls -lh $repo/test_wavs/*.wav
1045
+
1046
+ pushd $repo/exp
1047
+ ln -s pretrained-epoch-24-avg-10.pt pretrained.pt
1048
+ popd
1049
+
1050
+ for sym in 1 2 3; do
1051
+ log "Greedy search with --max-sym-per-frame $sym"
1052
+
1053
+ ./pruned_transducer_stateless2/pretrained.py \
1054
+ --method greedy_search \
1055
+ --max-sym-per-frame $sym \
1056
+ --checkpoint $repo/exp/pretrained.pt \
1057
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
1058
+ --simulate-streaming 1 \
1059
+ --causal-convolution 1 \
1060
+ $repo/test_wavs/1089-134686-0001.wav \
1061
+ $repo/test_wavs/1221-135766-0001.wav \
1062
+ $repo/test_wavs/1221-135766-0002.wav
1063
+ done
1064
+
1065
+ for method in modified_beam_search beam_search fast_beam_search; do
1066
+ log "$method"
1067
+
1068
+ ./pruned_transducer_stateless2/pretrained.py \
1069
+ --method $method \
1070
+ --beam-size 4 \
1071
+ --checkpoint $repo/exp/pretrained.pt \
1072
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
1073
+ --simulate-streaming 1 \
1074
+ --causal-convolution 1 \
1075
+ $repo/test_wavs/1089-134686-0001.wav \
1076
+ $repo/test_wavs/1221-135766-0001.wav \
1077
+ $repo/test_wavs/1221-135766-0002.wav
1078
+ done
1079
+ rm -rf $repo
1080
+ }
1081
+
1082
+ function test_streaming_zipformer_2023_05_17() {
1083
+ repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-streaming-zipformer-2023-05-17
1084
+
1085
+ log "Downloading pre-trained model from $repo_url"
1086
+ git lfs install
1087
+ GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
1088
+ repo=$(basename $repo_url)
1089
+
1090
+ log "Display test files"
1091
+ tree $repo/
1092
+ ls -lh $repo/test_wavs/*.wav
1093
+
1094
+ pushd $repo/exp
1095
+ git lfs pull --include "data/lang_bpe_500/bpe.model"
1096
+ git lfs pull --include "data/lang_bpe_500/tokens.txt"
1097
+ git lfs pull --include "exp/jit_script_chunk_16_left_128.pt"
1098
+ git lfs pull --include "exp/pretrained.pt"
1099
+ ln -s pretrained.pt epoch-99.pt
1100
+ ls -lh *.pt
1101
+ popd
1102
+
1103
+ log "Export to torchscript model"
1104
+ ./zipformer/export.py \
1105
+ --exp-dir $repo/exp \
1106
+ --use-averaged-model false \
1107
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
1108
+ --causal 1 \
1109
+ --chunk-size 16 \
1110
+ --left-context-frames 128 \
1111
+ --epoch 99 \
1112
+ --avg 1 \
1113
+ --jit 1
1114
+
1115
+ ls -lh $repo/exp/*.pt
1116
+
1117
+ log "Decode with models exported by torch.jit.script()"
1118
+
1119
+ ./zipformer/jit_pretrained_streaming.py \
1120
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
1121
+ --nn-model-filename $repo/exp/jit_script_chunk_16_left_128.pt \
1122
+ $repo/test_wavs/1089-134686-0001.wav
1123
+
1124
+ for method in greedy_search modified_beam_search fast_beam_search; do
1125
+ log "$method"
1126
+
1127
+ ./zipformer/pretrained.py \
1128
+ --causal 1 \
1129
+ --chunk-size 16 \
1130
+ --left-context-frames 128 \
1131
+ --method $method \
1132
+ --beam-size 4 \
1133
+ --checkpoint $repo/exp/pretrained.pt \
1134
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
1135
+ $repo/test_wavs/1089-134686-0001.wav \
1136
+ $repo/test_wavs/1221-135766-0001.wav \
1137
+ $repo/test_wavs/1221-135766-0002.wav
1138
+ done
1139
+ rm -rf $repo
1140
+ }
1141
+
1142
+ function test_zipformer_2023_05_18() {
1143
+ repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-zipformer-2023-05-15
1144
+
1145
+ log "Downloading pre-trained model from $repo_url"
1146
+ git lfs install
1147
+ GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
1148
+ repo=$(basename $repo_url)
1149
+
1150
+ log "Display test files"
1151
+ tree $repo/
1152
+ ls -lh $repo/test_wavs/*.wav
1153
+
1154
+ pushd $repo/exp
1155
+ git lfs pull --include "data/lang_bpe_500/bpe.model"
1156
+ git lfs pull --include "data/lang_bpe_500/tokens.txt"
1157
+ git lfs pull --include "exp/jit_script.pt"
1158
+ git lfs pull --include "exp/pretrained.pt"
1159
+ ln -s pretrained.pt epoch-99.pt
1160
+ ls -lh *.pt
1161
+ popd
1162
+
1163
+ log "Export to torchscript model"
1164
+ ./zipformer/export.py \
1165
+ --exp-dir $repo/exp \
1166
+ --use-averaged-model false \
1167
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
1168
+ --epoch 99 \
1169
+ --avg 1 \
1170
+ --jit 1
1171
+
1172
+ ls -lh $repo/exp/*.pt
1173
+
1174
+ log "Decode with models exported by torch.jit.script()"
1175
+
1176
+ ./zipformer/jit_pretrained.py \
1177
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
1178
+ --nn-model-filename $repo/exp/jit_script.pt \
1179
+ $repo/test_wavs/1089-134686-0001.wav \
1180
+ $repo/test_wavs/1221-135766-0001.wav \
1181
+ $repo/test_wavs/1221-135766-0002.wav
1182
+
1183
+ for method in greedy_search modified_beam_search fast_beam_search; do
1184
+ log "$method"
1185
+
1186
+ ./zipformer/pretrained.py \
1187
+ --method $method \
1188
+ --beam-size 4 \
1189
+ --checkpoint $repo/exp/pretrained.pt \
1190
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
1191
+ $repo/test_wavs/1089-134686-0001.wav \
1192
+ $repo/test_wavs/1221-135766-0001.wav \
1193
+ $repo/test_wavs/1221-135766-0002.wav
1194
+ done
1195
+ rm -rf $repo
1196
+ }
1197
+
1198
+ function test_transducer_stateless2_torchaudio_2022_04_19() {
1199
+ repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-transducer-stateless2-torchaudio-2022-04-19
1200
+
1201
+ log "Downloading pre-trained model from $repo_url"
1202
+ git lfs install
1203
+ git clone $repo_url
1204
+ repo=$(basename $repo_url)
1205
+
1206
+ log "Display test files"
1207
+ tree $repo/
1208
+ ls -lh $repo/test_wavs/*.wav
1209
+
1210
+ for sym in 1 2 3; do
1211
+ log "Greedy search with --max-sym-per-frame $sym"
1212
+
1213
+ ./transducer_stateless2/pretrained.py \
1214
+ --method greedy_search \
1215
+ --max-sym-per-frame $sym \
1216
+ --checkpoint $repo/exp/pretrained.pt \
1217
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
1218
+ $repo/test_wavs/1089-134686-0001.wav \
1219
+ $repo/test_wavs/1221-135766-0001.wav \
1220
+ $repo/test_wavs/1221-135766-0002.wav
1221
+ done
1222
+
1223
+ for method in fast_beam_search modified_beam_search beam_search; do
1224
+ log "$method"
1225
+
1226
+ ./transducer_stateless2/pretrained.py \
1227
+ --method $method \
1228
+ --beam-size 4 \
1229
+ --checkpoint $repo/exp/pretrained.pt \
1230
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
1231
+ $repo/test_wavs/1089-134686-0001.wav \
1232
+ $repo/test_wavs/1221-135766-0001.wav \
1233
+ $repo/test_wavs/1221-135766-0002.wav
1234
+ done
1235
+ rm -rf $repo
1236
+ }
1237
+
1238
+ function test_zipformer_transducer_ctc_2023_06_13() {
1239
+ repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-zipformer-transducer-ctc-2023-06-13
1240
+
1241
+ log "Downloading pre-trained model from $repo_url"
1242
+ git lfs install
1243
+ GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
1244
+ repo=$(basename $repo_url)
1245
+
1246
+ log "Display test files"
1247
+ tree $repo/
1248
+ ls -lh $repo/test_wavs/*.wav
1249
+
1250
+ pushd $repo/exp
1251
+ git lfs pull --include "data/lang_bpe_500/bpe.model"
1252
+ git lfs pull --include "data/lang_bpe_500/tokens.txt"
1253
+ git lfs pull --include "data/lang_bpe_500/HLG.pt"
1254
+ git lfs pull --include "data/lang_bpe_500/L.pt"
1255
+ git lfs pull --include "data/lang_bpe_500/LG.pt"
1256
+ git lfs pull --include "data/lang_bpe_500/Linv.pt"
1257
+ git lfs pull --include "data/lm/G_4_gram.pt"
1258
+ git lfs pull --include "exp/jit_script.pt"
1259
+ git lfs pull --include "exp/pretrained.pt"
1260
+ ln -s pretrained.pt epoch-99.pt
1261
+ ls -lh *.pt
1262
+ popd
1263
+
1264
+ log "Export to torchscript model"
1265
+ ./zipformer/export.py \
1266
+ --exp-dir $repo/exp \
1267
+ --use-transducer 1 \
1268
+ --use-ctc 1 \
1269
+ --use-averaged-model false \
1270
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
1271
+ --epoch 99 \
1272
+ --avg 1 \
1273
+ --jit 1
1274
+
1275
+ ls -lh $repo/exp/*.pt
1276
+
1277
+ log "Decode with models exported by torch.jit.script()"
1278
+
1279
+ for method in ctc-decoding 1best; do
1280
+ ./zipformer/jit_pretrained_ctc.py \
1281
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
1282
+ --model-filename $repo/exp/jit_script.pt \
1283
+ --HLG $repo/data/lang_bpe_500/HLG.pt \
1284
+ --words-file $repo/data/lang_bpe_500/words.txt \
1285
+ --G $repo/data/lm/G_4_gram.pt \
1286
+ --method $method \
1287
+ --sample-rate 16000 \
1288
+ $repo/test_wavs/1089-134686-0001.wav \
1289
+ $repo/test_wavs/1221-135766-0001.wav \
1290
+ $repo/test_wavs/1221-135766-0002.wav
1291
+ done
1292
+
1293
+ for method in ctc-decoding 1best; do
1294
+ log "$method"
1295
+
1296
+ ./zipformer/pretrained_ctc.py \
1297
+ --use-transducer 1 \
1298
+ --use-ctc 1 \
1299
+ --method $method \
1300
+ --checkpoint $repo/exp/pretrained.pt \
1301
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
1302
+ --HLG $repo/data/lang_bpe_500/HLG.pt \
1303
+ --G $repo/data/lm/G_4_gram.pt \
1304
+ --words-file $repo/data/lang_bpe_500/words.txt \
1305
+ --sample-rate 16000 \
1306
+ $repo/test_wavs/1089-134686-0001.wav \
1307
+ $repo/test_wavs/1221-135766-0001.wav \
1308
+ $repo/test_wavs/1221-135766-0002.wav
1309
+ done
1310
+ rm -rf $repo
1311
+ }
1312
+
1313
+ function test_100h_transducer_stateless_multi_datasets_bpe_500_2022_02_21() {
1314
+ repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-100h-transducer-stateless-multi-datasets-bpe-500-2022-02-21
1315
+
1316
+ log "Downloading pre-trained model from $repo_url"
1317
+ git lfs install
1318
+ git clone $repo_url
1319
+ repo=$(basename $repo_url)
1320
+
1321
+ log "Display test files"
1322
+ tree $repo/
1323
+ ls -lh $repo/test_wavs/*.wav
1324
+
1325
+ for sym in 1 2 3; do
1326
+ log "Greedy search with --max-sym-per-frame $sym"
1327
+
1328
+ ./transducer_stateless_multi_datasets/pretrained.py \
1329
+ --method greedy_search \
1330
+ --max-sym-per-frame $sym \
1331
+ --checkpoint $repo/exp/pretrained.pt \
1332
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
1333
+ $repo/test_wavs/1089-134686-0001.wav \
1334
+ $repo/test_wavs/1221-135766-0001.wav \
1335
+ $repo/test_wavs/1221-135766-0002.wav
1336
+ done
1337
+
1338
+ for method in modified_beam_search beam_search fast_beam_search; do
1339
+ log "$method"
1340
+
1341
+ ./transducer_stateless_multi_datasets/pretrained.py \
1342
+ --method $method \
1343
+ --beam-size 4 \
1344
+ --checkpoint $repo/exp/pretrained.pt \
1345
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
1346
+ $repo/test_wavs/1089-134686-0001.wav \
1347
+ $repo/test_wavs/1221-135766-0001.wav \
1348
+ $repo/test_wavs/1221-135766-0002.wav
1349
+ done
1350
+ rm -rf $repo
1351
+ }
1352
+
1353
+ function test_transducer_stateless_multi_datasets_bpe_500_2022_03_01() {
1354
+ repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-transducer-stateless-multi-datasets-bpe-500-2022-03-01
1355
+
1356
+ log "Downloading pre-trained model from $repo_url"
1357
+ git lfs install
1358
+ git clone $repo_url
1359
+ repo=$(basename $repo_url)
1360
+
1361
+ log "Display test files"
1362
+ tree $repo/
1363
+ ls -lh $repo/test_wavs/*.wav
1364
+
1365
+ for sym in 1 2 3; do
1366
+ log "Greedy search with --max-sym-per-frame $sym"
1367
+
1368
+ ./transducer_stateless_multi_datasets/pretrained.py \
1369
+ --method greedy_search \
1370
+ --max-sym-per-frame $sym \
1371
+ --checkpoint $repo/exp/pretrained.pt \
1372
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
1373
+ $repo/test_wavs/1089-134686-0001.wav \
1374
+ $repo/test_wavs/1221-135766-0001.wav \
1375
+ $repo/test_wavs/1221-135766-0002.wav
1376
+ done
1377
+
1378
+ for method in modified_beam_search beam_search fast_beam_search; do
1379
+ log "$method"
1380
+
1381
+ ./transducer_stateless_multi_datasets/pretrained.py \
1382
+ --method $method \
1383
+ --beam-size 4 \
1384
+ --checkpoint $repo/exp/pretrained.pt \
1385
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
1386
+ $repo/test_wavs/1089-134686-0001.wav \
1387
+ $repo/test_wavs/1221-135766-0001.wav \
1388
+ $repo/test_wavs/1221-135766-0002.wav
1389
+ done
1390
+ rm -rf $repo
1391
+ }
1392
+
1393
+ function test_transducer_stateless_bpe_500_2022_02_07() {
1394
+ repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07
1395
+
1396
+ log "Downloading pre-trained model from $repo_url"
1397
+ git lfs install
1398
+ git clone $repo_url
1399
+ repo=$(basename $repo_url)
1400
+
1401
+ log "Display test files"
1402
+ tree $repo/
1403
+ ls -lh $repo/test_wavs/*.wav
1404
+
1405
+ for sym in 1 2 3; do
1406
+ log "Greedy search with --max-sym-per-frame $sym"
1407
+
1408
+ ./transducer_stateless/pretrained.py \
1409
+ --method greedy_search \
1410
+ --max-sym-per-frame $sym \
1411
+ --checkpoint $repo/exp/pretrained.pt \
1412
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
1413
+ $repo/test_wavs/1089-134686-0001.wav \
1414
+ $repo/test_wavs/1221-135766-0001.wav \
1415
+ $repo/test_wavs/1221-135766-0002.wav
1416
+ done
1417
+
1418
+ for method in fast_beam_search modified_beam_search beam_search; do
1419
+ log "$method"
1420
+
1421
+ ./transducer_stateless/pretrained.py \
1422
+ --method $method \
1423
+ --beam-size 4 \
1424
+ --checkpoint $repo/exp/pretrained.pt \
1425
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
1426
+ $repo/test_wavs/1089-134686-0001.wav \
1427
+ $repo/test_wavs/1221-135766-0001.wav \
1428
+ $repo/test_wavs/1221-135766-0002.wav
1429
+ done
1430
+ rm -rf $repo
1431
+ }
1432
+
1433
+ function test_zipformer_ctc_en_2023_10_02() {
1434
+ repo_url=https://huggingface.co/csukuangfj/sherpa-onnx-zipformer-ctc-en-2023-10-02
1435
+ log "Downloading pre-trained model from $repo_url"
1436
+ git lfs install
1437
+ git clone $repo_url
1438
+ repo=$(basename $repo_url)
1439
+
1440
+ log "Display test files"
1441
+ tree $repo/
1442
+ ls -lh $repo/test_wavs/*.wav
1443
+
1444
+ log "CTC greedy search"
1445
+
1446
+ ./zipformer/onnx_pretrained_ctc.py \
1447
+ --nn-model $repo/model.onnx \
1448
+ --tokens $repo/tokens.txt \
1449
+ $repo/test_wavs/0.wav \
1450
+ $repo/test_wavs/1.wav \
1451
+ $repo/test_wavs/2.wav
1452
+
1453
+ log "CTC H decoding"
1454
+
1455
+ ./zipformer/onnx_pretrained_ctc_H.py \
1456
+ --nn-model $repo/model.onnx \
1457
+ --tokens $repo/tokens.txt \
1458
+ --H $repo/H.fst \
1459
+ $repo/test_wavs/0.wav \
1460
+ $repo/test_wavs/1.wav \
1461
+ $repo/test_wavs/2.wav
1462
+
1463
+ log "CTC HL decoding"
1464
+
1465
+ ./zipformer/onnx_pretrained_ctc_HL.py \
1466
+ --nn-model $repo/model.onnx \
1467
+ --words $repo/words.txt \
1468
+ --HL $repo/HL.fst \
1469
+ $repo/test_wavs/0.wav \
1470
+ $repo/test_wavs/1.wav \
1471
+ $repo/test_wavs/2.wav
1472
+
1473
+ log "CTC HLG decoding"
1474
+
1475
+ ./zipformer/onnx_pretrained_ctc_HLG.py \
1476
+ --nn-model $repo/model.onnx \
1477
+ --words $repo/words.txt \
1478
+ --HLG $repo/HLG.fst \
1479
+ $repo/test_wavs/0.wav \
1480
+ $repo/test_wavs/1.wav \
1481
+ $repo/test_wavs/2.wav
1482
+
1483
+ rm -rf $repo
1484
+ }
1485
+
1486
+ function test_conformer_ctc_jit_bpe_500_2021_11_09() {
1487
+ repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09
1488
+ log "Downloading pre-trained model from $repo_url"
1489
+ GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
1490
+ repo=$(basename $repo_url)
1491
+ pushd $repo
1492
+
1493
+ git lfs pull --include "exp/pretrained.pt"
1494
+ git lfs pull --include "data/lang_bpe_500/HLG.pt"
1495
+ git lfs pull --include "data/lang_bpe_500/L.pt"
1496
+ git lfs pull --include "data/lang_bpe_500/L_disambig.pt"
1497
+ git lfs pull --include "data/lang_bpe_500/Linv.pt"
1498
+ git lfs pull --include "data/lang_bpe_500/bpe.model"
1499
+ git lfs pull --include "data/lang_bpe_500/lexicon.txt"
1500
+ git lfs pull --include "data/lang_bpe_500/lexicon_disambig.txt"
1501
+ git lfs pull --include "data/lang_bpe_500/tokens.txt"
1502
+ git lfs pull --include "data/lang_bpe_500/words.txt"
1503
+ git lfs pull --include "data/lm/G_3_gram.fst.txt"
1504
+
1505
+ popd
1506
+
1507
+ log "Display test files"
1508
+ tree $repo/
1509
+ ls -lh $repo/test_wavs/*.wav
1510
+
1511
+ log "CTC decoding"
1512
+
1513
+ ./conformer_ctc/pretrained.py \
1514
+ --method ctc-decoding \
1515
+ --num-classes 500 \
1516
+ --checkpoint $repo/exp/pretrained.pt \
1517
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
1518
+ $repo/test_wavs/1089-134686-0001.wav \
1519
+ $repo/test_wavs/1221-135766-0001.wav \
1520
+ $repo/test_wavs/1221-135766-0002.wav
1521
+
1522
+ log "HLG decoding"
1523
+
1524
+ ./conformer_ctc/pretrained.py \
1525
+ --method 1best \
1526
+ --num-classes 500 \
1527
+ --checkpoint $repo/exp/pretrained.pt \
1528
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
1529
+ --words-file $repo/data/lang_bpe_500/words.txt \
1530
+ --HLG $repo/data/lang_bpe_500/HLG.pt \
1531
+ $repo/test_wavs/1089-134686-0001.wav \
1532
+ $repo/test_wavs/1221-135766-0001.wav \
1533
+ $repo/test_wavs/1221-135766-0002.wav
1534
+
1535
+ log "CTC decoding on CPU with kaldi decoders using OpenFst"
1536
+
1537
+ log "Exporting model with torchscript"
1538
+
1539
+ pushd $repo/exp
1540
+ ln -s pretrained.pt epoch-99.pt
1541
+ popd
1542
+
1543
+ ./conformer_ctc/export.py \
1544
+ --epoch 99 \
1545
+ --avg 1 \
1546
+ --exp-dir $repo/exp \
1547
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
1548
+ --jit 1
1549
+
1550
+ ls -lh $repo/exp
1551
+
1552
+
1553
+ log "Generating H.fst, HL.fst"
1554
+
1555
+ ./local/prepare_lang_fst.py --lang-dir $repo/data/lang_bpe_500 --ngram-G $repo/data/lm/G_3_gram.fst.txt
1556
+
1557
+ ls -lh $repo/data/lang_bpe_500
1558
+
1559
+ log "Decoding with H on CPU with OpenFst"
1560
+
1561
+ ./conformer_ctc/jit_pretrained_decode_with_H.py \
1562
+ --nn-model $repo/exp/cpu_jit.pt \
1563
+ --H $repo/data/lang_bpe_500/H.fst \
1564
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
1565
+ $repo/test_wavs/1089-134686-0001.wav \
1566
+ $repo/test_wavs/1221-135766-0001.wav \
1567
+ $repo/test_wavs/1221-135766-0002.wav
1568
+
1569
+ log "Decoding with HL on CPU with OpenFst"
1570
+
1571
+ ./conformer_ctc/jit_pretrained_decode_with_HL.py \
1572
+ --nn-model $repo/exp/cpu_jit.pt \
1573
+ --HL $repo/data/lang_bpe_500/HL.fst \
1574
+ --words $repo/data/lang_bpe_500/words.txt \
1575
+ $repo/test_wavs/1089-134686-0001.wav \
1576
+ $repo/test_wavs/1221-135766-0001.wav \
1577
+ $repo/test_wavs/1221-135766-0002.wav
1578
+
1579
+ log "Decoding with HLG on CPU with OpenFst"
1580
+
1581
+ ./conformer_ctc/jit_pretrained_decode_with_HLG.py \
1582
+ --nn-model $repo/exp/cpu_jit.pt \
1583
+ --HLG $repo/data/lang_bpe_500/HLG.fst \
1584
+ --words $repo/data/lang_bpe_500/words.txt \
1585
+ $repo/test_wavs/1089-134686-0001.wav \
1586
+ $repo/test_wavs/1221-135766-0001.wav \
1587
+ $repo/test_wavs/1221-135766-0002.wav
1588
+
1589
+ rm -rf $repo
1590
+ }
1591
+
1592
+ function test_transducer_bpe_500_2021_12_23() {
1593
+ repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-transducer-bpe-500-2021-12-23
1594
+
1595
+ log "Downloading pre-trained model from $repo_url"
1596
+ git lfs install
1597
+ git clone $repo_url
1598
+ repo=$(basename $repo_url)
1599
+
1600
+ log "Display test files"
1601
+ tree $repo/
1602
+ ls -lh $repo/test_wavs/*.wav
1603
+
1604
+ log "Beam search decoding"
1605
+
1606
+ ./transducer/pretrained.py \
1607
+ --method beam_search \
1608
+ --beam-size 4 \
1609
+ --checkpoint $repo/exp/pretrained.pt \
1610
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
1611
+ $repo/test_wavs/1089-134686-0001.wav \
1612
+ $repo/test_wavs/1221-135766-0001.wav \
1613
+ $repo/test_wavs/1221-135766-0002.wav
1614
+
1615
+ rm -rf $repo
1616
+ }
1617
+
1618
+ prepare_data
1619
+ run_diagnostics
1620
+ test_streaming_zipformer_ctc_hlg
1621
+ test_pruned_transducer_stateless_2022_03_12
1622
+ test_pruned_transducer_stateless2_2022_04_29
1623
+ test_pruned_transducer_stateless3_2022_04_29
1624
+ test_pruned_transducer_stateless5_2022_05_13
1625
+ test_pruned_transducer_stateless7_2022_11_11
1626
+ test_pruned_transducer_stateless8_2022_11_14
1627
+ test_pruned_transducer_stateless7_ctc_2022_12_01
1628
+ test_zipformer_mmi_2022_12_08
1629
+ test_pruned_transducer_stateless7_streaming_2022_12_29
1630
+ test_pruned_transducer_stateless7_ctc_bs_2023_01_29
1631
+ test_conformer_ctc3_2022_11_27
1632
+ test_lstm_transducer_stateless2_2022_09_03
1633
+ test_pruned_transducer_stateless3_2022_05_13
1634
+ test_streaming_pruned_transducer_stateless2_20220625
1635
+ test_streaming_zipformer_2023_05_17
1636
+ test_zipformer_2023_05_18
1637
+ test_transducer_stateless2_torchaudio_2022_04_19
1638
+ test_zipformer_transducer_ctc_2023_06_13
1639
+ test_100h_transducer_stateless_multi_datasets_bpe_500_2022_02_21
1640
+ test_transducer_stateless_multi_datasets_bpe_500_2022_03_01
1641
+ test_transducer_stateless_bpe_500_2022_02_07
1642
+ test_zipformer_ctc_en_2023_10_02
1643
+ # test_conformer_ctc_jit_bpe_500_2021_11_09 # failes for torch != 1.13.x and torch != 2.0.x
1644
+ test_transducer_bpe_500_2021_12_23
.github/scripts/librispeech/ASR/run_rknn.sh ADDED
@@ -0,0 +1,275 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+
3
+ set -ex
4
+
5
+ python3 -m pip install kaldi-native-fbank soundfile librosa
6
+
7
+ log() {
8
+ # This function is from espnet
9
+ local fname=${BASH_SOURCE[1]##*/}
10
+ echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
11
+ }
12
+
13
+ cd egs/librispeech/ASR
14
+
15
+ # https://huggingface.co/csukuangfj/k2fsa-zipformer-chinese-english-mixed
16
+ # sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
17
+ function export_2023_02_20() {
18
+ d=exp_2023_02_20
19
+
20
+ mkdir $d
21
+ pushd $d
22
+
23
+ curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-chinese-english-mixed/resolve/main/exp/pretrained.pt
24
+ mv pretrained.pt epoch-99.pt
25
+
26
+ curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-chinese-english-mixed/resolve/main/data/lang_char_bpe/tokens.txt
27
+
28
+ curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-chinese-english-mixed/resolve/main/test_wavs/0.wav
29
+ curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-chinese-english-mixed/resolve/main/test_wavs/1.wav
30
+ curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-chinese-english-mixed/resolve/main/test_wavs/2.wav
31
+ curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-chinese-english-mixed/resolve/main/test_wavs/3.wav
32
+ curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-chinese-english-mixed/resolve/main/test_wavs/4.wav
33
+ ls -lh
34
+ popd
35
+
36
+ ./pruned_transducer_stateless7_streaming/export-onnx-zh.py \
37
+ --dynamic-batch 0 \
38
+ --enable-int8-quantization 0 \
39
+ --tokens $d/tokens.txt \
40
+ --use-averaged-model 0 \
41
+ --epoch 99 \
42
+ --avg 1 \
43
+ --exp-dir $d/ \
44
+ --decode-chunk-len 64 \
45
+ --num-encoder-layers "2,4,3,2,4" \
46
+ --feedforward-dims "1024,1024,1536,1536,1024" \
47
+ --nhead "8,8,8,8,8" \
48
+ --encoder-dims "384,384,384,384,384" \
49
+ --attention-dims "192,192,192,192,192" \
50
+ --encoder-unmasked-dims "256,256,256,256,256" \
51
+ --zipformer-downsampling-factors "1,2,4,8,2" \
52
+ --cnn-module-kernels "31,31,31,31,31" \
53
+ --decoder-dim 512 \
54
+ --joiner-dim 512
55
+
56
+ ls -lh $d/
57
+
58
+ ./pruned_transducer_stateless7_streaming/onnx_pretrained.py \
59
+ --encoder-model-filename $d/encoder-epoch-99-avg-1.onnx \
60
+ --decoder-model-filename $d/decoder-epoch-99-avg-1.onnx \
61
+ --joiner-model-filename $d/joiner-epoch-99-avg-1.onnx \
62
+ --tokens $d/tokens.txt \
63
+ $d/0.wav
64
+
65
+ ./pruned_transducer_stateless7_streaming/onnx_pretrained.py \
66
+ --encoder-model-filename $d/encoder-epoch-99-avg-1.onnx \
67
+ --decoder-model-filename $d/decoder-epoch-99-avg-1.onnx \
68
+ --joiner-model-filename $d/joiner-epoch-99-avg-1.onnx \
69
+ --tokens $d/tokens.txt \
70
+ $d/1.wav
71
+
72
+ for platform in rk3562 rk3566 rk3568 rk3576 rk3588; do
73
+ dst=sherpa-onnx-$platform-streaming-zipformer-bilingual-zh-en-2023-02-20
74
+ mkdir -p $dst
75
+
76
+ ./pruned_transducer_stateless7_streaming/export_rknn.py \
77
+ --in-encoder $d/encoder-epoch-99-avg-1.onnx \
78
+ --in-decoder $d/decoder-epoch-99-avg-1.onnx \
79
+ --in-joiner $d/joiner-epoch-99-avg-1.onnx \
80
+ --out-encoder $dst/encoder.rknn \
81
+ --out-decoder $dst/decoder.rknn \
82
+ --out-joiner $dst/joiner.rknn \
83
+ --target-platform $platform 2>/dev/null
84
+
85
+ ls -lh $dst/
86
+
87
+ ./pruned_transducer_stateless7_streaming/test_rknn_on_cpu_simulator.py \
88
+ --encoder $d/encoder-epoch-99-avg-1.onnx \
89
+ --decoder $d/decoder-epoch-99-avg-1.onnx \
90
+ --joiner $d/joiner-epoch-99-avg-1.onnx \
91
+ --tokens $d/tokens.txt \
92
+ --wav $d/0.wav
93
+
94
+ cp $d/tokens.txt $dst
95
+ mkdir $dst/test_wavs
96
+ cp $d/*.wav $dst/test_wavs
97
+
98
+ tar cjvf $dst.tar.bz2 $dst
99
+ ls -lh $dst.tar.bz2
100
+ mv $dst.tar.bz2 /icefall/
101
+ ls -lh $dst/
102
+ echo "---"
103
+
104
+ rm -rf $dst
105
+ done
106
+ }
107
+
108
+ # https://huggingface.co/csukuangfj/k2fsa-zipformer-bilingual-zh-en-t
109
+ # sherpa-onnx-streaming-zipformer-small-bilingual-zh-en-2023-02-16
110
+ function export_2023_02_16() {
111
+ d=exp_2023_02_16
112
+
113
+ mkdir $d
114
+ pushd $d
115
+
116
+ curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-bilingual-zh-en-t/resolve/main/exp/pretrained.pt
117
+ mv pretrained.pt epoch-99.pt
118
+
119
+ curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-bilingual-zh-en-t/resolve/main/data/lang_char_bpe/tokens.txt
120
+ curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-bilingual-zh-en-t/resolve/main/test_wavs/0.wav
121
+ curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-bilingual-zh-en-t/resolve/main/test_wavs/1.wav
122
+ curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-bilingual-zh-en-t/resolve/main/test_wavs/2.wav
123
+ curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-bilingual-zh-en-t/resolve/main/test_wavs/3.wav
124
+ curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-bilingual-zh-en-t/resolve/main/test_wavs/4.wav
125
+
126
+ ls -lh
127
+
128
+ popd
129
+
130
+ ./pruned_transducer_stateless7_streaming/export-onnx-zh.py \
131
+ --dynamic-batch 0 \
132
+ --enable-int8-quantization 0 \
133
+ --tokens $d/tokens.txt \
134
+ --use-averaged-model 0 \
135
+ --epoch 99 \
136
+ --avg 1 \
137
+ --exp-dir $d/ \
138
+ --decode-chunk-len 64 \
139
+ \
140
+ --num-encoder-layers 2,2,2,2,2 \
141
+ --feedforward-dims 768,768,768,768,768 \
142
+ --nhead 4,4,4,4,4 \
143
+ --encoder-dims 256,256,256,256,256 \
144
+ --attention-dims 192,192,192,192,192 \
145
+ --encoder-unmasked-dims 192,192,192,192,192 \
146
+ \
147
+ --zipformer-downsampling-factors "1,2,4,8,2" \
148
+ --cnn-module-kernels "31,31,31,31,31" \
149
+ --decoder-dim 512 \
150
+ --joiner-dim 512
151
+
152
+ ls -lh $d/
153
+
154
+ ./pruned_transducer_stateless7_streaming/onnx_pretrained.py \
155
+ --encoder-model-filename $d/encoder-epoch-99-avg-1.onnx \
156
+ --decoder-model-filename $d/decoder-epoch-99-avg-1.onnx \
157
+ --joiner-model-filename $d/joiner-epoch-99-avg-1.onnx \
158
+ --tokens $d/tokens.txt \
159
+ $d/0.wav
160
+
161
+ ./pruned_transducer_stateless7_streaming/onnx_pretrained.py \
162
+ --encoder-model-filename $d/encoder-epoch-99-avg-1.onnx \
163
+ --decoder-model-filename $d/decoder-epoch-99-avg-1.onnx \
164
+ --joiner-model-filename $d/joiner-epoch-99-avg-1.onnx \
165
+ --tokens $d/tokens.txt \
166
+ $d/1.wav
167
+
168
+ for platform in rk3562 rk3566 rk3568 rk3576 rk3588; do
169
+ dst=sherpa-onnx-$platform-streaming-zipformer-small-bilingual-zh-en-2023-02-16
170
+ mkdir -p $dst
171
+
172
+ ./pruned_transducer_stateless7_streaming/export_rknn.py \
173
+ --in-encoder $d/encoder-epoch-99-avg-1.onnx \
174
+ --in-decoder $d/decoder-epoch-99-avg-1.onnx \
175
+ --in-joiner $d/joiner-epoch-99-avg-1.onnx \
176
+ --out-encoder $dst/encoder.rknn \
177
+ --out-decoder $dst/decoder.rknn \
178
+ --out-joiner $dst/joiner.rknn \
179
+ --target-platform $platform 2>/dev/null
180
+
181
+ ls -lh $dst/
182
+
183
+ ./pruned_transducer_stateless7_streaming/test_rknn_on_cpu_simulator.py \
184
+ --encoder $d/encoder-epoch-99-avg-1.onnx \
185
+ --decoder $d/decoder-epoch-99-avg-1.onnx \
186
+ --joiner $d/joiner-epoch-99-avg-1.onnx \
187
+ --tokens $d/tokens.txt \
188
+ --wav $d/0.wav
189
+
190
+ cp $d/tokens.txt $dst
191
+ mkdir $dst/test_wavs
192
+ cp $d/*.wav $dst/test_wavs
193
+
194
+ tar cjvf $dst.tar.bz2 $dst
195
+ ls -lh $dst.tar.bz2
196
+ mv $dst.tar.bz2 /icefall/
197
+ ls -lh $dst/
198
+ echo "---"
199
+
200
+ rm -rf $dst
201
+ done
202
+ }
203
+
204
+ # https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-en-2023-06-26-english
205
+ function export_2023_06_26() {
206
+ d=exp_2023_06_26
207
+
208
+ mkdir $d
209
+ pushd $d
210
+
211
+ curl -SL -O https://huggingface.co/Zengwei/icefall-asr-librispeech-streaming-zipformer-2023-05-17/resolve/main/exp/pretrained.pt
212
+ mv pretrained.pt epoch-99.pt
213
+
214
+ curl -SL -O https://huggingface.co/Zengwei/icefall-asr-librispeech-streaming-zipformer-2023-05-17/resolve/main/data/lang_bpe_500/tokens.txt
215
+
216
+ curl -SL -o 0.wav https://huggingface.co/Zengwei/icefall-asr-librispeech-streaming-zipformer-2023-05-17/resolve/main/data/lang_bpe_500/tokens.txt
217
+ curl -SL -o 1.wav https://huggingface.co/Zengwei/icefall-asr-librispeech-streaming-zipformer-2023-05-17/resolve/main/test_wavs/1221-135766-0001.wav
218
+ curl -SL -o 2.wav https://huggingface.co/Zengwei/icefall-asr-librispeech-streaming-zipformer-2023-05-17/resolve/main/test_wavs/1221-135766-0002.wav
219
+
220
+ ls -lh
221
+
222
+ popd
223
+
224
+ ./zipformer/export-onnx-streaming.py \
225
+ --dynamic-batch 0 \
226
+ --enable-int8-quantization 0 \
227
+ --tokens $d/tokens.txt \
228
+ --use-averaged-model 0 \
229
+ --epoch 99 \
230
+ --avg 1 \
231
+ --exp-dir $d \
232
+ --use-ctc 0 \
233
+ --use-transducer 1 \
234
+ \
235
+ --chunk-size 32 \
236
+ --left-context-frames 128 \
237
+ --causal 1
238
+
239
+ ls -lh $d/
240
+
241
+ for platform in rk3562 rk3566 rk3568 rk3576 rk3588; do
242
+ dst=sherpa-onnx-$platform-streaming-zipformer-en-2023-06-26
243
+ mkdir -p $dst
244
+
245
+ ./zipformer/export_rknn_transducer_streaming.py \
246
+ --in-encoder $d/encoder-epoch-99-avg-1-chunk-32-left-128.onnx \
247
+ --in-decoder $d/decoder-epoch-99-avg-1-chunk-32-left-128.onnx \
248
+ --in-joiner $d/joiner-epoch-99-avg-1-chunk-32-left-128.onnx \
249
+ --out-encoder $dst/encoder.rknn \
250
+ --out-decoder $dst/decoder.rknn \
251
+ --out-joiner $dst/joiner.rknn \
252
+ --target-platform $platform
253
+
254
+ ls -lh $dst/
255
+
256
+ cp $d/tokens.txt $dst
257
+ mkdir $dst/test_wavs
258
+ cp $d/*.wav $dst/test_wavs
259
+
260
+ tar cjvf $dst.tar.bz2 $dst
261
+ ls -lh $dst.tar.bz2
262
+ mv $dst.tar.bz2 /icefall/
263
+ ls -lh $dst/
264
+ echo "---"
265
+
266
+ rm -rf $dst
267
+ done
268
+ }
269
+
270
+ if [[ $rknn_toolkit2_version == "2.1.0" ]]; then
271
+ export_2023_02_16
272
+ export_2023_02_20
273
+ else
274
+ export_2023_06_26
275
+ fi
.github/scripts/ljspeech/TTS/run-matcha.sh ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+
3
+ set -ex
4
+
5
+ apt-get update
6
+ apt-get install -y sox
7
+
8
+ python3 -m pip install piper_phonemize -f https://k2-fsa.github.io/icefall/piper_phonemize.html
9
+ python3 -m pip install espnet_tts_frontend
10
+ python3 -m pip install numba conformer==0.3.2 diffusers librosa
11
+
12
+ log() {
13
+ # This function is from espnet
14
+ local fname=${BASH_SOURCE[1]##*/}
15
+ echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
16
+ }
17
+
18
+ cd egs/ljspeech/TTS
19
+
20
+ sed -i.bak s/600/8/g ./prepare.sh
21
+ sed -i.bak s/"first 100"/"first 3"/g ./prepare.sh
22
+ sed -i.bak s/500/5/g ./prepare.sh
23
+ git diff
24
+
25
+ function prepare_data() {
26
+ # We have created a subset of the data for testing
27
+ #
28
+ mkdir -p download
29
+ pushd download
30
+ wget -q https://huggingface.co/csukuangfj/ljspeech-subset-for-ci-test/resolve/main/LJSpeech-1.1.tar.bz2
31
+ tar xvf LJSpeech-1.1.tar.bz2
32
+ popd
33
+
34
+ ./prepare.sh
35
+ tree .
36
+ }
37
+
38
+ function train() {
39
+ pushd ./matcha
40
+ sed -i.bak s/1500/3/g ./train.py
41
+ git diff .
42
+ popd
43
+
44
+ ./matcha/train.py \
45
+ --exp-dir matcha/exp \
46
+ --num-epochs 1 \
47
+ --save-every-n 1 \
48
+ --num-buckets 2 \
49
+ --tokens data/tokens.txt \
50
+ --max-duration 20
51
+
52
+ ls -lh matcha/exp
53
+ }
54
+
55
+ function infer() {
56
+
57
+ curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v1
58
+
59
+ ./matcha/infer.py \
60
+ --num-buckets 2 \
61
+ --epoch 1 \
62
+ --exp-dir ./matcha/exp \
63
+ --tokens data/tokens.txt \
64
+ --vocoder ./generator_v1 \
65
+ --input-text "how are you doing?" \
66
+ --output-wav ./generated.wav
67
+
68
+ ls -lh *.wav
69
+ soxi ./generated.wav
70
+ rm -v ./generated.wav
71
+ rm -v generator_v1
72
+ }
73
+
74
+ function export_onnx() {
75
+ pushd matcha/exp
76
+ curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-ljspeech-matcha-en-2024-10-28/resolve/main/exp/epoch-4000.pt
77
+ popd
78
+
79
+ pushd data/fbank
80
+ rm -fv *.json
81
+ curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-ljspeech-matcha-en-2024-10-28/resolve/main/data/cmvn.json
82
+ popd
83
+
84
+ ./matcha/export_onnx.py \
85
+ --exp-dir ./matcha/exp \
86
+ --epoch 4000 \
87
+ --tokens ./data/tokens.txt \
88
+ --cmvn ./data/fbank/cmvn.json
89
+
90
+ ls -lh *.onnx
91
+
92
+ if false; then
93
+ # The CI machine does not have enough memory to run it
94
+ #
95
+ curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v1
96
+ curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v2
97
+ curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v3
98
+ python3 ./matcha/export_onnx_hifigan.py
99
+ else
100
+ curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-ljspeech-matcha-en-2024-10-28/resolve/main/exp/hifigan_v1.onnx
101
+ curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-ljspeech-matcha-en-2024-10-28/resolve/main/exp/hifigan_v2.onnx
102
+ curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-ljspeech-matcha-en-2024-10-28/resolve/main/exp/hifigan_v3.onnx
103
+ fi
104
+
105
+ ls -lh *.onnx
106
+
107
+ for v in v1 v2 v3; do
108
+ python3 ./matcha/onnx_pretrained.py \
109
+ --acoustic-model ./model-steps-6.onnx \
110
+ --vocoder ./hifigan_$v.onnx \
111
+ --tokens ./data/tokens.txt \
112
+ --input-text "how are you doing?" \
113
+ --output-wav /icefall/generated-matcha-tts-steps-6-$v.wav
114
+ done
115
+
116
+ ls -lh /icefall/*.wav
117
+ soxi /icefall/generated-matcha-tts-steps-6-*.wav
118
+
119
+ cp ./model-steps-*.onnx /icefall
120
+
121
+ d=matcha-icefall-en_US-ljspeech
122
+ mkdir $d
123
+ cp -v data/tokens.txt $d
124
+ cp model-steps-3.onnx $d
125
+ pushd $d
126
+ curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/espeak-ng-data.tar.bz2
127
+ tar xf espeak-ng-data.tar.bz2
128
+ rm espeak-ng-data.tar.bz2
129
+
130
+ cat >README.md <<EOF
131
+ # Introduction
132
+
133
+ This model is trained using the dataset from
134
+ https://keithito.com/LJ-Speech-Dataset/
135
+
136
+ The dataset contains only 1 female speaker.
137
+
138
+ You can find the training code at
139
+ https://github.com/k2-fsa/icefall/tree/master/egs/ljspeech/TTS#matcha
140
+ EOF
141
+
142
+ ls -lh
143
+
144
+ popd
145
+
146
+ tar cvjf $d.tar.bz2 $d
147
+ mv $d.tar.bz2 /icefall
148
+ mv $d /icefall
149
+ }
150
+
151
+ prepare_data
152
+ train
153
+ infer
154
+ export_onnx
155
+
156
+ rm -rfv generator_v* matcha/exp
157
+ git checkout .
.github/scripts/ljspeech/TTS/run.sh ADDED
@@ -0,0 +1,157 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+
3
+ set -ex
4
+
5
+ python3 -m pip install piper_phonemize -f https://k2-fsa.github.io/icefall/piper_phonemize.html
6
+ python3 -m pip install espnet_tts_frontend
7
+ python3 -m pip install numba
8
+
9
+ log() {
10
+ # This function is from espnet
11
+ local fname=${BASH_SOURCE[1]##*/}
12
+ echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
13
+ }
14
+
15
+ cd egs/ljspeech/TTS
16
+
17
+ sed -i.bak s/600/8/g ./prepare.sh
18
+ sed -i.bak s/"first 100"/"first 3"/g ./prepare.sh
19
+ sed -i.bak s/500/5/g ./prepare.sh
20
+ git diff
21
+
22
+ function prepare_data() {
23
+ # We have created a subset of the data for testing
24
+ #
25
+ mkdir -p download
26
+ pushd download
27
+ wget -q https://huggingface.co/csukuangfj/ljspeech-subset-for-ci-test/resolve/main/LJSpeech-1.1.tar.bz2
28
+ tar xvf LJSpeech-1.1.tar.bz2
29
+ popd
30
+
31
+ ./prepare.sh
32
+ tree .
33
+ }
34
+
35
+ function train() {
36
+ pushd ./vits
37
+ sed -i.bak s/200/3/g ./train.py
38
+ git diff .
39
+ popd
40
+
41
+ for t in low medium high; do
42
+ ./vits/train.py \
43
+ --exp-dir vits/exp-$t \
44
+ --model-type $t \
45
+ --num-epochs 1 \
46
+ --save-every-n 1 \
47
+ --num-buckets 2 \
48
+ --tokens data/tokens.txt \
49
+ --max-duration 20
50
+
51
+ ls -lh vits/exp-$t
52
+ done
53
+ }
54
+
55
+ function infer() {
56
+ for t in low medium high; do
57
+ ./vits/infer.py \
58
+ --num-buckets 2 \
59
+ --model-type $t \
60
+ --epoch 1 \
61
+ --exp-dir ./vits/exp-$t \
62
+ --tokens data/tokens.txt \
63
+ --max-duration 20
64
+ done
65
+ }
66
+
67
+ function export_onnx() {
68
+ for t in low medium high; do
69
+ ./vits/export-onnx.py \
70
+ --model-type $t \
71
+ --epoch 1 \
72
+ --exp-dir ./vits/exp-$t \
73
+ --tokens data/tokens.txt
74
+
75
+ ls -lh vits/exp-$t/
76
+ done
77
+ }
78
+
79
+ function test_medium() {
80
+ git clone https://huggingface.co/csukuangfj/icefall-tts-ljspeech-vits-medium-2024-03-12
81
+
82
+ ./vits/export-onnx.py \
83
+ --model-type medium \
84
+ --epoch 820 \
85
+ --exp-dir ./icefall-tts-ljspeech-vits-medium-2024-03-12/exp \
86
+ --tokens ./icefall-tts-ljspeech-vits-medium-2024-03-12/data/tokens.txt
87
+
88
+ ls -lh ./icefall-tts-ljspeech-vits-medium-2024-03-12/exp
89
+
90
+ ./vits/test_onnx.py \
91
+ --model-filename ./icefall-tts-ljspeech-vits-medium-2024-03-12/exp/vits-epoch-820.onnx \
92
+ --tokens ./icefall-tts-ljspeech-vits-medium-2024-03-12/data/tokens.txt \
93
+ --output-filename /icefall/test-medium.wav
94
+
95
+ ls -lh /icefall/test-medium.wav
96
+
97
+ d=/icefall/vits-icefall-en_US-ljspeech-medium
98
+ mkdir $d
99
+ cp -v ./icefall-tts-ljspeech-vits-medium-2024-03-12/data/tokens.txt $d/
100
+ cp -v ./icefall-tts-ljspeech-vits-medium-2024-03-12/exp/vits-epoch-820.onnx $d/model.onnx
101
+
102
+ rm -rf icefall-tts-ljspeech-vits-medium-2024-03-12
103
+
104
+ pushd $d
105
+ wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/espeak-ng-data.tar.bz2
106
+ tar xf espeak-ng-data.tar.bz2
107
+ rm espeak-ng-data.tar.bz2
108
+ cd ..
109
+ tar cjf vits-icefall-en_US-ljspeech-medium.tar.bz2 vits-icefall-en_US-ljspeech-medium
110
+ rm -rf vits-icefall-en_US-ljspeech-medium
111
+ ls -lh *.tar.bz2
112
+ popd
113
+ }
114
+
115
+ function test_low() {
116
+ git clone https://huggingface.co/csukuangfj/icefall-tts-ljspeech-vits-low-2024-03-12
117
+
118
+ ./vits/export-onnx.py \
119
+ --model-type low \
120
+ --epoch 1600 \
121
+ --exp-dir ./icefall-tts-ljspeech-vits-low-2024-03-12/exp \
122
+ --tokens ./icefall-tts-ljspeech-vits-low-2024-03-12/data/tokens.txt
123
+
124
+ ls -lh ./icefall-tts-ljspeech-vits-low-2024-03-12/exp
125
+
126
+ ./vits/test_onnx.py \
127
+ --model-filename ./icefall-tts-ljspeech-vits-low-2024-03-12/exp/vits-epoch-1600.onnx \
128
+ --tokens ./icefall-tts-ljspeech-vits-low-2024-03-12/data/tokens.txt \
129
+ --output-filename /icefall/test-low.wav
130
+
131
+ ls -lh /icefall/test-low.wav
132
+
133
+ d=/icefall/vits-icefall-en_US-ljspeech-low
134
+ mkdir $d
135
+ cp -v ./icefall-tts-ljspeech-vits-low-2024-03-12/data/tokens.txt $d/
136
+ cp -v ./icefall-tts-ljspeech-vits-low-2024-03-12/exp/vits-epoch-1600.onnx $d/model.onnx
137
+
138
+ rm -rf icefall-tts-ljspeech-vits-low-2024-03-12
139
+
140
+ pushd $d
141
+ wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/espeak-ng-data.tar.bz2
142
+ tar xf espeak-ng-data.tar.bz2
143
+ rm espeak-ng-data.tar.bz2
144
+ cd ..
145
+ tar cjf vits-icefall-en_US-ljspeech-low.tar.bz2 vits-icefall-en_US-ljspeech-low
146
+ rm -rf vits-icefall-en_US-ljspeech-low
147
+ ls -lh *.tar.bz2
148
+ popd
149
+ }
150
+
151
+ prepare_data
152
+ train
153
+ infer
154
+ export_onnx
155
+ rm -rf vits/exp-{low,medium,high}
156
+ test_medium
157
+ test_low
.github/scripts/multi_zh-hans/ASR/run.sh ADDED
@@ -0,0 +1,756 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+
3
+ set -ex
4
+
5
+ git config --global user.name "k2-fsa"
6
+ git config --global user.email "csukuangfj@gmail.com"
7
+ git config --global lfs.allowincompletepush true
8
+
9
+ python3 -m pip install onnxmltools==1.13.0 onnx==1.17.0 onnxruntime==1.17.1 sherpa-onnx
10
+
11
+ log() {
12
+ # This function is from espnet
13
+ local fname=${BASH_SOURCE[1]##*/}
14
+ echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
15
+ }
16
+
17
+ cd egs/multi_zh-hans/ASR
18
+
19
+ log "pwd: $PWD"
20
+
21
+ function run_2023_9_2() {
22
+ repo_url=https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-2023-9-2
23
+ log "Downloading pre-trained model from $repo_url"
24
+ GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
25
+ repo=$(basename $repo_url)
26
+ pushd $repo
27
+ cd exp
28
+ git lfs pull --include pretrained.pt
29
+ ln -s pretrained.pt epoch-99.pt
30
+ cd ../data/lang_bpe_2000
31
+ ls -lh
32
+ git lfs pull --include L.pt L_disambig.pt Linv.pt bpe.model
33
+ git lfs pull --include "*.model"
34
+ ls -lh
35
+ popd
36
+
37
+ log "--------------------------------------------"
38
+ log "Export non-streaming ONNX transducer models "
39
+ log "--------------------------------------------"
40
+ ./zipformer/export-onnx.py \
41
+ --tokens $repo/data/lang_bpe_2000/tokens.txt \
42
+ --use-averaged-model 0 \
43
+ --epoch 99 \
44
+ --avg 1 \
45
+ --exp-dir $repo/exp \
46
+ --causal False \
47
+ --fp16 1
48
+
49
+ ls -lh $repo/exp
50
+
51
+ ./zipformer/onnx_pretrained.py \
52
+ --encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
53
+ --decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
54
+ --joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
55
+ --tokens $repo/data/lang_bpe_2000/tokens.txt \
56
+ $repo/test_wavs/DEV_T0000000000.wav \
57
+ $repo/test_wavs/DEV_T0000000001.wav \
58
+ $repo/test_wavs/DEV_T0000000002.wav \
59
+ $repo/test_wavs/TEST_MEETING_T0000000113.wav \
60
+ $repo/test_wavs/TEST_MEETING_T0000000219.wav \
61
+ $repo/test_wavs/TEST_MEETING_T0000000351.wav
62
+
63
+ ./zipformer/onnx_pretrained.py \
64
+ --encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.int8.onnx \
65
+ --decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
66
+ --joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.int8.onnx \
67
+ --tokens $repo/data/lang_bpe_2000/tokens.txt \
68
+ $repo/test_wavs/DEV_T0000000000.wav \
69
+ $repo/test_wavs/DEV_T0000000001.wav \
70
+ $repo/test_wavs/DEV_T0000000002.wav \
71
+ $repo/test_wavs/TEST_MEETING_T0000000113.wav \
72
+ $repo/test_wavs/TEST_MEETING_T0000000219.wav \
73
+ $repo/test_wavs/TEST_MEETING_T0000000351.wav
74
+
75
+ ./zipformer/onnx_pretrained.py \
76
+ --encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.fp16.onnx \
77
+ --decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.fp16.onnx \
78
+ --joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.fp16.onnx \
79
+ --tokens $repo/data/lang_bpe_2000/tokens.txt \
80
+ $repo/test_wavs/DEV_T0000000000.wav \
81
+ $repo/test_wavs/DEV_T0000000001.wav \
82
+ $repo/test_wavs/DEV_T0000000002.wav \
83
+ $repo/test_wavs/TEST_MEETING_T0000000113.wav \
84
+ $repo/test_wavs/TEST_MEETING_T0000000219.wav \
85
+ $repo/test_wavs/TEST_MEETING_T0000000351.wav
86
+
87
+ rm -rf $repo
88
+ }
89
+
90
+ function run_2023_11_05_streaming() {
91
+ repo_url=https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-ctc-streaming-2023-11-05
92
+ log "Downloading pre-trained model from $repo_url"
93
+ GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
94
+ repo=$(basename $repo_url)
95
+
96
+ pushd $repo
97
+ cd exp/
98
+ git lfs pull --include pretrained.pt
99
+ rm -fv epoch-20.pt
100
+ rm -fv *.onnx
101
+ ln -s pretrained.pt epoch-20.pt
102
+ cd ../data/lang_bpe_2000
103
+ ls -lh
104
+ git lfs pull --include L.pt L_disambig.pt Linv.pt bpe.model
105
+ git lfs pull --include "*.model"
106
+ ls -lh
107
+ popd
108
+
109
+ log "----------------------------------------"
110
+ log "Export streaming ONNX CTC models "
111
+ log "----------------------------------------"
112
+ ./zipformer/export-onnx-streaming-ctc.py \
113
+ --exp-dir $repo/exp \
114
+ --tokens $repo/data/lang_bpe_2000/tokens.txt \
115
+ --causal 1 \
116
+ --avg 1 \
117
+ --epoch 20 \
118
+ --use-averaged-model 0 \
119
+ --chunk-size 16 \
120
+ --left-context-frames 128 \
121
+ --use-ctc 1 \
122
+ --fp16 1
123
+
124
+ ls -lh $repo/exp/
125
+
126
+ log "------------------------------------------------------------"
127
+ log "Test exported streaming ONNX CTC models (greedy search) "
128
+ log "------------------------------------------------------------"
129
+
130
+ test_wavs=(
131
+ DEV_T0000000000.wav
132
+ DEV_T0000000001.wav
133
+ DEV_T0000000002.wav
134
+ TEST_MEETING_T0000000113.wav
135
+ TEST_MEETING_T0000000219.wav
136
+ TEST_MEETING_T0000000351.wav
137
+ )
138
+
139
+ for w in ${test_wavs[@]}; do
140
+ log "----fp32----"
141
+ ./zipformer/onnx_pretrained-streaming-ctc.py \
142
+ --model-filename $repo/exp/ctc-epoch-20-avg-1-chunk-16-left-128.onnx \
143
+ --tokens $repo/data/lang_bpe_2000/tokens.txt \
144
+ $repo/test_wavs/$w
145
+
146
+ log "----int8----"
147
+
148
+ ./zipformer/onnx_pretrained-streaming-ctc.py \
149
+ --model-filename $repo/exp/ctc-epoch-20-avg-1-chunk-16-left-128.int8.onnx \
150
+ --tokens $repo/data/lang_bpe_2000/tokens.txt \
151
+ $repo/test_wavs/$w
152
+
153
+ log "----fp16----"
154
+
155
+ ./zipformer/onnx_pretrained-streaming-ctc.py \
156
+ --model-filename $repo/exp/ctc-epoch-20-avg-1-chunk-16-left-128.fp16.onnx \
157
+ --tokens $repo/data/lang_bpe_2000/tokens.txt \
158
+ $repo/test_wavs/$w
159
+ done
160
+
161
+ log "Upload onnx CTC models to huggingface"
162
+ name=(
163
+ sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13
164
+ sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-int8-2023-12-13
165
+ sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-fp16-2023-12-13
166
+ )
167
+ for n in ${name[@]}; do
168
+ url=https://huggingface.co/k2-fsa/$n
169
+ GIT_LFS_SKIP_SMUDGE=1 git clone $url
170
+ dst=$(basename $url)
171
+ if [[ $n == sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13 ]]; then
172
+ cp -v $repo/exp/ctc-epoch-20-avg-1-chunk-16-left-128.onnx $dst
173
+ elif [[ $n == sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-int8-2023-12-13 ]]; then
174
+ cp -v $repo/exp/ctc-epoch-20-avg-1-chunk-16-left-128.int8.onnx $dst
175
+ elif [[ $n == sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-fp16-2023-12-13 ]]; then
176
+ cp -v $repo/exp/ctc-epoch-20-avg-1-chunk-16-left-128.fp16.onnx $dst
177
+ fi
178
+
179
+ cp -v $repo/data/lang_bpe_2000/tokens.txt $dst
180
+ cp -v $repo/data/lang_bpe_2000/bpe.model $dst
181
+ mkdir -p $dst/test_wavs
182
+ cp -v $repo/test_wavs/*.wav $dst/test_wavs
183
+ cd $dst
184
+ git lfs track "*.onnx" "bpe.model" "*.wav"
185
+ ls -lh
186
+ file bpe.model
187
+ git status
188
+ git add .
189
+ git commit -m "upload model" && git push https://k2-fsa:${HF_TOKEN}@huggingface.co/k2-fsa/$dst main || true
190
+
191
+ log "Upload models to https://github.com/k2-fsa/sherpa-onnx"
192
+ rm -rf .git
193
+ rm -fv .gitattributes
194
+ cd ..
195
+ tar cjfv $dst.tar.bz2 $dst
196
+ ls -lh *.tar.bz2
197
+ mv -v $dst.tar.bz2 ../../../
198
+ done
199
+
200
+ log "----------------------------------------"
201
+ log "Export streaming ONNX transducer models "
202
+ log "----------------------------------------"
203
+
204
+ ./zipformer/export-onnx-streaming.py \
205
+ --exp-dir $repo/exp \
206
+ --tokens $repo/data/lang_bpe_2000/tokens.txt \
207
+ --causal 1 \
208
+ --avg 1 \
209
+ --epoch 20 \
210
+ --use-averaged-model 0 \
211
+ --chunk-size 16 \
212
+ --left-context-frames 128 \
213
+ --use-ctc 0 \
214
+ --fp16 1
215
+
216
+ ls -lh $repo/exp
217
+
218
+ log "------------------------------------------------------------"
219
+ log "Test exported streaming ONNX transducer models (Python code)"
220
+ log "------------------------------------------------------------"
221
+
222
+ log "test fp32"
223
+ ./zipformer/onnx_pretrained-streaming.py \
224
+ --encoder-model-filename $repo/exp/encoder-epoch-20-avg-1-chunk-16-left-128.onnx \
225
+ --decoder-model-filename $repo/exp/decoder-epoch-20-avg-1-chunk-16-left-128.onnx \
226
+ --joiner-model-filename $repo/exp/joiner-epoch-20-avg-1-chunk-16-left-128.onnx \
227
+ --tokens $repo/data/lang_bpe_2000/tokens.txt \
228
+ $repo/test_wavs/DEV_T0000000000.wav
229
+
230
+ log "test int8"
231
+ ./zipformer/onnx_pretrained-streaming.py \
232
+ --encoder-model-filename $repo/exp/encoder-epoch-20-avg-1-chunk-16-left-128.int8.onnx \
233
+ --decoder-model-filename $repo/exp/decoder-epoch-20-avg-1-chunk-16-left-128.onnx \
234
+ --joiner-model-filename $repo/exp/joiner-epoch-20-avg-1-chunk-16-left-128.int8.onnx \
235
+ --tokens $repo/data/lang_bpe_2000/tokens.txt \
236
+ $repo/test_wavs/DEV_T0000000000.wav
237
+
238
+ log "test fp16"
239
+ ./zipformer/onnx_pretrained-streaming.py \
240
+ --encoder-model-filename $repo/exp/encoder-epoch-20-avg-1-chunk-16-left-128.fp16.onnx \
241
+ --decoder-model-filename $repo/exp/decoder-epoch-20-avg-1-chunk-16-left-128.fp16.onnx \
242
+ --joiner-model-filename $repo/exp/joiner-epoch-20-avg-1-chunk-16-left-128.fp16.onnx \
243
+ --tokens $repo/data/lang_bpe_2000/tokens.txt \
244
+ $repo/test_wavs/DEV_T0000000000.wav
245
+
246
+ name=(
247
+ sherpa-onnx-streaming-zipformer-multi-zh-hans-2023-12-13
248
+ sherpa-onnx-streaming-zipformer-multi-zh-hans-int8-2023-12-13
249
+ sherpa-onnx-streaming-zipformer-multi-zh-hans-fp16-2023-12-13
250
+ )
251
+
252
+ for n in ${name[@]}; do
253
+ url=https://huggingface.co/csukuangfj/$n
254
+ GIT_LFS_SKIP_SMUDGE=1 git clone $url
255
+ dst=$(basename $url)
256
+ if [[ $n == sherpa-onnx-streaming-zipformer-multi-zh-hans-2023-12-13 ]]; then
257
+ cp -v $repo/exp/encoder-epoch-20-avg-1-chunk-16-left-128.onnx $dst
258
+ cp -v $repo/exp/decoder-epoch-20-avg-1-chunk-16-left-128.onnx $dst
259
+ cp -v $repo/exp/joiner-epoch-20-avg-1-chunk-16-left-128.onnx $dst
260
+ elif [[ $n == sherpa-onnx-streaming-zipformer-multi-zh-hans-int8-2023-12-13 ]]; then
261
+ cp -v $repo/exp/encoder-epoch-20-avg-1-chunk-16-left-128.int8.onnx $dst
262
+ cp -v $repo/exp/decoder-epoch-20-avg-1-chunk-16-left-128.onnx $dst
263
+ cp -v $repo/exp/joiner-epoch-20-avg-1-chunk-16-left-128.int8.onnx $dst
264
+ elif [[ $n == sherpa-onnx-streaming-zipformer-multi-zh-hans-fp16-2023-12-13 ]]; then
265
+ cp -v $repo/exp/encoder-epoch-20-avg-1-chunk-16-left-128.fp16.onnx $dst
266
+ cp -v $repo/exp/decoder-epoch-20-avg-1-chunk-16-left-128.fp16.onnx $dst
267
+ cp -v $repo/exp/joiner-epoch-20-avg-1-chunk-16-left-128.fp16.onnx $dst
268
+ fi
269
+
270
+ cp -v $repo/data/lang_bpe_2000/tokens.txt $dst
271
+ cp -v $repo/data/lang_bpe_2000/bpe.model $dst
272
+ mkdir -p $dst/test_wavs
273
+ cp -v $repo/test_wavs/*.wav $dst/test_wavs
274
+ cd $dst
275
+ git lfs track "*.onnx" "bpe.model" "*.wav"
276
+ ls -lh
277
+ file bpe.model
278
+ git status
279
+ git add .
280
+ git commit -m "upload model" && git push https://csukuangfj:${HF_TOKEN}@huggingface.co/csukuangfj/$dst main || true
281
+
282
+ log "Upload models to https://github.com/k2-fsa/sherpa-onnx"
283
+ rm -rf .git
284
+ rm -fv .gitattributes
285
+ cd ..
286
+ tar cjfv $dst.tar.bz2 $dst
287
+ ls -lh *.tar.bz2
288
+ mv -v $dst.tar.bz2 ../../../
289
+ done
290
+ }
291
+
292
+ function run_2023_12_12_streaming() {
293
+ log "Upload onnx transducer models to huggingface"
294
+
295
+ url=https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-multi-zh-hans-2023-12-12
296
+ GIT_LFS_SKIP_SMUDGE=1 git clone $url
297
+ dst=$(basename $url)
298
+ cp -v $repo/exp/encoder*.onnx $dst
299
+ cp -v $repo/exp/decoder*.onnx $dst
300
+ cp -v $repo/exp/joiner*.onnx $dst
301
+ cp -v $repo/data/lang_bpe_2000/tokens.txt $dst
302
+ cp -v $repo/data/lang_bpe_2000/bpe.model $dst
303
+ mkdir -p $dst/test_wavs
304
+ cp -v $repo/test_wavs/*.wav $dst/test_wavs
305
+ cd $dst
306
+ git lfs track "*.onnx" bpe.model "*.wav"
307
+ git add .
308
+ git commit -m "upload model" && git push https://k2-fsa:${HF_TOKEN}@huggingface.co/k2-fsa/$dst main || true
309
+
310
+ log "Upload models to https://github.com/k2-fsa/sherpa-onnx"
311
+ rm -rf .git
312
+ rm -fv .gitattributes
313
+ cd ..
314
+ tar cjfv $dst.tar.bz2 $dst
315
+ ls -lh *.tar.bz2
316
+ mv -v $dst.tar.bz2 ../../../
317
+ }
318
+
319
+ function run_yuekai_large() {
320
+ repo_url=https://csukuangfj:${HF_TOKEN}@huggingface.co/yuekai/icefall-asr-multi-zh-hans-zipformer-large
321
+ log "Downloading pre-trained model from $repo_url"
322
+ GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
323
+ repo=$(basename $repo_url)
324
+ pushd $repo
325
+ git lfs pull --include pretrained.pt
326
+ mv pretrained.pt epoch-99.pt
327
+ curl -SL -O https://huggingface.co/pingzxy/icefall-asr-multi-zh-hans-zipformer-large-onnx/resolve/main/tokens.txt
328
+ popd
329
+
330
+ log "----------------------------------------"
331
+ log "Export streaming ONNX CTC models "
332
+ log "----------------------------------------"
333
+ ./zipformer/export-onnx-streaming-ctc.py \
334
+ --exp-dir $repo/ \
335
+ --tokens $repo/tokens.txt \
336
+ --causal 1 \
337
+ --avg 1 \
338
+ --epoch 99 \
339
+ --use-averaged-model 0 \
340
+ --chunk-size 16 \
341
+ --left-context-frames 128 \
342
+ --use-ctc 1 \
343
+ \
344
+ --num-encoder-layers 2,2,4,5,4,2 \
345
+ --feedforward-dim 768,1024,1536,2048,1536,768 \
346
+ --encoder-dim 256,384,512,768,512,256 \
347
+ --encoder-unmasked-dim 192,192,256,320,256,192 \
348
+ \
349
+ --fp16 1 \
350
+ --use-whisper-features 1
351
+
352
+
353
+ ls -lh $repo/
354
+ pushd $repo
355
+
356
+ cat >README.md <<EOF
357
+ # Introduction
358
+
359
+ This model is converted
360
+ from
361
+ https://huggingface.co/yuekai/icefall-asr-multi-zh-hans-zipformer-large
362
+
363
+ The training code can be found at
364
+ https://github.com/k2-fsa/icefall/blob/master/egs/multi_zh-hans/ASR/RESULTS.md#multi-chinese-datasets-char-based-training-results-streaming-on-zipformer-large-model
365
+ EOF
366
+
367
+ mv -v ctc-epoch-99-avg-1-chunk-16-left-128.fp16.onnx model.fp16.onnx
368
+ mv -v ctc-epoch-99-avg-1-chunk-16-left-128.int8.onnx model.int8.onnx
369
+ mv -v ctc-epoch-99-avg-1-chunk-16-left-128.onnx model.onnx
370
+
371
+ ls -lh *.onnx
372
+
373
+ mkdir test_wavs
374
+ cd test_wavs
375
+ curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-small-ctc-zh-int8-2025-04-01/resolve/main/test_wavs/0.wav
376
+ curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-small-ctc-zh-int8-2025-04-01/resolve/main/test_wavs/1.wav
377
+ curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-small-ctc-zh-int8-2025-04-01/resolve/main/test_wavs/8k.wav
378
+ popd
379
+
380
+ for w in 0.wav 1.wav 8k.wav; do
381
+ log "---fp32---"
382
+ sherpa-onnx \
383
+ --zipformer2-ctc-model=$repo/model.onnx \
384
+ --tokens=$repo/tokens.txt \
385
+ $repo/test_wavs/$w
386
+
387
+ log "---int8---"
388
+
389
+ sherpa-onnx \
390
+ --zipformer2-ctc-model=$repo/model.int8.onnx \
391
+ --tokens=$repo/tokens.txt \
392
+ $repo/test_wavs/$w
393
+
394
+ log "---fp16---"
395
+
396
+ sherpa-onnx \
397
+ --zipformer2-ctc-model=$repo/model.fp16.onnx \
398
+ --tokens=$repo/tokens.txt \
399
+ $repo/test_wavs/$w
400
+ done
401
+
402
+ name=(
403
+ sherpa-onnx-streaming-zipformer-ctc-zh-2025-06-30
404
+ sherpa-onnx-streaming-zipformer-ctc-zh-int8-2025-06-30
405
+ sherpa-onnx-streaming-zipformer-ctc-zh-fp16-2025-06-30
406
+ )
407
+ for n in ${name[@]}; do
408
+ url=https://huggingface.co/csukuangfj/$n
409
+ GIT_LFS_SKIP_SMUDGE=1 git clone $url
410
+ dst=$(basename $url)
411
+ if [[ $n == sherpa-onnx-streaming-zipformer-ctc-zh-2025-06-30 ]]; then
412
+ cp -v $repo/model.onnx $dst
413
+ elif [[ $n == sherpa-onnx-streaming-zipformer-ctc-zh-int8-2025-06-30 ]]; then
414
+ cp -v $repo/model.int8.onnx $dst
415
+ elif [[ $n == sherpa-onnx-streaming-zipformer-ctc-zh-fp16-2025-06-30 ]]; then
416
+ cp -v $repo/model.fp16.onnx $dst
417
+ fi
418
+
419
+ cp -v $repo/tokens.txt $dst
420
+ cp -v $repo/README.md $dst
421
+ mkdir -p $dst/test_wavs
422
+ cp -v $repo/test_wavs/*.wav $dst/test_wavs
423
+ cd $dst
424
+ git lfs track "*.onnx" "*.wav"
425
+ ls -lh
426
+ git status
427
+ git add .
428
+ git commit -m "upload model" && git push https://csukuangfj:${HF_TOKEN}@huggingface.co/csukuangfj/$dst main || true
429
+
430
+ log "Upload models to https://github.com/k2-fsa/sherpa-onnx"
431
+ rm -rf .git
432
+ rm -fv .gitattributes
433
+ cd ..
434
+ tar cjfv $dst.tar.bz2 $dst
435
+ ls -lh *.tar.bz2
436
+ mv -v $dst.tar.bz2 ../../../
437
+ done
438
+
439
+ rm $repo/*.onnx
440
+
441
+ log "----------------------------------------"
442
+ log "Export streaming ONNX transducer models "
443
+ log "----------------------------------------"
444
+
445
+ ./zipformer/export-onnx-streaming.py \
446
+ --exp-dir $repo \
447
+ --tokens $repo/tokens.txt \
448
+ --causal 1 \
449
+ --avg 1 \
450
+ --epoch 99 \
451
+ --use-averaged-model 0 \
452
+ --chunk-size 16 \
453
+ --left-context-frames 128 \
454
+ --use-ctc 0 \
455
+ \
456
+ --num-encoder-layers 2,2,4,5,4,2 \
457
+ --feedforward-dim 768,1024,1536,2048,1536,768 \
458
+ --encoder-dim 256,384,512,768,512,256 \
459
+ --encoder-unmasked-dim 192,192,256,320,256,192 \
460
+ \
461
+ --fp16 1 \
462
+ --use-whisper-features 1
463
+
464
+ ls -lh $repo
465
+ pushd $repo
466
+ for m in encoder decoder joiner; do
467
+ mv -v $m-epoch-99-avg-1-chunk-16-left-128.onnx $m.onnx
468
+ mv -v $m-epoch-99-avg-1-chunk-16-left-128.fp16.onnx $m.fp16.onnx
469
+ mv -v $m-epoch-99-avg-1-chunk-16-left-128.int8.onnx $m.int8.onnx
470
+ done
471
+ ls -lh *.onnx
472
+ popd
473
+
474
+ for w in 0.wav 1.wav 8k.wav; do
475
+ log "---fp32---"
476
+ sherpa-onnx \
477
+ --encoder=$repo/encoder.onnx \
478
+ --decoder=$repo/decoder.onnx \
479
+ --joiner=$repo/joiner.onnx \
480
+ --tokens=$repo/tokens.txt \
481
+ $repo/test_wavs/$w
482
+
483
+ log "---int8---"
484
+
485
+ sherpa-onnx \
486
+ --encoder=$repo/encoder.int8.onnx \
487
+ --decoder=$repo/decoder.onnx \
488
+ --joiner=$repo/joiner.int8.onnx \
489
+ --tokens=$repo/tokens.txt \
490
+ $repo/test_wavs/$w
491
+
492
+ log "---fp16---"
493
+
494
+ sherpa-onnx \
495
+ --encoder=$repo/encoder.fp16.onnx \
496
+ --decoder=$repo/decoder.fp16.onnx \
497
+ --joiner=$repo/joiner.fp16.onnx \
498
+ --tokens=$repo/tokens.txt \
499
+ $repo/test_wavs/$w
500
+ done
501
+
502
+ name=(
503
+ sherpa-onnx-streaming-zipformer-zh-2025-06-30
504
+ sherpa-onnx-streaming-zipformer-zh-int8-2025-06-30
505
+ sherpa-onnx-streaming-zipformer-zh-fp16-2025-06-30
506
+ )
507
+ for n in ${name[@]}; do
508
+ url=https://huggingface.co/csukuangfj/$n
509
+ GIT_LFS_SKIP_SMUDGE=1 git clone $url
510
+ dst=$(basename $url)
511
+ if [[ $n == sherpa-onnx-streaming-zipformer-zh-2025-06-30 ]]; then
512
+ cp -v $repo/encoder.onnx $dst
513
+ cp -v $repo/decoder.onnx $dst
514
+ cp -v $repo/joiner.onnx $dst
515
+ elif [[ $n == sherpa-onnx-streaming-zipformer-zh-int8-2025-06-30 ]]; then
516
+ cp -v $repo/encoder.int8.onnx $dst
517
+ cp -v $repo/decoder.onnx $dst
518
+ cp -v $repo/joiner.int8.onnx $dst
519
+ elif [[ $n == sherpa-onnx-streaming-zipformer-zh-fp16-2025-06-30 ]]; then
520
+ cp -v $repo/encoder.fp16.onnx $dst
521
+ cp -v $repo/decoder.fp16.onnx $dst
522
+ cp -v $repo/joiner.fp16.onnx $dst
523
+ fi
524
+
525
+ cp -v $repo/tokens.txt $dst
526
+ cp -v $repo/README.md $dst
527
+ mkdir -p $dst/test_wavs
528
+ cp -v $repo/test_wavs/*.wav $dst/test_wavs
529
+ cd $dst
530
+ git lfs track "*.onnx" "*.wav"
531
+ ls -lh
532
+ git status
533
+ git add .
534
+ git commit -m "upload model" && git push https://csukuangfj:${HF_TOKEN}@huggingface.co/csukuangfj/$dst main || true
535
+
536
+ log "Upload models to https://github.com/k2-fsa/sherpa-onnx"
537
+ rm -rf .git
538
+ rm -fv .gitattributes
539
+ cd ..
540
+ tar cjfv $dst.tar.bz2 $dst
541
+ ls -lh *.tar.bz2
542
+ mv -v $dst.tar.bz2 ../../../
543
+ done
544
+ }
545
+
546
+ function run_yuekai_xl() {
547
+ repo_url=https://csukuangfj:${HF_TOKEN}@huggingface.co/yuekai/icefall-asr-multi-zh-hans-zipformer-xl
548
+ log "Downloading pre-trained model from $repo_url"
549
+ GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
550
+ repo=$(basename $repo_url)
551
+
552
+ pushd $repo
553
+ git lfs pull --include pretrained.pt
554
+ git lfs pull --include data/lang_bpe_2000/bpe.model
555
+ mv pretrained.pt epoch-99.pt
556
+ ls -lh *.pt
557
+ popd
558
+
559
+ log "----------------------------------------"
560
+ log "Export streaming ONNX CTC models "
561
+ log "----------------------------------------"
562
+ ./zipformer/export-onnx-streaming-ctc.py \
563
+ --exp-dir $repo/ \
564
+ --tokens $repo/data/lang_bpe_2000/tokens.txt \
565
+ --causal 1 \
566
+ --avg 1 \
567
+ --epoch 99 \
568
+ --use-averaged-model 0 \
569
+ --chunk-size 16 \
570
+ --left-context-frames 128 \
571
+ --use-ctc 1 \
572
+ \
573
+ --num-encoder-layers 2,3,5,6,5,3 \
574
+ --feedforward-dim 1536,2048,3072,4096,3072,1536 \
575
+ --encoder-dim 512,768,1024,1536,1024,512 \
576
+ --encoder-unmasked-dim 192,192,256,320,256,192 \
577
+ --decoder-dim 768 --joiner-dim 768 \
578
+ --value-head-dim 18 \
579
+ --query-head-dim 48 \
580
+ --num-heads 4,4,4,8,4,4 \
581
+ \
582
+ --fp16 1 \
583
+ --use-whisper-features 1 \
584
+ --use-external-data 1
585
+
586
+ mv -v ctc-epoch-99-avg-1-chunk-16-left-128.int8.onnx model.int8.onnx
587
+ mv -v ctc-epoch-99-avg-1-chunk-16-left-128.fp16.onnx model.fp16.onnx
588
+
589
+ ls -lh *.onnx
590
+
591
+ mkdir test_wavs
592
+ pushd test_wavs
593
+ curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-small-ctc-zh-int8-2025-04-01/resolve/main/test_wavs/0.wav
594
+ curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-small-ctc-zh-int8-2025-04-01/resolve/main/test_wavs/1.wav
595
+ curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-small-ctc-zh-int8-2025-04-01/resolve/main/test_wavs/8k.wav
596
+ popd
597
+
598
+ for w in 0.wav 1.wav 8k.wav; do
599
+ log "---int8---"
600
+
601
+ sherpa-onnx \
602
+ --zipformer2-ctc-model=./model.int8.onnx \
603
+ --tokens=$repo/data/lang_bpe_2000/tokens.txt \
604
+ test_wavs/$w
605
+
606
+ log "---fp16---"
607
+
608
+ sherpa-onnx \
609
+ --zipformer2-ctc-model=./model.fp16.onnx \
610
+ --tokens=$repo/data/lang_bpe_2000/tokens.txt \
611
+ test_wavs/$w
612
+ done
613
+
614
+ pushd $repo
615
+ cat >README.md <<EOF
616
+ # Introduction
617
+
618
+ This model is converted
619
+ from
620
+ https://huggingface.co/yuekai/icefall-asr-multi-zh-hans-zipformer-xl
621
+
622
+ The training code can be found at
623
+ https://github.com/k2-fsa/icefall/blob/master/egs/multi_zh-hans/ASR/RESULTS.md#multi-chinese-datasets-char-based-training-results-streaming-on-zipformer-xl-model
624
+ EOF
625
+ popd
626
+
627
+ name=(
628
+ sherpa-onnx-streaming-zipformer-ctc-zh-xlarge-int8-2025-06-30
629
+ sherpa-onnx-streaming-zipformer-ctc-zh-xlarge-fp16-2025-06-30
630
+ )
631
+
632
+ for n in ${name[@]}; do
633
+ url=https://huggingface.co/csukuangfj/$n
634
+ GIT_LFS_SKIP_SMUDGE=1 git clone $url
635
+ dst=$(basename $url)
636
+ if [[ $n == sherpa-onnx-streaming-zipformer-ctc-zh-xlarge-fp16-2025-06-30 ]]; then
637
+ cp -v model.fp16.onnx $dst
638
+ elif [[ $n == sherpa-onnx-streaming-zipformer-ctc-zh-xlarge-int8-2025-06-30 ]]; then
639
+ cp -v model.int8.onnx $dst
640
+ fi
641
+
642
+ cp -v $repo/data/lang_bpe_2000/tokens.txt $dst
643
+ cp -v $repo/data/lang_bpe_2000/bpe.model $dst
644
+ cp -v $repo/README.md $dst
645
+ mkdir -p $dst/test_wavs
646
+ cp -v ./test_wavs/*.wav $dst/test_wavs
647
+ cd $dst
648
+ git lfs track "*.onnx" "*.wav" "bpe.model"
649
+ ls -lh
650
+ git status
651
+ git add .
652
+ git commit -m "upload model" && git push https://csukuangfj:${HF_TOKEN}@huggingface.co/csukuangfj/$dst main || true
653
+
654
+ log "Upload models to https://github.com/k2-fsa/sherpa-onnx"
655
+ rm -rf .git
656
+ rm -fv .gitattributes
657
+ cd ..
658
+
659
+ ls -lh $dst
660
+ tar cjfv $dst.tar.bz2 $dst
661
+ ls -lh *.tar.bz2
662
+ mv -v $dst.tar.bz2 ../../../
663
+ done
664
+
665
+ rm -fv *.onnx *.weights
666
+
667
+ log "----------------------------------------"
668
+ log "Export streaming ONNX transducer models "
669
+ log "----------------------------------------"
670
+
671
+ ./zipformer/export-onnx-streaming.py \
672
+ --exp-dir $repo/ \
673
+ --tokens $repo/data/lang_bpe_2000/tokens.txt \
674
+ --causal 1 \
675
+ --avg 1 \
676
+ --epoch 99 \
677
+ --use-averaged-model 0 \
678
+ --chunk-size 16 \
679
+ --left-context-frames 128 \
680
+ --use-ctc 0 \
681
+ \
682
+ --num-encoder-layers 2,3,5,6,5,3 \
683
+ --feedforward-dim 1536,2048,3072,4096,3072,1536 \
684
+ --encoder-dim 512,768,1024,1536,1024,512 \
685
+ --encoder-unmasked-dim 192,192,256,320,256,192 \
686
+ --decoder-dim 768 --joiner-dim 768 \
687
+ --value-head-dim 18 \
688
+ --query-head-dim 48 \
689
+ --num-heads 4,4,4,8,4,4 \
690
+ \
691
+ --fp16 1 \
692
+ --use-whisper-features 1 \
693
+ --use-external-data 1
694
+
695
+ ls -lh *.onnx
696
+ ls -lh *.weights
697
+
698
+ mv encoder-epoch-99-avg-1-chunk-16-left-128.fp16.onnx encoder.fp16.onnx
699
+ mv encoder-epoch-99-avg-1-chunk-16-left-128.int8.onnx encoder.int8.onnx
700
+
701
+ mv $repo/decoder-epoch-99-avg-1-chunk-16-left-128.onnx decoder.onnx
702
+ mv $repo/decoder-epoch-99-avg-1-chunk-16-left-128.fp16.onnx decoder.fp16.onnx
703
+
704
+ mv $repo/joiner-epoch-99-avg-1-chunk-16-left-128.int8.onnx joiner.int8.onnx
705
+ mv $repo/joiner-epoch-99-avg-1-chunk-16-left-128.fp16.onnx joiner.fp16.onnx
706
+
707
+ name=(
708
+ sherpa-onnx-streaming-zipformer-zh-xlarge-int8-2025-06-30
709
+ sherpa-onnx-streaming-zipformer-zh-xlarge-fp16-2025-06-30
710
+ )
711
+
712
+ for n in ${name[@]}; do
713
+ url=https://huggingface.co/csukuangfj/$n
714
+ GIT_LFS_SKIP_SMUDGE=1 git clone $url
715
+ dst=$(basename $url)
716
+ if [[ $n == sherpa-onnx-streaming-zipformer-zh-xlarge-fp16-2025-06-30 ]]; then
717
+ cp -v encoder.fp16.onnx $dst
718
+ cp -v decoder.fp16.onnx $dst
719
+ cp -v joiner.fp16.onnx $dst
720
+ elif [[ $n == sherpa-onnx-streaming-zipformer-zh-xlarge-int8-2025-06-30 ]]; then
721
+ cp -v encoder.int8.onnx $dst
722
+ cp -v decoder.onnx $dst
723
+ cp -v joiner.int8.onnx $dst
724
+ fi
725
+
726
+ cp -v $repo/data/lang_bpe_2000/tokens.txt $dst
727
+ cp -v $repo/data/lang_bpe_2000/bpe.model $dst
728
+ cp -v $repo/README.md $dst
729
+ mkdir -p $dst/test_wavs
730
+ cp -v ./test_wavs/*.wav $dst/test_wavs
731
+ cd $dst
732
+ git lfs track "*.onnx" "*.wav" "bpe.model"
733
+ ls -lh
734
+ git status
735
+ git add .
736
+ git commit -m "upload model" && git push https://csukuangfj:${HF_TOKEN}@huggingface.co/csukuangfj/$dst main || true
737
+
738
+ log "Upload models to https://github.com/k2-fsa/sherpa-onnx"
739
+ rm -rf .git
740
+ rm -fv .gitattributes
741
+ cd ..
742
+
743
+ ls -lh $dst
744
+ tar cjfv $dst.tar.bz2 $dst
745
+ ls -lh *.tar.bz2
746
+ mv -v $dst.tar.bz2 ../../../
747
+ done
748
+
749
+ rm -fv *.onnx *.weights
750
+ }
751
+
752
+ # run_yuekai_large
753
+ # run_yuekai_xl
754
+ # run_2023_9_2
755
+ run_2023_11_05_streaming
756
+ # run_2023_12_12_streaming
.github/scripts/multi_zh-hans/ASR/run_rknn.sh ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+
3
+ set -ex
4
+
5
+ python3 -m pip install kaldi-native-fbank soundfile librosa
6
+
7
+ log() {
8
+ # This function is from espnet
9
+ local fname=${BASH_SOURCE[1]##*/}
10
+ echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
11
+ }
12
+
13
+ cd egs/multi_zh-hans/ASR
14
+
15
+
16
+
17
+ # https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#sherpa-onnx-streaming-zipformer-multi-zh-hans-2023-12-12-chinese
18
+ function export_2023_11_05() {
19
+ d=exp
20
+ mkdir $d
21
+ pushd $d
22
+ curl -SL -O https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-ctc-streaming-2023-11-05/resolve/main/data/lang_bpe_2000/tokens.txt
23
+ curl -SL -O https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-ctc-streaming-2023-11-05/resolve/main/exp/pretrained.pt
24
+ mv pretrained.pt epoch-99.pt
25
+
26
+ curl -SL -o 0.wav https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-ctc-streaming-2023-11-05/resolve/main/test_wavs/DEV_T0000000000.wav
27
+ curl -SL -o 1.wav https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-ctc-streaming-2023-11-05/resolve/main/test_wavs/DEV_T0000000001.wav
28
+ curl -SL -o 2.wav https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-ctc-streaming-2023-11-05/resolve/main/test_wavs/DEV_T0000000002.wav
29
+ ls -lh
30
+ popd
31
+
32
+ ./zipformer/export-onnx-streaming.py \
33
+ --dynamic-batch 0 \
34
+ --enable-int8-quantization 0 \
35
+ --tokens $d/tokens.txt \
36
+ --use-averaged-model 0 \
37
+ --epoch 99 \
38
+ --avg 1 \
39
+ --exp-dir $d \
40
+ --use-ctc 0 \
41
+ --use-transducer 1 \
42
+ --chunk-size 32 \
43
+ --left-context-frames 128 \
44
+ --causal 1
45
+
46
+ for platform in rk3562 rk3566 rk3568 rk3576 rk3588; do
47
+ dst=sherpa-onnx-$platform-streaming-zipformer-multi-zh-hans-2023-12-12
48
+ mkdir -p $dst
49
+
50
+ ./zipformer/export_rknn_transducer_streaming.py \
51
+ --in-encoder $d/encoder-epoch-99-avg-1-chunk-32-left-128.onnx \
52
+ --in-decoder $d/decoder-epoch-99-avg-1-chunk-32-left-128.onnx \
53
+ --in-joiner $d/joiner-epoch-99-avg-1-chunk-32-left-128.onnx \
54
+ --out-encoder $dst/encoder.rknn \
55
+ --out-decoder $dst/decoder.rknn \
56
+ --out-joiner $dst/joiner.rknn \
57
+ --target-platform $platform
58
+
59
+ cp $d/tokens.txt $dst
60
+ mkdir $dst/test_wavs
61
+ cp $d/*.wav $dst/test_wavs
62
+
63
+ tar cjvf $dst.tar.bz2 $dst
64
+ ls -lh $dst.tar.bz2
65
+ mv $dst.tar.bz2 /icefall/
66
+ ls -lh $dst/
67
+ echo "---"
68
+
69
+ rm -rf $dst
70
+ done
71
+ }
72
+
73
+ export_2023_11_05
.github/scripts/prepare-librispeech-test-clean-and-test-other-manifests.sh ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+
3
+ # This script assumes that test-clean and test-other are downloaded
4
+ # to egs/librispeech/ASR/download/LibriSpeech and generates manifest
5
+ # files in egs/librispeech/ASR/data/manifests
6
+
7
+ set -e
8
+
9
+ cd egs/librispeech/ASR
10
+ [ ! -e download ] && ln -s ~/tmp/download .
11
+ mkdir -p data/manifests
12
+ lhotse prepare librispeech -j 2 -p test-clean -p test-other ./download/LibriSpeech data/manifests
13
+ ls -lh data/manifests
.github/scripts/run-gigaspeech-pruned-transducer-stateless2-2022-05-12.sh ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+
3
+ set -e
4
+
5
+ log() {
6
+ # This function is from espnet
7
+ local fname=${BASH_SOURCE[1]##*/}
8
+ echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
9
+ }
10
+
11
+ cd egs/gigaspeech/ASR
12
+
13
+ repo_url=https://huggingface.co/wgb14/icefall-asr-gigaspeech-pruned-transducer-stateless2
14
+
15
+ log "Downloading pre-trained model from $repo_url"
16
+ git lfs install
17
+ git clone $repo_url
18
+ repo=$(basename $repo_url)
19
+
20
+ echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}"
21
+ echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}"
22
+ if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_NAME}" == x"workflow_dispatch" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then
23
+ mkdir -p pruned_transducer_stateless2/exp
24
+ ln -s $PWD/$repo/exp/pretrained-iter-3488000-avg-20.pt pruned_transducer_stateless2/exp/epoch-999.pt
25
+ ln -s $PWD/$repo/data/lang_bpe_500 data/
26
+
27
+ ls -lh data
28
+ ls -lh data/lang_bpe_500
29
+ ls -lh data/fbank
30
+ ls -lh pruned_transducer_stateless2/exp
31
+
32
+ pushd data/fbank
33
+ curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/cuts_DEV.jsonl.gz
34
+ curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/cuts_TEST.jsonl.gz
35
+ curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/feats_DEV.lca
36
+ curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/feats_TEST.lca
37
+
38
+ ln -sf cuts_DEV.jsonl.gz gigaspeech_cuts_DEV.jsonl.gz
39
+ ln -sf cuts_TEST.jsonl.gz gigaspeech_cuts_TEST.jsonl.gz
40
+ popd
41
+
42
+
43
+ log "Decoding dev and test"
44
+
45
+ # use a small value for decoding with CPU
46
+ max_duration=100
47
+
48
+ # Test only greedy_search to reduce CI running time
49
+ # for method in greedy_search fast_beam_search modified_beam_search; do
50
+ for method in greedy_search; do
51
+ log "Decoding with $method"
52
+
53
+ ./pruned_transducer_stateless2/decode.py \
54
+ --decoding-method $method \
55
+ --epoch 999 \
56
+ --avg 1 \
57
+ --max-duration $max_duration \
58
+ --exp-dir pruned_transducer_stateless2/exp
59
+ done
60
+
61
+ rm pruned_transducer_stateless2/exp/*.pt
62
+ fi
.github/scripts/run-gigaspeech-zipformer-2023-10-17.sh ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+
3
+ set -e
4
+
5
+ log() {
6
+ # This function is from espnet
7
+ local fname=${BASH_SOURCE[1]##*/}
8
+ echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
9
+ }
10
+
11
+ cd egs/gigaspeech/ASR
12
+
13
+ repo_url=https://huggingface.co/yfyeung/icefall-asr-gigaspeech-zipformer-2023-10-17
14
+
15
+ log "Downloading pre-trained model from $repo_url"
16
+ git lfs install
17
+ GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
18
+ repo=$(basename $repo_url)
19
+
20
+ log "Display test files"
21
+ tree $repo/
22
+ ls -lh $repo/test_wavs/*.wav
23
+
24
+ pushd $repo/exp
25
+ git lfs pull --include "data/lang_bpe_500/bpe.model"
26
+ git lfs pull --include "data/lang_bpe_500/tokens.txt"
27
+ git lfs pull --include "exp/jit_script.pt"
28
+ git lfs pull --include "exp/pretrained.pt"
29
+ rm epoch-30.pt
30
+ ln -s pretrained.pt epoch-30.pt
31
+ rm *.onnx
32
+ ls -lh
33
+ popd
34
+
35
+ log "----------------------------------------"
36
+ log "Export ONNX transducer models "
37
+ log "----------------------------------------"
38
+
39
+ ./zipformer/export-onnx.py \
40
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
41
+ --use-averaged-model 0 \
42
+ --epoch 30 \
43
+ --avg 1 \
44
+ --exp-dir $repo/exp
45
+
46
+ ls -lh $repo/exp
47
+
48
+ log "------------------------------------------------------------"
49
+ log "Test exported ONNX transducer models (Python code) "
50
+ log "------------------------------------------------------------"
51
+
52
+ log "test fp32"
53
+ ./zipformer/onnx_pretrained.py \
54
+ --encoder-model-filename $repo/exp/encoder-epoch-30-avg-1.onnx \
55
+ --decoder-model-filename $repo/exp/decoder-epoch-30-avg-1.onnx \
56
+ --joiner-model-filename $repo/exp/joiner-epoch-30-avg-1.onnx \
57
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
58
+ $repo/test_wavs/1089-134686-0001.wav \
59
+ $repo/test_wavs/1221-135766-0001.wav \
60
+ $repo/test_wavs/1221-135766-0002.wav
61
+
62
+ log "test int8"
63
+ ./zipformer/onnx_pretrained.py \
64
+ --encoder-model-filename $repo/exp/encoder-epoch-30-avg-1.int8.onnx \
65
+ --decoder-model-filename $repo/exp/decoder-epoch-30-avg-1.onnx \
66
+ --joiner-model-filename $repo/exp/joiner-epoch-30-avg-1.int8.onnx \
67
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
68
+ $repo/test_wavs/1089-134686-0001.wav \
69
+ $repo/test_wavs/1221-135766-0001.wav \
70
+ $repo/test_wavs/1221-135766-0002.wav
71
+
72
+ log "Upload models to huggingface"
73
+ git config --global user.name "k2-fsa"
74
+ git config --global user.email "xxx@gmail.com"
75
+
76
+ url=https://huggingface.co/k2-fsa/sherpa-onnx-zipformer-gigaspeech-2023-12-12
77
+ GIT_LFS_SKIP_SMUDGE=1 git clone $url
78
+ dst=$(basename $url)
79
+ cp -v $repo/exp/*.onnx $dst
80
+ cp -v $repo/data/lang_bpe_500/tokens.txt $dst
81
+ cp -v $repo/data/lang_bpe_500/bpe.model $dst
82
+ mkdir -p $dst/test_wavs
83
+ cp -v $repo/test_wavs/*.wav $dst/test_wavs
84
+ cd $dst
85
+ git lfs track "*.onnx"
86
+ git add .
87
+ git commit -m "upload model" && git push https://k2-fsa:${HF_TOKEN}@huggingface.co/k2-fsa/$dst main || true
88
+
89
+ log "Upload models to https://github.com/k2-fsa/sherpa-onnx"
90
+ rm -rf .git
91
+ rm -fv .gitattributes
92
+ cd ..
93
+ tar cjfv $dst.tar.bz2 $dst
94
+ ls -lh
95
+ mv -v $dst.tar.bz2 ../../../
96
+
97
+ log "Export to torchscript model"
98
+ ./zipformer/export.py \
99
+ --exp-dir $repo/exp \
100
+ --use-averaged-model false \
101
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
102
+ --epoch 30 \
103
+ --avg 1 \
104
+ --jit 1
105
+
106
+ ls -lh $repo/exp/*.pt
107
+
108
+ log "Decode with models exported by torch.jit.script()"
109
+
110
+ ./zipformer/jit_pretrained.py \
111
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
112
+ --nn-model-filename $repo/exp/jit_script.pt \
113
+ $repo/test_wavs/1089-134686-0001.wav \
114
+ $repo/test_wavs/1221-135766-0001.wav \
115
+ $repo/test_wavs/1221-135766-0002.wav
116
+
117
+ for method in greedy_search modified_beam_search fast_beam_search; do
118
+ log "$method"
119
+
120
+ ./zipformer/pretrained.py \
121
+ --method $method \
122
+ --beam-size 4 \
123
+ --checkpoint $repo/exp/pretrained.pt \
124
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
125
+ $repo/test_wavs/1089-134686-0001.wav \
126
+ $repo/test_wavs/1221-135766-0001.wav \
127
+ $repo/test_wavs/1221-135766-0002.wav
128
+ done
129
+
130
+ echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}"
131
+ echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}"
132
+ if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_NAME}" == x"workflow_dispatch" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then
133
+ mkdir -p zipformer/exp
134
+ ln -s $PWD/$repo/exp/pretrained.pt zipformer/exp/epoch-30.pt
135
+ mkdir -p data
136
+ ln -s $PWD/$repo/data/lang_bpe_500 data/
137
+
138
+ ls -lh data
139
+ ls -lh zipformer/exp
140
+
141
+ mkdir -p data/fbank
142
+ pushd data/fbank
143
+
144
+ curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/cuts_DEV.jsonl.gz
145
+ curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/cuts_TEST.jsonl.gz
146
+ curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/feats_DEV.lca
147
+ curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/feats_TEST.lca
148
+
149
+ ln -sf cuts_DEV.jsonl.gz gigaspeech_cuts_DEV.jsonl.gz
150
+ ln -sf cuts_TEST.jsonl.gz gigaspeech_cuts_TEST.jsonl.gz
151
+
152
+ popd
153
+
154
+ log "Decoding test-clean and test-other"
155
+
156
+ # use a small value for decoding with CPU
157
+ max_duration=100
158
+
159
+ for method in greedy_search; do
160
+ log "Decoding with $method"
161
+
162
+ ./zipformer/decode.py \
163
+ --decoding-method $method \
164
+ --epoch 30 \
165
+ --avg 1 \
166
+ --use-averaged-model 0 \
167
+ --max-duration $max_duration \
168
+ --exp-dir zipformer/exp
169
+ done
170
+
171
+ rm zipformer/exp/*.pt
172
+ fi
.github/scripts/run-librispeech-lstm-transducer-stateless2-2022-09-03.sh ADDED
@@ -0,0 +1,191 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ #
3
+ set -e
4
+
5
+ log() {
6
+ # This function is from espnet
7
+ local fname=${BASH_SOURCE[1]##*/}
8
+ echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
9
+ }
10
+
11
+ cd egs/librispeech/ASR
12
+
13
+ repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03
14
+
15
+ log "Downloading pre-trained model from $repo_url"
16
+ git lfs install
17
+ git clone $repo_url
18
+ repo=$(basename $repo_url)
19
+ abs_repo=$(realpath $repo)
20
+
21
+ log "Display test files"
22
+ tree $repo/
23
+ ls -lh $repo/test_wavs/*.wav
24
+
25
+ pushd $repo/exp
26
+ ln -s pretrained-iter-468000-avg-16.pt pretrained.pt
27
+ ln -s pretrained-iter-468000-avg-16.pt epoch-99.pt
28
+ popd
29
+
30
+ log "Test exporting with torch.jit.trace()"
31
+
32
+ ./lstm_transducer_stateless2/export.py \
33
+ --exp-dir $repo/exp \
34
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
35
+ --epoch 99 \
36
+ --avg 1 \
37
+ --use-averaged-model 0 \
38
+ --jit-trace 1
39
+
40
+ log "Decode with models exported by torch.jit.trace()"
41
+
42
+ ./lstm_transducer_stateless2/jit_pretrained.py \
43
+ --bpe-model $repo/data/lang_bpe_500/bpe.model \
44
+ --encoder-model-filename $repo/exp/encoder_jit_trace.pt \
45
+ --decoder-model-filename $repo/exp/decoder_jit_trace.pt \
46
+ --joiner-model-filename $repo/exp/joiner_jit_trace.pt \
47
+ $repo/test_wavs/1089-134686-0001.wav \
48
+ $repo/test_wavs/1221-135766-0001.wav \
49
+ $repo/test_wavs/1221-135766-0002.wav
50
+
51
+ for sym in 1 2 3; do
52
+ log "Greedy search with --max-sym-per-frame $sym"
53
+
54
+ ./lstm_transducer_stateless2/pretrained.py \
55
+ --method greedy_search \
56
+ --max-sym-per-frame $sym \
57
+ --checkpoint $repo/exp/pretrained.pt \
58
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
59
+ $repo/test_wavs/1089-134686-0001.wav \
60
+ $repo/test_wavs/1221-135766-0001.wav \
61
+ $repo/test_wavs/1221-135766-0002.wav
62
+ done
63
+
64
+ for method in modified_beam_search beam_search fast_beam_search; do
65
+ log "$method"
66
+
67
+ ./lstm_transducer_stateless2/pretrained.py \
68
+ --method $method \
69
+ --beam-size 4 \
70
+ --checkpoint $repo/exp/pretrained.pt \
71
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
72
+ $repo/test_wavs/1089-134686-0001.wav \
73
+ $repo/test_wavs/1221-135766-0001.wav \
74
+ $repo/test_wavs/1221-135766-0002.wav
75
+ done
76
+
77
+ echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}"
78
+ echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}"
79
+
80
+ if [[ x"${GITHUB_EVENT_LABEL_NAME}" == x"shallow-fusion" ]]; then
81
+ lm_repo_url=https://huggingface.co/ezerhouni/icefall-librispeech-rnn-lm
82
+ log "Download pre-trained RNN-LM model from ${lm_repo_url}"
83
+ GIT_LFS_SKIP_SMUDGE=1 git clone $lm_repo_url
84
+ lm_repo=$(basename $lm_repo_url)
85
+ pushd $lm_repo
86
+ git lfs pull --include "exp/pretrained.pt"
87
+ mv exp/pretrained.pt exp/epoch-88.pt
88
+ popd
89
+
90
+ mkdir -p lstm_transducer_stateless2/exp
91
+ ln -sf $PWD/$repo/exp/pretrained.pt lstm_transducer_stateless2/exp/epoch-999.pt
92
+ ln -s $PWD/$repo/data/lang_bpe_500 data/
93
+
94
+ ls -lh data
95
+ ls -lh lstm_transducer_stateless2/exp
96
+
97
+ log "Decoding test-clean and test-other with RNN LM"
98
+
99
+ ./lstm_transducer_stateless2/decode.py \
100
+ --use-averaged-model 0 \
101
+ --epoch 999 \
102
+ --avg 1 \
103
+ --exp-dir lstm_transducer_stateless2/exp \
104
+ --max-duration 600 \
105
+ --decoding-method modified_beam_search_lm_shallow_fusion \
106
+ --beam 4 \
107
+ --use-shallow-fusion 1 \
108
+ --lm-type rnn \
109
+ --lm-exp-dir $lm_repo/exp \
110
+ --lm-epoch 88 \
111
+ --lm-avg 1 \
112
+ --lm-scale 0.3 \
113
+ --rnn-lm-num-layers 3 \
114
+ --rnn-lm-tie-weights 1
115
+ fi
116
+
117
+ if [[ x"${GITHUB_EVENT_LABEL_NAME}" == x"LODR" ]]; then
118
+ bigram_repo_url=https://huggingface.co/marcoyang/librispeech_bigram
119
+ log "Download bi-gram LM from ${bigram_repo_url}"
120
+ GIT_LFS_SKIP_SMUDGE=1 git clone $bigram_repo_url
121
+ bigramlm_repo=$(basename $bigram_repo_url)
122
+ pushd $bigramlm_repo
123
+ git lfs pull --include "2gram.fst.txt"
124
+ cp 2gram.fst.txt $abs_repo/data/lang_bpe_500/.
125
+ popd
126
+
127
+ lm_repo_url=https://huggingface.co/ezerhouni/icefall-librispeech-rnn-lm
128
+ log "Download pre-trained RNN-LM model from ${lm_repo_url}"
129
+ GIT_LFS_SKIP_SMUDGE=1 git clone $lm_repo_url
130
+ lm_repo=$(basename $lm_repo_url)
131
+ pushd $lm_repo
132
+ git lfs pull --include "exp/pretrained.pt"
133
+ mv exp/pretrained.pt exp/epoch-88.pt
134
+ popd
135
+
136
+ mkdir -p lstm_transducer_stateless2/exp
137
+ ln -sf $PWD/$repo/exp/pretrained.pt lstm_transducer_stateless2/exp/epoch-999.pt
138
+ ln -s $PWD/$repo/data/lang_bpe_500 data/
139
+
140
+ ls -lh data
141
+ ls -lh lstm_transducer_stateless2/exp
142
+
143
+ log "Decoding test-clean and test-other"
144
+
145
+ ./lstm_transducer_stateless2/decode.py \
146
+ --use-averaged-model 0 \
147
+ --epoch 999 \
148
+ --avg 1 \
149
+ --exp-dir lstm_transducer_stateless2/exp \
150
+ --max-duration 600 \
151
+ --decoding-method modified_beam_search_LODR \
152
+ --beam 4 \
153
+ --use-shallow-fusion 1 \
154
+ --lm-type rnn \
155
+ --lm-exp-dir $lm_repo/exp \
156
+ --lm-scale 0.4 \
157
+ --lm-epoch 88 \
158
+ --rnn-lm-avg 1 \
159
+ --rnn-lm-num-layers 3 \
160
+ --rnn-lm-tie-weights 1 \
161
+ --tokens-ngram 2 \
162
+ --ngram-lm-scale -0.16
163
+ fi
164
+
165
+ if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_NAME}" == x"workflow_dispatch" ]]; then
166
+ mkdir -p lstm_transducer_stateless2/exp
167
+ ln -s $PWD/$repo/exp/pretrained.pt lstm_transducer_stateless2/exp/epoch-999.pt
168
+ ln -s $PWD/$repo/data/lang_bpe_500 data/
169
+
170
+ ls -lh data
171
+ ls -lh lstm_transducer_stateless2/exp
172
+
173
+ log "Decoding test-clean and test-other"
174
+
175
+ # use a small value for decoding with CPU
176
+ max_duration=100
177
+
178
+ for method in greedy_search fast_beam_search; do
179
+ log "Decoding with $method"
180
+
181
+ ./lstm_transducer_stateless2/decode.py \
182
+ --decoding-method $method \
183
+ --epoch 999 \
184
+ --avg 1 \
185
+ --use-averaged-model 0 \
186
+ --max-duration $max_duration \
187
+ --exp-dir lstm_transducer_stateless2/exp
188
+ done
189
+
190
+ rm lstm_transducer_stateless2/exp/*.pt
191
+ fi
.github/scripts/run-multi-corpora-zipformer.sh ADDED
@@ -0,0 +1,135 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+
3
+ set -e
4
+
5
+ log() {
6
+ # This function is from espnet
7
+ local fname=${BASH_SOURCE[1]##*/}
8
+ echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
9
+ }
10
+
11
+ cd egs/multi_zh-hans/ASR
12
+
13
+ log "==== Test icefall-asr-multi-zh-hans-zipformer-2023-9-2 ===="
14
+ repo_url=https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-2023-9-2/
15
+
16
+ log "Downloading pre-trained model from $repo_url"
17
+ git lfs install
18
+ git clone $repo_url
19
+ repo=$(basename $repo_url)
20
+
21
+
22
+ log "Display test files"
23
+ tree $repo/
24
+ ls -lh $repo/test_wavs/*.wav
25
+
26
+ pushd $repo/exp
27
+ ln -s epoch-20.pt epoch-99.pt
28
+ popd
29
+
30
+ ls -lh $repo/exp/*.pt
31
+
32
+
33
+ ./zipformer/pretrained.py \
34
+ --checkpoint $repo/exp/epoch-99.pt \
35
+ --tokens $repo/data/lang_bpe_2000/tokens.txt \
36
+ --method greedy_search \
37
+ $repo/test_wavs/DEV_T0000000000.wav \
38
+ $repo/test_wavs/DEV_T0000000001.wav \
39
+ $repo/test_wavs/DEV_T0000000002.wav
40
+
41
+ for method in modified_beam_search fast_beam_search; do
42
+ log "$method"
43
+
44
+ ./zipformer/pretrained.py \
45
+ --method $method \
46
+ --beam-size 4 \
47
+ --checkpoint $repo/exp/epoch-99.pt \
48
+ --tokens $repo/data/lang_bpe_2000/tokens.txt \
49
+ $repo/test_wavs/DEV_T0000000000.wav \
50
+ $repo/test_wavs/DEV_T0000000001.wav \
51
+ $repo/test_wavs/DEV_T0000000002.wav
52
+ done
53
+
54
+ rm -rf $repo
55
+
56
+ log "==== Test icefall-asr-multi-zh-hans-zipformer-ctc-2023-10-24 ===="
57
+ repo_url=https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-ctc-2023-10-24/
58
+
59
+ log "Downloading pre-trained model from $repo_url"
60
+ git lfs install
61
+ git clone $repo_url
62
+ repo=$(basename $repo_url)
63
+
64
+
65
+ log "Display test files"
66
+ tree $repo/
67
+ ls -lh $repo/test_wavs/*.wav
68
+
69
+ pushd $repo/exp
70
+ ln -s epoch-20.pt epoch-99.pt
71
+ popd
72
+
73
+ ls -lh $repo/exp/*.pt
74
+
75
+
76
+ ./zipformer/pretrained.py \
77
+ --checkpoint $repo/exp/epoch-99.pt \
78
+ --tokens $repo/data/lang_bpe_2000/tokens.txt \
79
+ --use-ctc 1 \
80
+ --method greedy_search \
81
+ $repo/test_wavs/DEV_T0000000000.wav \
82
+ $repo/test_wavs/DEV_T0000000001.wav \
83
+ $repo/test_wavs/DEV_T0000000002.wav
84
+
85
+ for method in modified_beam_search fast_beam_search; do
86
+ log "$method"
87
+
88
+ ./zipformer/pretrained.py \
89
+ --method $method \
90
+ --beam-size 4 \
91
+ --use-ctc 1 \
92
+ --checkpoint $repo/exp/epoch-99.pt \
93
+ --tokens $repo/data/lang_bpe_2000/tokens.txt \
94
+ $repo/test_wavs/DEV_T0000000000.wav \
95
+ $repo/test_wavs/DEV_T0000000001.wav \
96
+ $repo/test_wavs/DEV_T0000000002.wav
97
+ done
98
+
99
+ rm -rf $repo
100
+
101
+ cd ../../../egs/multi_zh_en/ASR
102
+ log "==== Test icefall-asr-zipformer-multi-zh-en-2023-11-22 ===="
103
+ repo_url=https://huggingface.co/zrjin/icefall-asr-zipformer-multi-zh-en-2023-11-22/
104
+
105
+ log "Downloading pre-trained model from $repo_url"
106
+ git lfs install
107
+ git clone $repo_url
108
+ repo=$(basename $repo_url)
109
+
110
+ log "Display test files"
111
+ tree $repo/
112
+ ls -lh $repo/test_wavs/*.wav
113
+
114
+ ./zipformer/pretrained.py \
115
+ --checkpoint $repo/exp/pretrained.pt \
116
+ --bpe-model $repo/data/lang_bbpe_2000/bbpe.model \
117
+ --method greedy_search \
118
+ $repo/test_wavs/_1634_210_2577_1_1525157964032_3712259_29.wav \
119
+ $repo/test_wavs/_1634_210_2577_1_1525157964032_3712259_55.wav \
120
+ $repo/test_wavs/_1634_210_2577_1_1525157964032_3712259_75.wav
121
+
122
+ for method in modified_beam_search fast_beam_search; do
123
+ log "$method"
124
+
125
+ ./zipformer/pretrained.py \
126
+ --method $method \
127
+ --beam-size 4 \
128
+ --checkpoint $repo/exp/pretrained.pt \
129
+ --bpe-model $repo/data/lang_bbpe_2000/bbpe.model \
130
+ $repo/test_wavs/_1634_210_2577_1_1525157964032_3712259_29.wav \
131
+ $repo/test_wavs/_1634_210_2577_1_1525157964032_3712259_55.wav \
132
+ $repo/test_wavs/_1634_210_2577_1_1525157964032_3712259_75.wav
133
+ done
134
+
135
+ rm -rf $repo
.github/scripts/run-swbd-conformer-ctc-2023-08-26.sh ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+
3
+ set -e
4
+
5
+ log() {
6
+ # This function is from espnet
7
+ local fname=${BASH_SOURCE[1]##*/}
8
+ echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
9
+ }
10
+
11
+ cd egs/swbd/ASR
12
+
13
+ repo_url=https://huggingface.co/zrjin/icefall-asr-swbd-conformer-ctc-2023-8-26
14
+
15
+ log "Downloading pre-trained model from $repo_url"
16
+ git lfs install
17
+ git clone $repo_url
18
+ repo=$(basename $repo_url)
19
+
20
+
21
+ log "Display test files"
22
+ tree $repo/
23
+ ls -lh $repo/test_wavs/*.wav
24
+
25
+ pushd $repo/exp
26
+ ln -s epoch-98.pt epoch-99.pt
27
+ popd
28
+
29
+ ls -lh $repo/exp/*.pt
30
+
31
+ for method in ctc-decoding 1best; do
32
+ log "$method"
33
+
34
+ ./conformer_ctc/pretrained.py \
35
+ --method $method \
36
+ --checkpoint $repo/exp/epoch-99.pt \
37
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
38
+ --words-file $repo/data/lang_bpe_500/words.txt \
39
+ --HLG $repo/data/lang_bpe_500/HLG.pt \
40
+ --G $repo/data/lm/G_4_gram.pt \
41
+ $repo/test_wavs/1089-134686-0001.wav \
42
+ $repo/test_wavs/1221-135766-0001.wav \
43
+ $repo/test_wavs/1221-135766-0002.wav
44
+ done
.github/scripts/run-wenetspeech-pruned-transducer-stateless2.sh ADDED
@@ -0,0 +1,119 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+
3
+ set -e
4
+
5
+ log() {
6
+ # This function is from espnet
7
+ local fname=${BASH_SOURCE[1]##*/}
8
+ echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
9
+ }
10
+
11
+ cd egs/wenetspeech/ASR
12
+
13
+ repo_url=https://huggingface.co/luomingshuang/icefall_asr_wenetspeech_pruned_transducer_stateless2
14
+
15
+ log "Downloading pre-trained model from $repo_url"
16
+ git lfs install
17
+ git clone $repo_url
18
+ repo=$(basename $repo_url)
19
+
20
+ log "Display test files"
21
+ tree $repo/
22
+ ls -lh $repo/test_wavs/*.wav
23
+
24
+ pushd $repo/exp
25
+ ln -s pretrained_epoch_10_avg_2.pt pretrained.pt
26
+ ln -s pretrained_epoch_10_avg_2.pt epoch-99.pt
27
+ popd
28
+
29
+ log "Test exporting to ONNX format"
30
+
31
+ ./pruned_transducer_stateless2/export-onnx.py \
32
+ --exp-dir $repo/exp \
33
+ --tokens $repo/data/lang_char/tokens.txt \
34
+ --epoch 99 \
35
+ --avg 1
36
+
37
+ log "Export to torchscript model"
38
+
39
+ ./pruned_transducer_stateless2/export.py \
40
+ --exp-dir $repo/exp \
41
+ --tokens $repo/data/lang_char/tokens.txt \
42
+ --epoch 99 \
43
+ --avg 1 \
44
+ --jit 1
45
+
46
+ ./pruned_transducer_stateless2/export.py \
47
+ --exp-dir $repo/exp \
48
+ --tokens $repo/data/lang_char/tokens.txt \
49
+ --epoch 99 \
50
+ --avg 1 \
51
+ --jit-trace 1
52
+
53
+ ls -lh $repo/exp/*.onnx
54
+ ls -lh $repo/exp/*.pt
55
+
56
+ log "Decode with ONNX models"
57
+
58
+ ./pruned_transducer_stateless2/onnx_check.py \
59
+ --jit-filename $repo/exp/cpu_jit.pt \
60
+ --onnx-encoder-filename $repo/exp/encoder-epoch-10-avg-2.onnx \
61
+ --onnx-decoder-filename $repo/exp/decoder-epoch-10-avg-2.onnx \
62
+ --onnx-joiner-filename $repo/exp/joiner-epoch-10-avg-2.onnx \
63
+ --onnx-joiner-encoder-proj-filename $repo/exp/joiner_encoder_proj-epoch-10-avg-2.onnx \
64
+ --onnx-joiner-decoder-proj-filename $repo/exp/joiner_decoder_proj-epoch-10-avg-2.onnx
65
+
66
+ ./pruned_transducer_stateless2/onnx_pretrained.py \
67
+ --tokens $repo/data/lang_char/tokens.txt \
68
+ --encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
69
+ --decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
70
+ --joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
71
+ $repo/test_wavs/DEV_T0000000000.wav \
72
+ $repo/test_wavs/DEV_T0000000001.wav \
73
+ $repo/test_wavs/DEV_T0000000002.wav
74
+
75
+ log "Decode with models exported by torch.jit.trace()"
76
+
77
+ ./pruned_transducer_stateless2/jit_pretrained.py \
78
+ --tokens $repo/data/lang_char/tokens.txt \
79
+ --encoder-model-filename $repo/exp/encoder_jit_trace.pt \
80
+ --decoder-model-filename $repo/exp/decoder_jit_trace.pt \
81
+ --joiner-model-filename $repo/exp/joiner_jit_trace.pt \
82
+ $repo/test_wavs/DEV_T0000000000.wav \
83
+ $repo/test_wavs/DEV_T0000000001.wav \
84
+ $repo/test_wavs/DEV_T0000000002.wav
85
+
86
+ ./pruned_transducer_stateless2/jit_pretrained.py \
87
+ --tokens $repo/data/lang_char/tokens.txt \
88
+ --encoder-model-filename $repo/exp/encoder_jit_script.pt \
89
+ --decoder-model-filename $repo/exp/decoder_jit_script.pt \
90
+ --joiner-model-filename $repo/exp/joiner_jit_script.pt \
91
+ $repo/test_wavs/DEV_T0000000000.wav \
92
+ $repo/test_wavs/DEV_T0000000001.wav \
93
+ $repo/test_wavs/DEV_T0000000002.wav
94
+
95
+ for sym in 1 2 3; do
96
+ log "Greedy search with --max-sym-per-frame $sym"
97
+
98
+ ./pruned_transducer_stateless2/pretrained.py \
99
+ --checkpoint $repo/exp/epoch-99.pt \
100
+ --lang-dir $repo/data/lang_char \
101
+ --decoding-method greedy_search \
102
+ --max-sym-per-frame $sym \
103
+ $repo/test_wavs/DEV_T0000000000.wav \
104
+ $repo/test_wavs/DEV_T0000000001.wav \
105
+ $repo/test_wavs/DEV_T0000000002.wav
106
+ done
107
+
108
+ for method in modified_beam_search beam_search fast_beam_search; do
109
+ log "$method"
110
+
111
+ ./pruned_transducer_stateless2/pretrained.py \
112
+ --decoding-method $method \
113
+ --beam-size 4 \
114
+ --checkpoint $repo/exp/epoch-99.pt \
115
+ --lang-dir $repo/data/lang_char \
116
+ $repo/test_wavs/DEV_T0000000000.wav \
117
+ $repo/test_wavs/DEV_T0000000001.wav \
118
+ $repo/test_wavs/DEV_T0000000002.wav
119
+ done
.github/scripts/test-ncnn-export.sh ADDED
@@ -0,0 +1,230 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+
3
+ set -e
4
+
5
+ log() {
6
+ # This function is from espnet
7
+ local fname=${BASH_SOURCE[1]##*/}
8
+ echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
9
+ }
10
+
11
+ pushd egs/librispeech/ASR
12
+
13
+ log "Install ncnn and pnnx"
14
+
15
+ # We are using a modified ncnn here. Will try to merge it to the official repo
16
+ # of ncnn
17
+ git clone https://github.com/csukuangfj/ncnn
18
+ pushd ncnn
19
+ git submodule init
20
+ git submodule update python/pybind11
21
+ python3 setup.py bdist_wheel
22
+ ls -lh dist/
23
+ pip install dist/*.whl
24
+ cd tools/pnnx
25
+ mkdir build
26
+ cd build
27
+
28
+ echo "which python3"
29
+
30
+ which python3
31
+ #/opt/hostedtoolcache/Python/3.8.16/x64/bin/python3
32
+
33
+ cmake -D Python3_EXECUTABLE=$(which python3) ..
34
+ make -j4 pnnx
35
+
36
+ ./src/pnnx || echo "pass"
37
+
38
+ popd
39
+
40
+ export PATH=$PWD/ncnn/tools/pnnx/build/src:$PATH
41
+
42
+ log "=========================================================================="
43
+ repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05
44
+ GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
45
+ repo=$(basename $repo_url)
46
+
47
+ pushd $repo
48
+ git lfs pull --include "exp/pretrained-epoch-30-avg-10-averaged.pt"
49
+
50
+ cd exp
51
+ ln -s pretrained-epoch-30-avg-10-averaged.pt epoch-99.pt
52
+ popd
53
+
54
+ log "Export via torch.jit.trace()"
55
+
56
+ ./conv_emformer_transducer_stateless2/export-for-ncnn.py \
57
+ --exp-dir $repo/exp \
58
+ --epoch 99 \
59
+ --avg 1 \
60
+ --use-averaged-model 0 \
61
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
62
+ --num-encoder-layers 12 \
63
+ --chunk-length 32 \
64
+ --cnn-module-kernel 31 \
65
+ --left-context-length 32 \
66
+ --right-context-length 8 \
67
+ --memory-size 32
68
+
69
+ pnnx $repo/exp/encoder_jit_trace-pnnx.pt
70
+ pnnx $repo/exp/decoder_jit_trace-pnnx.pt
71
+ pnnx $repo/exp/joiner_jit_trace-pnnx.pt
72
+
73
+ python3 ./conv_emformer_transducer_stateless2/streaming-ncnn-decode.py \
74
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
75
+ --encoder-param-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.param \
76
+ --encoder-bin-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.bin \
77
+ --decoder-param-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.param \
78
+ --decoder-bin-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.bin \
79
+ --joiner-param-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.param \
80
+ --joiner-bin-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.bin \
81
+ $repo/test_wavs/1089-134686-0001.wav
82
+
83
+ rm -rf $repo
84
+ log "--------------------------------------------------------------------------"
85
+
86
+ log "=========================================================================="
87
+ repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03
88
+ GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
89
+ repo=$(basename $repo_url)
90
+
91
+ pushd $repo
92
+ git lfs pull --include "exp/pretrained-iter-468000-avg-16.pt"
93
+
94
+ cd exp
95
+ ln -s pretrained-iter-468000-avg-16.pt epoch-99.pt
96
+ popd
97
+
98
+ log "Export via torch.jit.trace()"
99
+
100
+ ./lstm_transducer_stateless2/export-for-ncnn.py \
101
+ --exp-dir $repo/exp \
102
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
103
+ --epoch 99 \
104
+ --avg 1 \
105
+ --use-averaged-model 0
106
+
107
+ pnnx $repo/exp/encoder_jit_trace-pnnx.pt
108
+ pnnx $repo/exp/decoder_jit_trace-pnnx.pt
109
+ pnnx $repo/exp/joiner_jit_trace-pnnx.pt
110
+
111
+ python3 ./lstm_transducer_stateless2/streaming-ncnn-decode.py \
112
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
113
+ --encoder-param-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.param \
114
+ --encoder-bin-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.bin \
115
+ --decoder-param-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.param \
116
+ --decoder-bin-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.bin \
117
+ --joiner-param-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.param \
118
+ --joiner-bin-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.bin \
119
+ $repo/test_wavs/1089-134686-0001.wav
120
+
121
+ python3 ./lstm_transducer_stateless2/ncnn-decode.py \
122
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
123
+ --encoder-param-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.param \
124
+ --encoder-bin-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.bin \
125
+ --decoder-param-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.param \
126
+ --decoder-bin-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.bin \
127
+ --joiner-param-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.param \
128
+ --joiner-bin-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.bin \
129
+ $repo/test_wavs/1089-134686-0001.wav
130
+
131
+ rm -rf $repo
132
+ log "--------------------------------------------------------------------------"
133
+
134
+ log "=========================================================================="
135
+ repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29
136
+ GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
137
+ repo=$(basename $repo_url)
138
+
139
+ pushd $repo
140
+ git lfs pull --include "exp/pretrained.pt"
141
+
142
+ cd exp
143
+ ln -s pretrained.pt epoch-99.pt
144
+ popd
145
+
146
+ ./pruned_transducer_stateless7_streaming/export-for-ncnn.py \
147
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
148
+ --exp-dir $repo/exp \
149
+ --use-averaged-model 0 \
150
+ --epoch 99 \
151
+ --avg 1 \
152
+ \
153
+ --decode-chunk-len 32 \
154
+ --num-encoder-layers "2,4,3,2,4" \
155
+ --feedforward-dims "1024,1024,2048,2048,1024" \
156
+ --nhead "8,8,8,8,8" \
157
+ --encoder-dims "384,384,384,384,384" \
158
+ --attention-dims "192,192,192,192,192" \
159
+ --encoder-unmasked-dims "256,256,256,256,256" \
160
+ --zipformer-downsampling-factors "1,2,4,8,2" \
161
+ --cnn-module-kernels "31,31,31,31,31" \
162
+ --decoder-dim 512 \
163
+ --joiner-dim 512
164
+
165
+ pnnx $repo/exp/encoder_jit_trace-pnnx.pt
166
+ pnnx $repo/exp/decoder_jit_trace-pnnx.pt
167
+ pnnx $repo/exp/joiner_jit_trace-pnnx.pt
168
+
169
+ python3 ./pruned_transducer_stateless7_streaming/streaming-ncnn-decode.py \
170
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
171
+ --encoder-param-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.param \
172
+ --encoder-bin-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.bin \
173
+ --decoder-param-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.param \
174
+ --decoder-bin-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.bin \
175
+ --joiner-param-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.param \
176
+ --joiner-bin-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.bin \
177
+ $repo/test_wavs/1089-134686-0001.wav
178
+
179
+ rm -rf $repo
180
+ log "--------------------------------------------------------------------------"
181
+
182
+ log "=========================================================================="
183
+ repo_url=https://huggingface.co/pfluo/k2fsa-zipformer-chinese-english-mixed
184
+ GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
185
+ repo=$(basename $repo_url)
186
+
187
+ pushd $repo
188
+ git lfs pull --include "data/lang_char_bpe/L.pt"
189
+ git lfs pull --include "data/lang_char_bpe/L_disambig.pt"
190
+ git lfs pull --include "data/lang_char_bpe/Linv.pt"
191
+ git lfs pull --include "exp/pretrained.pt"
192
+
193
+ cd exp
194
+ ln -s pretrained.pt epoch-9999.pt
195
+ popd
196
+
197
+ ./pruned_transducer_stateless7_streaming/export-for-ncnn-zh.py \
198
+ --tokens $repo/data/lang_char_bpe/tokens.txt \
199
+ --exp-dir $repo/exp \
200
+ --use-averaged-model 0 \
201
+ --epoch 9999 \
202
+ --avg 1 \
203
+ --decode-chunk-len 32 \
204
+ --num-encoder-layers "2,4,3,2,4" \
205
+ --feedforward-dims "1024,1024,1536,1536,1024" \
206
+ --nhead "8,8,8,8,8" \
207
+ --encoder-dims "384,384,384,384,384" \
208
+ --attention-dims "192,192,192,192,192" \
209
+ --encoder-unmasked-dims "256,256,256,256,256" \
210
+ --zipformer-downsampling-factors "1,2,4,8,2" \
211
+ --cnn-module-kernels "31,31,31,31,31" \
212
+ --decoder-dim 512 \
213
+ --joiner-dim 512
214
+
215
+ pnnx $repo/exp/encoder_jit_trace-pnnx.pt
216
+ pnnx $repo/exp/decoder_jit_trace-pnnx.pt
217
+ pnnx $repo/exp/joiner_jit_trace-pnnx.pt
218
+
219
+ python3 ./pruned_transducer_stateless7_streaming/streaming-ncnn-decode.py \
220
+ --tokens $repo/data/lang_char_bpe/tokens.txt \
221
+ --encoder-param-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.param \
222
+ --encoder-bin-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.bin \
223
+ --decoder-param-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.param \
224
+ --decoder-bin-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.bin \
225
+ --joiner-param-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.param \
226
+ --joiner-bin-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.bin \
227
+ $repo/test_wavs/0.wav
228
+
229
+ rm -rf $repo
230
+ log "--------------------------------------------------------------------------"
.github/scripts/test-onnx-export.sh ADDED
@@ -0,0 +1,466 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+
3
+ set -e
4
+
5
+ log() {
6
+ # This function is from espnet
7
+ local fname=${BASH_SOURCE[1]##*/}
8
+ echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
9
+ }
10
+
11
+ cd egs/librispeech/ASR
12
+
13
+ log "=========================================================================="
14
+ repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-zipformer-2023-05-15
15
+ log "Downloading pre-trained model from $repo_url"
16
+ git lfs install
17
+ GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
18
+ repo=$(basename $repo_url)
19
+
20
+ pushd $repo
21
+ git lfs pull --include "exp/pretrained.pt"
22
+ cd exp
23
+ ln -s pretrained.pt epoch-99.pt
24
+ popd
25
+
26
+ log "Export via torch.jit.script()"
27
+ ./zipformer/export.py \
28
+ --use-averaged-model 0 \
29
+ --exp-dir $repo/exp \
30
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
31
+ --epoch 99 \
32
+ --avg 1 \
33
+ --jit 1
34
+
35
+ log "Test export to ONNX format"
36
+ ./zipformer/export-onnx.py \
37
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
38
+ --use-averaged-model 0 \
39
+ --epoch 99 \
40
+ --avg 1 \
41
+ --exp-dir $repo/exp \
42
+ --num-encoder-layers "2,2,3,4,3,2" \
43
+ --downsampling-factor "1,2,4,8,4,2" \
44
+ --feedforward-dim "512,768,1024,1536,1024,768" \
45
+ --num-heads "4,4,4,8,4,4" \
46
+ --encoder-dim "192,256,384,512,384,256" \
47
+ --query-head-dim 32 \
48
+ --value-head-dim 12 \
49
+ --pos-head-dim 4 \
50
+ --pos-dim 48 \
51
+ --encoder-unmasked-dim "192,192,256,256,256,192" \
52
+ --cnn-module-kernel "31,31,15,15,15,31" \
53
+ --decoder-dim 512 \
54
+ --joiner-dim 512 \
55
+ --causal False \
56
+ --chunk-size "16,32,64,-1" \
57
+ --left-context-frames "64,128,256,-1"
58
+
59
+ ls -lh $repo/exp
60
+
61
+ log "Run onnx_check.py"
62
+
63
+ ./zipformer/onnx_check.py \
64
+ --jit-filename $repo/exp/jit_script.pt \
65
+ --onnx-encoder-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
66
+ --onnx-decoder-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
67
+ --onnx-joiner-filename $repo/exp/joiner-epoch-99-avg-1.onnx
68
+
69
+ log "Run onnx_pretrained.py"
70
+
71
+ ./zipformer/onnx_pretrained.py \
72
+ --encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
73
+ --decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
74
+ --joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
75
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
76
+ $repo/test_wavs/1089-134686-0001.wav
77
+
78
+ rm -rf $repo
79
+
80
+ repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-streaming-zipformer-2023-05-17
81
+ log "Downloading pre-trained model from $repo_url"
82
+ git lfs install
83
+ GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
84
+ repo=$(basename $repo_url)
85
+
86
+ pushd $repo
87
+ git lfs pull --include "exp/pretrained.pt"
88
+
89
+ cd exp
90
+ ln -s pretrained.pt epoch-99.pt
91
+ popd
92
+
93
+ log "Test export streaming model to ONNX format"
94
+ ./zipformer/export-onnx-streaming.py \
95
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
96
+ --use-averaged-model 0 \
97
+ --epoch 99 \
98
+ --avg 1 \
99
+ --exp-dir $repo/exp \
100
+ --num-encoder-layers "2,2,3,4,3,2" \
101
+ --downsampling-factor "1,2,4,8,4,2" \
102
+ --feedforward-dim "512,768,1024,1536,1024,768" \
103
+ --num-heads "4,4,4,8,4,4" \
104
+ --encoder-dim "192,256,384,512,384,256" \
105
+ --query-head-dim 32 \
106
+ --value-head-dim 12 \
107
+ --pos-head-dim 4 \
108
+ --pos-dim 48 \
109
+ --encoder-unmasked-dim "192,192,256,256,256,192" \
110
+ --cnn-module-kernel "31,31,15,15,15,31" \
111
+ --decoder-dim 512 \
112
+ --joiner-dim 512 \
113
+ --causal True \
114
+ --chunk-size 16 \
115
+ --left-context-frames 64
116
+
117
+ ls -lh $repo/exp
118
+
119
+ log "Run onnx_pretrained-streaming.py"
120
+
121
+ ./zipformer/onnx_pretrained-streaming.py \
122
+ --encoder-model-filename $repo/exp/encoder-epoch-99-avg-1-chunk-16-left-64.onnx \
123
+ --decoder-model-filename $repo/exp/decoder-epoch-99-avg-1-chunk-16-left-64.onnx \
124
+ --joiner-model-filename $repo/exp/joiner-epoch-99-avg-1-chunk-16-left-64.onnx \
125
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
126
+ $repo/test_wavs/1089-134686-0001.wav
127
+
128
+ rm -rf $repo
129
+
130
+ log "--------------------------------------------------------------------------"
131
+
132
+ log "=========================================================================="
133
+ repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29
134
+ log "Downloading pre-trained model from $repo_url"
135
+ git lfs install
136
+ GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
137
+ repo=$(basename $repo_url)
138
+
139
+ pushd $repo
140
+ git lfs pull --include "data/lang_bpe_500/bpe.model"
141
+ git lfs pull --include "exp/pretrained.pt"
142
+ cd exp
143
+ ln -s pretrained.pt epoch-99.pt
144
+ popd
145
+
146
+ log "Export via torch.jit.trace()"
147
+
148
+ ./pruned_transducer_stateless7_streaming/jit_trace_export.py \
149
+ --bpe-model $repo/data/lang_bpe_500/bpe.model \
150
+ --use-averaged-model 0 \
151
+ --epoch 99 \
152
+ --avg 1 \
153
+ --decode-chunk-len 32 \
154
+ --exp-dir $repo/exp/
155
+
156
+ log "Test exporting to ONNX format"
157
+
158
+ ./pruned_transducer_stateless7_streaming/export-onnx.py \
159
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
160
+ --use-averaged-model 0 \
161
+ --epoch 99 \
162
+ --avg 1 \
163
+ --decode-chunk-len 32 \
164
+ --exp-dir $repo/exp/
165
+
166
+ ls -lh $repo/exp
167
+
168
+ log "Run onnx_check.py"
169
+
170
+ ./pruned_transducer_stateless7_streaming/onnx_check.py \
171
+ --jit-encoder-filename $repo/exp/encoder_jit_trace.pt \
172
+ --jit-decoder-filename $repo/exp/decoder_jit_trace.pt \
173
+ --jit-joiner-filename $repo/exp/joiner_jit_trace.pt \
174
+ --onnx-encoder-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
175
+ --onnx-decoder-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
176
+ --onnx-joiner-filename $repo/exp/joiner-epoch-99-avg-1.onnx
177
+
178
+ log "Run onnx_pretrained.py"
179
+
180
+ ./pruned_transducer_stateless7_streaming/onnx_pretrained.py \
181
+ --encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
182
+ --decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
183
+ --joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
184
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
185
+ $repo/test_wavs/1089-134686-0001.wav
186
+
187
+ rm -rf $repo
188
+ log "--------------------------------------------------------------------------"
189
+
190
+ log "=========================================================================="
191
+ repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13
192
+ log "Downloading pre-trained model from $repo_url"
193
+ git lfs install
194
+ GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
195
+ repo=$(basename $repo_url)
196
+
197
+ pushd $repo
198
+ git lfs pull --include "data/lang_bpe_500/bpe.model"
199
+ git lfs pull --include "exp/pretrained-iter-1224000-avg-14.pt"
200
+
201
+ cd exp
202
+ ln -s pretrained-iter-1224000-avg-14.pt epoch-9999.pt
203
+ popd
204
+
205
+ log "Export via torch.jit.script()"
206
+
207
+ ./pruned_transducer_stateless3/export.py \
208
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
209
+ --epoch 9999 \
210
+ --avg 1 \
211
+ --exp-dir $repo/exp/ \
212
+ --jit 1
213
+
214
+ log "Test exporting to ONNX format"
215
+
216
+ ./pruned_transducer_stateless3/export-onnx.py \
217
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
218
+ --epoch 9999 \
219
+ --avg 1 \
220
+ --exp-dir $repo/exp/
221
+
222
+ ls -lh $repo/exp
223
+
224
+ log "Run onnx_check.py"
225
+
226
+ ./pruned_transducer_stateless3/onnx_check.py \
227
+ --jit-filename $repo/exp/cpu_jit.pt \
228
+ --onnx-encoder-filename $repo/exp/encoder-epoch-9999-avg-1.onnx \
229
+ --onnx-decoder-filename $repo/exp/decoder-epoch-9999-avg-1.onnx \
230
+ --onnx-joiner-filename $repo/exp/joiner-epoch-9999-avg-1.onnx
231
+
232
+ log "Run onnx_pretrained.py"
233
+
234
+ ./pruned_transducer_stateless3/onnx_pretrained.py \
235
+ --encoder-model-filename $repo/exp/encoder-epoch-9999-avg-1.onnx \
236
+ --decoder-model-filename $repo/exp/decoder-epoch-9999-avg-1.onnx \
237
+ --joiner-model-filename $repo/exp/joiner-epoch-9999-avg-1.onnx \
238
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
239
+ $repo/test_wavs/1089-134686-0001.wav \
240
+ $repo/test_wavs/1221-135766-0001.wav \
241
+ $repo/test_wavs/1221-135766-0002.wav
242
+
243
+ rm -rf $repo
244
+ log "--------------------------------------------------------------------------"
245
+
246
+ log "=========================================================================="
247
+ repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless5-2022-05-13
248
+ GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
249
+ repo=$(basename $repo_url)
250
+
251
+ pushd $repo
252
+ git lfs pull --include "data/lang_bpe_500/bpe.model"
253
+ git lfs pull --include "exp/pretrained-epoch-39-avg-7.pt"
254
+
255
+ cd exp
256
+ ln -s pretrained-epoch-39-avg-7.pt epoch-99.pt
257
+ popd
258
+
259
+ log "Export via torch.jit.script()"
260
+
261
+ ./pruned_transducer_stateless5/export.py \
262
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
263
+ --epoch 99 \
264
+ --avg 1 \
265
+ --use-averaged-model 0 \
266
+ --exp-dir $repo/exp \
267
+ --num-encoder-layers 18 \
268
+ --dim-feedforward 2048 \
269
+ --nhead 8 \
270
+ --encoder-dim 512 \
271
+ --decoder-dim 512 \
272
+ --joiner-dim 512 \
273
+ --jit 1
274
+
275
+ log "Test exporting to ONNX format"
276
+
277
+ ./pruned_transducer_stateless5/export-onnx.py \
278
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
279
+ --epoch 99 \
280
+ --avg 1 \
281
+ --use-averaged-model 0 \
282
+ --exp-dir $repo/exp \
283
+ --num-encoder-layers 18 \
284
+ --dim-feedforward 2048 \
285
+ --nhead 8 \
286
+ --encoder-dim 512 \
287
+ --decoder-dim 512 \
288
+ --joiner-dim 512
289
+
290
+ ls -lh $repo/exp
291
+
292
+ log "Run onnx_check.py"
293
+
294
+ ./pruned_transducer_stateless5/onnx_check.py \
295
+ --jit-filename $repo/exp/cpu_jit.pt \
296
+ --onnx-encoder-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
297
+ --onnx-decoder-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
298
+ --onnx-joiner-filename $repo/exp/joiner-epoch-99-avg-1.onnx
299
+
300
+ log "Run onnx_pretrained.py"
301
+
302
+ ./pruned_transducer_stateless5/onnx_pretrained.py \
303
+ --encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
304
+ --decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
305
+ --joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
306
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
307
+ $repo/test_wavs/1089-134686-0001.wav \
308
+ $repo/test_wavs/1221-135766-0001.wav \
309
+ $repo/test_wavs/1221-135766-0002.wav
310
+
311
+ rm -rf $repo
312
+ log "--------------------------------------------------------------------------"
313
+
314
+ log "=========================================================================="
315
+ repo_url=
316
+
317
+ rm -rf $repo
318
+ log "--------------------------------------------------------------------------"
319
+ repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless7-2022-11-11
320
+ GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
321
+ repo=$(basename $repo_url)
322
+
323
+ pushd $repo
324
+ git lfs pull --include "exp/pretrained.pt"
325
+
326
+ cd exp
327
+ ln -s pretrained.pt epoch-99.pt
328
+ popd
329
+
330
+ log "Export via torch.jit.script()"
331
+
332
+ ./pruned_transducer_stateless7/export.py \
333
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
334
+ --use-averaged-model 0 \
335
+ --epoch 99 \
336
+ --avg 1 \
337
+ --exp-dir $repo/exp \
338
+ --feedforward-dims "1024,1024,2048,2048,1024" \
339
+ --jit 1
340
+
341
+ log "Test exporting to ONNX format"
342
+
343
+ ./pruned_transducer_stateless7/export-onnx.py \
344
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
345
+ --use-averaged-model 0 \
346
+ --epoch 99 \
347
+ --avg 1 \
348
+ --exp-dir $repo/exp \
349
+ --feedforward-dims "1024,1024,2048,2048,1024"
350
+
351
+ ls -lh $repo/exp
352
+
353
+ log "Run onnx_check.py"
354
+
355
+ ./pruned_transducer_stateless7/onnx_check.py \
356
+ --jit-filename $repo/exp/cpu_jit.pt \
357
+ --onnx-encoder-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
358
+ --onnx-decoder-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
359
+ --onnx-joiner-filename $repo/exp/joiner-epoch-99-avg-1.onnx
360
+
361
+ log "Run onnx_pretrained.py"
362
+
363
+ ./pruned_transducer_stateless7/onnx_pretrained.py \
364
+ --encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
365
+ --decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
366
+ --joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
367
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
368
+ $repo/test_wavs/1089-134686-0001.wav \
369
+ $repo/test_wavs/1221-135766-0001.wav \
370
+ $repo/test_wavs/1221-135766-0002.wav
371
+
372
+ log "=========================================================================="
373
+ repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05
374
+ GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
375
+ repo=$(basename $repo_url)
376
+
377
+ pushd $repo
378
+ git lfs pull --include "data/lang_bpe_500/bpe.model"
379
+ git lfs pull --include "exp/pretrained-epoch-30-avg-10-averaged.pt"
380
+
381
+ cd exp
382
+ ln -s pretrained-epoch-30-avg-10-averaged.pt epoch-99.pt
383
+ popd
384
+
385
+ log "Test exporting to ONNX format"
386
+
387
+ ./conv_emformer_transducer_stateless2/export-onnx.py \
388
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
389
+ --use-averaged-model 0 \
390
+ --epoch 99 \
391
+ --avg 1 \
392
+ --exp-dir $repo/exp \
393
+ --num-encoder-layers 12 \
394
+ --chunk-length 32 \
395
+ --cnn-module-kernel 31 \
396
+ --left-context-length 32 \
397
+ --right-context-length 8 \
398
+ --memory-size 32
399
+
400
+ log "Run onnx_pretrained.py"
401
+
402
+ ./conv_emformer_transducer_stateless2/onnx_pretrained.py \
403
+ --encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
404
+ --decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
405
+ --joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
406
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
407
+ $repo/test_wavs/1221-135766-0001.wav
408
+
409
+ rm -rf $repo
410
+ log "--------------------------------------------------------------------------"
411
+
412
+ log "=========================================================================="
413
+ repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03
414
+ GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
415
+ repo=$(basename $repo_url)
416
+
417
+ pushd $repo
418
+ git lfs pull --include "data/lang_bpe_500/bpe.model"
419
+ git lfs pull --include "exp/pretrained-iter-468000-avg-16.pt"
420
+
421
+ cd exp
422
+ ln -s pretrained-iter-468000-avg-16.pt epoch-99.pt
423
+ popd
424
+
425
+ log "Export via torch.jit.trace()"
426
+
427
+ ./lstm_transducer_stateless2/export.py \
428
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
429
+ --use-averaged-model 0 \
430
+ --epoch 99 \
431
+ --avg 1 \
432
+ --exp-dir $repo/exp/ \
433
+ --jit-trace 1
434
+
435
+ log "Test exporting to ONNX format"
436
+
437
+ ./lstm_transducer_stateless2/export-onnx.py \
438
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
439
+ --use-averaged-model 0 \
440
+ --epoch 99 \
441
+ --avg 1 \
442
+ --exp-dir $repo/exp
443
+
444
+ ls -lh $repo/exp
445
+
446
+ log "Run onnx_check.py"
447
+
448
+ ./lstm_transducer_stateless2/onnx_check.py \
449
+ --jit-encoder-filename $repo/exp/encoder_jit_trace.pt \
450
+ --jit-decoder-filename $repo/exp/decoder_jit_trace.pt \
451
+ --jit-joiner-filename $repo/exp/joiner_jit_trace.pt \
452
+ --onnx-encoder-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
453
+ --onnx-decoder-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
454
+ --onnx-joiner-filename $repo/exp/joiner-epoch-99-avg-1.onnx
455
+
456
+ log "Run onnx_pretrained.py"
457
+
458
+ ./lstm_transducer_stateless2/onnx_pretrained.py \
459
+ --encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
460
+ --decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
461
+ --joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
462
+ --tokens $repo/data/lang_bpe_500/tokens.txt \
463
+ $repo/test_wavs/1221-135766-0001.wav
464
+
465
+ rm -rf $repo
466
+ log "--------------------------------------------------------------------------"
.github/scripts/wenetspeech/ASR/run_rknn.sh ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+
3
+ set -ex
4
+
5
+ python3 -m pip install kaldi-native-fbank soundfile librosa
6
+
7
+ log() {
8
+ # This function is from espnet
9
+ local fname=${BASH_SOURCE[1]##*/}
10
+ echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
11
+ }
12
+
13
+ cd egs/wenetspeech/ASR
14
+
15
+ #https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#k2-fsa-icefall-asr-zipformer-wenetspeech-streaming-small-chinese
16
+ function export_2025_03_02() {
17
+ d=exp_2025_03_02
18
+ mkdir $d
19
+ pushd $d
20
+ curl -SL -O https://huggingface.co/k2-fsa/icefall-asr-zipformer-wenetspeech-streaming-small/resolve/main/data/lang_char/tokens.txt
21
+ curl -SL -O https://huggingface.co/k2-fsa/icefall-asr-zipformer-wenetspeech-streaming-small/resolve/main/exp/pretrained.pt
22
+ mv pretrained.pt epoch-99.pt
23
+
24
+ curl -SL -o 0.wav https://huggingface.co/k2-fsa/icefall-asr-zipformer-wenetspeech-streaming-small/resolve/main/test_wavs/DEV_T0000000000.wav
25
+ curl -SL -o 1.wav https://huggingface.co/k2-fsa/icefall-asr-zipformer-wenetspeech-streaming-small/resolve/main/test_wavs/DEV_T0000000001.wav
26
+ curl -SL -o 2.wav https://huggingface.co/k2-fsa/icefall-asr-zipformer-wenetspeech-streaming-small/resolve/main/test_wavs/DEV_T0000000002.wav
27
+ ls -lh
28
+ popd
29
+
30
+ ./zipformer/export-onnx-streaming.py \
31
+ --dynamic-batch 0 \
32
+ --enable-int8-quantization 0 \
33
+ --tokens $d/tokens.txt \
34
+ --use-averaged-model 0 \
35
+ --epoch 99 \
36
+ --avg 1 \
37
+ --exp-dir $d \
38
+ --use-ctc 0 \
39
+ --use-transducer 1 \
40
+ \
41
+ --num-encoder-layers 2,2,2,2,2,2 \
42
+ --feedforward-dim 512,768,768,768,768,768 \
43
+ --encoder-dim 192,256,256,256,256,256 \
44
+ --encoder-unmasked-dim 192,192,192,192,192,192 \
45
+ \
46
+ --chunk-size 32 \
47
+ --left-context-frames 128 \
48
+ --causal 1
49
+
50
+ for platform in rk3562 rk3566 rk3568 rk3576 rk3588; do
51
+ dst=sherpa-onnx-$platform-streaming-zipformer-small-zh-2025-03-02
52
+ mkdir -p $dst
53
+
54
+ ./zipformer/export_rknn_transducer_streaming.py \
55
+ --in-encoder $d/encoder-epoch-99-avg-1-chunk-32-left-128.onnx \
56
+ --in-decoder $d/decoder-epoch-99-avg-1-chunk-32-left-128.onnx \
57
+ --in-joiner $d/joiner-epoch-99-avg-1-chunk-32-left-128.onnx \
58
+ --out-encoder $dst/encoder.rknn \
59
+ --out-decoder $dst/decoder.rknn \
60
+ --out-joiner $dst/joiner.rknn \
61
+ --target-platform $platform
62
+
63
+ cp $d/tokens.txt $dst
64
+ mkdir $dst/test_wavs
65
+ cp $d/*.wav $dst/test_wavs
66
+
67
+ tar cjvf $dst.tar.bz2 $dst
68
+ ls -lh $dst.tar.bz2
69
+ mv $dst.tar.bz2 /icefall/
70
+ ls -lh $dst/
71
+ echo "---"
72
+
73
+ rm -rf $dst
74
+ done
75
+ rm -rf $d
76
+ }
77
+
78
+ # https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#k2-fsa-icefall-asr-zipformer-wenetspeech-streaming-large-chinese
79
+ function export_2025_03_03() {
80
+ d=exp_2025_03_03
81
+ mkdir $d
82
+ pushd $d
83
+ curl -SL -O https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/data/lang_char/tokens.txt
84
+ curl -SL -O https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/exp/pretrained.pt
85
+ mv pretrained.pt epoch-99.pt
86
+
87
+ curl -SL -o 0.wav https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/test_wavs/DEV_T0000000000.wav
88
+ curl -SL -o 1.wav https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/test_wavs/DEV_T0000000001.wav
89
+ curl -SL -o 2.wav https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/test_wavs/DEV_T0000000002.wav
90
+ ls -lh
91
+ popd
92
+
93
+ ./zipformer/export-onnx-streaming.py \
94
+ --dynamic-batch 0 \
95
+ --enable-int8-quantization 0 \
96
+ --tokens $d/tokens.txt \
97
+ --use-averaged-model 0 \
98
+ --epoch 99 \
99
+ --avg 1 \
100
+ --exp-dir $d \
101
+ --use-ctc 0 \
102
+ --use-transducer 1 \
103
+ \
104
+ --chunk-size 32 \
105
+ --left-context-frames 128 \
106
+ --causal 1
107
+
108
+ for platform in rk3562 rk3566 rk3568 rk3576 rk3588; do
109
+ dst=sherpa-onnx-$platform-streaming-zipformer-zh-2025-03-03
110
+ mkdir -p $dst
111
+
112
+ ./zipformer/export_rknn_transducer_streaming.py \
113
+ --in-encoder $d/encoder-epoch-99-avg-1-chunk-32-left-128.onnx \
114
+ --in-decoder $d/decoder-epoch-99-avg-1-chunk-32-left-128.onnx \
115
+ --in-joiner $d/joiner-epoch-99-avg-1-chunk-32-left-128.onnx \
116
+ --out-encoder $dst/encoder.rknn \
117
+ --out-decoder $dst/decoder.rknn \
118
+ --out-joiner $dst/joiner.rknn \
119
+ --target-platform $platform
120
+
121
+ cp $d/tokens.txt $dst
122
+ mkdir $dst/test_wavs
123
+ cp $d/*.wav $dst/test_wavs
124
+
125
+ tar cjvf $dst.tar.bz2 $dst
126
+ ls -lh $dst.tar.bz2
127
+ mv $dst.tar.bz2 /icefall/
128
+ ls -lh $dst/
129
+ echo "---"
130
+ ls -lh $dst.tar.bz2
131
+
132
+ rm -rf $dst
133
+ done
134
+ rm -rf $d
135
+ }
136
+
137
+ function export_2023_06_15() {
138
+ d=exp_2023_06_15
139
+ mkdir $d
140
+ pushd $d
141
+ curl -SL -O https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/data/lang_char/tokens.txt
142
+ curl -SL -O https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/exp/pretrained.pt
143
+ mv pretrained.pt epoch-99.pt
144
+
145
+ curl -SL -o 0.wav https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/test_wavs/DEV_T0000000000.wav
146
+ curl -SL -o 1.wav https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/test_wavs/DEV_T0000000001.wav
147
+ curl -SL -o 2.wav https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/test_wavs/DEV_T0000000002.wav
148
+ ls -lh
149
+ popd
150
+
151
+ ./zipformer/export-onnx-streaming.py \
152
+ --dynamic-batch 0 \
153
+ --enable-int8-quantization 0 \
154
+ --tokens $d/tokens.txt \
155
+ --use-averaged-model 0 \
156
+ --epoch 99 \
157
+ --avg 1 \
158
+ --exp-dir $d \
159
+ --use-ctc 0 \
160
+ --use-transducer 1 \
161
+ \
162
+ --chunk-size 32 \
163
+ --left-context-frames 128 \
164
+ --causal 1
165
+
166
+ for platform in rk3562 rk3566 rk3568 rk3576 rk3588; do
167
+ dst=sherpa-onnx-$platform-streaming-zipformer-zh-2023-06-15
168
+ mkdir -p $dst
169
+
170
+ ./zipformer/export_rknn_transducer_streaming.py \
171
+ --in-encoder $d/encoder-epoch-99-avg-1-chunk-32-left-128.onnx \
172
+ --in-decoder $d/decoder-epoch-99-avg-1-chunk-32-left-128.onnx \
173
+ --in-joiner $d/joiner-epoch-99-avg-1-chunk-32-left-128.onnx \
174
+ --out-encoder $dst/encoder.rknn \
175
+ --out-decoder $dst/decoder.rknn \
176
+ --out-joiner $dst/joiner.rknn \
177
+ --target-platform $platform
178
+
179
+ cp $d/tokens.txt $dst
180
+ mkdir $dst/test_wavs
181
+ cp $d/*.wav $dst/test_wavs
182
+
183
+ tar cjvf $dst.tar.bz2 $dst
184
+ ls -lh $dst.tar.bz2
185
+ mv $dst.tar.bz2 /icefall/
186
+ ls -lh $dst/
187
+ echo "---"
188
+ ls -lh $dst.tar.bz2
189
+
190
+ rm -rf $dst
191
+ done
192
+ }
193
+
194
+ export_2025_03_02
195
+ export_2025_03_03
196
+ export_2023_06_15
.github/scripts/yesno/ASR/run.sh ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+
3
+ set -ex
4
+
5
+ log() {
6
+ # This function is from espnet
7
+ local fname=${BASH_SOURCE[1]##*/}
8
+ echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
9
+ }
10
+
11
+ cd egs/yesno/ASR
12
+
13
+ log "data preparation"
14
+ ./prepare.sh
15
+
16
+ log "training"
17
+ python3 ./tdnn/train.py
18
+
19
+ log "decoding"
20
+ python3 ./tdnn/decode.py
21
+
22
+ log "export to pretrained.pt"
23
+
24
+ python3 ./tdnn/export.py --epoch 14 --avg 2
25
+
26
+ python3 ./tdnn/pretrained.py \
27
+ --checkpoint ./tdnn/exp/pretrained.pt \
28
+ --HLG ./data/lang_phone/HLG.pt \
29
+ --words-file ./data/lang_phone/words.txt \
30
+ download/waves_yesno/0_0_0_1_0_0_0_1.wav \
31
+ download/waves_yesno/0_0_1_0_0_0_1_0.wav
32
+
33
+ log "Test exporting to torchscript"
34
+ python3 ./tdnn/export.py --epoch 14 --avg 2 --jit 1
35
+
36
+ python3 ./tdnn/jit_pretrained.py \
37
+ --nn-model ./tdnn/exp/cpu_jit.pt \
38
+ --HLG ./data/lang_phone/HLG.pt \
39
+ --words-file ./data/lang_phone/words.txt \
40
+ download/waves_yesno/0_0_0_1_0_0_0_1.wav \
41
+ download/waves_yesno/0_0_1_0_0_0_1_0.wav
42
+
43
+ log "Test exporting to onnx"
44
+ python3 ./tdnn/export_onnx.py --epoch 14 --avg 2
45
+
46
+ log "Test float32 model"
47
+ python3 ./tdnn/onnx_pretrained.py \
48
+ --nn-model ./tdnn/exp/model-epoch-14-avg-2.onnx \
49
+ --HLG ./data/lang_phone/HLG.pt \
50
+ --words-file ./data/lang_phone/words.txt \
51
+ download/waves_yesno/0_0_0_1_0_0_0_1.wav \
52
+ download/waves_yesno/0_0_1_0_0_0_1_0.wav
53
+
54
+ log "Test int8 model"
55
+ python3 ./tdnn/onnx_pretrained.py \
56
+ --nn-model ./tdnn/exp/model-epoch-14-avg-2.int8.onnx \
57
+ --HLG ./data/lang_phone/HLG.pt \
58
+ --words-file ./data/lang_phone/words.txt \
59
+ download/waves_yesno/0_0_0_1_0_0_0_1.wav \
60
+ download/waves_yesno/0_0_1_0_0_0_1_0.wav
61
+
62
+ log "Test decoding with H"
63
+ python3 ./tdnn/export.py --epoch 14 --avg 2 --jit 1
64
+
65
+ python3 ./tdnn/jit_pretrained_decode_with_H.py \
66
+ --nn-model ./tdnn/exp/cpu_jit.pt \
67
+ --H ./data/lang_phone/H.fst \
68
+ --tokens ./data/lang_phone/tokens.txt \
69
+ ./download/waves_yesno/0_0_0_1_0_0_0_1.wav \
70
+ ./download/waves_yesno/0_0_1_0_0_0_1_0.wav \
71
+ ./download/waves_yesno/0_0_1_0_0_1_1_1.wav
72
+
73
+ log "Test decoding with HL"
74
+ python3 ./tdnn/export.py --epoch 14 --avg 2 --jit 1
75
+
76
+ python3 ./tdnn/jit_pretrained_decode_with_HL.py \
77
+ --nn-model ./tdnn/exp/cpu_jit.pt \
78
+ --HL ./data/lang_phone/HL.fst \
79
+ --words ./data/lang_phone/words.txt \
80
+ ./download/waves_yesno/0_0_0_1_0_0_0_1.wav \
81
+ ./download/waves_yesno/0_0_1_0_0_0_1_0.wav \
82
+ ./download/waves_yesno/0_0_1_0_0_1_1_1.wav
83
+
84
+ log "Show generated files"
85
+ ls -lh tdnn/exp
86
+ ls -lh data/lang_phone
.github/workflows/aishell.yml ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: aishell
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - master
7
+
8
+ pull_request:
9
+ branches:
10
+ - master
11
+
12
+ workflow_dispatch:
13
+
14
+ concurrency:
15
+ group: aishell-${{ github.ref }}
16
+ cancel-in-progress: true
17
+
18
+ jobs:
19
+ generate_build_matrix:
20
+ if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
21
+
22
+ # see https://github.com/pytorch/pytorch/pull/50633
23
+ runs-on: ubuntu-latest
24
+ outputs:
25
+ matrix: ${{ steps.set-matrix.outputs.matrix }}
26
+ steps:
27
+ - uses: actions/checkout@v4
28
+ with:
29
+ fetch-depth: 0
30
+ - name: Generating build matrix
31
+ id: set-matrix
32
+ run: |
33
+ # outputting for debugging purposes
34
+ python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10"
35
+ MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10")
36
+ echo "::set-output name=matrix::${MATRIX}"
37
+ aishell:
38
+ needs: generate_build_matrix
39
+ name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }}
40
+ runs-on: ubuntu-latest
41
+ strategy:
42
+ fail-fast: false
43
+ matrix:
44
+ ${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
45
+
46
+ steps:
47
+ - uses: actions/checkout@v4
48
+ with:
49
+ fetch-depth: 0
50
+
51
+ - name: Free space
52
+ shell: bash
53
+ run: |
54
+ df -h
55
+ rm -rf /opt/hostedtoolcache
56
+ df -h
57
+ echo "pwd: $PWD"
58
+ echo "github.workspace ${{ github.workspace }}"
59
+
60
+ - name: Run aishell tests
61
+ uses: addnab/docker-run-action@v3
62
+ with:
63
+ image: ghcr.io/${{ github.repository_owner }}/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v${{ matrix.version }}
64
+ options: |
65
+ --volume ${{ github.workspace }}/:/icefall
66
+ shell: bash
67
+ run: |
68
+ export PYTHONPATH=/icefall:$PYTHONPATH
69
+ cd /icefall
70
+ git config --global --add safe.directory /icefall
71
+
72
+ .github/scripts/aishell/ASR/run.sh
.github/workflows/audioset.yml ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: audioset
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - master
7
+
8
+ pull_request:
9
+ branches:
10
+ - master
11
+
12
+ workflow_dispatch:
13
+
14
+ concurrency:
15
+ group: audioset-${{ github.ref }}
16
+ cancel-in-progress: true
17
+
18
+ jobs:
19
+ generate_build_matrix:
20
+ if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
21
+ # see https://github.com/pytorch/pytorch/pull/50633
22
+ runs-on: ubuntu-latest
23
+ outputs:
24
+ matrix: ${{ steps.set-matrix.outputs.matrix }}
25
+ steps:
26
+ - uses: actions/checkout@v4
27
+ with:
28
+ fetch-depth: 0
29
+ - name: Generating build matrix
30
+ id: set-matrix
31
+ run: |
32
+ # outputting for debugging purposes
33
+ python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10"
34
+ MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10")
35
+ echo "::set-output name=matrix::${MATRIX}"
36
+
37
+ audioset:
38
+ needs: generate_build_matrix
39
+ name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }}
40
+ runs-on: ubuntu-latest
41
+ strategy:
42
+ fail-fast: false
43
+ matrix:
44
+ ${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
45
+
46
+ steps:
47
+ - uses: actions/checkout@v4
48
+ with:
49
+ fetch-depth: 0
50
+
51
+ - name: Free space
52
+ shell: bash
53
+ run: |
54
+ ls -lh
55
+ df -h
56
+ rm -rf /opt/hostedtoolcache
57
+ df -h
58
+ echo "pwd: $PWD"
59
+ echo "github.workspace ${{ github.workspace }}"
60
+
61
+ - name: Run tests
62
+ uses: addnab/docker-run-action@v3
63
+ with:
64
+ image: ghcr.io/${{ github.repository_owner }}/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v${{ matrix.version }}
65
+ options: |
66
+ --volume ${{ github.workspace }}/:/icefall
67
+ shell: bash
68
+ run: |
69
+ export PYTHONPATH=/icefall:$PYTHONPATH
70
+ cd /icefall
71
+ git config --global --add safe.directory /icefall
72
+
73
+ .github/scripts/audioset/AT/run.sh
74
+
75
+ - name: Show model files
76
+ shell: bash
77
+ run: |
78
+ sudo chown -R runner ./model-onnx
79
+ ls -lh ./model-onnx
80
+ chmod -x ./model-onnx/class_labels_indices.csv
81
+
82
+ echo "----------"
83
+ ls -lh ./model-onnx/*
84
+
85
+ - name: Upload model to huggingface
86
+ if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
87
+ env:
88
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
89
+ uses: nick-fields/retry@v3
90
+ with:
91
+ max_attempts: 20
92
+ timeout_seconds: 200
93
+ shell: bash
94
+ command: |
95
+ git config --global user.email "csukuangfj@gmail.com"
96
+ git config --global user.name "Fangjun Kuang"
97
+
98
+ rm -rf huggingface
99
+ export GIT_LFS_SKIP_SMUDGE=1
100
+
101
+ git clone https://huggingface.co/k2-fsa/sherpa-onnx-zipformer-audio-tagging-2024-04-09 huggingface
102
+ cd huggingface
103
+ git fetch
104
+ git pull
105
+ git merge -m "merge remote" --ff origin main
106
+ cp ../model-onnx/*.onnx ./
107
+ cp ../model-onnx/*.csv ./
108
+ cp -a ../model-onnx/test_wavs ./
109
+ ls -lh
110
+ git add .
111
+ git status
112
+ git commit -m "update models"
113
+ git status
114
+
115
+ git push https://csukuangfj:$HF_TOKEN@huggingface.co/k2-fsa/sherpa-onnx-zipformer-audio-tagging-2024-04-09 main || true
116
+ rm -rf huggingface
117
+
118
+ - name: Prepare for release
119
+ if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
120
+ shell: bash
121
+ run: |
122
+ d=sherpa-onnx-zipformer-audio-tagging-2024-04-09
123
+ mv ./model-onnx $d
124
+ tar cjvf ${d}.tar.bz2 $d
125
+ ls -lh
126
+
127
+ - name: Release exported onnx models
128
+ if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
129
+ uses: svenstaro/upload-release-action@v2
130
+ with:
131
+ file_glob: true
132
+ overwrite: true
133
+ file: sherpa-onnx-*.tar.bz2
134
+ repo_name: k2-fsa/sherpa-onnx
135
+ repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
136
+ tag: audio-tagging-models
137
+
.github/workflows/baker_zh.yml ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: baker_zh
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - master
7
+ - baker-matcha-2
8
+
9
+ pull_request:
10
+ branches:
11
+ - master
12
+
13
+ workflow_dispatch:
14
+
15
+ concurrency:
16
+ group: baker-zh-${{ github.ref }}
17
+ cancel-in-progress: true
18
+
19
+ jobs:
20
+ generate_build_matrix:
21
+ if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
22
+ # see https://github.com/pytorch/pytorch/pull/50633
23
+ runs-on: ubuntu-latest
24
+ outputs:
25
+ matrix: ${{ steps.set-matrix.outputs.matrix }}
26
+ steps:
27
+ - uses: actions/checkout@v4
28
+ with:
29
+ fetch-depth: 0
30
+ - name: Generating build matrix
31
+ id: set-matrix
32
+ run: |
33
+ # outputting for debugging purposes
34
+ python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10"
35
+ MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10")
36
+ echo "::set-output name=matrix::${MATRIX}"
37
+
38
+ baker_zh:
39
+ needs: generate_build_matrix
40
+ name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }}
41
+ runs-on: ubuntu-latest
42
+ strategy:
43
+ fail-fast: false
44
+ matrix:
45
+ ${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
46
+
47
+ steps:
48
+ - uses: actions/checkout@v4
49
+ with:
50
+ fetch-depth: 0
51
+
52
+ - name: Free space
53
+ shell: bash
54
+ run: |
55
+ ls -lh
56
+ df -h
57
+ rm -rf /opt/hostedtoolcache
58
+ df -h
59
+ echo "pwd: $PWD"
60
+ echo "github.workspace ${{ github.workspace }}"
61
+
62
+ - name: Run tests
63
+ uses: addnab/docker-run-action@v3
64
+ with:
65
+ image: ghcr.io/${{ github.repository_owner }}/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v${{ matrix.version }}
66
+ options: |
67
+ --volume ${{ github.workspace }}/:/icefall
68
+ shell: bash
69
+ run: |
70
+ export PYTHONPATH=/icefall:$PYTHONPATH
71
+ cd /icefall
72
+
73
+ pip install onnx==1.17.0
74
+
75
+ pip list
76
+
77
+ git config --global --add safe.directory /icefall
78
+
79
+ .github/scripts/baker_zh/TTS/run-matcha.sh
80
+
81
+ - name: display files
82
+ shell: bash
83
+ run: |
84
+ ls -lh
85
+
86
+ - uses: actions/upload-artifact@v4
87
+ if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
88
+ with:
89
+ name: generated-test-files-${{ matrix.python-version }}-${{ matrix.torch-version }}
90
+ path: ./*.wav
91
+
92
+ - uses: actions/upload-artifact@v4
93
+ if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
94
+ with:
95
+ name: step-2
96
+ path: ./model-steps-2.onnx
97
+
98
+ - uses: actions/upload-artifact@v4
99
+ if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
100
+ with:
101
+ name: step-3
102
+ path: ./model-steps-3.onnx
103
+
104
+ - uses: actions/upload-artifact@v4
105
+ if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
106
+ with:
107
+ name: step-4
108
+ path: ./model-steps-4.onnx
109
+
110
+ - uses: actions/upload-artifact@v4
111
+ if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
112
+ with:
113
+ name: step-5
114
+ path: ./model-steps-5.onnx
115
+
116
+ - uses: actions/upload-artifact@v4
117
+ if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
118
+ with:
119
+ name: step-6
120
+ path: ./model-steps-6.onnx
121
+
122
+ - name: Upload models to huggingface
123
+ if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
124
+ shell: bash
125
+ env:
126
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
127
+ run: |
128
+ d=matcha-icefall-zh-baker
129
+
130
+ GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/$d hf
131
+ cp -av $d/* hf/
132
+
133
+ pushd hf
134
+ git add .
135
+
136
+ git config --global user.name "csukuangfj"
137
+ git config --global user.email "csukuangfj@gmail.com"
138
+ git config --global lfs.allowincompletepush true
139
+
140
+ git commit -m "upload model" && git push https://csukuangfj:${HF_TOKEN}@huggingface.co/csukuangfj/$d main || true
141
+ popd
142
+
143
+ - name: Release exported onnx models
144
+ if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
145
+ uses: svenstaro/upload-release-action@v2
146
+ with:
147
+ file_glob: true
148
+ overwrite: true
149
+ file: matcha-icefall-*.tar.bz2
150
+ repo_name: k2-fsa/sherpa-onnx
151
+ repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
152
+ tag: tts-models
.github/workflows/build-cpu-docker.yml ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: build-cpu-docker
2
+ on:
3
+ workflow_dispatch:
4
+
5
+ concurrency:
6
+ group: build-cpu-docker-${{ github.ref }}
7
+ cancel-in-progress: true
8
+
9
+ jobs:
10
+ generate_build_matrix:
11
+ if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
12
+ # see https://github.com/pytorch/pytorch/pull/50633
13
+ runs-on: ubuntu-latest
14
+ outputs:
15
+ matrix: ${{ steps.set-matrix.outputs.matrix }}
16
+ steps:
17
+ - uses: actions/checkout@v4
18
+ with:
19
+ fetch-depth: 0
20
+ - name: Generating build matrix
21
+ id: set-matrix
22
+ run: |
23
+ # outputting for debugging purposes
24
+ python ./.github/scripts/docker/generate_build_matrix.py
25
+ MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py)
26
+ echo "::set-output name=matrix::${MATRIX}"
27
+ build-cpu-docker:
28
+ needs: generate_build_matrix
29
+ name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }}
30
+ runs-on: ubuntu-latest
31
+ strategy:
32
+ fail-fast: false
33
+ matrix:
34
+ ${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
35
+
36
+ steps:
37
+ # refer to https://github.com/actions/checkout
38
+ - uses: actions/checkout@v4
39
+ with:
40
+ fetch-depth: 0
41
+
42
+ - name: Free space
43
+ shell: bash
44
+ run: |
45
+ df -h
46
+ rm -rf /opt/hostedtoolcache
47
+ df -h
48
+
49
+ - name: 'Login to GitHub Container Registry'
50
+ uses: docker/login-action@v2
51
+ with:
52
+ registry: ghcr.io
53
+ username: ${{ github.actor }}
54
+ password: ${{ secrets.GITHUB_TOKEN }}
55
+
56
+ - name: Build docker Image
57
+ shell: bash
58
+ run: |
59
+ cd .github/scripts/docker
60
+ torch_version=${{ matrix.torch-version }}
61
+ torchaudio_version=${{ matrix.torchaudio-version }}
62
+
63
+ echo "torch_version: $torch_version"
64
+ echo "torchaudio_version: $torchaudio_version"
65
+
66
+ version=${{ matrix.version }}
67
+
68
+ tag=ghcr.io/${{ github.repository_owner }}/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v$version
69
+ echo "tag: $tag"
70
+
71
+ docker build \
72
+ -t $tag \
73
+ --build-arg PYTHON_VERSION=${{ matrix.python-version }} \
74
+ --build-arg TORCH_VERSION=$torch_version \
75
+ --build-arg TORCHAUDIO_VERSION=$torchaudio_version \
76
+ --build-arg K2_VERSION=${{ matrix.k2-version }} \
77
+ --build-arg KALDIFEAT_VERSION=${{ matrix.kaldifeat-version }} \
78
+ .
79
+
80
+ docker image ls
81
+ docker push $tag
.github/workflows/build-doc.yml ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2022 Xiaomi Corp. (author: Fangjun Kuang)
2
+
3
+ # See ../../LICENSE for clarification regarding multiple authors
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ # refer to https://github.com/actions/starter-workflows/pull/47/files
18
+
19
+ # You can access it at https://k2-fsa.github.io/icefall/
20
+ name: Generate doc
21
+ on:
22
+ push:
23
+ branches:
24
+ - master
25
+ - doc
26
+ pull_request:
27
+ types: [labeled]
28
+
29
+ workflow_dispatch:
30
+
31
+ concurrency:
32
+ group: build_doc-${{ github.ref }}
33
+ cancel-in-progress: true
34
+
35
+ jobs:
36
+ build-doc:
37
+ # if: github.event.label.name == 'doc' || github.event_name == 'push'
38
+ runs-on: ${{ matrix.os }}
39
+ strategy:
40
+ fail-fast: false
41
+ matrix:
42
+ os: [ubuntu-latest]
43
+ python-version: ["3.8"]
44
+ steps:
45
+ # refer to https://github.com/actions/checkout
46
+ - uses: actions/checkout@v4
47
+ with:
48
+ fetch-depth: 0
49
+
50
+ - name: Setup Python ${{ matrix.python-version }}
51
+ uses: actions/setup-python@v2
52
+ with:
53
+ python-version: ${{ matrix.python-version }}
54
+
55
+ - name: Display Python version
56
+ run: python -c "import sys; print(sys.version)"
57
+
58
+ - name: Build doc
59
+ shell: bash
60
+ run: |
61
+ .github/scripts/generate-piper-phonemize-page.py
62
+ cd docs
63
+ python3 -m pip install -r ./requirements.txt
64
+ make html
65
+ touch build/html/.nojekyll
66
+
67
+ cp -v ../piper_phonemize.html ./build/html/
68
+
69
+ - name: Deploy
70
+ uses: peaceiris/actions-gh-pages@v3
71
+ with:
72
+ github_token: ${{ secrets.GITHUB_TOKEN }}
73
+ publish_dir: ./docs/build/html
74
+ publish_branch: gh-pages
.github/workflows/build-docker-image.yml ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # see also
2
+ # https://docs.github.com/en/actions/publishing-packages/publishing-docker-images#publishing-images-to-github-packages
3
+ name: Build docker image
4
+ on:
5
+ workflow_dispatch:
6
+
7
+ concurrency:
8
+ group: build_docker-${{ github.ref }}
9
+ cancel-in-progress: true
10
+
11
+ jobs:
12
+ build-docker-image:
13
+ name: ${{ matrix.image }}
14
+ runs-on: ${{ matrix.os }}
15
+ strategy:
16
+ fail-fast: false
17
+ matrix:
18
+ os: [ubuntu-latest]
19
+ image: ["torch2.4.1-cuda12.4", "torch2.4.1-cuda12.1", "torch2.4.1-cuda11.8", "torch2.4.0-cuda12.4", "torch2.4.0-cuda12.1", "torch2.4.0-cuda11.8", "torch2.3.1-cuda12.1", "torch2.3.1-cuda11.8", "torch2.2.2-cuda12.1", "torch2.2.2-cuda11.8", "torch2.2.1-cuda12.1", "torch2.2.1-cuda11.8", "torch2.2.0-cuda12.1", "torch2.2.0-cuda11.8", "torch2.1.0-cuda12.1", "torch2.1.0-cuda11.8", "torch2.0.0-cuda11.7", "torch1.13.0-cuda11.6", "torch1.12.1-cuda11.3", "torch1.9.0-cuda10.2"]
20
+
21
+ steps:
22
+ # refer to https://github.com/actions/checkout
23
+ - uses: actions/checkout@v2
24
+ with:
25
+ fetch-depth: 0
26
+
27
+ - name: Rename
28
+ shell: bash
29
+ run: |
30
+ image=${{ matrix.image }}
31
+ mv -v ./docker/$image.dockerfile ./Dockerfile
32
+
33
+ - name: Free space
34
+ shell: bash
35
+ run: |
36
+ df -h
37
+ rm -rf /opt/hostedtoolcache
38
+ df -h
39
+
40
+ - name: Free more space
41
+ shell: bash
42
+ run: |
43
+ # https://github.com/orgs/community/discussions/25678
44
+ cd /opt
45
+ find . -maxdepth 1 -mindepth 1 '!' -path ./containerd '!' -path ./actionarchivecache '!' -path ./runner '!' -path ./runner-cache -exec rm -rf '{}' ';'
46
+
47
+ sudo rm -rf /usr/share/dotnet
48
+ sudo rm -rf "/usr/local/share/boost"
49
+ sudo rm -rf "$AGENT_TOOLSDIRECTORY"
50
+
51
+ - name: Free Disk Space (Ubuntu)
52
+ uses: jlumbroso/free-disk-space@main
53
+ with:
54
+ # this might remove tools that are actually needed,
55
+ # if set to "true" but frees about 6 GB
56
+ tool-cache: false
57
+
58
+ # all of these default to true, but feel free to set to
59
+ # "false" if necessary for your workflow
60
+ android: true
61
+ dotnet: true
62
+ haskell: true
63
+ large-packages: true
64
+ docker-images: false
65
+ swap-storage: true
66
+
67
+ - name: Check space
68
+ shell: bash
69
+ run: |
70
+ df -h
71
+
72
+ - name: Log in to Docker Hub
73
+ uses: docker/login-action@v2
74
+ with:
75
+ username: ${{ secrets.DOCKER_USERNAME }}
76
+ password: ${{ secrets.DOCKER_PASSWORD }}
77
+
78
+ - name: Build and push
79
+ uses: docker/build-push-action@v4
80
+ with:
81
+ context: .
82
+ file: ./Dockerfile
83
+ push: true
84
+ tags: k2fsa/icefall:${{ matrix.image }}
.github/workflows/ksponspeech.yml ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: ksponspeech
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - ksponspeech
7
+
8
+ workflow_dispatch:
9
+
10
+ jobs:
11
+ ksponspeech:
12
+ runs-on: ${{ matrix.os }}
13
+ strategy:
14
+ matrix:
15
+ os: [ubuntu-latest]
16
+ python-version: [3.8]
17
+ fail-fast: false
18
+
19
+ steps:
20
+ - uses: actions/checkout@v4
21
+ with:
22
+ fetch-depth: 0
23
+
24
+ - name: Setup Python ${{ matrix.python-version }}
25
+ uses: actions/setup-python@v5
26
+ with:
27
+ python-version: ${{ matrix.python-version }}
28
+ cache: 'pip'
29
+ cache-dependency-path: '**/requirements-ci.txt'
30
+
31
+ - name: Install Python dependencies
32
+ run: |
33
+ grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
34
+ pip uninstall -y protobuf
35
+ pip install --no-binary protobuf protobuf==3.20.*
36
+
37
+ - name: Cache kaldifeat
38
+ id: my-cache
39
+ uses: actions/cache@v2
40
+ with:
41
+ path: |
42
+ ~/tmp/kaldifeat
43
+ key: cache-tmp-${{ matrix.python-version }}-2023-05-22
44
+
45
+ - name: Install kaldifeat
46
+ if: steps.my-cache.outputs.cache-hit != 'true'
47
+ shell: bash
48
+ run: |
49
+ .github/scripts/install-kaldifeat.sh
50
+
51
+ - name: Test
52
+ shell: bash
53
+ run: |
54
+ export PYTHONPATH=$PWD:$PYTHONPATH
55
+ export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
56
+ export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
57
+
58
+ .github/scripts/ksponspeech/ASR/run.sh
59
+
60
+ - name: Show model files (2024-06-24)
61
+ shell: bash
62
+ run: |
63
+ src=/tmp/model-2024-06-24
64
+ ls -lh $src
65
+
66
+ - name: Show model files (2024-06-16)
67
+ shell: bash
68
+ run: |
69
+ src=/tmp/model-2024-06-16
70
+ ls -lh $src
71
+
72
+ - name: Upload model to huggingface (2024-06-24)
73
+ env:
74
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
75
+ uses: nick-fields/retry@v3
76
+ with:
77
+ max_attempts: 20
78
+ timeout_seconds: 200
79
+ shell: bash
80
+ command: |
81
+ src=/tmp/model-2024-06-24
82
+ git config --global user.email "csukuangfj@gmail.com"
83
+ git config --global user.name "Fangjun Kuang"
84
+
85
+ rm -rf hf
86
+ export GIT_LFS_SKIP_SMUDGE=1
87
+ export GIT_CLONE_PROTECTION_ACTIVE=false
88
+
89
+ git clone https://huggingface.co/k2-fsa/sherpa-onnx-zipformer-korean-2024-06-24 hf
90
+ cd hf
91
+ git fetch
92
+ git pull
93
+ git merge -m "merge remote" --ff origin main
94
+ cp -av $src/* ./
95
+ ls -lh
96
+ git lfs track "bpe.model"
97
+ git lfs track "*.onnx"
98
+ git add .
99
+ git status
100
+ git commit -m "update models"
101
+ git status
102
+
103
+ git push https://csukuangfj:$HF_TOKEN@huggingface.co/k2-fsa/sherpa-onnx-zipformer-korean-2024-06-24 main || true
104
+ rm -rf hf
105
+
106
+ - name: Upload model to huggingface (2024-06-16)
107
+ env:
108
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
109
+ uses: nick-fields/retry@v3
110
+ with:
111
+ max_attempts: 20
112
+ timeout_seconds: 200
113
+ shell: bash
114
+ command: |
115
+ src=/tmp/model-2024-06-16
116
+ git config --global user.email "csukuangfj@gmail.com"
117
+ git config --global user.name "Fangjun Kuang"
118
+
119
+ rm -rf hf
120
+ export GIT_LFS_SKIP_SMUDGE=1
121
+ export GIT_CLONE_PROTECTION_ACTIVE=false
122
+
123
+ git clone https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16 hf
124
+ cd hf
125
+ git fetch
126
+ git pull
127
+ git merge -m "merge remote" --ff origin main
128
+ cp -v $src/* ./
129
+ ls -lh
130
+ git lfs track "bpe.model"
131
+ git lfs track "*.onnx"
132
+ cp -av test_wavs $src/
133
+ git add .
134
+ git status
135
+ git commit -m "update models"
136
+ git status
137
+
138
+ git push https://csukuangfj:$HF_TOKEN@huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16 main || true
139
+ rm -rf hf
140
+
141
+ - name: Prepare for release (2024-06-16)
142
+ shell: bash
143
+ run: |
144
+ src=/tmp/model-2024-06-16
145
+ d=sherpa-onnx-streaming-zipformer-korean-2024-06-16
146
+ mv $src ./$d
147
+ tar cjvf ${d}.tar.bz2 $d
148
+ ls -lh
149
+
150
+ - name: Prepare for release (2024-06-24)
151
+ shell: bash
152
+ run: |
153
+ src=/tmp/model-2024-06-24
154
+ d=sherpa-onnx-zipformer-korean-2024-06-24
155
+ mv $src ./$d
156
+ tar cjvf ${d}.tar.bz2 $d
157
+ ls -lh
158
+
159
+ - name: Release exported onnx models
160
+ uses: svenstaro/upload-release-action@v2
161
+ with:
162
+ file_glob: true
163
+ overwrite: true
164
+ file: sherpa-onnx-*.tar.bz2
165
+ repo_name: k2-fsa/sherpa-onnx
166
+ repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
167
+ tag: asr-models
.github/workflows/librispeech.yml ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: librispeech
2
+ on:
3
+ push:
4
+ branches:
5
+ - master
6
+
7
+ pull_request:
8
+ branches:
9
+ - master
10
+
11
+ workflow_dispatch:
12
+
13
+ concurrency:
14
+ group: librispeech-${{ github.ref }}
15
+ cancel-in-progress: true
16
+
17
+ jobs:
18
+ generate_build_matrix:
19
+ if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
20
+ # see https://github.com/pytorch/pytorch/pull/50633
21
+ runs-on: ubuntu-latest
22
+ outputs:
23
+ matrix: ${{ steps.set-matrix.outputs.matrix }}
24
+ steps:
25
+ - uses: actions/checkout@v4
26
+ with:
27
+ fetch-depth: 0
28
+ - name: Generating build matrix
29
+ id: set-matrix
30
+ run: |
31
+ # outputting for debugging purposes
32
+ python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10"
33
+ # MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10")
34
+ MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10" --min-torch-version "2.6.0")
35
+ echo "::set-output name=matrix::${MATRIX}"
36
+ librispeech:
37
+ needs: generate_build_matrix
38
+ name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }}
39
+ runs-on: ubuntu-latest
40
+ strategy:
41
+ fail-fast: false
42
+ matrix:
43
+ ${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
44
+
45
+ steps:
46
+ # refer to https://github.com/actions/checkout
47
+ - uses: actions/checkout@v4
48
+ with:
49
+ fetch-depth: 0
50
+
51
+ - name: Free space
52
+ shell: bash
53
+ run: |
54
+ df -h
55
+ rm -rf /opt/hostedtoolcache
56
+ df -h
57
+ echo "pwd: $PWD"
58
+ echo "github.workspace ${{ github.workspace }}"
59
+
60
+ - name: Test zipformer/train.py with LibriSpeech
61
+ uses: addnab/docker-run-action@v3
62
+ with:
63
+ image: ghcr.io/${{ github.repository_owner }}/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v${{ matrix.version }}
64
+ options: |
65
+ --volume ${{ github.workspace }}/:/icefall
66
+ shell: bash
67
+ run: |
68
+ export PYTHONPATH=/icefall:$PYTHONPATH
69
+ cd /icefall
70
+ git config --global --add safe.directory /icefall
71
+
72
+ .github/scripts/librispeech/ASR/run.sh
.github/workflows/ljspeech.yml ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: ljspeech
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - master
7
+
8
+ pull_request:
9
+ branches:
10
+ - master
11
+
12
+ workflow_dispatch:
13
+
14
+ concurrency:
15
+ group: ljspeech-${{ github.ref }}
16
+ cancel-in-progress: true
17
+
18
+ jobs:
19
+ generate_build_matrix:
20
+ if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
21
+ # see https://github.com/pytorch/pytorch/pull/50633
22
+ runs-on: ubuntu-latest
23
+ outputs:
24
+ matrix: ${{ steps.set-matrix.outputs.matrix }}
25
+ steps:
26
+ - uses: actions/checkout@v4
27
+ with:
28
+ fetch-depth: 0
29
+ - name: Generating build matrix
30
+ id: set-matrix
31
+ run: |
32
+ # outputting for debugging purposes
33
+ python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10"
34
+ MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10")
35
+ echo "::set-output name=matrix::${MATRIX}"
36
+
37
+ ljspeech:
38
+ needs: generate_build_matrix
39
+ name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }}
40
+ runs-on: ubuntu-latest
41
+ strategy:
42
+ fail-fast: false
43
+ matrix:
44
+ ${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
45
+
46
+ steps:
47
+ - uses: actions/checkout@v4
48
+ with:
49
+ fetch-depth: 0
50
+
51
+ - name: Free space
52
+ shell: bash
53
+ run: |
54
+ ls -lh
55
+ df -h
56
+ rm -rf /opt/hostedtoolcache
57
+ df -h
58
+ echo "pwd: $PWD"
59
+ echo "github.workspace ${{ github.workspace }}"
60
+
61
+ - name: Run tests
62
+ uses: addnab/docker-run-action@v3
63
+ with:
64
+ image: ghcr.io/${{ github.repository_owner }}/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v${{ matrix.version }}
65
+ options: |
66
+ --volume ${{ github.workspace }}/:/icefall
67
+ shell: bash
68
+ run: |
69
+ export PYTHONPATH=/icefall:$PYTHONPATH
70
+ cd /icefall
71
+ git config --global --add safe.directory /icefall
72
+
73
+ pip install "matplotlib<=3.9.4"
74
+
75
+ pip list
76
+
77
+ .github/scripts/ljspeech/TTS/run-matcha.sh
78
+ .github/scripts/ljspeech/TTS/run.sh
79
+
80
+ - name: display files
81
+ shell: bash
82
+ run: |
83
+ ls -lh
84
+
85
+ - uses: actions/upload-artifact@v4
86
+ if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
87
+ with:
88
+ name: generated-test-files-${{ matrix.python-version }}-${{ matrix.torch-version }}
89
+ path: ./*.wav
90
+
91
+ - name: Release exported onnx models
92
+ if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
93
+ uses: svenstaro/upload-release-action@v2
94
+ with:
95
+ file_glob: true
96
+ overwrite: true
97
+ file: vits-icefall-*.tar.bz2
98
+ repo_name: k2-fsa/sherpa-onnx
99
+ repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
100
+ tag: tts-models
101
+
102
+ - uses: actions/upload-artifact@v4
103
+ if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
104
+ with:
105
+ name: step-2
106
+ path: ./model-steps-2.onnx
107
+
108
+ - uses: actions/upload-artifact@v4
109
+ if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
110
+ with:
111
+ name: step-3
112
+ path: ./model-steps-3.onnx
113
+
114
+ - uses: actions/upload-artifact@v4
115
+ if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
116
+ with:
117
+ name: step-4
118
+ path: ./model-steps-4.onnx
119
+
120
+ - uses: actions/upload-artifact@v4
121
+ if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
122
+ with:
123
+ name: step-5
124
+ path: ./model-steps-5.onnx
125
+
126
+ - uses: actions/upload-artifact@v4
127
+ if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
128
+ with:
129
+ name: step-6
130
+ path: ./model-steps-6.onnx
131
+
132
+ - name: Upload models to huggingface
133
+ if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
134
+ shell: bash
135
+ env:
136
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
137
+ run: |
138
+ d=matcha-icefall-en_US-ljspeech
139
+
140
+ GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/$d hf
141
+ cp -av $d/* hf/
142
+
143
+ pushd hf
144
+
145
+ git lfs track "cmn_dict"
146
+ git lfs track "ru_dict"
147
+
148
+ git add .
149
+
150
+ git config --global user.name "csukuangfj"
151
+ git config --global user.email "csukuangfj@gmail.com"
152
+ git config --global lfs.allowincompletepush true
153
+
154
+ git commit -m "upload model" && git push https://csukuangfj:${HF_TOKEN}@huggingface.co/csukuangfj/$d main || true
155
+ popd
156
+
157
+ - name: Release exported onnx models
158
+ if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
159
+ uses: svenstaro/upload-release-action@v2
160
+ with:
161
+ file_glob: true
162
+ overwrite: true
163
+ file: matcha-icefall-*.tar.bz2
164
+ repo_name: k2-fsa/sherpa-onnx
165
+ repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
166
+ tag: tts-models
.github/workflows/multi-zh-hans.yml ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: multi-zh-hans
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - master
7
+
8
+ workflow_dispatch:
9
+
10
+ concurrency:
11
+ group: multi-zh-hans-${{ github.ref }}
12
+ cancel-in-progress: true
13
+
14
+ permissions:
15
+ contents: write
16
+
17
+ jobs:
18
+ generate_build_matrix:
19
+ if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
20
+ # see https://github.com/pytorch/pytorch/pull/50633
21
+ runs-on: ubuntu-latest
22
+ outputs:
23
+ matrix: ${{ steps.set-matrix.outputs.matrix }}
24
+ steps:
25
+ - uses: actions/checkout@v4
26
+ with:
27
+ fetch-depth: 0
28
+ - name: Generating build matrix
29
+ id: set-matrix
30
+ run: |
31
+ # outputting for debugging purposes
32
+ python ./.github/scripts/docker/generate_build_matrix.py --torch-version "2.7.0" --python-version "3.11"
33
+ MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --torch-version "2.7.0" --python-version "3.11")
34
+ echo "::set-output name=matrix::${MATRIX}"
35
+ multi-zh-hans:
36
+ needs: generate_build_matrix
37
+ name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }}
38
+ runs-on: ubuntu-latest
39
+ strategy:
40
+ fail-fast: false
41
+ matrix:
42
+ ${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
43
+
44
+ steps:
45
+ - uses: actions/checkout@v4
46
+ with:
47
+ fetch-depth: 0
48
+
49
+ - name: Free space
50
+ shell: bash
51
+ run: |
52
+ df -h
53
+ rm -rf /opt/hostedtoolcache
54
+ df -h
55
+ echo "pwd: $PWD"
56
+ echo "github.workspace ${{ github.workspace }}"
57
+
58
+ - name: Test with multi_zh-hans
59
+ uses: addnab/docker-run-action@v3
60
+ with:
61
+ image: ghcr.io/${{ github.repository_owner }}/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v${{ matrix.version }}
62
+ options: |
63
+ --volume ${{ github.workspace }}/:/icefall
64
+ shell: bash
65
+ run: |
66
+ export PYTHONPATH=/icefall:$PYTHONPATH
67
+ export HF_TOKEN=${{ secrets.HF_TOKEN }}
68
+ cd /icefall
69
+ git config --global --add safe.directory /icefall
70
+
71
+ .github/scripts/multi_zh-hans/ASR/run.sh
72
+
73
+ - name: Show models
74
+ shell: bash
75
+ run: |
76
+ ls -lh *.tar.bz2
77
+
78
+ - name: upload model to https://github.com/k2-fsa/sherpa-onnx
79
+ uses: svenstaro/upload-release-action@v2
80
+ with:
81
+ file_glob: true
82
+ file: ./*.tar.bz2
83
+ overwrite: true
84
+ repo_name: k2-fsa/sherpa-onnx
85
+ repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
86
+ tag: asr-models
.github/workflows/rknn.yml ADDED
@@ -0,0 +1,134 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: rknn
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - master
7
+ - rknn-zipformer2
8
+
9
+ pull_request:
10
+ branches:
11
+ - master
12
+
13
+ workflow_dispatch:
14
+
15
+ concurrency:
16
+ group: rknn-${{ github.ref }}
17
+ cancel-in-progress: true
18
+
19
+ jobs:
20
+ rknn:
21
+ name: RKNN ${{ matrix.recipe }} ${{ matrix.rknn_toolkit2_version }}
22
+ runs-on: ubuntu-latest
23
+ strategy:
24
+ fail-fast: false
25
+ matrix:
26
+ python-version: ["3.10"]
27
+ k2-version: ["1.24.4.dev20241029"]
28
+ kaldifeat-version: ["1.25.5.dev20241029"]
29
+ torch-version: ["2.0.0"]
30
+ torchaudio-version: ["2.0.1"]
31
+ version: ["20241218"]
32
+ # recipe: ["librispeech", "wenetspeech", "multi_zh-hans"]
33
+ recipe: ["librispeech"]
34
+ rknn_toolkit2_version: ["2.2.0", "2.1.0"]
35
+
36
+
37
+ steps:
38
+ - uses: actions/checkout@v4
39
+ with:
40
+ fetch-depth: 0
41
+
42
+ - name: Export RKNN model
43
+ uses: addnab/docker-run-action@v3
44
+ with:
45
+ image: ghcr.io/${{ github.repository_owner }}/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v${{ matrix.version }}
46
+ options: |
47
+ --volume ${{ github.workspace }}/:/icefall
48
+ shell: bash
49
+ run: |
50
+ cat /etc/*release
51
+ lsb_release -a
52
+ uname -a
53
+ python3 --version
54
+ export PYTHONPATH=/icefall:$PYTHONPATH
55
+ cd /icefall
56
+ git config --global --add safe.directory /icefall
57
+
58
+ python3 -m torch.utils.collect_env
59
+ python3 -m k2.version
60
+ pip list
61
+ export rknn_toolkit2_version=${{ matrix.rknn_toolkit2_version }}
62
+
63
+ if [[ $rknn_toolkit2_version == "2.1.0" ]]; then
64
+ # for the folder pruned_transducer_stateless7_streaming
65
+ curl -SL -O https://huggingface.co/csukuangfj/rknn-toolkit2/resolve/main/rknn_toolkit2-2.1.0%2B708089d1-cp310-cp310-linux_x86_64.whl
66
+ else
67
+ # for the folder zipformer/
68
+ curl -SL -O https://huggingface.co/csukuangfj/rknn-toolkit2/resolve/main/rknn_toolkit2-2.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
69
+ fi
70
+
71
+ # Install rknn
72
+ pip install ./*.whl "numpy<=1.26.4"
73
+ pip list | grep rknn
74
+ echo "---"
75
+ pip list
76
+ echo "---"
77
+
78
+ recipe=${{ matrix.recipe }}
79
+ .github/scripts/$recipe/ASR/run_rknn.sh > log-$recipe.txt 2>&1 || true
80
+
81
+ - uses: actions/upload-artifact@v4
82
+ with:
83
+ name: log-${{ matrix.recipe }}-${{ matrix.rknn_toolkit2_version }}
84
+ path: ./log-*.txt
85
+
86
+ - name: Display results
87
+ shell: bash
88
+ run: |
89
+ ls -lh *rk*.tar.bz2 || true
90
+
91
+ - name: Release to GitHub
92
+ uses: svenstaro/upload-release-action@v2
93
+ with:
94
+ file_glob: true
95
+ overwrite: true
96
+ file: sherpa-onnx-*.tar.bz2
97
+ repo_name: k2-fsa/sherpa-onnx
98
+ repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
99
+ tag: asr-models
100
+
101
+ - name: Upload model to huggingface
102
+ if: github.event_name == 'push' || github.event_name == 'workflow_dispatch'
103
+ env:
104
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
105
+ uses: nick-fields/retry@v3
106
+ with:
107
+ max_attempts: 20
108
+ timeout_seconds: 200
109
+ shell: bash
110
+ command: |
111
+ git config --global user.email "csukuangfj@gmail.com"
112
+ git config --global user.name "Fangjun Kuang"
113
+
114
+ rm -rf huggingface
115
+ export GIT_LFS_SKIP_SMUDGE=1
116
+
117
+ git clone https://huggingface.co/csukuangfj/sherpa-onnx-rknn-models huggingface
118
+ cd huggingface
119
+
120
+ git fetch
121
+ git pull
122
+ git merge -m "merge remote" --ff origin main
123
+ dst=streaming-asr
124
+ mkdir -p $dst
125
+ cp ../*rk*.tar.bz2 $dst/ || true
126
+
127
+ ls -lh $dst
128
+ git add .
129
+ git status
130
+ git commit -m "update models"
131
+ git status
132
+
133
+ git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-rknn-models main || true
134
+ rm -rf huggingface
.github/workflows/run-docker-image.yml ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Run docker image
2
+ on:
3
+ workflow_dispatch:
4
+
5
+ concurrency:
6
+ group: run_docker_image-${{ github.ref }}
7
+ cancel-in-progress: true
8
+
9
+ jobs:
10
+ run-docker-image:
11
+ name: ${{ matrix.image }}
12
+ runs-on: ${{ matrix.os }}
13
+ strategy:
14
+ fail-fast: false
15
+ matrix:
16
+ os: [ubuntu-latest]
17
+ image: ["torch2.4.0-cuda12.4", "torch2.4.0-cuda12.1", "torch2.4.0-cuda11.8", "torch2.3.1-cuda12.1", "torch2.3.1-cuda11.8", "torch2.2.2-cuda12.1", "torch2.2.2-cuda11.8", "torch2.2.1-cuda12.1", "torch2.2.1-cuda11.8", "torch2.2.0-cuda12.1", "torch2.2.0-cuda11.8", "torch2.1.0-cuda12.1", "torch2.1.0-cuda11.8", "torch2.0.0-cuda11.7", "torch1.13.0-cuda11.6", "torch1.12.1-cuda11.3", "torch1.9.0-cuda10.2"]
18
+ steps:
19
+ # refer to https://github.com/actions/checkout
20
+ - uses: actions/checkout@v2
21
+ with:
22
+ fetch-depth: 0
23
+
24
+ - name: Free space
25
+ shell: bash
26
+ run: |
27
+ df -h
28
+ rm -rf /opt/hostedtoolcache
29
+ df -h
30
+
31
+ - name: Free more space
32
+ shell: bash
33
+ run: |
34
+ # https://github.com/orgs/community/discussions/25678
35
+ cd /opt
36
+ find . -maxdepth 1 -mindepth 1 '!' -path ./containerd '!' -path ./actionarchivecache '!' -path ./runner '!' -path ./runner-cache -exec rm -rf '{}' ';'
37
+
38
+ sudo rm -rf /usr/share/dotnet
39
+ sudo rm -rf "/usr/local/share/boost"
40
+ sudo rm -rf "$AGENT_TOOLSDIRECTORY"
41
+
42
+ - name: Free Disk Space (Ubuntu)
43
+ uses: jlumbroso/free-disk-space@main
44
+ with:
45
+ # this might remove tools that are actually needed,
46
+ # if set to "true" but frees about 6 GB
47
+ tool-cache: false
48
+
49
+ # all of these default to true, but feel free to set to
50
+ # "false" if necessary for your workflow
51
+ android: true
52
+ dotnet: true
53
+ haskell: true
54
+ large-packages: true
55
+ docker-images: false
56
+ swap-storage: true
57
+
58
+ - name: Check space
59
+ shell: bash
60
+ run: |
61
+ df -h
62
+
63
+ - name: Run the build process with Docker
64
+ uses: addnab/docker-run-action@v3
65
+ with:
66
+ image: k2fsa/icefall:${{ matrix.image }}
67
+ shell: bash
68
+ run: |
69
+ uname -a
70
+ cat /etc/*release
71
+
72
+ find / -name libcuda* 2>/dev/null
73
+
74
+ ls -lh /usr/local/
75
+ ls -lh /usr/local/cuda*
76
+
77
+ nvcc --version
78
+
79
+ ls -lh /usr/local/cuda-*/compat/*
80
+
81
+ # For torch1.9.0-cuda10.2
82
+ export LD_LIBRARY_PATH=/usr/local/cuda-10.2/compat:$LD_LIBRARY_PATH
83
+
84
+ # For torch1.12.1-cuda11.3
85
+ export LD_LIBRARY_PATH=/usr/local/cuda-11.3/compat:$LD_LIBRARY_PATH
86
+
87
+ # For torch2.0.0-cuda11.7
88
+ export LD_LIBRARY_PATH=/usr/local/cuda-11.7/compat:$LD_LIBRARY_PATH
89
+
90
+ # For torch2.1.0-cuda11.8
91
+ export LD_LIBRARY_PATH=/usr/local/cuda-11.8/compat:$LD_LIBRARY_PATH
92
+
93
+ # For torch2.1.0-cuda12.1
94
+ export LD_LIBRARY_PATH=/usr/local/cuda-12.1/compat:$LD_LIBRARY_PATH
95
+
96
+
97
+ which nvcc
98
+ cuda_dir=$(dirname $(which nvcc))
99
+ echo "cuda_dir: $cuda_dir"
100
+
101
+ find $cuda_dir -name libcuda.so*
102
+ echo "--------------------"
103
+
104
+ find / -name libcuda.so* 2>/dev/null
105
+
106
+ # for torch1.13.0-cuda11.6
107
+ if [ -e /opt/conda/lib/stubs/libcuda.so ]; then
108
+ cd /opt/conda/lib/stubs && ln -s libcuda.so libcuda.so.1 && cd -
109
+ export LD_LIBRARY_PATH=/opt/conda/lib/stubs:$LD_LIBRARY_PATH
110
+ fi
111
+
112
+ find / -name libcuda.so* 2>/dev/null
113
+ echo "LD_LIBRARY_PATH: $LD_LIBRARY_PATH"
114
+
115
+ python3 --version
116
+ which python3
117
+
118
+ python3 -m pip list
119
+
120
+ echo "----------torch----------"
121
+ python3 -m torch.utils.collect_env
122
+
123
+ echo "----------k2----------"
124
+ python3 -c "import k2; print(k2.__file__)"
125
+ python3 -c "import k2; print(k2.__dev_version__)"
126
+ python3 -m k2.version
127
+
128
+ echo "----------lhotse----------"
129
+ python3 -c "import lhotse; print(lhotse.__file__)"
130
+ python3 -c "import lhotse; print(lhotse.__version__)"
131
+
132
+ echo "----------kaldifeat----------"
133
+ python3 -c "import kaldifeat; print(kaldifeat.__file__)"
134
+ python3 -c "import kaldifeat; print(kaldifeat.__version__)"
135
+
136
+ echo "Test yesno recipe"
137
+
138
+ cd egs/yesno/ASR
139
+
140
+ ./prepare.sh
141
+
142
+ ./tdnn/train.py
143
+
144
+ ./tdnn/decode.py
.github/workflows/run-gigaspeech-2022-05-13.yml ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2021 Fangjun Kuang (csukuangfj@gmail.com)
2
+
3
+ # See ../../LICENSE for clarification regarding multiple authors
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ name: run-gigaspeech-2022-05-13
18
+ # stateless transducer + k2 pruned rnnt-loss + reworked conformer
19
+
20
+ on:
21
+ push:
22
+ branches:
23
+ - master
24
+ pull_request:
25
+ types: [labeled]
26
+
27
+ schedule:
28
+ # minute (0-59)
29
+ # hour (0-23)
30
+ # day of the month (1-31)
31
+ # month (1-12)
32
+ # day of the week (0-6)
33
+ # nightly build at 15:50 UTC time every day
34
+ - cron: "50 15 * * *"
35
+
36
+ workflow_dispatch:
37
+
38
+ concurrency:
39
+ group: run_gigaspeech_2022_05_13-${{ github.ref }}
40
+ cancel-in-progress: true
41
+
42
+ jobs:
43
+ run_gigaspeech_2022_05_13:
44
+ if: github.event_name == 'workflow_dispatch' || github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
45
+ runs-on: ${{ matrix.os }}
46
+ strategy:
47
+ matrix:
48
+ os: [ubuntu-latest]
49
+ python-version: [3.8]
50
+
51
+ fail-fast: false
52
+
53
+ steps:
54
+ - uses: actions/checkout@v2
55
+ with:
56
+ fetch-depth: 0
57
+
58
+ - name: Setup Python ${{ matrix.python-version }}
59
+ uses: actions/setup-python@v2
60
+ with:
61
+ python-version: ${{ matrix.python-version }}
62
+ cache: 'pip'
63
+ cache-dependency-path: '**/requirements-ci.txt'
64
+
65
+ - name: Install Python dependencies
66
+ run: |
67
+ grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
68
+ pip uninstall -y protobuf
69
+ pip install --no-binary protobuf protobuf==3.20.*
70
+
71
+ - name: Cache kaldifeat
72
+ id: my-cache
73
+ uses: actions/cache@v2
74
+ with:
75
+ path: |
76
+ ~/tmp/kaldifeat
77
+ key: cache-tmp-${{ matrix.python-version }}-2023-05-22
78
+
79
+ - name: Install kaldifeat
80
+ if: steps.my-cache.outputs.cache-hit != 'true'
81
+ shell: bash
82
+ run: |
83
+ .github/scripts/install-kaldifeat.sh
84
+
85
+ - name: Download GigaSpeech dev/test dataset
86
+ shell: bash
87
+ run: |
88
+ sudo apt-get install -y -q git-lfs
89
+
90
+ .github/scripts/download-gigaspeech-dev-test-dataset.sh
91
+
92
+ - name: Inference with pre-trained model
93
+ shell: bash
94
+ env:
95
+ GITHUB_EVENT_NAME: ${{ github.event_name }}
96
+ GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
97
+ run: |
98
+ ln -s ~/tmp/giga-dev-dataset-fbank/data egs/gigaspeech/ASR/
99
+
100
+ ls -lh egs/gigaspeech/ASR/data/fbank
101
+
102
+ export PYTHONPATH=$PWD:$PYTHONPATH
103
+ export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
104
+ export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
105
+
106
+ .github/scripts/run-gigaspeech-pruned-transducer-stateless2-2022-05-12.sh
107
+
108
+ - name: Display decoding results for gigaspeech pruned_transducer_stateless2
109
+ if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' || github.event.label.name == 'run-decode'
110
+ shell: bash
111
+ run: |
112
+ cd egs/gigaspeech/ASR/
113
+ tree ./pruned_transducer_stateless2/exp
114
+
115
+ sudo apt-get -qq install tree
116
+
117
+ cd pruned_transducer_stateless2
118
+ echo "results for pruned_transducer_stateless2"
119
+ echo "===greedy search==="
120
+ find exp/greedy_search -name "log-*" -exec grep -n --color "best for dev" {} + | sort -n -k2
121
+ find exp/greedy_search -name "log-*" -exec grep -n --color "best for test" {} + | sort -n -k2
122
+
123
+ - name: Upload decoding results for gigaspeech pruned_transducer_stateless2
124
+ uses: actions/upload-artifact@v4
125
+ if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' || github.event.label.name == 'run-decode'
126
+ with:
127
+ name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-gigaspeech-pruned_transducer_stateless2-2022-05-12
128
+ path: egs/gigaspeech/ASR/pruned_transducer_stateless2/exp/
.github/workflows/run-gigaspeech-zipformer-2023-10-17.yml ADDED
@@ -0,0 +1,136 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2022 Fangjun Kuang (csukuangfj@gmail.com)
2
+
3
+ # See ../../LICENSE for clarification regarding multiple authors
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ name: run-gigaspeech-zipformer-2023-10-17
18
+ # zipformer
19
+
20
+ on:
21
+ push:
22
+ branches:
23
+ - master
24
+
25
+ pull_request:
26
+ types: [labeled]
27
+
28
+ schedule:
29
+ # minute (0-59)
30
+ # hour (0-23)
31
+ # day of the month (1-31)
32
+ # month (1-12)
33
+ # day of the week (0-6)
34
+ # nightly build at 15:50 UTC time every day
35
+ - cron: "50 15 * * *"
36
+
37
+ workflow_dispatch:
38
+
39
+ concurrency:
40
+ group: run_gigaspeech_2023_10_17_zipformer-${{ github.ref }}
41
+ cancel-in-progress: true
42
+
43
+ jobs:
44
+ run_gigaspeech_2023_10_17_zipformer:
45
+ if: github.event.label.name == 'zipformer' ||github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
46
+ runs-on: ${{ matrix.os }}
47
+ strategy:
48
+ matrix:
49
+ os: [ubuntu-latest]
50
+ python-version: [3.8]
51
+
52
+ fail-fast: false
53
+
54
+ steps:
55
+ - uses: actions/checkout@v2
56
+ with:
57
+ fetch-depth: 0
58
+
59
+ - name: Setup Python ${{ matrix.python-version }}
60
+ uses: actions/setup-python@v2
61
+ with:
62
+ python-version: ${{ matrix.python-version }}
63
+ cache: 'pip'
64
+ cache-dependency-path: '**/requirements-ci.txt'
65
+
66
+ - name: Install Python dependencies
67
+ run: |
68
+ grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
69
+ pip uninstall -y protobuf
70
+ pip install --no-binary protobuf protobuf==3.20.*
71
+
72
+ - name: Cache kaldifeat
73
+ id: my-cache
74
+ uses: actions/cache@v2
75
+ with:
76
+ path: |
77
+ ~/tmp/kaldifeat
78
+ key: cache-tmp-${{ matrix.python-version }}-2023-05-22
79
+
80
+ - name: Install kaldifeat
81
+ if: steps.my-cache.outputs.cache-hit != 'true'
82
+ shell: bash
83
+ run: |
84
+ .github/scripts/install-kaldifeat.sh
85
+
86
+ - name: Inference with pre-trained model
87
+ shell: bash
88
+ env:
89
+ GITHUB_EVENT_NAME: ${{ github.event_name }}
90
+ GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
91
+ HF_TOKEN: ${{ secrets.HF_TOKEN }}
92
+ run: |
93
+ sudo apt-get -qq install git-lfs tree
94
+ export PYTHONPATH=$PWD:$PYTHONPATH
95
+ export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
96
+ export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
97
+
98
+ .github/scripts/run-gigaspeech-zipformer-2023-10-17.sh
99
+
100
+ - name: upload model to https://github.com/k2-fsa/sherpa-onnx
101
+ uses: svenstaro/upload-release-action@v2
102
+ with:
103
+ file_glob: true
104
+ file: ./*.tar.bz2
105
+ overwrite: true
106
+ repo_name: k2-fsa/sherpa-onnx
107
+ repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
108
+ tag: asr-models
109
+
110
+ - name: Display decoding results for gigaspeech zipformer
111
+ if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' || github.event_name == 'workflow_dispatch'
112
+ shell: bash
113
+ run: |
114
+ cd egs/gigaspeech/ASR/
115
+ tree ./zipformer/exp
116
+
117
+ cd zipformer
118
+ echo "results for zipformer"
119
+ echo "===greedy search==="
120
+ find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
121
+ find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
122
+
123
+ # echo "===fast_beam_search==="
124
+ # find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
125
+ # find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
126
+ #
127
+ # echo "===modified beam search==="
128
+ # find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
129
+ # find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
130
+
131
+ - name: Upload decoding results for gigaspeech zipformer
132
+ uses: actions/upload-artifact@v4
133
+ if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' || github.event_name == 'workflow_dispatch'
134
+ with:
135
+ name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-zipformer-2022-11-11
136
+ path: egs/gigaspeech/ASR/zipformer/exp/
.github/workflows/run-librispeech-lstm-transducer-stateless2-2022-09-03.yml ADDED
@@ -0,0 +1,165 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: run-librispeech-lstm-transducer2-2022-09-03
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - master
7
+ pull_request:
8
+ types: [labeled]
9
+
10
+ schedule:
11
+ # minute (0-59)
12
+ # hour (0-23)
13
+ # day of the month (1-31)
14
+ # month (1-12)
15
+ # day of the week (0-6)
16
+ # nightly build at 15:50 UTC time every day
17
+ - cron: "50 15 * * *"
18
+
19
+ workflow_dispatch:
20
+
21
+ concurrency:
22
+ group: run_librispeech_lstm_transducer_stateless2_2022_09_03-${{ github.ref }}
23
+ cancel-in-progress: true
24
+
25
+ jobs:
26
+ run_librispeech_lstm_transducer_stateless2_2022_09_03:
27
+ if: github.event.label.name == 'ready' || github.event.label.name == 'LODR' || github.event.label.name == 'shallow-fusion' || github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
28
+ runs-on: ${{ matrix.os }}
29
+ strategy:
30
+ matrix:
31
+ os: [ubuntu-latest]
32
+ python-version: [3.8]
33
+
34
+ fail-fast: false
35
+
36
+ steps:
37
+ - uses: actions/checkout@v2
38
+ with:
39
+ fetch-depth: 0
40
+
41
+ - name: Setup Python ${{ matrix.python-version }}
42
+ uses: actions/setup-python@v2
43
+ with:
44
+ python-version: ${{ matrix.python-version }}
45
+ cache: 'pip'
46
+ cache-dependency-path: '**/requirements-ci.txt'
47
+
48
+ - name: Install Python dependencies
49
+ run: |
50
+ grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
51
+ pip uninstall -y protobuf
52
+ pip install --no-binary protobuf protobuf==3.20.*
53
+
54
+ - name: Cache kaldifeat
55
+ id: my-cache
56
+ uses: actions/cache@v2
57
+ with:
58
+ path: |
59
+ ~/tmp/kaldifeat
60
+ key: cache-tmp-${{ matrix.python-version }}-2023-05-22
61
+
62
+ - name: Install kaldifeat
63
+ if: steps.my-cache.outputs.cache-hit != 'true'
64
+ shell: bash
65
+ run: |
66
+ .github/scripts/install-kaldifeat.sh
67
+
68
+ - name: Cache LibriSpeech test-clean and test-other datasets
69
+ id: libri-test-clean-and-test-other-data
70
+ uses: actions/cache@v2
71
+ with:
72
+ path: |
73
+ ~/tmp/download
74
+ key: cache-libri-test-clean-and-test-other
75
+
76
+ - name: Download LibriSpeech test-clean and test-other
77
+ if: steps.libri-test-clean-and-test-other-data.outputs.cache-hit != 'true'
78
+ shell: bash
79
+ run: |
80
+ .github/scripts/download-librispeech-test-clean-and-test-other-dataset.sh
81
+
82
+ - name: Prepare manifests for LibriSpeech test-clean and test-other
83
+ shell: bash
84
+ run: |
85
+ .github/scripts/prepare-librispeech-test-clean-and-test-other-manifests.sh
86
+
87
+ - name: Cache LibriSpeech test-clean and test-other fbank features
88
+ id: libri-test-clean-and-test-other-fbank
89
+ uses: actions/cache@v2
90
+ with:
91
+ path: |
92
+ ~/tmp/fbank-libri
93
+ key: cache-libri-fbank-test-clean-and-test-other-v2
94
+
95
+ - name: Compute fbank for LibriSpeech test-clean and test-other
96
+ if: steps.libri-test-clean-and-test-other-fbank.outputs.cache-hit != 'true'
97
+ shell: bash
98
+ run: |
99
+ .github/scripts/compute-fbank-librispeech-test-clean-and-test-other.sh
100
+
101
+ - name: Inference with pre-trained model
102
+ shell: bash
103
+ env:
104
+ GITHUB_EVENT_NAME: ${{ github.event_name }}
105
+ GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
106
+ run: |
107
+ mkdir -p egs/librispeech/ASR/data
108
+ ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
109
+ ls -lh egs/librispeech/ASR/data/*
110
+
111
+ sudo apt-get -qq install git-lfs tree
112
+ export PYTHONPATH=$PWD:$PYTHONPATH
113
+ export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
114
+ export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
115
+
116
+ .github/scripts/run-librispeech-lstm-transducer-stateless2-2022-09-03.sh
117
+
118
+ - name: Display decoding results for lstm_transducer_stateless2
119
+ if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
120
+ shell: bash
121
+ run: |
122
+ cd egs/librispeech/ASR
123
+ tree lstm_transducer_stateless2/exp
124
+ cd lstm_transducer_stateless2/exp
125
+ echo "===greedy search==="
126
+ find greedy_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
127
+ find greedy_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
128
+
129
+ echo "===fast_beam_search==="
130
+ find fast_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
131
+ find fast_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
132
+
133
+ # echo "===modified beam search==="
134
+ # find modified_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
135
+ # find modified_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
136
+
137
+ - name: Display decoding results for lstm_transducer_stateless2
138
+ if: github.event.label.name == 'shallow-fusion'
139
+ shell: bash
140
+ run: |
141
+ cd egs/librispeech/ASR
142
+ tree lstm_transducer_stateless2/exp
143
+ cd lstm_transducer_stateless2/exp
144
+ echo "===modified_beam_search_lm_shallow_fusion==="
145
+ echo "===Using RNNLM==="
146
+ find modified_beam_search_lm_shallow_fusion -name "log-*rnn*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
147
+ find modified_beam_search_lm_shallow_fusion -name "log-*rnn*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
148
+
149
+ - name: Display decoding results for lstm_transducer_stateless2
150
+ if: github.event.label.name == 'LODR'
151
+ shell: bash
152
+ run: |
153
+ cd egs/librispeech/ASR
154
+ tree lstm_transducer_stateless2/exp
155
+ cd lstm_transducer_stateless2/exp
156
+ echo "===modified_beam_search_rnnlm_LODR==="
157
+ find modified_beam_search_LODR -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
158
+ find modified_beam_search_LODR -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
159
+
160
+ - name: Upload decoding results for lstm_transducer_stateless2
161
+ uses: actions/upload-artifact@v4
162
+ if: github.event_name == 'schedule' || github.event.label.name == 'shallow-fusion' || github.event.label.name == 'LODR' || github.event_name == 'workflow_dispatch'
163
+ with:
164
+ name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-lstm_transducer_stateless2-2022-09-03
165
+ path: egs/librispeech/ASR/lstm_transducer_stateless2/exp/
.github/workflows/run-multi-corpora-zipformer.yml ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2023 Xiaomi Corp. (author: Zengrui Jin)
2
+
3
+ # See ../../LICENSE for clarification regarding multiple authors
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ name: run-multi-corpora-zipformer
18
+
19
+ on:
20
+ push:
21
+ branches:
22
+ - master
23
+ pull_request:
24
+ types: [labeled]
25
+
26
+ workflow_dispatch:
27
+
28
+ concurrency:
29
+ group: run_multi-corpora_zipformer-${{ github.ref }}
30
+ cancel-in-progress: true
31
+
32
+ jobs:
33
+ run_multi-corpora_zipformer:
34
+ if: github.event.label.name == 'onnx' || github.event.label.name == 'ready' || github.event_name == 'push' || github.event.label.name == 'multi-zh_hans' || github.event.label.name == 'zipformer' || github.event.label.name == 'multi-corpora'
35
+ runs-on: ${{ matrix.os }}
36
+ strategy:
37
+ matrix:
38
+ os: [ubuntu-latest]
39
+ python-version: [3.8]
40
+
41
+ fail-fast: false
42
+
43
+ steps:
44
+ - uses: actions/checkout@v2
45
+ with:
46
+ fetch-depth: 0
47
+
48
+ - name: Setup Python ${{ matrix.python-version }}
49
+ uses: actions/setup-python@v2
50
+ with:
51
+ python-version: ${{ matrix.python-version }}
52
+ cache: 'pip'
53
+ cache-dependency-path: '**/requirements-ci.txt'
54
+
55
+ - name: Install Python dependencies
56
+ run: |
57
+ grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
58
+ pip uninstall -y protobuf
59
+ pip install --no-binary protobuf protobuf==3.20.*
60
+
61
+ - name: Cache kaldifeat
62
+ id: my-cache
63
+ uses: actions/cache@v2
64
+ with:
65
+ path: |
66
+ ~/tmp/kaldifeat
67
+ key: cache-tmp-${{ matrix.python-version }}-2023-05-22
68
+
69
+ - name: Install kaldifeat
70
+ if: steps.my-cache.outputs.cache-hit != 'true'
71
+ shell: bash
72
+ run: |
73
+ .github/scripts/install-kaldifeat.sh
74
+
75
+ - name: Inference with pre-trained model
76
+ shell: bash
77
+ env:
78
+ GITHUB_EVENT_NAME: ${{ github.event_name }}
79
+ GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
80
+ run: |
81
+ sudo apt-get -qq install git-lfs tree
82
+ export PYTHONPATH=$PWD:$PYTHONPATH
83
+ export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
84
+ export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
85
+
86
+ .github/scripts/run-multi-corpora-zipformer.sh
.github/workflows/run-ptb-rnn-lm.yml ADDED
@@ -0,0 +1,73 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: run-ptb-rnn-lm-training
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - master
7
+ pull_request:
8
+ types: [labeled]
9
+
10
+ schedule:
11
+ # minute (0-59)
12
+ # hour (0-23)
13
+ # day of the month (1-31)
14
+ # month (1-12)
15
+ # day of the week (0-6)
16
+ # nightly build at 15:50 UTC time every day
17
+ - cron: "50 15 * * *"
18
+
19
+ workflow_dispatch:
20
+
21
+ concurrency:
22
+ group: run_ptb_rnn_lm_training-${{ github.ref }}
23
+ cancel-in-progress: true
24
+
25
+ jobs:
26
+ run_ptb_rnn_lm_training:
27
+ if: github.event.label.name == 'ready' || github.event.label.name == 'rnnlm' || github.event_name == 'push' || github.event_name == 'schedule'
28
+ runs-on: ${{ matrix.os }}
29
+ strategy:
30
+ matrix:
31
+ os: [ubuntu-latest]
32
+ python-version: ["3.8"]
33
+
34
+ fail-fast: false
35
+
36
+ steps:
37
+ - uses: actions/checkout@v2
38
+ with:
39
+ fetch-depth: 0
40
+
41
+ - name: Setup Python ${{ matrix.python-version }}
42
+ uses: actions/setup-python@v2
43
+ with:
44
+ python-version: ${{ matrix.python-version }}
45
+ cache: 'pip'
46
+ cache-dependency-path: '**/requirements-ci.txt'
47
+
48
+ - name: Install Python dependencies
49
+ run: |
50
+ grep -v '^#' ./requirements-ci.txt | grep -v kaldifst | xargs -n 1 -L 1 pip install
51
+ pip uninstall -y protobuf
52
+ pip install --no-binary protobuf protobuf==3.20.*
53
+
54
+ - name: Prepare data
55
+ shell: bash
56
+ run: |
57
+ export PYTHONPATH=$PWD:$PYTHONPATH
58
+ cd egs/ptb/LM
59
+ ./prepare.sh
60
+
61
+ - name: Run training
62
+ shell: bash
63
+ run: |
64
+ export PYTHONPATH=$PWD:$PYTHONPATH
65
+ cd egs/ptb/LM
66
+ ./train-rnn-lm.sh --world-size 1 --num-epochs 5 --use-epoch 4 --use-avg 2
67
+
68
+ - name: Upload pretrained models
69
+ uses: actions/upload-artifact@v4
70
+ if: github.event.label.name == 'ready' || github.event.label.name == 'rnnlm' || github.event_name == 'push' || github.event_name == 'schedule'
71
+ with:
72
+ name: python-${{ matrix.python-version }}-ubuntu-rnn-lm-ptb
73
+ path: egs/ptb/LM/my-rnnlm-exp/
.github/workflows/run-swbd-conformer-ctc.yml ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright 2023 Xiaomi Corp. (author: Zengrui Jin)
2
+
3
+ # See ../../LICENSE for clarification regarding multiple authors
4
+ #
5
+ # Licensed under the Apache License, Version 2.0 (the "License");
6
+ # you may not use this file except in compliance with the License.
7
+ # You may obtain a copy of the License at
8
+ #
9
+ # http://www.apache.org/licenses/LICENSE-2.0
10
+ #
11
+ # Unless required by applicable law or agreed to in writing, software
12
+ # distributed under the License is distributed on an "AS IS" BASIS,
13
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ # See the License for the specific language governing permissions and
15
+ # limitations under the License.
16
+
17
+ name: run-swbd-conformer_ctc
18
+
19
+ on:
20
+ push:
21
+ branches:
22
+ - master
23
+ pull_request:
24
+ types: [labeled]
25
+
26
+ workflow_dispatch:
27
+
28
+ concurrency:
29
+ group: run-swbd-conformer_ctc-${{ github.ref }}
30
+ cancel-in-progress: true
31
+
32
+ jobs:
33
+ run-swbd-conformer_ctc:
34
+ if: github.event.label.name == 'onnx' || github.event.label.name == 'ready' || github.event_name == 'push' || github.event.label.name == 'swbd'
35
+ runs-on: ${{ matrix.os }}
36
+ strategy:
37
+ matrix:
38
+ os: [ubuntu-latest]
39
+ python-version: [3.8]
40
+
41
+ fail-fast: false
42
+
43
+ steps:
44
+ - uses: actions/checkout@v2
45
+ with:
46
+ fetch-depth: 0
47
+
48
+ - name: Setup Python ${{ matrix.python-version }}
49
+ uses: actions/setup-python@v2
50
+ with:
51
+ python-version: ${{ matrix.python-version }}
52
+ cache: 'pip'
53
+ cache-dependency-path: '**/requirements-ci.txt'
54
+
55
+ - name: Install Python dependencies
56
+ run: |
57
+ grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
58
+ pip uninstall -y protobuf
59
+ pip install --no-binary protobuf protobuf==3.20.*
60
+
61
+ - name: Cache kaldifeat
62
+ id: my-cache
63
+ uses: actions/cache@v2
64
+ with:
65
+ path: |
66
+ ~/tmp/kaldifeat
67
+ key: cache-tmp-${{ matrix.python-version }}-2023-05-22
68
+
69
+ - name: Install kaldifeat
70
+ if: steps.my-cache.outputs.cache-hit != 'true'
71
+ shell: bash
72
+ run: |
73
+ .github/scripts/install-kaldifeat.sh
74
+
75
+ - name: Inference with pre-trained model
76
+ shell: bash
77
+ env:
78
+ GITHUB_EVENT_NAME: ${{ github.event_name }}
79
+ GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
80
+ run: |
81
+ sudo apt-get -qq install git-lfs tree
82
+ export PYTHONPATH=$PWD:$PYTHONPATH
83
+ export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
84
+ export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
85
+
86
+ .github/scripts/run-swbd-conformer-ctc-2023-08-26.sh