Upload icefall experiment results and logs
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .flake8 +35 -0
- .git-blame-ignore-revs +3 -0
- .gitattributes +22 -0
- .github/scripts/.gitignore +1 -0
- .github/scripts/aishell/ASR/run.sh +343 -0
- .github/scripts/audioset/AT/run.sh +94 -0
- .github/scripts/baker_zh/TTS/run-matcha.sh +167 -0
- .github/scripts/compute-fbank-librispeech-test-clean-and-test-other.sh +19 -0
- .github/scripts/docker/Dockerfile +75 -0
- .github/scripts/docker/generate_build_matrix.py +140 -0
- .github/scripts/download-gigaspeech-dev-test-dataset.sh +17 -0
- .github/scripts/download-librispeech-test-clean-and-test-other-dataset.sh +25 -0
- .github/scripts/generate-piper-phonemize-page.py +90 -0
- .github/scripts/install-kaldifeat.sh +15 -0
- .github/scripts/ksponspeech/ASR/run.sh +132 -0
- .github/scripts/librispeech/ASR/run.sh +1644 -0
- .github/scripts/librispeech/ASR/run_rknn.sh +275 -0
- .github/scripts/ljspeech/TTS/run-matcha.sh +157 -0
- .github/scripts/ljspeech/TTS/run.sh +157 -0
- .github/scripts/multi_zh-hans/ASR/run.sh +756 -0
- .github/scripts/multi_zh-hans/ASR/run_rknn.sh +73 -0
- .github/scripts/prepare-librispeech-test-clean-and-test-other-manifests.sh +13 -0
- .github/scripts/run-gigaspeech-pruned-transducer-stateless2-2022-05-12.sh +62 -0
- .github/scripts/run-gigaspeech-zipformer-2023-10-17.sh +172 -0
- .github/scripts/run-librispeech-lstm-transducer-stateless2-2022-09-03.sh +191 -0
- .github/scripts/run-multi-corpora-zipformer.sh +135 -0
- .github/scripts/run-swbd-conformer-ctc-2023-08-26.sh +44 -0
- .github/scripts/run-wenetspeech-pruned-transducer-stateless2.sh +119 -0
- .github/scripts/test-ncnn-export.sh +230 -0
- .github/scripts/test-onnx-export.sh +466 -0
- .github/scripts/wenetspeech/ASR/run_rknn.sh +196 -0
- .github/scripts/yesno/ASR/run.sh +86 -0
- .github/workflows/aishell.yml +72 -0
- .github/workflows/audioset.yml +137 -0
- .github/workflows/baker_zh.yml +152 -0
- .github/workflows/build-cpu-docker.yml +81 -0
- .github/workflows/build-doc.yml +74 -0
- .github/workflows/build-docker-image.yml +84 -0
- .github/workflows/ksponspeech.yml +167 -0
- .github/workflows/librispeech.yml +72 -0
- .github/workflows/ljspeech.yml +166 -0
- .github/workflows/multi-zh-hans.yml +86 -0
- .github/workflows/rknn.yml +134 -0
- .github/workflows/run-docker-image.yml +144 -0
- .github/workflows/run-gigaspeech-2022-05-13.yml +128 -0
- .github/workflows/run-gigaspeech-zipformer-2023-10-17.yml +136 -0
- .github/workflows/run-librispeech-lstm-transducer-stateless2-2022-09-03.yml +165 -0
- .github/workflows/run-multi-corpora-zipformer.yml +86 -0
- .github/workflows/run-ptb-rnn-lm.yml +73 -0
- .github/workflows/run-swbd-conformer-ctc.yml +86 -0
.flake8
ADDED
|
@@ -0,0 +1,35 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[flake8]
|
| 2 |
+
show-source=true
|
| 3 |
+
statistics=true
|
| 4 |
+
max-line-length = 88
|
| 5 |
+
per-file-ignores =
|
| 6 |
+
# line too long
|
| 7 |
+
icefall/diagnostics.py: E501,
|
| 8 |
+
egs/*/ASR/*/conformer.py: E501,
|
| 9 |
+
egs/*/ASR/pruned_transducer_stateless*/*.py: E501,
|
| 10 |
+
egs/*/ASR/*/optim.py: E501,
|
| 11 |
+
egs/*/ASR/*/scaling.py: E501,
|
| 12 |
+
egs/librispeech/ASR/lstm_transducer_stateless*/*.py: E501, E203
|
| 13 |
+
egs/librispeech/ASR/conv_emformer_transducer_stateless*/*.py: E501, E203
|
| 14 |
+
egs/librispeech/ASR/conformer_ctc*/*py: E501,
|
| 15 |
+
egs/librispeech/ASR/zipformer_mmi/*.py: E501, E203
|
| 16 |
+
egs/librispeech/ASR/zipformer/*.py: E501, E203
|
| 17 |
+
egs/librispeech/ASR/RESULTS.md: E999,
|
| 18 |
+
egs/ljspeech/TTS/vits/*.py: E501, E203
|
| 19 |
+
# invalid escape sequence (cause by tex formular), W605
|
| 20 |
+
icefall/utils.py: E501, W605
|
| 21 |
+
|
| 22 |
+
exclude =
|
| 23 |
+
.git,
|
| 24 |
+
**/data/**,
|
| 25 |
+
icefall/shared/make_kn_lm.py,
|
| 26 |
+
icefall/__init__.py
|
| 27 |
+
icefall/ctc/__init__.py
|
| 28 |
+
|
| 29 |
+
ignore =
|
| 30 |
+
# E203 white space before ":"
|
| 31 |
+
E203,
|
| 32 |
+
# W503 line break before binary operator
|
| 33 |
+
W503,
|
| 34 |
+
# E226 missing whitespace around arithmetic operator
|
| 35 |
+
E226,
|
.git-blame-ignore-revs
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Migrate to 88 characters per line (see: https://github.com/lhotse-speech/lhotse/issues/890)
|
| 2 |
+
107df3b115a58f1b68a6458c3f94a130004be34c
|
| 3 |
+
d31db010371a4128856480382876acdc0d1739ed
|
.gitattributes
CHANGED
|
@@ -33,3 +33,25 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
docs/source/_static/kaldi-align/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
docs/source/_static/logo.png filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
docs/source/contributing/images/doc-contrib.png filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
docs/source/contributing/images/pre-commit-check-success.png filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
docs/source/contributing/images/pre-commit-check.png filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
docs/source/docker/img/docker-hub.png filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
docs/source/huggingface/pic/hugging-face-sherpa-2.png filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
docs/source/huggingface/pic/hugging-face-sherpa-3.png filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
docs/source/huggingface/pic/hugging-face-sherpa.png filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
docs/source/recipes/Non-streaming-ASR/aishell/images/aishell-conformer-ctc-tensorboard-log.jpg filter=lfs diff=lfs merge=lfs -text
|
| 46 |
+
docs/source/recipes/Non-streaming-ASR/aishell/images/aishell-tdnn-lstm-ctc-tensorboard-log.jpg filter=lfs diff=lfs merge=lfs -text
|
| 47 |
+
docs/source/recipes/Non-streaming-ASR/aishell/images/aishell-transducer_stateless_modified-tensorboard-log.png filter=lfs diff=lfs merge=lfs -text
|
| 48 |
+
docs/source/recipes/Non-streaming-ASR/librispeech/images/librispeech-conformer-ctc-tensorboard-log.png filter=lfs diff=lfs merge=lfs -text
|
| 49 |
+
docs/source/recipes/Non-streaming-ASR/librispeech/images/librispeech-pruned-transducer-tensorboard-log.jpg filter=lfs diff=lfs merge=lfs -text
|
| 50 |
+
docs/source/recipes/Non-streaming-ASR/yesno/images/tdnn-tensorboard-log.png filter=lfs diff=lfs merge=lfs -text
|
| 51 |
+
docs/source/recipes/Streaming-ASR/librispeech/images/librispeech-lstm-transducer-tensorboard-log.png filter=lfs diff=lfs merge=lfs -text
|
| 52 |
+
docs/source/recipes/Streaming-ASR/librispeech/images/streaming-librispeech-pruned-transducer-tensorboard-log.jpg filter=lfs diff=lfs merge=lfs -text
|
| 53 |
+
egs/ami/ASR/xlsr_transducer/tts_2_en.wav filter=lfs diff=lfs merge=lfs -text
|
| 54 |
+
egs/libricss/SURT/heat.png filter=lfs diff=lfs merge=lfs -text
|
| 55 |
+
egs/libricss/SURT/surt.png filter=lfs diff=lfs merge=lfs -text
|
| 56 |
+
egs/librispeech/WSASR/figures/otc_training_graph.drawio.png filter=lfs diff=lfs merge=lfs -text
|
| 57 |
+
egs/speech_llm/ASR_LLM/assets/framework.png filter=lfs diff=lfs merge=lfs -text
|
.github/scripts/.gitignore
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
piper_phonemize.html
|
.github/scripts/aishell/ASR/run.sh
ADDED
|
@@ -0,0 +1,343 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
|
| 3 |
+
set -ex
|
| 4 |
+
|
| 5 |
+
log() {
|
| 6 |
+
# This function is from espnet
|
| 7 |
+
local fname=${BASH_SOURCE[1]##*/}
|
| 8 |
+
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
|
| 9 |
+
}
|
| 10 |
+
|
| 11 |
+
cd egs/aishell/ASR
|
| 12 |
+
|
| 13 |
+
function download_test_dev_manifests() {
|
| 14 |
+
git lfs install
|
| 15 |
+
|
| 16 |
+
fbank_url=https://huggingface.co/csukuangfj/aishell-test-dev-manifests
|
| 17 |
+
log "Downloading pre-commputed fbank from $fbank_url"
|
| 18 |
+
|
| 19 |
+
git clone https://huggingface.co/csukuangfj/aishell-test-dev-manifests
|
| 20 |
+
ln -s $PWD/aishell-test-dev-manifests/data .
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
function test_transducer_stateless3_2022_06_20() {
|
| 24 |
+
repo_url=https://huggingface.co/csukuangfj/icefall-aishell-pruned-transducer-stateless3-2022-06-20
|
| 25 |
+
log "Downloading pre-trained model from $repo_url"
|
| 26 |
+
git clone $repo_url
|
| 27 |
+
repo=$(basename $repo_url)
|
| 28 |
+
|
| 29 |
+
log "Display test files"
|
| 30 |
+
tree $repo/
|
| 31 |
+
ls -lh $repo/test_wavs/*.wav
|
| 32 |
+
|
| 33 |
+
pushd $repo/exp
|
| 34 |
+
ln -s pretrained-epoch-29-avg-5-torch-1.10.0.pt pretrained.pt
|
| 35 |
+
popd
|
| 36 |
+
|
| 37 |
+
log "test greedy_search with pretrained.py"
|
| 38 |
+
|
| 39 |
+
for sym in 1 2 3; do
|
| 40 |
+
log "Greedy search with --max-sym-per-frame $sym"
|
| 41 |
+
|
| 42 |
+
./pruned_transducer_stateless3/pretrained.py \
|
| 43 |
+
--method greedy_search \
|
| 44 |
+
--max-sym-per-frame $sym \
|
| 45 |
+
--checkpoint $repo/exp/pretrained.pt \
|
| 46 |
+
--lang-dir $repo/data/lang_char \
|
| 47 |
+
$repo/test_wavs/BAC009S0764W0121.wav \
|
| 48 |
+
$repo/test_wavs/BAC009S0764W0122.wav \
|
| 49 |
+
$repo/test_wavs/BAC009S0764W0123.wav
|
| 50 |
+
done
|
| 51 |
+
|
| 52 |
+
log "test beam search with pretrained.py"
|
| 53 |
+
|
| 54 |
+
for method in modified_beam_search beam_search fast_beam_search; do
|
| 55 |
+
log "$method"
|
| 56 |
+
|
| 57 |
+
./pruned_transducer_stateless3/pretrained.py \
|
| 58 |
+
--method $method \
|
| 59 |
+
--beam-size 4 \
|
| 60 |
+
--checkpoint $repo/exp/pretrained.pt \
|
| 61 |
+
--lang-dir $repo/data/lang_char \
|
| 62 |
+
$repo/test_wavs/BAC009S0764W0121.wav \
|
| 63 |
+
$repo/test_wavs/BAC009S0764W0122.wav \
|
| 64 |
+
$repo/test_wavs/BAC009S0764W0123.wav
|
| 65 |
+
done
|
| 66 |
+
|
| 67 |
+
echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}"
|
| 68 |
+
echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}"
|
| 69 |
+
if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then
|
| 70 |
+
mkdir -p pruned_transducer_stateless3/exp
|
| 71 |
+
ln -s $PWD/$repo/exp/pretrained.pt pruned_transducer_stateless3/exp/epoch-999.pt
|
| 72 |
+
ln -s $PWD/$repo/data/lang_char data/
|
| 73 |
+
|
| 74 |
+
ls -lh data
|
| 75 |
+
ls -lh pruned_transducer_stateless3/exp
|
| 76 |
+
|
| 77 |
+
log "Decoding test and dev"
|
| 78 |
+
|
| 79 |
+
# use a small value for decoding with CPU
|
| 80 |
+
max_duration=100
|
| 81 |
+
|
| 82 |
+
for method in greedy_search fast_beam_search modified_beam_search; do
|
| 83 |
+
log "Decoding with $method"
|
| 84 |
+
|
| 85 |
+
./pruned_transducer_stateless3/decode.py \
|
| 86 |
+
--decoding-method $method \
|
| 87 |
+
--epoch 999 \
|
| 88 |
+
--avg 1 \
|
| 89 |
+
--max-duration $max_duration \
|
| 90 |
+
--exp-dir pruned_transducer_stateless3/exp
|
| 91 |
+
done
|
| 92 |
+
|
| 93 |
+
rm pruned_transducer_stateless3/exp/*.pt
|
| 94 |
+
fi
|
| 95 |
+
|
| 96 |
+
rm -rf $repo
|
| 97 |
+
}
|
| 98 |
+
|
| 99 |
+
function test_zipformer_large_2023_10_24() {
|
| 100 |
+
log "CI testing large model"
|
| 101 |
+
repo_url=https://huggingface.co/zrjin/icefall-asr-aishell-zipformer-large-2023-10-24/
|
| 102 |
+
log "Downloading pre-trained model from $repo_url"
|
| 103 |
+
git clone $repo_url
|
| 104 |
+
repo=$(basename $repo_url)
|
| 105 |
+
|
| 106 |
+
log "Display test files"
|
| 107 |
+
tree $repo/
|
| 108 |
+
ls -lh $repo/test_wavs/*.wav
|
| 109 |
+
|
| 110 |
+
for method in modified_beam_search greedy_search fast_beam_search; do
|
| 111 |
+
log "$method"
|
| 112 |
+
|
| 113 |
+
./zipformer/pretrained.py \
|
| 114 |
+
--method $method \
|
| 115 |
+
--context-size 1 \
|
| 116 |
+
--checkpoint $repo/exp/pretrained.pt \
|
| 117 |
+
--tokens $repo/data/lang_char/tokens.txt \
|
| 118 |
+
--num-encoder-layers 2,2,4,5,4,2 \
|
| 119 |
+
--feedforward-dim 512,768,1536,2048,1536,768 \
|
| 120 |
+
--encoder-dim 192,256,512,768,512,256 \
|
| 121 |
+
--encoder-unmasked-dim 192,192,256,320,256,192 \
|
| 122 |
+
$repo/test_wavs/BAC009S0764W0121.wav \
|
| 123 |
+
$repo/test_wavs/BAC009S0764W0122.wav \
|
| 124 |
+
$repo/test_wavs/BAC009S0764W0123.wav
|
| 125 |
+
done
|
| 126 |
+
rm -rf $repo
|
| 127 |
+
}
|
| 128 |
+
|
| 129 |
+
function test_zipformer_2023_10_24() {
|
| 130 |
+
repo_url=https://huggingface.co/zrjin/icefall-asr-aishell-zipformer-2023-10-24/
|
| 131 |
+
log "Downloading pre-trained model from $repo_url"
|
| 132 |
+
git clone $repo_url
|
| 133 |
+
repo=$(basename $repo_url)
|
| 134 |
+
|
| 135 |
+
log "Display test files"
|
| 136 |
+
tree $repo/
|
| 137 |
+
ls -lh $repo/test_wavs/*.wav
|
| 138 |
+
|
| 139 |
+
|
| 140 |
+
for method in modified_beam_search greedy_search fast_beam_search; do
|
| 141 |
+
log "$method"
|
| 142 |
+
|
| 143 |
+
./zipformer/pretrained.py \
|
| 144 |
+
--method $method \
|
| 145 |
+
--context-size 1 \
|
| 146 |
+
--checkpoint $repo/exp/pretrained.pt \
|
| 147 |
+
--tokens $repo/data/lang_char/tokens.txt \
|
| 148 |
+
$repo/test_wavs/BAC009S0764W0121.wav \
|
| 149 |
+
$repo/test_wavs/BAC009S0764W0122.wav \
|
| 150 |
+
$repo/test_wavs/BAC009S0764W0123.wav
|
| 151 |
+
done
|
| 152 |
+
rm -rf $repo
|
| 153 |
+
}
|
| 154 |
+
|
| 155 |
+
function test_zipformer_small_2023_10_24() {
|
| 156 |
+
log "CI testing small model"
|
| 157 |
+
repo_url=https://huggingface.co/zrjin/icefall-asr-aishell-zipformer-small-2023-10-24/
|
| 158 |
+
log "Downloading pre-trained model from $repo_url"
|
| 159 |
+
git clone $repo_url
|
| 160 |
+
repo=$(basename $repo_url)
|
| 161 |
+
|
| 162 |
+
log "Display test files"
|
| 163 |
+
tree $repo/
|
| 164 |
+
ls -lh $repo/test_wavs/*.wav
|
| 165 |
+
|
| 166 |
+
|
| 167 |
+
for method in modified_beam_search greedy_search fast_beam_search; do
|
| 168 |
+
log "$method"
|
| 169 |
+
|
| 170 |
+
./zipformer/pretrained.py \
|
| 171 |
+
--method $method \
|
| 172 |
+
--context-size 1 \
|
| 173 |
+
--checkpoint $repo/exp/pretrained.pt \
|
| 174 |
+
--tokens $repo/data/lang_char/tokens.txt \
|
| 175 |
+
--num-encoder-layers 2,2,2,2,2,2 \
|
| 176 |
+
--feedforward-dim 512,768,768,768,768,768 \
|
| 177 |
+
--encoder-dim 192,256,256,256,256,256 \
|
| 178 |
+
--encoder-unmasked-dim 192,192,192,192,192,192 \
|
| 179 |
+
$repo/test_wavs/BAC009S0764W0121.wav \
|
| 180 |
+
$repo/test_wavs/BAC009S0764W0122.wav \
|
| 181 |
+
$repo/test_wavs/BAC009S0764W0123.wav
|
| 182 |
+
done
|
| 183 |
+
rm -rf $repo
|
| 184 |
+
}
|
| 185 |
+
|
| 186 |
+
function test_transducer_stateless_modified_2022_03_01() {
|
| 187 |
+
repo_url=https://huggingface.co/csukuangfj/icefall-aishell-transducer-stateless-modified-2022-03-01
|
| 188 |
+
|
| 189 |
+
log "Downloading pre-trained model from $repo_url"
|
| 190 |
+
git lfs install
|
| 191 |
+
git clone $repo_url
|
| 192 |
+
repo=$(basename $repo_url)
|
| 193 |
+
|
| 194 |
+
log "Display test files"
|
| 195 |
+
tree $repo/
|
| 196 |
+
ls -lh $repo/test_wavs/*.wav
|
| 197 |
+
|
| 198 |
+
for sym in 1 2 3; do
|
| 199 |
+
log "Greedy search with --max-sym-per-frame $sym"
|
| 200 |
+
|
| 201 |
+
./transducer_stateless_modified/pretrained.py \
|
| 202 |
+
--method greedy_search \
|
| 203 |
+
--max-sym-per-frame $sym \
|
| 204 |
+
--checkpoint $repo/exp/pretrained.pt \
|
| 205 |
+
--lang-dir $repo/data/lang_char \
|
| 206 |
+
$repo/test_wavs/BAC009S0764W0121.wav \
|
| 207 |
+
$repo/test_wavs/BAC009S0764W0122.wav \
|
| 208 |
+
$repo/test_wavs/BAC009S0764W0123.wav
|
| 209 |
+
done
|
| 210 |
+
|
| 211 |
+
for method in modified_beam_search beam_search; do
|
| 212 |
+
log "$method"
|
| 213 |
+
|
| 214 |
+
./transducer_stateless_modified/pretrained.py \
|
| 215 |
+
--method $method \
|
| 216 |
+
--beam-size 4 \
|
| 217 |
+
--checkpoint $repo/exp/pretrained.pt \
|
| 218 |
+
--lang-dir $repo/data/lang_char \
|
| 219 |
+
$repo/test_wavs/BAC009S0764W0121.wav \
|
| 220 |
+
$repo/test_wavs/BAC009S0764W0122.wav \
|
| 221 |
+
$repo/test_wavs/BAC009S0764W0123.wav
|
| 222 |
+
done
|
| 223 |
+
rm -rf $repo
|
| 224 |
+
}
|
| 225 |
+
|
| 226 |
+
function test_transducer_stateless_modified_2_2022_03_01() {
|
| 227 |
+
repo_url=https://huggingface.co/csukuangfj/icefall-aishell-transducer-stateless-modified-2-2022-03-01
|
| 228 |
+
|
| 229 |
+
log "Downloading pre-trained model from $repo_url"
|
| 230 |
+
git lfs install
|
| 231 |
+
git clone $repo_url
|
| 232 |
+
repo=$(basename $repo_url)
|
| 233 |
+
|
| 234 |
+
log "Display test files"
|
| 235 |
+
tree $repo/
|
| 236 |
+
ls -lh $repo/test_wavs/*.wav
|
| 237 |
+
|
| 238 |
+
for sym in 1 2 3; do
|
| 239 |
+
log "Greedy search with --max-sym-per-frame $sym"
|
| 240 |
+
|
| 241 |
+
./transducer_stateless_modified-2/pretrained.py \
|
| 242 |
+
--method greedy_search \
|
| 243 |
+
--max-sym-per-frame $sym \
|
| 244 |
+
--checkpoint $repo/exp/pretrained.pt \
|
| 245 |
+
--lang-dir $repo/data/lang_char \
|
| 246 |
+
$repo/test_wavs/BAC009S0764W0121.wav \
|
| 247 |
+
$repo/test_wavs/BAC009S0764W0122.wav \
|
| 248 |
+
$repo/test_wavs/BAC009S0764W0123.wav
|
| 249 |
+
done
|
| 250 |
+
|
| 251 |
+
for method in modified_beam_search beam_search; do
|
| 252 |
+
log "$method"
|
| 253 |
+
|
| 254 |
+
./transducer_stateless_modified-2/pretrained.py \
|
| 255 |
+
--method $method \
|
| 256 |
+
--beam-size 4 \
|
| 257 |
+
--checkpoint $repo/exp/pretrained.pt \
|
| 258 |
+
--lang-dir $repo/data/lang_char \
|
| 259 |
+
$repo/test_wavs/BAC009S0764W0121.wav \
|
| 260 |
+
$repo/test_wavs/BAC009S0764W0122.wav \
|
| 261 |
+
$repo/test_wavs/BAC009S0764W0123.wav
|
| 262 |
+
done
|
| 263 |
+
rm -rf $repo
|
| 264 |
+
}
|
| 265 |
+
|
| 266 |
+
function test_conformer_ctc() {
|
| 267 |
+
repo_url=https://huggingface.co/csukuangfj/icefall_asr_aishell_conformer_ctc
|
| 268 |
+
log "Downloading pre-trained model from $repo_url"
|
| 269 |
+
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
| 270 |
+
repo=$(basename $repo_url)
|
| 271 |
+
pushd $repo
|
| 272 |
+
|
| 273 |
+
git lfs pull --include "exp/pretrained.pt"
|
| 274 |
+
git lfs pull --include "data/lang_char/H.fst"
|
| 275 |
+
git lfs pull --include "data/lang_char/HL.fst"
|
| 276 |
+
git lfs pull --include "data/lang_char/HLG.fst"
|
| 277 |
+
|
| 278 |
+
popd
|
| 279 |
+
|
| 280 |
+
log "Display test files"
|
| 281 |
+
tree $repo/
|
| 282 |
+
ls -lh $repo/test_wavs/*.wav
|
| 283 |
+
|
| 284 |
+
log "CTC decoding"
|
| 285 |
+
|
| 286 |
+
log "Exporting model with torchscript"
|
| 287 |
+
|
| 288 |
+
pushd $repo/exp
|
| 289 |
+
ln -s pretrained.pt epoch-99.pt
|
| 290 |
+
popd
|
| 291 |
+
|
| 292 |
+
./conformer_ctc/export.py \
|
| 293 |
+
--epoch 99 \
|
| 294 |
+
--avg 1 \
|
| 295 |
+
--exp-dir $repo/exp \
|
| 296 |
+
--tokens $repo/data/lang_char/tokens.txt \
|
| 297 |
+
--jit 1
|
| 298 |
+
|
| 299 |
+
ls -lh $repo/exp
|
| 300 |
+
|
| 301 |
+
ls -lh $repo/data/lang_char
|
| 302 |
+
|
| 303 |
+
log "Decoding with H on CPU with OpenFst"
|
| 304 |
+
|
| 305 |
+
./conformer_ctc/jit_pretrained_decode_with_H.py \
|
| 306 |
+
--nn-model $repo/exp/cpu_jit.pt \
|
| 307 |
+
--H $repo/data/lang_char/H.fst \
|
| 308 |
+
--tokens $repo/data/lang_char/tokens.txt \
|
| 309 |
+
$repo/test_wavs/0.wav \
|
| 310 |
+
$repo/test_wavs/1.wav \
|
| 311 |
+
$repo/test_wavs/2.wav
|
| 312 |
+
|
| 313 |
+
log "Decoding with HL on CPU with OpenFst"
|
| 314 |
+
|
| 315 |
+
./conformer_ctc/jit_pretrained_decode_with_HL.py \
|
| 316 |
+
--nn-model $repo/exp/cpu_jit.pt \
|
| 317 |
+
--HL $repo/data/lang_char/HL.fst \
|
| 318 |
+
--words $repo/data/lang_char/words.txt \
|
| 319 |
+
$repo/test_wavs/0.wav \
|
| 320 |
+
$repo/test_wavs/1.wav \
|
| 321 |
+
$repo/test_wavs/2.wav
|
| 322 |
+
|
| 323 |
+
log "Decoding with HLG on CPU with OpenFst"
|
| 324 |
+
|
| 325 |
+
./conformer_ctc/jit_pretrained_decode_with_HLG.py \
|
| 326 |
+
--nn-model $repo/exp/cpu_jit.pt \
|
| 327 |
+
--HLG $repo/data/lang_char/HLG.fst \
|
| 328 |
+
--words $repo/data/lang_char/words.txt \
|
| 329 |
+
$repo/test_wavs/0.wav \
|
| 330 |
+
$repo/test_wavs/1.wav \
|
| 331 |
+
$repo/test_wavs/2.wav
|
| 332 |
+
|
| 333 |
+
rm -rf $repo
|
| 334 |
+
}
|
| 335 |
+
|
| 336 |
+
download_test_dev_manifests
|
| 337 |
+
test_transducer_stateless3_2022_06_20
|
| 338 |
+
test_zipformer_large_2023_10_24
|
| 339 |
+
test_zipformer_2023_10_24
|
| 340 |
+
test_zipformer_small_2023_10_24
|
| 341 |
+
test_transducer_stateless_modified_2022_03_01
|
| 342 |
+
test_transducer_stateless_modified_2_2022_03_01
|
| 343 |
+
# test_conformer_ctc # fails for torch 1.13.x and torch 2.0.x
|
.github/scripts/audioset/AT/run.sh
ADDED
|
@@ -0,0 +1,94 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
|
| 3 |
+
set -ex
|
| 4 |
+
|
| 5 |
+
python3 -m pip install onnxoptimizer onnxsim
|
| 6 |
+
|
| 7 |
+
log() {
|
| 8 |
+
# This function is from espnet
|
| 9 |
+
local fname=${BASH_SOURCE[1]##*/}
|
| 10 |
+
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
|
| 11 |
+
}
|
| 12 |
+
|
| 13 |
+
cd egs/audioset/AT
|
| 14 |
+
|
| 15 |
+
function test_pretrained() {
|
| 16 |
+
repo_url=https://huggingface.co/marcoyang/icefall-audio-tagging-audioset-zipformer-2024-03-12
|
| 17 |
+
repo=$(basename $repo_url)
|
| 18 |
+
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
| 19 |
+
pushd $repo/exp
|
| 20 |
+
git lfs pull --include pretrained.pt
|
| 21 |
+
ln -s pretrained.pt epoch-99.pt
|
| 22 |
+
ls -lh
|
| 23 |
+
popd
|
| 24 |
+
|
| 25 |
+
log "test pretrained.pt"
|
| 26 |
+
|
| 27 |
+
python3 zipformer/pretrained.py \
|
| 28 |
+
--checkpoint $repo/exp/pretrained.pt \
|
| 29 |
+
--label-dict $repo/data/class_labels_indices.csv \
|
| 30 |
+
$repo/test_wavs/1.wav \
|
| 31 |
+
$repo/test_wavs/2.wav \
|
| 32 |
+
$repo/test_wavs/3.wav \
|
| 33 |
+
$repo/test_wavs/4.wav
|
| 34 |
+
|
| 35 |
+
log "test jit export"
|
| 36 |
+
ls -lh $repo/exp/
|
| 37 |
+
python3 zipformer/export.py \
|
| 38 |
+
--exp-dir $repo/exp \
|
| 39 |
+
--epoch 99 \
|
| 40 |
+
--avg 1 \
|
| 41 |
+
--use-averaged-model 0 \
|
| 42 |
+
--jit 1
|
| 43 |
+
ls -lh $repo/exp/
|
| 44 |
+
|
| 45 |
+
log "test jit models"
|
| 46 |
+
python3 zipformer/jit_pretrained.py \
|
| 47 |
+
--nn-model-filename $repo/exp/jit_script.pt \
|
| 48 |
+
--label-dict $repo/data/class_labels_indices.csv \
|
| 49 |
+
$repo/test_wavs/1.wav \
|
| 50 |
+
$repo/test_wavs/2.wav \
|
| 51 |
+
$repo/test_wavs/3.wav \
|
| 52 |
+
$repo/test_wavs/4.wav
|
| 53 |
+
|
| 54 |
+
log "test onnx export"
|
| 55 |
+
ls -lh $repo/exp/
|
| 56 |
+
python3 zipformer/export-onnx.py \
|
| 57 |
+
--exp-dir $repo/exp \
|
| 58 |
+
--epoch 99 \
|
| 59 |
+
--avg 1 \
|
| 60 |
+
--use-averaged-model 0
|
| 61 |
+
|
| 62 |
+
ls -lh $repo/exp/
|
| 63 |
+
|
| 64 |
+
pushd $repo/exp/
|
| 65 |
+
mv model-epoch-99-avg-1.onnx model.onnx
|
| 66 |
+
mv model-epoch-99-avg-1.int8.onnx model.int8.onnx
|
| 67 |
+
popd
|
| 68 |
+
|
| 69 |
+
ls -lh $repo/exp/
|
| 70 |
+
|
| 71 |
+
log "test onnx models"
|
| 72 |
+
for m in model.onnx model.int8.onnx; do
|
| 73 |
+
log "$m"
|
| 74 |
+
python3 zipformer/onnx_pretrained.py \
|
| 75 |
+
--model-filename $repo/exp/model.onnx \
|
| 76 |
+
--label-dict $repo/data/class_labels_indices.csv \
|
| 77 |
+
$repo/test_wavs/1.wav \
|
| 78 |
+
$repo/test_wavs/2.wav \
|
| 79 |
+
$repo/test_wavs/3.wav \
|
| 80 |
+
$repo/test_wavs/4.wav
|
| 81 |
+
done
|
| 82 |
+
|
| 83 |
+
log "prepare data for uploading to huggingface"
|
| 84 |
+
dst=/icefall/model-onnx
|
| 85 |
+
mkdir -p $dst
|
| 86 |
+
cp -v $repo/exp/*.onnx $dst/
|
| 87 |
+
cp -v $repo/data/* $dst/
|
| 88 |
+
cp -av $repo/test_wavs $dst
|
| 89 |
+
|
| 90 |
+
ls -lh $dst
|
| 91 |
+
ls -lh $dst/test_wavs
|
| 92 |
+
}
|
| 93 |
+
|
| 94 |
+
test_pretrained
|
.github/scripts/baker_zh/TTS/run-matcha.sh
ADDED
|
@@ -0,0 +1,167 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
|
| 3 |
+
set -ex
|
| 4 |
+
|
| 5 |
+
apt-get update
|
| 6 |
+
apt-get install -y sox
|
| 7 |
+
|
| 8 |
+
python3 -m pip install numba conformer==0.3.2 diffusers librosa
|
| 9 |
+
python3 -m pip install jieba
|
| 10 |
+
|
| 11 |
+
|
| 12 |
+
log() {
|
| 13 |
+
# This function is from espnet
|
| 14 |
+
local fname=${BASH_SOURCE[1]##*/}
|
| 15 |
+
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
|
| 16 |
+
}
|
| 17 |
+
|
| 18 |
+
cd egs/baker_zh/TTS
|
| 19 |
+
|
| 20 |
+
sed -i.bak s/600/8/g ./prepare.sh
|
| 21 |
+
sed -i.bak s/"first 100"/"first 3"/g ./prepare.sh
|
| 22 |
+
sed -i.bak s/500/5/g ./prepare.sh
|
| 23 |
+
git diff
|
| 24 |
+
|
| 25 |
+
function prepare_data() {
|
| 26 |
+
# We have created a subset of the data for testing
|
| 27 |
+
#
|
| 28 |
+
mkdir -p download
|
| 29 |
+
pushd download
|
| 30 |
+
wget -q https://huggingface.co/csukuangfj/tmp-files/resolve/main/BZNSYP-samples.tar.bz2
|
| 31 |
+
tar xvf BZNSYP-samples.tar.bz2
|
| 32 |
+
mv BZNSYP-samples BZNSYP
|
| 33 |
+
rm BZNSYP-samples.tar.bz2
|
| 34 |
+
popd
|
| 35 |
+
|
| 36 |
+
./prepare.sh
|
| 37 |
+
tree .
|
| 38 |
+
}
|
| 39 |
+
|
| 40 |
+
function train() {
|
| 41 |
+
pushd ./matcha
|
| 42 |
+
sed -i.bak s/1500/3/g ./train.py
|
| 43 |
+
git diff .
|
| 44 |
+
popd
|
| 45 |
+
|
| 46 |
+
./matcha/train.py \
|
| 47 |
+
--exp-dir matcha/exp \
|
| 48 |
+
--num-epochs 1 \
|
| 49 |
+
--save-every-n 1 \
|
| 50 |
+
--num-buckets 2 \
|
| 51 |
+
--tokens data/tokens.txt \
|
| 52 |
+
--max-duration 20
|
| 53 |
+
|
| 54 |
+
ls -lh matcha/exp
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
function infer() {
|
| 58 |
+
curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v2
|
| 59 |
+
|
| 60 |
+
./matcha/infer.py \
|
| 61 |
+
--num-buckets 2 \
|
| 62 |
+
--epoch 1 \
|
| 63 |
+
--exp-dir ./matcha/exp \
|
| 64 |
+
--tokens data/tokens.txt \
|
| 65 |
+
--cmvn ./data/fbank/cmvn.json \
|
| 66 |
+
--vocoder ./generator_v2 \
|
| 67 |
+
--input-text "当夜幕降临,星光点点,伴随着微风拂面,我在静谧中感受着时光的流转,思念如涟漪荡漾,梦境如画卷展开,我与自然融为一体,沉静在这片宁静的美丽之中,感受着生命的奇迹与温柔。" \
|
| 68 |
+
--output-wav ./generated.wav
|
| 69 |
+
|
| 70 |
+
ls -lh *.wav
|
| 71 |
+
soxi ./generated.wav
|
| 72 |
+
rm -v ./generated.wav
|
| 73 |
+
rm -v generator_v2
|
| 74 |
+
}
|
| 75 |
+
|
| 76 |
+
function export_onnx() {
|
| 77 |
+
pushd matcha/exp
|
| 78 |
+
curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-baker-matcha-zh-2024-12-27/resolve/main/epoch-2000.pt
|
| 79 |
+
popd
|
| 80 |
+
|
| 81 |
+
pushd data/fbank
|
| 82 |
+
rm -v *.json
|
| 83 |
+
curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-baker-matcha-zh-2024-12-27/resolve/main/cmvn.json
|
| 84 |
+
popd
|
| 85 |
+
|
| 86 |
+
./matcha/export_onnx.py \
|
| 87 |
+
--exp-dir ./matcha/exp \
|
| 88 |
+
--epoch 2000 \
|
| 89 |
+
--tokens ./data/tokens.txt \
|
| 90 |
+
--cmvn ./data/fbank/cmvn.json
|
| 91 |
+
|
| 92 |
+
ls -lh *.onnx
|
| 93 |
+
|
| 94 |
+
if false; then
|
| 95 |
+
# The CI machine does not have enough memory to run it
|
| 96 |
+
#
|
| 97 |
+
curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v1
|
| 98 |
+
curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v2
|
| 99 |
+
curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v3
|
| 100 |
+
python3 ./matcha/export_onnx_hifigan.py
|
| 101 |
+
else
|
| 102 |
+
curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-ljspeech-matcha-en-2024-10-28/resolve/main/exp/hifigan_v1.onnx
|
| 103 |
+
curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-ljspeech-matcha-en-2024-10-28/resolve/main/exp/hifigan_v2.onnx
|
| 104 |
+
curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-ljspeech-matcha-en-2024-10-28/resolve/main/exp/hifigan_v3.onnx
|
| 105 |
+
fi
|
| 106 |
+
|
| 107 |
+
ls -lh *.onnx
|
| 108 |
+
|
| 109 |
+
python3 ./matcha/generate_lexicon.py
|
| 110 |
+
|
| 111 |
+
for v in v1 v2 v3; do
|
| 112 |
+
python3 ./matcha/onnx_pretrained.py \
|
| 113 |
+
--acoustic-model ./model-steps-6.onnx \
|
| 114 |
+
--vocoder ./hifigan_$v.onnx \
|
| 115 |
+
--tokens ./data/tokens.txt \
|
| 116 |
+
--lexicon ./lexicon.txt \
|
| 117 |
+
--input-text "当夜幕降临,星光点点,伴随着微风拂面,我在静谧中感受着时光的流转,思念如涟漪荡漾,梦境如画卷展开,我与自然融为一体,沉静在这片宁静的美丽之中,感受着生命的奇迹与温柔。" \
|
| 118 |
+
--output-wav /icefall/generated-matcha-tts-steps-6-$v.wav
|
| 119 |
+
done
|
| 120 |
+
|
| 121 |
+
ls -lh /icefall/*.wav
|
| 122 |
+
soxi /icefall/generated-matcha-tts-steps-6-*.wav
|
| 123 |
+
cp ./model-steps-*.onnx /icefall
|
| 124 |
+
|
| 125 |
+
d=matcha-icefall-zh-baker
|
| 126 |
+
mkdir $d
|
| 127 |
+
cp -v data/tokens.txt $d
|
| 128 |
+
cp -v lexicon.txt $d
|
| 129 |
+
cp model-steps-3.onnx $d
|
| 130 |
+
pushd $d
|
| 131 |
+
curl -SL -O https://github.com/csukuangfj/cppjieba/releases/download/sherpa-onnx-2024-04-19/dict.tar.bz2
|
| 132 |
+
tar xvf dict.tar.bz2
|
| 133 |
+
rm dict.tar.bz2
|
| 134 |
+
|
| 135 |
+
curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/date.fst
|
| 136 |
+
curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/number.fst
|
| 137 |
+
curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-aishell3-vits-low-2024-04-06/resolve/main/data/phone.fst
|
| 138 |
+
|
| 139 |
+
cat >README.md <<EOF
|
| 140 |
+
# Introduction
|
| 141 |
+
|
| 142 |
+
This model is trained using the dataset from
|
| 143 |
+
https://en.data-baker.com/datasets/freeDatasets/
|
| 144 |
+
|
| 145 |
+
The dataset contains 10000 Chinese sentences of a native Chinese female speaker,
|
| 146 |
+
which is about 12 hours.
|
| 147 |
+
|
| 148 |
+
**Note**: The dataset is for non-commercial use only.
|
| 149 |
+
|
| 150 |
+
You can find the training code at
|
| 151 |
+
https://github.com/k2-fsa/icefall/tree/master/egs/baker_zh/TTS
|
| 152 |
+
EOF
|
| 153 |
+
|
| 154 |
+
ls -lh
|
| 155 |
+
popd
|
| 156 |
+
tar cvjf $d.tar.bz2 $d
|
| 157 |
+
mv $d.tar.bz2 /icefall
|
| 158 |
+
mv $d /icefall
|
| 159 |
+
}
|
| 160 |
+
|
| 161 |
+
prepare_data
|
| 162 |
+
train
|
| 163 |
+
infer
|
| 164 |
+
export_onnx
|
| 165 |
+
|
| 166 |
+
rm -rfv generator_v* matcha/exp
|
| 167 |
+
git checkout .
|
.github/scripts/compute-fbank-librispeech-test-clean-and-test-other.sh
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
|
| 3 |
+
# This script computes fbank features for the test-clean and test-other datasets.
|
| 4 |
+
# The computed features are saved to ~/tmp/fbank-libri and are
|
| 5 |
+
# cached for later runs
|
| 6 |
+
|
| 7 |
+
set -e
|
| 8 |
+
|
| 9 |
+
export PYTHONPATH=$PWD:$PYTHONPATH
|
| 10 |
+
echo $PYTHONPATH
|
| 11 |
+
|
| 12 |
+
mkdir ~/tmp/fbank-libri
|
| 13 |
+
cd egs/librispeech/ASR
|
| 14 |
+
mkdir -p data
|
| 15 |
+
cd data
|
| 16 |
+
[ ! -e fbank ] && ln -s ~/tmp/fbank-libri fbank
|
| 17 |
+
cd ..
|
| 18 |
+
./local/compute_fbank_librispeech.py --dataset 'test-clean test-other'
|
| 19 |
+
ls -lh data/fbank/
|
.github/scripts/docker/Dockerfile
ADDED
|
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
ARG PYTHON_VERSION=3.8
|
| 2 |
+
FROM python:${PYTHON_VERSION}
|
| 3 |
+
|
| 4 |
+
ARG TORCHAUDIO_VERSION="0.13.0"
|
| 5 |
+
ARG TORCH_VERSION="1.13.0"
|
| 6 |
+
ARG K2_VERSION="1.24.4.dev20231220"
|
| 7 |
+
ARG KALDIFEAT_VERSION="1.25.3.dev20231221"
|
| 8 |
+
|
| 9 |
+
ARG _K2_VERSION="${K2_VERSION}+cpu.torch${TORCH_VERSION}"
|
| 10 |
+
ARG _KALDIFEAT_VERSION="${KALDIFEAT_VERSION}+cpu.torch${TORCH_VERSION}"
|
| 11 |
+
|
| 12 |
+
RUN apt-get update -y && \
|
| 13 |
+
apt-get install -qq -y \
|
| 14 |
+
cmake \
|
| 15 |
+
ffmpeg \
|
| 16 |
+
git \
|
| 17 |
+
git-lfs \
|
| 18 |
+
graphviz \
|
| 19 |
+
less \
|
| 20 |
+
tree \
|
| 21 |
+
vim \
|
| 22 |
+
&& \
|
| 23 |
+
apt-get clean && \
|
| 24 |
+
rm -rf /var/cache/apt/archives /var/lib/apt/lists
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
LABEL authors="Fangjun Kuang <csukuangfj@gmail.com>"
|
| 28 |
+
LABEL k2_version=${_K2_VERSION}
|
| 29 |
+
LABEL kaldifeat_version=${_KALDIFEAT_VERSION}
|
| 30 |
+
LABEL github_repo="https://github.com/k2-fsa/icefall"
|
| 31 |
+
|
| 32 |
+
# Install dependencies
|
| 33 |
+
RUN pip install --no-cache-dir \
|
| 34 |
+
torch==${TORCH_VERSION}+cpu -f https://download.pytorch.org/whl/torch \
|
| 35 |
+
torchaudio==${TORCHAUDIO_VERSION}+cpu -f https://download.pytorch.org/whl/torchaudio \
|
| 36 |
+
k2==${_K2_VERSION} -f https://k2-fsa.github.io/k2/cpu.html \
|
| 37 |
+
\
|
| 38 |
+
git+https://github.com/lhotse-speech/lhotse \
|
| 39 |
+
kaldifeat==${_KALDIFEAT_VERSION} -f https://csukuangfj.github.io/kaldifeat/cpu.html \
|
| 40 |
+
conformer==0.3.2 \
|
| 41 |
+
cython \
|
| 42 |
+
diffusers \
|
| 43 |
+
dill \
|
| 44 |
+
espnet_tts_frontend \
|
| 45 |
+
graphviz \
|
| 46 |
+
kaldi-decoder \
|
| 47 |
+
kaldi_native_io \
|
| 48 |
+
kaldialign \
|
| 49 |
+
kaldifst \
|
| 50 |
+
kaldilm \
|
| 51 |
+
librosa \
|
| 52 |
+
"matplotlib<=3.9.4" \
|
| 53 |
+
multi_quantization \
|
| 54 |
+
numba \
|
| 55 |
+
"numpy<2.0" \
|
| 56 |
+
onnxoptimizer \
|
| 57 |
+
onnxsim \
|
| 58 |
+
onnx==1.17.0 \
|
| 59 |
+
onnxmltools \
|
| 60 |
+
onnxruntime==1.17.1 \
|
| 61 |
+
piper_phonemize -f https://k2-fsa.github.io/icefall/piper_phonemize.html \
|
| 62 |
+
pypinyin==0.50.0 \
|
| 63 |
+
pytest \
|
| 64 |
+
sentencepiece>=0.1.96 \
|
| 65 |
+
six \
|
| 66 |
+
tensorboard \
|
| 67 |
+
typeguard
|
| 68 |
+
|
| 69 |
+
# RUN git clone https://github.com/k2-fsa/icefall /workspace/icefall && \
|
| 70 |
+
# cd /workspace/icefall && \
|
| 71 |
+
# pip install --no-cache-dir -r requirements.txt
|
| 72 |
+
#
|
| 73 |
+
# ENV PYTHONPATH /workspace/icefall:$PYTHONPATH
|
| 74 |
+
#
|
| 75 |
+
# WORKDIR /workspace/icefall
|
.github/scripts/docker/generate_build_matrix.py
ADDED
|
@@ -0,0 +1,140 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
# Copyright 2023 Xiaomi Corp. (authors: Fangjun Kuang)
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
import argparse
|
| 6 |
+
import json
|
| 7 |
+
|
| 8 |
+
|
| 9 |
+
def get_args():
|
| 10 |
+
parser = argparse.ArgumentParser()
|
| 11 |
+
parser.add_argument(
|
| 12 |
+
"--min-torch-version",
|
| 13 |
+
help="torch version",
|
| 14 |
+
)
|
| 15 |
+
|
| 16 |
+
parser.add_argument(
|
| 17 |
+
"--torch-version",
|
| 18 |
+
help="torch version",
|
| 19 |
+
)
|
| 20 |
+
|
| 21 |
+
parser.add_argument(
|
| 22 |
+
"--python-version",
|
| 23 |
+
help="python version",
|
| 24 |
+
)
|
| 25 |
+
return parser.parse_args()
|
| 26 |
+
|
| 27 |
+
|
| 28 |
+
def version_gt(a, b):
|
| 29 |
+
a_major, a_minor = list(map(int, a.split(".")))[:2]
|
| 30 |
+
b_major, b_minor = list(map(int, b.split(".")))[:2]
|
| 31 |
+
if a_major > b_major:
|
| 32 |
+
return True
|
| 33 |
+
|
| 34 |
+
if a_major == b_major and a_minor > b_minor:
|
| 35 |
+
return True
|
| 36 |
+
|
| 37 |
+
return False
|
| 38 |
+
|
| 39 |
+
|
| 40 |
+
def version_ge(a, b):
|
| 41 |
+
a_major, a_minor = list(map(int, a.split(".")))[:2]
|
| 42 |
+
b_major, b_minor = list(map(int, b.split(".")))[:2]
|
| 43 |
+
if a_major > b_major:
|
| 44 |
+
return True
|
| 45 |
+
|
| 46 |
+
if a_major == b_major and a_minor >= b_minor:
|
| 47 |
+
return True
|
| 48 |
+
|
| 49 |
+
return False
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def get_torchaudio_version(torch_version):
|
| 53 |
+
if torch_version == "1.13.0":
|
| 54 |
+
return "0.13.0"
|
| 55 |
+
elif torch_version == "1.13.1":
|
| 56 |
+
return "0.13.1"
|
| 57 |
+
elif torch_version == "2.0.0":
|
| 58 |
+
return "2.0.1"
|
| 59 |
+
elif torch_version == "2.0.1":
|
| 60 |
+
return "2.0.2"
|
| 61 |
+
else:
|
| 62 |
+
return torch_version
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
def get_matrix(min_torch_version, specified_torch_version, specified_python_version):
|
| 66 |
+
k2_version = "1.24.4.dev20250630"
|
| 67 |
+
kaldifeat_version = "1.25.5.dev20250630"
|
| 68 |
+
version = "20250630"
|
| 69 |
+
|
| 70 |
+
# torchaudio 2.5.0 does not support python 3.13
|
| 71 |
+
python_version = ["3.8", "3.9", "3.10", "3.11", "3.12", "3.13"]
|
| 72 |
+
torch_version = []
|
| 73 |
+
torch_version += ["1.13.0", "1.13.1"]
|
| 74 |
+
torch_version += ["2.0.0", "2.0.1"]
|
| 75 |
+
torch_version += ["2.1.0", "2.1.1", "2.1.2"]
|
| 76 |
+
torch_version += ["2.2.0", "2.2.1", "2.2.2"]
|
| 77 |
+
# Test only torch >= 2.3.0
|
| 78 |
+
torch_version += ["2.3.0", "2.3.1"]
|
| 79 |
+
torch_version += ["2.4.0"]
|
| 80 |
+
torch_version += ["2.4.1"]
|
| 81 |
+
torch_version += ["2.5.0"]
|
| 82 |
+
torch_version += ["2.5.1"]
|
| 83 |
+
torch_version += ["2.6.0", "2.7.0", "2.7.1"]
|
| 84 |
+
|
| 85 |
+
if specified_torch_version:
|
| 86 |
+
torch_version = [specified_torch_version]
|
| 87 |
+
|
| 88 |
+
if specified_python_version:
|
| 89 |
+
python_version = [specified_python_version]
|
| 90 |
+
|
| 91 |
+
matrix = []
|
| 92 |
+
for p in python_version:
|
| 93 |
+
for t in torch_version:
|
| 94 |
+
if min_torch_version and version_gt(min_torch_version, t):
|
| 95 |
+
continue
|
| 96 |
+
|
| 97 |
+
# torchaudio <= 1.13.x supports only python <= 3.10
|
| 98 |
+
|
| 99 |
+
if version_gt(p, "3.10") and not version_gt(t, "2.0"):
|
| 100 |
+
continue
|
| 101 |
+
|
| 102 |
+
# only torch>=2.2.0 supports python 3.12
|
| 103 |
+
if version_gt(p, "3.11") and not version_gt(t, "2.1"):
|
| 104 |
+
continue
|
| 105 |
+
|
| 106 |
+
if version_gt(p, "3.12") and not version_gt(t, "2.4"):
|
| 107 |
+
continue
|
| 108 |
+
|
| 109 |
+
if version_gt(t, "2.4") and version_gt("3.10", p):
|
| 110 |
+
# torch>=2.5 requires python 3.10
|
| 111 |
+
continue
|
| 112 |
+
|
| 113 |
+
k2_version_2 = k2_version
|
| 114 |
+
kaldifeat_version_2 = kaldifeat_version
|
| 115 |
+
|
| 116 |
+
matrix.append(
|
| 117 |
+
{
|
| 118 |
+
"k2-version": k2_version_2,
|
| 119 |
+
"kaldifeat-version": kaldifeat_version_2,
|
| 120 |
+
"version": version,
|
| 121 |
+
"python-version": p,
|
| 122 |
+
"torch-version": t,
|
| 123 |
+
"torchaudio-version": get_torchaudio_version(t),
|
| 124 |
+
}
|
| 125 |
+
)
|
| 126 |
+
return matrix
|
| 127 |
+
|
| 128 |
+
|
| 129 |
+
def main():
|
| 130 |
+
args = get_args()
|
| 131 |
+
matrix = get_matrix(
|
| 132 |
+
min_torch_version=args.min_torch_version,
|
| 133 |
+
specified_torch_version=args.torch_version,
|
| 134 |
+
specified_python_version=args.python_version,
|
| 135 |
+
)
|
| 136 |
+
print(json.dumps({"include": matrix}))
|
| 137 |
+
|
| 138 |
+
|
| 139 |
+
if __name__ == "__main__":
|
| 140 |
+
main()
|
.github/scripts/download-gigaspeech-dev-test-dataset.sh
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
|
| 3 |
+
# This script downloads the pre-computed fbank features for
|
| 4 |
+
# dev and test datasets of GigaSpeech.
|
| 5 |
+
#
|
| 6 |
+
# You will find directories `~/tmp/giga-dev-dataset-fbank` after running
|
| 7 |
+
# this script.
|
| 8 |
+
|
| 9 |
+
set -e
|
| 10 |
+
|
| 11 |
+
mkdir -p ~/tmp
|
| 12 |
+
cd ~/tmp
|
| 13 |
+
|
| 14 |
+
git lfs install
|
| 15 |
+
git clone https://huggingface.co/csukuangfj/giga-dev-dataset-fbank
|
| 16 |
+
|
| 17 |
+
ls -lh giga-dev-dataset-fbank/data/fbank
|
.github/scripts/download-librispeech-test-clean-and-test-other-dataset.sh
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
|
| 3 |
+
# This script downloads the test-clean and test-other datasets
|
| 4 |
+
# of LibriSpeech and unzip them to the folder ~/tmp/download,
|
| 5 |
+
# which is cached by GitHub actions for later runs.
|
| 6 |
+
#
|
| 7 |
+
# You will find directories ~/tmp/download/LibriSpeech after running
|
| 8 |
+
# this script.
|
| 9 |
+
|
| 10 |
+
set -e
|
| 11 |
+
|
| 12 |
+
mkdir ~/tmp/download
|
| 13 |
+
cd egs/librispeech/ASR
|
| 14 |
+
ln -s ~/tmp/download .
|
| 15 |
+
cd download
|
| 16 |
+
wget -q --no-check-certificate https://www.openslr.org/resources/12/test-clean.tar.gz
|
| 17 |
+
tar xf test-clean.tar.gz
|
| 18 |
+
rm test-clean.tar.gz
|
| 19 |
+
|
| 20 |
+
wget -q --no-check-certificate https://www.openslr.org/resources/12/test-other.tar.gz
|
| 21 |
+
tar xf test-other.tar.gz
|
| 22 |
+
rm test-other.tar.gz
|
| 23 |
+
pwd
|
| 24 |
+
ls -lh
|
| 25 |
+
ls -lh LibriSpeech
|
.github/scripts/generate-piper-phonemize-page.py
ADDED
|
@@ -0,0 +1,90 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
def get_v1_2_0_files():
|
| 5 |
+
prefix = (
|
| 6 |
+
"https://github.com/csukuangfj/piper-phonemize/releases/download/2023.12.5/"
|
| 7 |
+
)
|
| 8 |
+
files = [
|
| 9 |
+
"piper_phonemize-1.2.0-cp310-cp310-macosx_10_14_x86_64.whl",
|
| 10 |
+
"piper_phonemize-1.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
|
| 11 |
+
"piper_phonemize-1.2.0-cp311-cp311-macosx_10_14_x86_64.whl",
|
| 12 |
+
"piper_phonemize-1.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
|
| 13 |
+
"piper_phonemize-1.2.0-cp312-cp312-macosx_10_14_x86_64.whl",
|
| 14 |
+
"piper_phonemize-1.2.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
|
| 15 |
+
"piper_phonemize-1.2.0-cp37-cp37m-macosx_10_14_x86_64.whl",
|
| 16 |
+
"piper_phonemize-1.2.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
|
| 17 |
+
"piper_phonemize-1.2.0-cp38-cp38-macosx_10_14_x86_64.whl",
|
| 18 |
+
"piper_phonemize-1.2.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
|
| 19 |
+
"piper_phonemize-1.2.0-cp39-cp39-macosx_10_14_x86_64.whl",
|
| 20 |
+
"piper_phonemize-1.2.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
|
| 21 |
+
]
|
| 22 |
+
ans = [prefix + f for f in files]
|
| 23 |
+
ans.sort()
|
| 24 |
+
return ans
|
| 25 |
+
|
| 26 |
+
|
| 27 |
+
def get_v1_3_0_files():
|
| 28 |
+
prefix = (
|
| 29 |
+
"https://github.com/csukuangfj/piper-phonemize/releases/download/2025.06.23/"
|
| 30 |
+
)
|
| 31 |
+
files = [
|
| 32 |
+
"piper_phonemize-1.3.0-cp310-cp310-macosx_10_9_universal2.whl",
|
| 33 |
+
"piper_phonemize-1.3.0-cp310-cp310-macosx_10_9_x86_64.whl",
|
| 34 |
+
"piper_phonemize-1.3.0-cp310-cp310-macosx_11_0_arm64.whl",
|
| 35 |
+
"piper_phonemize-1.3.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl",
|
| 36 |
+
"piper_phonemize-1.3.0-cp310-cp310-manylinux_2_17_i686.manylinux2014_i686.whl",
|
| 37 |
+
"piper_phonemize-1.3.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
|
| 38 |
+
"piper_phonemize-1.3.0-cp310-cp310-win_amd64.whl",
|
| 39 |
+
"piper_phonemize-1.3.0-cp311-cp311-macosx_10_9_universal2.whl",
|
| 40 |
+
"piper_phonemize-1.3.0-cp311-cp311-macosx_10_9_x86_64.whl",
|
| 41 |
+
"piper_phonemize-1.3.0-cp311-cp311-macosx_11_0_arm64.whl",
|
| 42 |
+
"piper_phonemize-1.3.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl",
|
| 43 |
+
"piper_phonemize-1.3.0-cp311-cp311-manylinux_2_17_i686.manylinux2014_i686.whl",
|
| 44 |
+
"piper_phonemize-1.3.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
|
| 45 |
+
"piper_phonemize-1.3.0-cp311-cp311-win_amd64.whl",
|
| 46 |
+
"piper_phonemize-1.3.0-cp312-cp312-macosx_10_13_universal2.whl",
|
| 47 |
+
"piper_phonemize-1.3.0-cp312-cp312-macosx_10_13_x86_64.whl",
|
| 48 |
+
"piper_phonemize-1.3.0-cp312-cp312-macosx_11_0_arm64.whl",
|
| 49 |
+
"piper_phonemize-1.3.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl",
|
| 50 |
+
"piper_phonemize-1.3.0-cp312-cp312-manylinux_2_17_i686.manylinux2014_i686.whl",
|
| 51 |
+
"piper_phonemize-1.3.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
|
| 52 |
+
"piper_phonemize-1.3.0-cp312-cp312-win_amd64.whl",
|
| 53 |
+
"piper_phonemize-1.3.0-cp313-cp313-macosx_10_13_universal2.whl",
|
| 54 |
+
"piper_phonemize-1.3.0-cp313-cp313-macosx_10_13_x86_64.whl",
|
| 55 |
+
"piper_phonemize-1.3.0-cp313-cp313-macosx_11_0_arm64.whl",
|
| 56 |
+
"piper_phonemize-1.3.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl",
|
| 57 |
+
"piper_phonemize-1.3.0-cp313-cp313-manylinux_2_17_i686.manylinux2014_i686.whl",
|
| 58 |
+
"piper_phonemize-1.3.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
|
| 59 |
+
"piper_phonemize-1.3.0-cp313-cp313-win_amd64.whl",
|
| 60 |
+
"piper_phonemize-1.3.0-cp38-cp38-macosx_10_9_universal2.whl",
|
| 61 |
+
"piper_phonemize-1.3.0-cp38-cp38-macosx_10_9_x86_64.whl",
|
| 62 |
+
"piper_phonemize-1.3.0-cp38-cp38-macosx_11_0_arm64.whl",
|
| 63 |
+
"piper_phonemize-1.3.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl",
|
| 64 |
+
"piper_phonemize-1.3.0-cp38-cp38-manylinux_2_17_i686.manylinux2014_i686.whl",
|
| 65 |
+
"piper_phonemize-1.3.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
|
| 66 |
+
"piper_phonemize-1.3.0-cp38-cp38-win_amd64.whl",
|
| 67 |
+
"piper_phonemize-1.3.0-cp39-cp39-macosx_10_9_universal2.whl",
|
| 68 |
+
"piper_phonemize-1.3.0-cp39-cp39-macosx_10_9_x86_64.whl",
|
| 69 |
+
"piper_phonemize-1.3.0-cp39-cp39-macosx_11_0_arm64.whl",
|
| 70 |
+
"piper_phonemize-1.3.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl",
|
| 71 |
+
"piper_phonemize-1.3.0-cp39-cp39-manylinux_2_17_i686.manylinux2014_i686.whl",
|
| 72 |
+
"piper_phonemize-1.3.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl",
|
| 73 |
+
"piper_phonemize-1.3.0-cp39-cp39-win_amd64.whl",
|
| 74 |
+
]
|
| 75 |
+
ans = [prefix + f for f in files]
|
| 76 |
+
ans.sort()
|
| 77 |
+
return ans
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
def main():
|
| 81 |
+
files = get_v1_3_0_files() + get_v1_2_0_files()
|
| 82 |
+
|
| 83 |
+
with open("piper_phonemize.html", "w") as f:
|
| 84 |
+
for url in files:
|
| 85 |
+
file = url.split("/")[-1]
|
| 86 |
+
f.write(f'<a href="{url}">{file}</a><br/>\n')
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
if __name__ == "__main__":
|
| 90 |
+
main()
|
.github/scripts/install-kaldifeat.sh
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
|
| 3 |
+
# This script installs kaldifeat into the directory ~/tmp/kaldifeat
|
| 4 |
+
# which is cached by GitHub actions for later runs.
|
| 5 |
+
|
| 6 |
+
set -e
|
| 7 |
+
|
| 8 |
+
mkdir -p ~/tmp
|
| 9 |
+
cd ~/tmp
|
| 10 |
+
git clone https://github.com/csukuangfj/kaldifeat
|
| 11 |
+
cd kaldifeat
|
| 12 |
+
mkdir build
|
| 13 |
+
cd build
|
| 14 |
+
cmake -DCMAKE_BUILD_TYPE=Release ..
|
| 15 |
+
make -j2 _kaldifeat
|
.github/scripts/ksponspeech/ASR/run.sh
ADDED
|
@@ -0,0 +1,132 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
|
| 3 |
+
set -ex
|
| 4 |
+
|
| 5 |
+
log() {
|
| 6 |
+
# This function is from espnet
|
| 7 |
+
local fname=${BASH_SOURCE[1]##*/}
|
| 8 |
+
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
|
| 9 |
+
}
|
| 10 |
+
|
| 11 |
+
cd egs/ksponspeech/ASR
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
function test_pretrained_non_streaming() {
|
| 15 |
+
git lfs install
|
| 16 |
+
git clone https://huggingface.co/johnBamma/icefall-asr-ksponspeech-zipformer-2024-06-24
|
| 17 |
+
repo=icefall-asr-ksponspeech-zipformer-2024-06-24
|
| 18 |
+
pushd $repo
|
| 19 |
+
mkdir test_wavs
|
| 20 |
+
cd test_wavs
|
| 21 |
+
curl -SL -O https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16/resolve/main/test_wavs/0.wav
|
| 22 |
+
curl -SL -O https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16/resolve/main/test_wavs/1.wav
|
| 23 |
+
curl -SL -O https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16/resolve/main/test_wavs/2.wav
|
| 24 |
+
curl -SL -O https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16/resolve/main/test_wavs/3.wav
|
| 25 |
+
curl -SL -O https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16/resolve/main/test_wavs/trans.txt
|
| 26 |
+
cd ../exp
|
| 27 |
+
ln -s pretrained.pt epoch-99.pt
|
| 28 |
+
ls -lh
|
| 29 |
+
popd
|
| 30 |
+
|
| 31 |
+
log 'test pretrained.py'
|
| 32 |
+
./zipformer/pretrained.py \
|
| 33 |
+
--checkpoint $repo/exp/pretrained.pt \
|
| 34 |
+
--tokens $repo/data/lang_bpe_5000/tokens.txt \
|
| 35 |
+
--method greedy_search \
|
| 36 |
+
$repo/test_wavs/0.wav \
|
| 37 |
+
$repo/test_wavs/1.wav \
|
| 38 |
+
$repo/test_wavs/2.wav \
|
| 39 |
+
$repo/test_wavs/3.wav
|
| 40 |
+
|
| 41 |
+
log 'test export-onnx.py'
|
| 42 |
+
|
| 43 |
+
./zipformer/export-onnx.py \
|
| 44 |
+
--tokens $repo/data/lang_bpe_5000/tokens.txt \
|
| 45 |
+
--use-averaged-model 0 \
|
| 46 |
+
--epoch 99 \
|
| 47 |
+
--avg 1 \
|
| 48 |
+
--exp-dir $repo/exp/
|
| 49 |
+
|
| 50 |
+
ls -lh $repo/exp
|
| 51 |
+
|
| 52 |
+
ls -lh $repo/data/lang_bpe_5000/
|
| 53 |
+
|
| 54 |
+
log 'test exported onnx models'
|
| 55 |
+
./zipformer/onnx_pretrained.py \
|
| 56 |
+
--encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
|
| 57 |
+
--decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
|
| 58 |
+
--joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
|
| 59 |
+
--tokens $repo/data/lang_bpe_5000/tokens.txt \
|
| 60 |
+
$repo/test_wavs/0.wav
|
| 61 |
+
|
| 62 |
+
dst=/tmp/model-2024-06-24
|
| 63 |
+
mkdir -p $dst
|
| 64 |
+
|
| 65 |
+
cp -av $repo/test_wavs $dst
|
| 66 |
+
cp -v $repo/exp/*.onnx $dst
|
| 67 |
+
cp -v $repo/exp/*.onnx $dst
|
| 68 |
+
cp -v $repo/data/lang_bpe_5000/tokens.txt $dst
|
| 69 |
+
cp -v $repo/data/lang_bpe_5000/bpe.model $dst
|
| 70 |
+
rm -rf $repo
|
| 71 |
+
}
|
| 72 |
+
|
| 73 |
+
function test_pretrained_streaming() {
|
| 74 |
+
git lfs install
|
| 75 |
+
git clone https://huggingface.co/johnBamma/icefall-asr-ksponspeech-pruned-transducer-stateless7-streaming-2024-06-12
|
| 76 |
+
repo=icefall-asr-ksponspeech-pruned-transducer-stateless7-streaming-2024-06-12
|
| 77 |
+
pushd $repo
|
| 78 |
+
mkdir test_wavs
|
| 79 |
+
cd test_wavs
|
| 80 |
+
curl -SL -O https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16/resolve/main/test_wavs/0.wav
|
| 81 |
+
curl -SL -O https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16/resolve/main/test_wavs/1.wav
|
| 82 |
+
curl -SL -O https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16/resolve/main/test_wavs/2.wav
|
| 83 |
+
curl -SL -O https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16/resolve/main/test_wavs/3.wav
|
| 84 |
+
cd ../exp
|
| 85 |
+
ln -s pretrained.pt epoch-99.pt
|
| 86 |
+
ls -lh
|
| 87 |
+
popd
|
| 88 |
+
|
| 89 |
+
log 'test pretrained.py'
|
| 90 |
+
./pruned_transducer_stateless7_streaming/pretrained.py \
|
| 91 |
+
--checkpoint $repo/exp/pretrained.pt \
|
| 92 |
+
--tokens $repo/data/lang_bpe_5000/tokens.txt \
|
| 93 |
+
--method greedy_search \
|
| 94 |
+
$repo/test_wavs/0.wav \
|
| 95 |
+
$repo/test_wavs/1.wav \
|
| 96 |
+
$repo/test_wavs/2.wav \
|
| 97 |
+
$repo/test_wavs/3.wav
|
| 98 |
+
|
| 99 |
+
log 'test export-onnx.py'
|
| 100 |
+
|
| 101 |
+
./pruned_transducer_stateless7_streaming/export-onnx.py \
|
| 102 |
+
--tokens $repo/data/lang_bpe_5000/tokens.txt \
|
| 103 |
+
--use-averaged-model 0 \
|
| 104 |
+
--epoch 99 \
|
| 105 |
+
--avg 1 \
|
| 106 |
+
--decode-chunk-len 32 \
|
| 107 |
+
--exp-dir $repo/exp/
|
| 108 |
+
|
| 109 |
+
ls -lh $repo/exp
|
| 110 |
+
|
| 111 |
+
ls -lh $repo/data/lang_bpe_5000/
|
| 112 |
+
|
| 113 |
+
log 'test exported onnx models'
|
| 114 |
+
./pruned_transducer_stateless7_streaming/onnx_pretrained.py \
|
| 115 |
+
--encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
|
| 116 |
+
--decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
|
| 117 |
+
--joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
|
| 118 |
+
--tokens $repo/data/lang_bpe_5000/tokens.txt \
|
| 119 |
+
$repo/test_wavs/0.wav
|
| 120 |
+
|
| 121 |
+
dst=/tmp/model-2024-06-16
|
| 122 |
+
mkdir -p $dst
|
| 123 |
+
|
| 124 |
+
cp -v $repo/exp/*.onnx $dst
|
| 125 |
+
cp -v $repo/exp/*.onnx $dst
|
| 126 |
+
cp -v $repo/data/lang_bpe_5000/tokens.txt $dst
|
| 127 |
+
cp -v $repo/data/lang_bpe_5000/bpe.model $dst
|
| 128 |
+
rm -rf $repo
|
| 129 |
+
}
|
| 130 |
+
|
| 131 |
+
test_pretrained_non_streaming
|
| 132 |
+
test_pretrained_streaming
|
.github/scripts/librispeech/ASR/run.sh
ADDED
|
@@ -0,0 +1,1644 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
|
| 3 |
+
set -ex
|
| 4 |
+
|
| 5 |
+
log() {
|
| 6 |
+
# This function is from espnet
|
| 7 |
+
local fname=${BASH_SOURCE[1]##*/}
|
| 8 |
+
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
|
| 9 |
+
}
|
| 10 |
+
|
| 11 |
+
cd egs/librispeech/ASR
|
| 12 |
+
|
| 13 |
+
function prepare_data() {
|
| 14 |
+
# We don't download the LM file since it is so large that it will
|
| 15 |
+
# cause OOM error for CI later.
|
| 16 |
+
mkdir -p download/lm
|
| 17 |
+
pushd download/lm
|
| 18 |
+
wget -q https://huggingface.co/csukuangfj/librispeech-for-ci/resolve/main/librispeech-lm-norm.txt.gz
|
| 19 |
+
wget -q https://huggingface.co/csukuangfj/librispeech-for-ci/resolve/main/librispeech-lexicon.txt
|
| 20 |
+
wget -q https://huggingface.co/csukuangfj/librispeech-for-ci/resolve/main/librispeech-vocab.txt
|
| 21 |
+
ls -lh
|
| 22 |
+
gunzip librispeech-lm-norm.txt.gz
|
| 23 |
+
|
| 24 |
+
ls -lh
|
| 25 |
+
popd
|
| 26 |
+
|
| 27 |
+
pushd download/
|
| 28 |
+
wget -q https://huggingface.co/csukuangfj/librispeech-for-ci/resolve/main/LibriSpeech.tar.bz2
|
| 29 |
+
tar xf LibriSpeech.tar.bz2
|
| 30 |
+
rm LibriSpeech.tar.bz2
|
| 31 |
+
|
| 32 |
+
cd LibriSpeech
|
| 33 |
+
ln -s train-clean-100 train-clean-360
|
| 34 |
+
ln -s train-other-500 train-other-500
|
| 35 |
+
popd
|
| 36 |
+
|
| 37 |
+
mkdir -p data/manifests
|
| 38 |
+
|
| 39 |
+
lhotse prepare librispeech -j 2 -p dev-clean -p dev-other -p test-clean -p test-other -p train-clean-100 download/LibriSpeech data/manifests
|
| 40 |
+
ls -lh data/manifests
|
| 41 |
+
|
| 42 |
+
./local/compute_fbank_librispeech.py --dataset "dev-clean dev-other test-clean test-other train-clean-100" --perturb-speed False
|
| 43 |
+
ls -lh data/fbank
|
| 44 |
+
|
| 45 |
+
./prepare.sh --stage 5 --stop-stage 6
|
| 46 |
+
}
|
| 47 |
+
|
| 48 |
+
function run_diagnostics() {
|
| 49 |
+
./zipformer/train.py \
|
| 50 |
+
--world-size 1 \
|
| 51 |
+
--num-epochs 1 \
|
| 52 |
+
--start-epoch 1 \
|
| 53 |
+
--use-fp16 0 \
|
| 54 |
+
--exp-dir zipformer/exp-small \
|
| 55 |
+
--causal 0 \
|
| 56 |
+
--num-encoder-layers 1,1,1,1,1,1 \
|
| 57 |
+
--feedforward-dim 64,96,96,96,96,96 \
|
| 58 |
+
--encoder-dim 32,64,64,64,64,64 \
|
| 59 |
+
--encoder-unmasked-dim 32,32,32,32,32,32 \
|
| 60 |
+
--base-lr 0.04 \
|
| 61 |
+
--full-libri 0 \
|
| 62 |
+
--enable-musan 0 \
|
| 63 |
+
--max-duration 30 \
|
| 64 |
+
--print-diagnostics 1
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
function test_streaming_zipformer_ctc_hlg() {
|
| 68 |
+
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-streaming-zipformer-small-2024-03-18
|
| 69 |
+
|
| 70 |
+
log "Downloading pre-trained model from $repo_url"
|
| 71 |
+
git lfs install
|
| 72 |
+
git clone $repo_url
|
| 73 |
+
repo=$(basename $repo_url)
|
| 74 |
+
|
| 75 |
+
rm $repo/exp-ctc-rnnt-small/*.onnx
|
| 76 |
+
ls -lh $repo/exp-ctc-rnnt-small
|
| 77 |
+
|
| 78 |
+
# export models to onnx
|
| 79 |
+
./zipformer/export-onnx-streaming-ctc.py \
|
| 80 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 81 |
+
--epoch 30 \
|
| 82 |
+
--avg 3 \
|
| 83 |
+
--exp-dir $repo/exp-ctc-rnnt-small \
|
| 84 |
+
--causal 1 \
|
| 85 |
+
--use-ctc 1 \
|
| 86 |
+
--chunk-size 16 \
|
| 87 |
+
--left-context-frames 128 \
|
| 88 |
+
\
|
| 89 |
+
--num-encoder-layers 2,2,2,2,2,2 \
|
| 90 |
+
--feedforward-dim 512,768,768,768,768,768 \
|
| 91 |
+
--encoder-dim 192,256,256,256,256,256 \
|
| 92 |
+
--encoder-unmasked-dim 192,192,192,192,192,192
|
| 93 |
+
|
| 94 |
+
ls -lh $repo/exp-ctc-rnnt-small
|
| 95 |
+
|
| 96 |
+
for wav in 0.wav 1.wav 8k.wav; do
|
| 97 |
+
python3 ./zipformer/onnx_pretrained_ctc_HLG_streaming.py \
|
| 98 |
+
--nn-model $repo/exp-ctc-rnnt-small/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx \
|
| 99 |
+
--words $repo/data/lang_bpe_500/words.txt \
|
| 100 |
+
--HLG $repo/data/lang_bpe_500/HLG.fst \
|
| 101 |
+
$repo/test_wavs/$wav
|
| 102 |
+
done
|
| 103 |
+
|
| 104 |
+
rm -rf $repo
|
| 105 |
+
}
|
| 106 |
+
|
| 107 |
+
function test_pruned_transducer_stateless_2022_03_12() {
|
| 108 |
+
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless-2022-03-12
|
| 109 |
+
|
| 110 |
+
log "Downloading pre-trained model from $repo_url"
|
| 111 |
+
git lfs install
|
| 112 |
+
git clone $repo_url
|
| 113 |
+
repo=$(basename $repo_url)
|
| 114 |
+
|
| 115 |
+
log "Display test files"
|
| 116 |
+
tree $repo/
|
| 117 |
+
ls -lh $repo/test_wavs/*.wav
|
| 118 |
+
|
| 119 |
+
for sym in 1 2 3; do
|
| 120 |
+
log "Greedy search with --max-sym-per-frame $sym"
|
| 121 |
+
|
| 122 |
+
./pruned_transducer_stateless/pretrained.py \
|
| 123 |
+
--method greedy_search \
|
| 124 |
+
--max-sym-per-frame $sym \
|
| 125 |
+
--checkpoint $repo/exp/pretrained.pt \
|
| 126 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 127 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 128 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 129 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 130 |
+
done
|
| 131 |
+
|
| 132 |
+
for method in fast_beam_search modified_beam_search beam_search; do
|
| 133 |
+
log "$method"
|
| 134 |
+
|
| 135 |
+
./pruned_transducer_stateless/pretrained.py \
|
| 136 |
+
--method $method \
|
| 137 |
+
--beam-size 4 \
|
| 138 |
+
--checkpoint $repo/exp/pretrained.pt \
|
| 139 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 140 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 141 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 142 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 143 |
+
done
|
| 144 |
+
rm -rf $repo
|
| 145 |
+
}
|
| 146 |
+
|
| 147 |
+
function test_pruned_transducer_stateless2_2022_04_29() {
|
| 148 |
+
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless2-2022-04-29
|
| 149 |
+
|
| 150 |
+
log "Downloading pre-trained model from $repo_url"
|
| 151 |
+
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
| 152 |
+
repo=$(basename $repo_url)
|
| 153 |
+
|
| 154 |
+
pushd $repo
|
| 155 |
+
git lfs pull --include "data/lang_bpe_500/bpe.model"
|
| 156 |
+
git lfs pull --include "exp/pretrained-epoch-38-avg-10.pt"
|
| 157 |
+
popd
|
| 158 |
+
|
| 159 |
+
log "Display test files"
|
| 160 |
+
tree $repo/
|
| 161 |
+
ls -lh $repo/test_wavs/*.wav
|
| 162 |
+
|
| 163 |
+
pushd $repo/exp
|
| 164 |
+
ln -s pretrained-epoch-38-avg-10.pt pretrained.pt
|
| 165 |
+
popd
|
| 166 |
+
|
| 167 |
+
for sym in 1 2 3; do
|
| 168 |
+
log "Greedy search with --max-sym-per-frame $sym"
|
| 169 |
+
|
| 170 |
+
./pruned_transducer_stateless2/pretrained.py \
|
| 171 |
+
--method greedy_search \
|
| 172 |
+
--max-sym-per-frame $sym \
|
| 173 |
+
--checkpoint $repo/exp/pretrained.pt \
|
| 174 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 175 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 176 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 177 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 178 |
+
done
|
| 179 |
+
|
| 180 |
+
for method in modified_beam_search beam_search fast_beam_search; do
|
| 181 |
+
log "$method"
|
| 182 |
+
|
| 183 |
+
./pruned_transducer_stateless2/pretrained.py \
|
| 184 |
+
--method $method \
|
| 185 |
+
--beam-size 4 \
|
| 186 |
+
--checkpoint $repo/exp/pretrained.pt \
|
| 187 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 188 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 189 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 190 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 191 |
+
done
|
| 192 |
+
rm -rf $repo
|
| 193 |
+
}
|
| 194 |
+
|
| 195 |
+
function test_pruned_transducer_stateless3_2022_04_29() {
|
| 196 |
+
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless3-2022-04-29
|
| 197 |
+
|
| 198 |
+
log "Downloading pre-trained model from $repo_url"
|
| 199 |
+
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
| 200 |
+
repo=$(basename $repo_url)
|
| 201 |
+
pushd $repo
|
| 202 |
+
git lfs pull --include "data/lang_bpe_500/bpe.model"
|
| 203 |
+
git lfs pull --include "exp/pretrained-epoch-25-avg-6.pt"
|
| 204 |
+
popd
|
| 205 |
+
|
| 206 |
+
log "Display test files"
|
| 207 |
+
tree $repo/
|
| 208 |
+
ls -lh $repo/test_wavs/*.wav
|
| 209 |
+
|
| 210 |
+
pushd $repo/exp
|
| 211 |
+
ln -s pretrained-epoch-25-avg-6.pt pretrained.pt
|
| 212 |
+
popd
|
| 213 |
+
|
| 214 |
+
for sym in 1 2 3; do
|
| 215 |
+
log "Greedy search with --max-sym-per-frame $sym"
|
| 216 |
+
|
| 217 |
+
./pruned_transducer_stateless3/pretrained.py \
|
| 218 |
+
--method greedy_search \
|
| 219 |
+
--max-sym-per-frame $sym \
|
| 220 |
+
--checkpoint $repo/exp/pretrained.pt \
|
| 221 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 222 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 223 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 224 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 225 |
+
done
|
| 226 |
+
|
| 227 |
+
for method in modified_beam_search beam_search fast_beam_search; do
|
| 228 |
+
log "$method"
|
| 229 |
+
|
| 230 |
+
./pruned_transducer_stateless3/pretrained.py \
|
| 231 |
+
--method $method \
|
| 232 |
+
--beam-size 4 \
|
| 233 |
+
--checkpoint $repo/exp/pretrained.pt \
|
| 234 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 235 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 236 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 237 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 238 |
+
done
|
| 239 |
+
rm -rf $repo
|
| 240 |
+
}
|
| 241 |
+
|
| 242 |
+
function test_pruned_transducer_stateless5_2022_05_13() {
|
| 243 |
+
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless5-2022-05-13
|
| 244 |
+
|
| 245 |
+
log "Downloading pre-trained model from $repo_url"
|
| 246 |
+
git lfs install
|
| 247 |
+
git clone $repo_url
|
| 248 |
+
repo=$(basename $repo_url)
|
| 249 |
+
|
| 250 |
+
log "Display test files"
|
| 251 |
+
tree $repo/
|
| 252 |
+
ls -lh $repo/test_wavs/*.wav
|
| 253 |
+
|
| 254 |
+
pushd $repo/exp
|
| 255 |
+
ln -s pretrained-epoch-39-avg-7.pt pretrained.pt
|
| 256 |
+
popd
|
| 257 |
+
|
| 258 |
+
for sym in 1 2 3; do
|
| 259 |
+
log "Greedy search with --max-sym-per-frame $sym"
|
| 260 |
+
|
| 261 |
+
./pruned_transducer_stateless5/pretrained.py \
|
| 262 |
+
--method greedy_search \
|
| 263 |
+
--max-sym-per-frame $sym \
|
| 264 |
+
--checkpoint $repo/exp/pretrained.pt \
|
| 265 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 266 |
+
--num-encoder-layers 18 \
|
| 267 |
+
--dim-feedforward 2048 \
|
| 268 |
+
--nhead 8 \
|
| 269 |
+
--encoder-dim 512 \
|
| 270 |
+
--decoder-dim 512 \
|
| 271 |
+
--joiner-dim 512 \
|
| 272 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 273 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 274 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 275 |
+
done
|
| 276 |
+
|
| 277 |
+
for method in modified_beam_search beam_search fast_beam_search; do
|
| 278 |
+
log "$method"
|
| 279 |
+
|
| 280 |
+
./pruned_transducer_stateless5/pretrained.py \
|
| 281 |
+
--method $method \
|
| 282 |
+
--beam-size 4 \
|
| 283 |
+
--checkpoint $repo/exp/pretrained.pt \
|
| 284 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 285 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 286 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 287 |
+
$repo/test_wavs/1221-135766-0002.wav \
|
| 288 |
+
--num-encoder-layers 18 \
|
| 289 |
+
--dim-feedforward 2048 \
|
| 290 |
+
--nhead 8 \
|
| 291 |
+
--encoder-dim 512 \
|
| 292 |
+
--decoder-dim 512 \
|
| 293 |
+
--joiner-dim 512
|
| 294 |
+
done
|
| 295 |
+
rm -rf $repo
|
| 296 |
+
}
|
| 297 |
+
|
| 298 |
+
function test_pruned_transducer_stateless7_2022_11_11() {
|
| 299 |
+
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless7-2022-11-11
|
| 300 |
+
|
| 301 |
+
log "Downloading pre-trained model from $repo_url"
|
| 302 |
+
git lfs install
|
| 303 |
+
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
| 304 |
+
repo=$(basename $repo_url)
|
| 305 |
+
|
| 306 |
+
log "Display test files"
|
| 307 |
+
tree $repo/
|
| 308 |
+
ls -lh $repo/test_wavs/*.wav
|
| 309 |
+
|
| 310 |
+
pushd $repo/exp
|
| 311 |
+
git lfs pull --include "data/lang_bpe_500/bpe.model"
|
| 312 |
+
git lfs pull --include "exp/cpu_jit.pt"
|
| 313 |
+
git lfs pull --include "exp/pretrained.pt"
|
| 314 |
+
ln -s pretrained.pt epoch-99.pt
|
| 315 |
+
ls -lh *.pt
|
| 316 |
+
popd
|
| 317 |
+
|
| 318 |
+
log "Export to torchscript model"
|
| 319 |
+
./pruned_transducer_stateless7/export.py \
|
| 320 |
+
--exp-dir $repo/exp \
|
| 321 |
+
--use-averaged-model false \
|
| 322 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 323 |
+
--epoch 99 \
|
| 324 |
+
--avg 1 \
|
| 325 |
+
--jit 1
|
| 326 |
+
|
| 327 |
+
ls -lh $repo/exp/*.pt
|
| 328 |
+
|
| 329 |
+
log "Decode with models exported by torch.jit.script()"
|
| 330 |
+
|
| 331 |
+
./pruned_transducer_stateless7/jit_pretrained.py \
|
| 332 |
+
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
| 333 |
+
--nn-model-filename $repo/exp/cpu_jit.pt \
|
| 334 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 335 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 336 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 337 |
+
|
| 338 |
+
for sym in 1 2 3; do
|
| 339 |
+
log "Greedy search with --max-sym-per-frame $sym"
|
| 340 |
+
|
| 341 |
+
./pruned_transducer_stateless7/pretrained.py \
|
| 342 |
+
--method greedy_search \
|
| 343 |
+
--max-sym-per-frame $sym \
|
| 344 |
+
--checkpoint $repo/exp/pretrained.pt \
|
| 345 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 346 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 347 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 348 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 349 |
+
done
|
| 350 |
+
|
| 351 |
+
for method in modified_beam_search beam_search fast_beam_search; do
|
| 352 |
+
log "$method"
|
| 353 |
+
|
| 354 |
+
./pruned_transducer_stateless7/pretrained.py \
|
| 355 |
+
--method $method \
|
| 356 |
+
--beam-size 4 \
|
| 357 |
+
--checkpoint $repo/exp/pretrained.pt \
|
| 358 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 359 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 360 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 361 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 362 |
+
done
|
| 363 |
+
rm -rf $repo
|
| 364 |
+
}
|
| 365 |
+
|
| 366 |
+
function test_pruned_transducer_stateless8_2022_11_14() {
|
| 367 |
+
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless8-2022-11-14
|
| 368 |
+
|
| 369 |
+
log "Downloading pre-trained model from $repo_url"
|
| 370 |
+
git lfs install
|
| 371 |
+
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
| 372 |
+
repo=$(basename $repo_url)
|
| 373 |
+
|
| 374 |
+
log "Display test files"
|
| 375 |
+
tree $repo/
|
| 376 |
+
ls -lh $repo/test_wavs/*.wav
|
| 377 |
+
|
| 378 |
+
pushd $repo/exp
|
| 379 |
+
git lfs pull --include "data/lang_bpe_500/bpe.model"
|
| 380 |
+
git lfs pull --include "exp/cpu_jit.pt"
|
| 381 |
+
git lfs pull --include "exp/pretrained.pt"
|
| 382 |
+
ln -s pretrained.pt epoch-99.pt
|
| 383 |
+
ls -lh *.pt
|
| 384 |
+
popd
|
| 385 |
+
|
| 386 |
+
log "Decode with models exported by torch.jit.script()"
|
| 387 |
+
|
| 388 |
+
./pruned_transducer_stateless8/jit_pretrained.py \
|
| 389 |
+
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
| 390 |
+
--nn-model-filename $repo/exp/cpu_jit.pt \
|
| 391 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 392 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 393 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 394 |
+
|
| 395 |
+
log "Export to torchscript model"
|
| 396 |
+
./pruned_transducer_stateless8/export.py \
|
| 397 |
+
--exp-dir $repo/exp \
|
| 398 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 399 |
+
--use-averaged-model false \
|
| 400 |
+
--epoch 99 \
|
| 401 |
+
--avg 1 \
|
| 402 |
+
--jit 1
|
| 403 |
+
|
| 404 |
+
ls -lh $repo/exp/*.pt
|
| 405 |
+
|
| 406 |
+
log "Decode with models exported by torch.jit.script()"
|
| 407 |
+
|
| 408 |
+
./pruned_transducer_stateless8/jit_pretrained.py \
|
| 409 |
+
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
| 410 |
+
--nn-model-filename $repo/exp/cpu_jit.pt \
|
| 411 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 412 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 413 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 414 |
+
|
| 415 |
+
for sym in 1 2 3; do
|
| 416 |
+
log "Greedy search with --max-sym-per-frame $sym"
|
| 417 |
+
|
| 418 |
+
./pruned_transducer_stateless8/pretrained.py \
|
| 419 |
+
--method greedy_search \
|
| 420 |
+
--max-sym-per-frame $sym \
|
| 421 |
+
--checkpoint $repo/exp/pretrained.pt \
|
| 422 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 423 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 424 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 425 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 426 |
+
done
|
| 427 |
+
|
| 428 |
+
for method in modified_beam_search beam_search fast_beam_search; do
|
| 429 |
+
log "$method"
|
| 430 |
+
|
| 431 |
+
./pruned_transducer_stateless8/pretrained.py \
|
| 432 |
+
--method $method \
|
| 433 |
+
--beam-size 4 \
|
| 434 |
+
--checkpoint $repo/exp/pretrained.pt \
|
| 435 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 436 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 437 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 438 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 439 |
+
done
|
| 440 |
+
rm -rf $repo
|
| 441 |
+
}
|
| 442 |
+
|
| 443 |
+
function test_pruned_transducer_stateless7_ctc_2022_12_01() {
|
| 444 |
+
repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-pruned-transducer-stateless7-ctc-2022-12-01
|
| 445 |
+
|
| 446 |
+
log "Downloading pre-trained model from $repo_url"
|
| 447 |
+
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
| 448 |
+
repo=$(basename $repo_url)
|
| 449 |
+
|
| 450 |
+
log "Display test files"
|
| 451 |
+
tree $repo/
|
| 452 |
+
ls -lh $repo/test_wavs/*.wav
|
| 453 |
+
|
| 454 |
+
pushd $repo/exp
|
| 455 |
+
git lfs pull --include "data/lang_bpe_500/HLG.pt"
|
| 456 |
+
git lfs pull --include "data/lang_bpe_500/L.pt"
|
| 457 |
+
git lfs pull --include "data/lang_bpe_500/LG.pt"
|
| 458 |
+
git lfs pull --include "data/lang_bpe_500/Linv.pt"
|
| 459 |
+
git lfs pull --include "data/lang_bpe_500/bpe.model"
|
| 460 |
+
git lfs pull --include "data/lm/G_4_gram.pt"
|
| 461 |
+
git lfs pull --include "exp/cpu_jit.pt"
|
| 462 |
+
git lfs pull --include "exp/pretrained.pt"
|
| 463 |
+
ln -s pretrained.pt epoch-99.pt
|
| 464 |
+
ls -lh *.pt
|
| 465 |
+
popd
|
| 466 |
+
|
| 467 |
+
log "Export to torchscript model"
|
| 468 |
+
./pruned_transducer_stateless7_ctc/export.py \
|
| 469 |
+
--exp-dir $repo/exp \
|
| 470 |
+
--use-averaged-model false \
|
| 471 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 472 |
+
--epoch 99 \
|
| 473 |
+
--avg 1 \
|
| 474 |
+
--jit 1
|
| 475 |
+
|
| 476 |
+
ls -lh $repo/exp/*.pt
|
| 477 |
+
|
| 478 |
+
log "Decode with models exported by torch.jit.script()"
|
| 479 |
+
|
| 480 |
+
./pruned_transducer_stateless7_ctc/jit_pretrained.py \
|
| 481 |
+
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
| 482 |
+
--nn-model-filename $repo/exp/cpu_jit.pt \
|
| 483 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 484 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 485 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 486 |
+
|
| 487 |
+
for m in ctc-decoding 1best; do
|
| 488 |
+
./pruned_transducer_stateless7_ctc/jit_pretrained_ctc.py \
|
| 489 |
+
--model-filename $repo/exp/cpu_jit.pt \
|
| 490 |
+
--words-file $repo/data/lang_bpe_500/words.txt \
|
| 491 |
+
--HLG $repo/data/lang_bpe_500/HLG.pt \
|
| 492 |
+
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
| 493 |
+
--G $repo/data/lm/G_4_gram.pt \
|
| 494 |
+
--method $m \
|
| 495 |
+
--sample-rate 16000 \
|
| 496 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 497 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 498 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 499 |
+
done
|
| 500 |
+
|
| 501 |
+
for sym in 1 2 3; do
|
| 502 |
+
log "Greedy search with --max-sym-per-frame $sym"
|
| 503 |
+
|
| 504 |
+
./pruned_transducer_stateless7_ctc/pretrained.py \
|
| 505 |
+
--method greedy_search \
|
| 506 |
+
--max-sym-per-frame $sym \
|
| 507 |
+
--checkpoint $repo/exp/pretrained.pt \
|
| 508 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 509 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 510 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 511 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 512 |
+
done
|
| 513 |
+
|
| 514 |
+
for method in modified_beam_search beam_search fast_beam_search; do
|
| 515 |
+
log "$method"
|
| 516 |
+
|
| 517 |
+
./pruned_transducer_stateless7_ctc/pretrained.py \
|
| 518 |
+
--method $method \
|
| 519 |
+
--beam-size 4 \
|
| 520 |
+
--checkpoint $repo/exp/pretrained.pt \
|
| 521 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 522 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 523 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 524 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 525 |
+
done
|
| 526 |
+
|
| 527 |
+
for m in ctc-decoding 1best; do
|
| 528 |
+
./pruned_transducer_stateless7_ctc/pretrained_ctc.py \
|
| 529 |
+
--checkpoint $repo/exp/pretrained.pt \
|
| 530 |
+
--words-file $repo/data/lang_bpe_500/words.txt \
|
| 531 |
+
--HLG $repo/data/lang_bpe_500/HLG.pt \
|
| 532 |
+
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
| 533 |
+
--G $repo/data/lm/G_4_gram.pt \
|
| 534 |
+
--method $m \
|
| 535 |
+
--sample-rate 16000 \
|
| 536 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 537 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 538 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 539 |
+
done
|
| 540 |
+
rm -rf $repo
|
| 541 |
+
}
|
| 542 |
+
|
| 543 |
+
function test_zipformer_mmi_2022_12_08() {
|
| 544 |
+
repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-zipformer-mmi-2022-12-08
|
| 545 |
+
|
| 546 |
+
log "Downloading pre-trained model from $repo_url"
|
| 547 |
+
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
| 548 |
+
repo=$(basename $repo_url)
|
| 549 |
+
|
| 550 |
+
log "Display test files"
|
| 551 |
+
tree $repo/
|
| 552 |
+
ls -lh $repo/test_wavs/*.wav
|
| 553 |
+
|
| 554 |
+
pushd $repo/exp
|
| 555 |
+
git lfs pull --include "data/lang_bpe_500/3gram.pt"
|
| 556 |
+
git lfs pull --include "data/lang_bpe_500/4gram.pt"
|
| 557 |
+
git lfs pull --include "data/lang_bpe_500/L.pt"
|
| 558 |
+
git lfs pull --include "data/lang_bpe_500/LG.pt"
|
| 559 |
+
git lfs pull --include "data/lang_bpe_500/Linv.pt"
|
| 560 |
+
git lfs pull --include "data/lang_bpe_500/bpe.model"
|
| 561 |
+
git lfs pull --include "exp/cpu_jit.pt"
|
| 562 |
+
git lfs pull --include "exp/pretrained.pt"
|
| 563 |
+
ln -s pretrained.pt epoch-99.pt
|
| 564 |
+
ls -lh *.pt
|
| 565 |
+
popd
|
| 566 |
+
|
| 567 |
+
log "Export to torchscript model"
|
| 568 |
+
./zipformer_mmi/export.py \
|
| 569 |
+
--exp-dir $repo/exp \
|
| 570 |
+
--use-averaged-model false \
|
| 571 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 572 |
+
--epoch 99 \
|
| 573 |
+
--avg 1 \
|
| 574 |
+
--jit 1
|
| 575 |
+
|
| 576 |
+
ls -lh $repo/exp/*.pt
|
| 577 |
+
|
| 578 |
+
log "Decode with models exported by torch.jit.script()"
|
| 579 |
+
|
| 580 |
+
./zipformer_mmi/jit_pretrained.py \
|
| 581 |
+
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
| 582 |
+
--nn-model-filename $repo/exp/cpu_jit.pt \
|
| 583 |
+
--lang-dir $repo/data/lang_bpe_500 \
|
| 584 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 585 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 586 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 587 |
+
|
| 588 |
+
for method in 1best nbest nbest-rescoring-LG nbest-rescoring-3-gram nbest-rescoring-4-gram; do
|
| 589 |
+
log "$method"
|
| 590 |
+
|
| 591 |
+
./zipformer_mmi/pretrained.py \
|
| 592 |
+
--method $method \
|
| 593 |
+
--checkpoint $repo/exp/pretrained.pt \
|
| 594 |
+
--lang-dir $repo/data/lang_bpe_500 \
|
| 595 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 596 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 597 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 598 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 599 |
+
done
|
| 600 |
+
rm -rf $repo
|
| 601 |
+
}
|
| 602 |
+
|
| 603 |
+
function test_pruned_transducer_stateless7_streaming_2022_12_29() {
|
| 604 |
+
repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29
|
| 605 |
+
|
| 606 |
+
log "Downloading pre-trained model from $repo_url"
|
| 607 |
+
git lfs install
|
| 608 |
+
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
| 609 |
+
repo=$(basename $repo_url)
|
| 610 |
+
|
| 611 |
+
log "Display test files"
|
| 612 |
+
tree $repo/
|
| 613 |
+
ls -lh $repo/test_wavs/*.wav
|
| 614 |
+
|
| 615 |
+
pushd $repo
|
| 616 |
+
git lfs pull --include "data/lang_bpe_500/bpe.model"
|
| 617 |
+
git lfs pull --include "exp/cpu_jit.pt"
|
| 618 |
+
git lfs pull --include "exp/pretrained.pt"
|
| 619 |
+
git lfs pull --include "exp/encoder_jit_trace.pt"
|
| 620 |
+
git lfs pull --include "exp/decoder_jit_trace.pt"
|
| 621 |
+
git lfs pull --include "exp/joiner_jit_trace.pt"
|
| 622 |
+
cd exp
|
| 623 |
+
ln -s pretrained.pt epoch-99.pt
|
| 624 |
+
ls -lh *.pt
|
| 625 |
+
popd
|
| 626 |
+
|
| 627 |
+
log "Export to torchscript model"
|
| 628 |
+
./pruned_transducer_stateless7_streaming/export.py \
|
| 629 |
+
--exp-dir $repo/exp \
|
| 630 |
+
--use-averaged-model false \
|
| 631 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 632 |
+
--decode-chunk-len 32 \
|
| 633 |
+
--epoch 99 \
|
| 634 |
+
--avg 1 \
|
| 635 |
+
--jit 1
|
| 636 |
+
|
| 637 |
+
ls -lh $repo/exp/*.pt
|
| 638 |
+
|
| 639 |
+
log "Decode with models exported by torch.jit.script()"
|
| 640 |
+
|
| 641 |
+
./pruned_transducer_stateless7_streaming/jit_pretrained.py \
|
| 642 |
+
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
| 643 |
+
--nn-model-filename $repo/exp/cpu_jit.pt \
|
| 644 |
+
--decode-chunk-len 32 \
|
| 645 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 646 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 647 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 648 |
+
|
| 649 |
+
log "Export to torchscript model by torch.jit.trace()"
|
| 650 |
+
./pruned_transducer_stateless7_streaming/jit_trace_export.py \
|
| 651 |
+
--exp-dir $repo/exp \
|
| 652 |
+
--use-averaged-model false \
|
| 653 |
+
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
| 654 |
+
--decode-chunk-len 32 \
|
| 655 |
+
--epoch 99 \
|
| 656 |
+
--avg 1
|
| 657 |
+
|
| 658 |
+
log "Decode with models exported by torch.jit.trace()"
|
| 659 |
+
|
| 660 |
+
./pruned_transducer_stateless7_streaming/jit_trace_pretrained.py \
|
| 661 |
+
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
| 662 |
+
--encoder-model-filename $repo/exp/encoder_jit_trace.pt \
|
| 663 |
+
--decoder-model-filename $repo/exp/decoder_jit_trace.pt \
|
| 664 |
+
--joiner-model-filename $repo/exp/joiner_jit_trace.pt \
|
| 665 |
+
--decode-chunk-len 32 \
|
| 666 |
+
$repo/test_wavs/1089-134686-0001.wav
|
| 667 |
+
|
| 668 |
+
for sym in 1 2 3; do
|
| 669 |
+
log "Greedy search with --max-sym-per-frame $sym"
|
| 670 |
+
|
| 671 |
+
./pruned_transducer_stateless7_streaming/pretrained.py \
|
| 672 |
+
--method greedy_search \
|
| 673 |
+
--max-sym-per-frame $sym \
|
| 674 |
+
--checkpoint $repo/exp/pretrained.pt \
|
| 675 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 676 |
+
--decode-chunk-len 32 \
|
| 677 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 678 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 679 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 680 |
+
done
|
| 681 |
+
|
| 682 |
+
for method in modified_beam_search beam_search fast_beam_search; do
|
| 683 |
+
log "$method"
|
| 684 |
+
|
| 685 |
+
./pruned_transducer_stateless7_streaming/pretrained.py \
|
| 686 |
+
--method $method \
|
| 687 |
+
--beam-size 4 \
|
| 688 |
+
--checkpoint $repo/exp/pretrained.pt \
|
| 689 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 690 |
+
--decode-chunk-len 32 \
|
| 691 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 692 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 693 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 694 |
+
done
|
| 695 |
+
|
| 696 |
+
rm -rf $repo
|
| 697 |
+
}
|
| 698 |
+
|
| 699 |
+
function test_pruned_transducer_stateless7_ctc_bs_2023_01_29() {
|
| 700 |
+
repo_url=https://huggingface.co/yfyeung/icefall-asr-librispeech-pruned_transducer_stateless7_ctc_bs-2023-01-29
|
| 701 |
+
|
| 702 |
+
log "Downloading pre-trained model from $repo_url"
|
| 703 |
+
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
| 704 |
+
repo=$(basename $repo_url)
|
| 705 |
+
|
| 706 |
+
log "Display test files"
|
| 707 |
+
tree $repo/
|
| 708 |
+
ls -lh $repo/test_wavs/*.wav
|
| 709 |
+
|
| 710 |
+
pushd $repo/exp
|
| 711 |
+
git lfs pull --include "data/lang_bpe_500/L.pt"
|
| 712 |
+
git lfs pull --include "data/lang_bpe_500/LG.pt"
|
| 713 |
+
git lfs pull --include "data/lang_bpe_500/HLG.pt"
|
| 714 |
+
git lfs pull --include "data/lang_bpe_500/Linv.pt"
|
| 715 |
+
git lfs pull --include "data/lang_bpe_500/bpe.model"
|
| 716 |
+
git lfs pull --include "exp/cpu_jit.pt"
|
| 717 |
+
git lfs pull --include "exp/pretrained.pt"
|
| 718 |
+
ln -s pretrained.pt epoch-99.pt
|
| 719 |
+
ls -lh *.pt
|
| 720 |
+
popd
|
| 721 |
+
|
| 722 |
+
log "Export to torchscript model"
|
| 723 |
+
./pruned_transducer_stateless7_ctc_bs/export.py \
|
| 724 |
+
--exp-dir $repo/exp \
|
| 725 |
+
--use-averaged-model false \
|
| 726 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 727 |
+
--epoch 99 \
|
| 728 |
+
--avg 1 \
|
| 729 |
+
--jit 1
|
| 730 |
+
|
| 731 |
+
ls -lh $repo/exp/*.pt
|
| 732 |
+
|
| 733 |
+
log "Decode with models exported by torch.jit.script()"
|
| 734 |
+
|
| 735 |
+
./pruned_transducer_stateless7_ctc_bs/jit_pretrained.py \
|
| 736 |
+
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
| 737 |
+
--nn-model-filename $repo/exp/cpu_jit.pt \
|
| 738 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 739 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 740 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 741 |
+
|
| 742 |
+
for m in ctc-decoding 1best; do
|
| 743 |
+
./pruned_transducer_stateless7_ctc_bs/jit_pretrained_ctc.py \
|
| 744 |
+
--model-filename $repo/exp/cpu_jit.pt \
|
| 745 |
+
--words-file $repo/data/lang_bpe_500/words.txt \
|
| 746 |
+
--HLG $repo/data/lang_bpe_500/HLG.pt \
|
| 747 |
+
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
| 748 |
+
--method $m \
|
| 749 |
+
--sample-rate 16000 \
|
| 750 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 751 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 752 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 753 |
+
done
|
| 754 |
+
|
| 755 |
+
for sym in 1 2 3; do
|
| 756 |
+
log "Greedy search with --max-sym-per-frame $sym"
|
| 757 |
+
|
| 758 |
+
./pruned_transducer_stateless7_ctc_bs/pretrained.py \
|
| 759 |
+
--method greedy_search \
|
| 760 |
+
--max-sym-per-frame $sym \
|
| 761 |
+
--checkpoint $repo/exp/pretrained.pt \
|
| 762 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 763 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 764 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 765 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 766 |
+
done
|
| 767 |
+
|
| 768 |
+
for method in modified_beam_search beam_search fast_beam_search; do
|
| 769 |
+
log "$method"
|
| 770 |
+
|
| 771 |
+
./pruned_transducer_stateless7_ctc_bs/pretrained.py \
|
| 772 |
+
--method $method \
|
| 773 |
+
--beam-size 4 \
|
| 774 |
+
--checkpoint $repo/exp/pretrained.pt \
|
| 775 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 776 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 777 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 778 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 779 |
+
done
|
| 780 |
+
|
| 781 |
+
for m in ctc-decoding 1best; do
|
| 782 |
+
./pruned_transducer_stateless7_ctc_bs/pretrained_ctc.py \
|
| 783 |
+
--checkpoint $repo/exp/pretrained.pt \
|
| 784 |
+
--words-file $repo/data/lang_bpe_500/words.txt \
|
| 785 |
+
--HLG $repo/data/lang_bpe_500/HLG.pt \
|
| 786 |
+
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
| 787 |
+
--method $m \
|
| 788 |
+
--sample-rate 16000 \
|
| 789 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 790 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 791 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 792 |
+
done
|
| 793 |
+
rm -rf $repo
|
| 794 |
+
}
|
| 795 |
+
|
| 796 |
+
function test_conformer_ctc3_2022_11_27() {
|
| 797 |
+
repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-conformer-ctc3-2022-11-27
|
| 798 |
+
|
| 799 |
+
log "Downloading pre-trained model from $repo_url"
|
| 800 |
+
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
| 801 |
+
repo=$(basename $repo_url)
|
| 802 |
+
|
| 803 |
+
log "Display test files"
|
| 804 |
+
tree $repo/
|
| 805 |
+
ls -lh $repo/test_wavs/*.wav
|
| 806 |
+
|
| 807 |
+
pushd $repo/exp
|
| 808 |
+
git lfs pull --include "data/lang_bpe_500/HLG.pt"
|
| 809 |
+
git lfs pull --include "data/lang_bpe_500/L.pt"
|
| 810 |
+
git lfs pull --include "data/lang_bpe_500/LG.pt"
|
| 811 |
+
git lfs pull --include "data/lang_bpe_500/Linv.pt"
|
| 812 |
+
git lfs pull --include "data/lang_bpe_500/bpe.model"
|
| 813 |
+
git lfs pull --include "data/lm/G_4_gram.pt"
|
| 814 |
+
git lfs pull --include "exp/jit_trace.pt"
|
| 815 |
+
git lfs pull --include "exp/pretrained.pt"
|
| 816 |
+
ln -s pretrained.pt epoch-99.pt
|
| 817 |
+
ls -lh *.pt
|
| 818 |
+
popd
|
| 819 |
+
|
| 820 |
+
log "Decode with models exported by torch.jit.trace()"
|
| 821 |
+
|
| 822 |
+
for m in ctc-decoding 1best; do
|
| 823 |
+
./conformer_ctc3/jit_pretrained.py \
|
| 824 |
+
--model-filename $repo/exp/jit_trace.pt \
|
| 825 |
+
--words-file $repo/data/lang_bpe_500/words.txt \
|
| 826 |
+
--HLG $repo/data/lang_bpe_500/HLG.pt \
|
| 827 |
+
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
| 828 |
+
--G $repo/data/lm/G_4_gram.pt \
|
| 829 |
+
--method $m \
|
| 830 |
+
--sample-rate 16000 \
|
| 831 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 832 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 833 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 834 |
+
done
|
| 835 |
+
|
| 836 |
+
log "Export to torchscript model"
|
| 837 |
+
|
| 838 |
+
./conformer_ctc3/export.py \
|
| 839 |
+
--exp-dir $repo/exp \
|
| 840 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 841 |
+
--jit-trace 1 \
|
| 842 |
+
--epoch 99 \
|
| 843 |
+
--avg 1 \
|
| 844 |
+
--use-averaged-model 0
|
| 845 |
+
|
| 846 |
+
ls -lh $repo/exp/*.pt
|
| 847 |
+
|
| 848 |
+
log "Decode with models exported by torch.jit.trace()"
|
| 849 |
+
|
| 850 |
+
for m in ctc-decoding 1best; do
|
| 851 |
+
./conformer_ctc3/jit_pretrained.py \
|
| 852 |
+
--model-filename $repo/exp/jit_trace.pt \
|
| 853 |
+
--words-file $repo/data/lang_bpe_500/words.txt \
|
| 854 |
+
--HLG $repo/data/lang_bpe_500/HLG.pt \
|
| 855 |
+
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
| 856 |
+
--G $repo/data/lm/G_4_gram.pt \
|
| 857 |
+
--method $m \
|
| 858 |
+
--sample-rate 16000 \
|
| 859 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 860 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 861 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 862 |
+
done
|
| 863 |
+
|
| 864 |
+
for m in ctc-decoding 1best; do
|
| 865 |
+
./conformer_ctc3/pretrained.py \
|
| 866 |
+
--checkpoint $repo/exp/pretrained.pt \
|
| 867 |
+
--words-file $repo/data/lang_bpe_500/words.txt \
|
| 868 |
+
--HLG $repo/data/lang_bpe_500/HLG.pt \
|
| 869 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 870 |
+
--G $repo/data/lm/G_4_gram.pt \
|
| 871 |
+
--method $m \
|
| 872 |
+
--sample-rate 16000 \
|
| 873 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 874 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 875 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 876 |
+
done
|
| 877 |
+
rm -rf $repo
|
| 878 |
+
}
|
| 879 |
+
|
| 880 |
+
function test_lstm_transducer_stateless2_2022_09_03() {
|
| 881 |
+
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03
|
| 882 |
+
|
| 883 |
+
log "Downloading pre-trained model from $repo_url"
|
| 884 |
+
git lfs install
|
| 885 |
+
git clone $repo_url
|
| 886 |
+
repo=$(basename $repo_url)
|
| 887 |
+
abs_repo=$(realpath $repo)
|
| 888 |
+
|
| 889 |
+
log "Display test files"
|
| 890 |
+
tree $repo/
|
| 891 |
+
ls -lh $repo/test_wavs/*.wav
|
| 892 |
+
|
| 893 |
+
pushd $repo/exp
|
| 894 |
+
ln -s pretrained-iter-468000-avg-16.pt pretrained.pt
|
| 895 |
+
ln -s pretrained-iter-468000-avg-16.pt epoch-99.pt
|
| 896 |
+
popd
|
| 897 |
+
|
| 898 |
+
log "Test exporting with torch.jit.trace()"
|
| 899 |
+
|
| 900 |
+
./lstm_transducer_stateless2/export.py \
|
| 901 |
+
--exp-dir $repo/exp \
|
| 902 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 903 |
+
--epoch 99 \
|
| 904 |
+
--avg 1 \
|
| 905 |
+
--use-averaged-model 0 \
|
| 906 |
+
--jit-trace 1
|
| 907 |
+
|
| 908 |
+
log "Decode with models exported by torch.jit.trace()"
|
| 909 |
+
|
| 910 |
+
./lstm_transducer_stateless2/jit_pretrained.py \
|
| 911 |
+
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
| 912 |
+
--encoder-model-filename $repo/exp/encoder_jit_trace.pt \
|
| 913 |
+
--decoder-model-filename $repo/exp/decoder_jit_trace.pt \
|
| 914 |
+
--joiner-model-filename $repo/exp/joiner_jit_trace.pt \
|
| 915 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 916 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 917 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 918 |
+
|
| 919 |
+
for sym in 1 2 3; do
|
| 920 |
+
log "Greedy search with --max-sym-per-frame $sym"
|
| 921 |
+
|
| 922 |
+
./lstm_transducer_stateless2/pretrained.py \
|
| 923 |
+
--method greedy_search \
|
| 924 |
+
--max-sym-per-frame $sym \
|
| 925 |
+
--checkpoint $repo/exp/pretrained.pt \
|
| 926 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 927 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 928 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 929 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 930 |
+
done
|
| 931 |
+
|
| 932 |
+
for method in modified_beam_search beam_search fast_beam_search; do
|
| 933 |
+
log "$method"
|
| 934 |
+
|
| 935 |
+
./lstm_transducer_stateless2/pretrained.py \
|
| 936 |
+
--method $method \
|
| 937 |
+
--beam-size 4 \
|
| 938 |
+
--checkpoint $repo/exp/pretrained.pt \
|
| 939 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 940 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 941 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 942 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 943 |
+
done
|
| 944 |
+
rm -rf $repo
|
| 945 |
+
}
|
| 946 |
+
|
| 947 |
+
function test_pruned_transducer_stateless3_2022_05_13() {
|
| 948 |
+
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13
|
| 949 |
+
|
| 950 |
+
log "Downloading pre-trained model from $repo_url"
|
| 951 |
+
git lfs install
|
| 952 |
+
git clone $repo_url
|
| 953 |
+
repo=$(basename $repo_url)
|
| 954 |
+
|
| 955 |
+
log "Display test files"
|
| 956 |
+
tree $repo/
|
| 957 |
+
ls -lh $repo/test_wavs/*.wav
|
| 958 |
+
|
| 959 |
+
pushd $repo/exp
|
| 960 |
+
ln -s pretrained-iter-1224000-avg-14.pt pretrained.pt
|
| 961 |
+
ln -s pretrained-iter-1224000-avg-14.pt epoch-99.pt
|
| 962 |
+
popd
|
| 963 |
+
|
| 964 |
+
|
| 965 |
+
log "Export to torchscript model"
|
| 966 |
+
./pruned_transducer_stateless3/export.py \
|
| 967 |
+
--exp-dir $repo/exp \
|
| 968 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 969 |
+
--epoch 99 \
|
| 970 |
+
--avg 1 \
|
| 971 |
+
--jit 1
|
| 972 |
+
|
| 973 |
+
./pruned_transducer_stateless3/export.py \
|
| 974 |
+
--exp-dir $repo/exp \
|
| 975 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 976 |
+
--epoch 99 \
|
| 977 |
+
--avg 1 \
|
| 978 |
+
--jit-trace 1
|
| 979 |
+
|
| 980 |
+
ls -lh $repo/exp/*.pt
|
| 981 |
+
|
| 982 |
+
log "Decode with models exported by torch.jit.trace()"
|
| 983 |
+
|
| 984 |
+
./pruned_transducer_stateless3/jit_pretrained.py \
|
| 985 |
+
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
| 986 |
+
--encoder-model-filename $repo/exp/encoder_jit_trace.pt \
|
| 987 |
+
--decoder-model-filename $repo/exp/decoder_jit_trace.pt \
|
| 988 |
+
--joiner-model-filename $repo/exp/joiner_jit_trace.pt \
|
| 989 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 990 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 991 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 992 |
+
|
| 993 |
+
log "Decode with models exported by torch.jit.script()"
|
| 994 |
+
|
| 995 |
+
./pruned_transducer_stateless3/jit_pretrained.py \
|
| 996 |
+
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
| 997 |
+
--encoder-model-filename $repo/exp/encoder_jit_script.pt \
|
| 998 |
+
--decoder-model-filename $repo/exp/decoder_jit_script.pt \
|
| 999 |
+
--joiner-model-filename $repo/exp/joiner_jit_script.pt \
|
| 1000 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 1001 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 1002 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 1003 |
+
|
| 1004 |
+
|
| 1005 |
+
for sym in 1 2 3; do
|
| 1006 |
+
log "Greedy search with --max-sym-per-frame $sym"
|
| 1007 |
+
|
| 1008 |
+
./pruned_transducer_stateless3/pretrained.py \
|
| 1009 |
+
--method greedy_search \
|
| 1010 |
+
--max-sym-per-frame $sym \
|
| 1011 |
+
--checkpoint $repo/exp/pretrained.pt \
|
| 1012 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 1013 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 1014 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 1015 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 1016 |
+
done
|
| 1017 |
+
|
| 1018 |
+
for method in modified_beam_search beam_search fast_beam_search; do
|
| 1019 |
+
log "$method"
|
| 1020 |
+
|
| 1021 |
+
./pruned_transducer_stateless3/pretrained.py \
|
| 1022 |
+
--method $method \
|
| 1023 |
+
--beam-size 4 \
|
| 1024 |
+
--checkpoint $repo/exp/pretrained.pt \
|
| 1025 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 1026 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 1027 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 1028 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 1029 |
+
done
|
| 1030 |
+
|
| 1031 |
+
rm -rf $repo
|
| 1032 |
+
}
|
| 1033 |
+
|
| 1034 |
+
function test_streaming_pruned_transducer_stateless2_20220625() {
|
| 1035 |
+
repo_url=https://huggingface.co/pkufool/icefall_librispeech_streaming_pruned_transducer_stateless2_20220625
|
| 1036 |
+
|
| 1037 |
+
log "Downloading pre-trained model from $repo_url"
|
| 1038 |
+
git lfs install
|
| 1039 |
+
git clone $repo_url
|
| 1040 |
+
repo=$(basename $repo_url)
|
| 1041 |
+
|
| 1042 |
+
log "Display test files"
|
| 1043 |
+
tree $repo/
|
| 1044 |
+
ls -lh $repo/test_wavs/*.wav
|
| 1045 |
+
|
| 1046 |
+
pushd $repo/exp
|
| 1047 |
+
ln -s pretrained-epoch-24-avg-10.pt pretrained.pt
|
| 1048 |
+
popd
|
| 1049 |
+
|
| 1050 |
+
for sym in 1 2 3; do
|
| 1051 |
+
log "Greedy search with --max-sym-per-frame $sym"
|
| 1052 |
+
|
| 1053 |
+
./pruned_transducer_stateless2/pretrained.py \
|
| 1054 |
+
--method greedy_search \
|
| 1055 |
+
--max-sym-per-frame $sym \
|
| 1056 |
+
--checkpoint $repo/exp/pretrained.pt \
|
| 1057 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 1058 |
+
--simulate-streaming 1 \
|
| 1059 |
+
--causal-convolution 1 \
|
| 1060 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 1061 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 1062 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 1063 |
+
done
|
| 1064 |
+
|
| 1065 |
+
for method in modified_beam_search beam_search fast_beam_search; do
|
| 1066 |
+
log "$method"
|
| 1067 |
+
|
| 1068 |
+
./pruned_transducer_stateless2/pretrained.py \
|
| 1069 |
+
--method $method \
|
| 1070 |
+
--beam-size 4 \
|
| 1071 |
+
--checkpoint $repo/exp/pretrained.pt \
|
| 1072 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 1073 |
+
--simulate-streaming 1 \
|
| 1074 |
+
--causal-convolution 1 \
|
| 1075 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 1076 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 1077 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 1078 |
+
done
|
| 1079 |
+
rm -rf $repo
|
| 1080 |
+
}
|
| 1081 |
+
|
| 1082 |
+
function test_streaming_zipformer_2023_05_17() {
|
| 1083 |
+
repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-streaming-zipformer-2023-05-17
|
| 1084 |
+
|
| 1085 |
+
log "Downloading pre-trained model from $repo_url"
|
| 1086 |
+
git lfs install
|
| 1087 |
+
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
| 1088 |
+
repo=$(basename $repo_url)
|
| 1089 |
+
|
| 1090 |
+
log "Display test files"
|
| 1091 |
+
tree $repo/
|
| 1092 |
+
ls -lh $repo/test_wavs/*.wav
|
| 1093 |
+
|
| 1094 |
+
pushd $repo/exp
|
| 1095 |
+
git lfs pull --include "data/lang_bpe_500/bpe.model"
|
| 1096 |
+
git lfs pull --include "data/lang_bpe_500/tokens.txt"
|
| 1097 |
+
git lfs pull --include "exp/jit_script_chunk_16_left_128.pt"
|
| 1098 |
+
git lfs pull --include "exp/pretrained.pt"
|
| 1099 |
+
ln -s pretrained.pt epoch-99.pt
|
| 1100 |
+
ls -lh *.pt
|
| 1101 |
+
popd
|
| 1102 |
+
|
| 1103 |
+
log "Export to torchscript model"
|
| 1104 |
+
./zipformer/export.py \
|
| 1105 |
+
--exp-dir $repo/exp \
|
| 1106 |
+
--use-averaged-model false \
|
| 1107 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 1108 |
+
--causal 1 \
|
| 1109 |
+
--chunk-size 16 \
|
| 1110 |
+
--left-context-frames 128 \
|
| 1111 |
+
--epoch 99 \
|
| 1112 |
+
--avg 1 \
|
| 1113 |
+
--jit 1
|
| 1114 |
+
|
| 1115 |
+
ls -lh $repo/exp/*.pt
|
| 1116 |
+
|
| 1117 |
+
log "Decode with models exported by torch.jit.script()"
|
| 1118 |
+
|
| 1119 |
+
./zipformer/jit_pretrained_streaming.py \
|
| 1120 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 1121 |
+
--nn-model-filename $repo/exp/jit_script_chunk_16_left_128.pt \
|
| 1122 |
+
$repo/test_wavs/1089-134686-0001.wav
|
| 1123 |
+
|
| 1124 |
+
for method in greedy_search modified_beam_search fast_beam_search; do
|
| 1125 |
+
log "$method"
|
| 1126 |
+
|
| 1127 |
+
./zipformer/pretrained.py \
|
| 1128 |
+
--causal 1 \
|
| 1129 |
+
--chunk-size 16 \
|
| 1130 |
+
--left-context-frames 128 \
|
| 1131 |
+
--method $method \
|
| 1132 |
+
--beam-size 4 \
|
| 1133 |
+
--checkpoint $repo/exp/pretrained.pt \
|
| 1134 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 1135 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 1136 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 1137 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 1138 |
+
done
|
| 1139 |
+
rm -rf $repo
|
| 1140 |
+
}
|
| 1141 |
+
|
| 1142 |
+
function test_zipformer_2023_05_18() {
|
| 1143 |
+
repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-zipformer-2023-05-15
|
| 1144 |
+
|
| 1145 |
+
log "Downloading pre-trained model from $repo_url"
|
| 1146 |
+
git lfs install
|
| 1147 |
+
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
| 1148 |
+
repo=$(basename $repo_url)
|
| 1149 |
+
|
| 1150 |
+
log "Display test files"
|
| 1151 |
+
tree $repo/
|
| 1152 |
+
ls -lh $repo/test_wavs/*.wav
|
| 1153 |
+
|
| 1154 |
+
pushd $repo/exp
|
| 1155 |
+
git lfs pull --include "data/lang_bpe_500/bpe.model"
|
| 1156 |
+
git lfs pull --include "data/lang_bpe_500/tokens.txt"
|
| 1157 |
+
git lfs pull --include "exp/jit_script.pt"
|
| 1158 |
+
git lfs pull --include "exp/pretrained.pt"
|
| 1159 |
+
ln -s pretrained.pt epoch-99.pt
|
| 1160 |
+
ls -lh *.pt
|
| 1161 |
+
popd
|
| 1162 |
+
|
| 1163 |
+
log "Export to torchscript model"
|
| 1164 |
+
./zipformer/export.py \
|
| 1165 |
+
--exp-dir $repo/exp \
|
| 1166 |
+
--use-averaged-model false \
|
| 1167 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 1168 |
+
--epoch 99 \
|
| 1169 |
+
--avg 1 \
|
| 1170 |
+
--jit 1
|
| 1171 |
+
|
| 1172 |
+
ls -lh $repo/exp/*.pt
|
| 1173 |
+
|
| 1174 |
+
log "Decode with models exported by torch.jit.script()"
|
| 1175 |
+
|
| 1176 |
+
./zipformer/jit_pretrained.py \
|
| 1177 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 1178 |
+
--nn-model-filename $repo/exp/jit_script.pt \
|
| 1179 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 1180 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 1181 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 1182 |
+
|
| 1183 |
+
for method in greedy_search modified_beam_search fast_beam_search; do
|
| 1184 |
+
log "$method"
|
| 1185 |
+
|
| 1186 |
+
./zipformer/pretrained.py \
|
| 1187 |
+
--method $method \
|
| 1188 |
+
--beam-size 4 \
|
| 1189 |
+
--checkpoint $repo/exp/pretrained.pt \
|
| 1190 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 1191 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 1192 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 1193 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 1194 |
+
done
|
| 1195 |
+
rm -rf $repo
|
| 1196 |
+
}
|
| 1197 |
+
|
| 1198 |
+
function test_transducer_stateless2_torchaudio_2022_04_19() {
|
| 1199 |
+
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-transducer-stateless2-torchaudio-2022-04-19
|
| 1200 |
+
|
| 1201 |
+
log "Downloading pre-trained model from $repo_url"
|
| 1202 |
+
git lfs install
|
| 1203 |
+
git clone $repo_url
|
| 1204 |
+
repo=$(basename $repo_url)
|
| 1205 |
+
|
| 1206 |
+
log "Display test files"
|
| 1207 |
+
tree $repo/
|
| 1208 |
+
ls -lh $repo/test_wavs/*.wav
|
| 1209 |
+
|
| 1210 |
+
for sym in 1 2 3; do
|
| 1211 |
+
log "Greedy search with --max-sym-per-frame $sym"
|
| 1212 |
+
|
| 1213 |
+
./transducer_stateless2/pretrained.py \
|
| 1214 |
+
--method greedy_search \
|
| 1215 |
+
--max-sym-per-frame $sym \
|
| 1216 |
+
--checkpoint $repo/exp/pretrained.pt \
|
| 1217 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 1218 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 1219 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 1220 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 1221 |
+
done
|
| 1222 |
+
|
| 1223 |
+
for method in fast_beam_search modified_beam_search beam_search; do
|
| 1224 |
+
log "$method"
|
| 1225 |
+
|
| 1226 |
+
./transducer_stateless2/pretrained.py \
|
| 1227 |
+
--method $method \
|
| 1228 |
+
--beam-size 4 \
|
| 1229 |
+
--checkpoint $repo/exp/pretrained.pt \
|
| 1230 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 1231 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 1232 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 1233 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 1234 |
+
done
|
| 1235 |
+
rm -rf $repo
|
| 1236 |
+
}
|
| 1237 |
+
|
| 1238 |
+
function test_zipformer_transducer_ctc_2023_06_13() {
|
| 1239 |
+
repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-zipformer-transducer-ctc-2023-06-13
|
| 1240 |
+
|
| 1241 |
+
log "Downloading pre-trained model from $repo_url"
|
| 1242 |
+
git lfs install
|
| 1243 |
+
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
| 1244 |
+
repo=$(basename $repo_url)
|
| 1245 |
+
|
| 1246 |
+
log "Display test files"
|
| 1247 |
+
tree $repo/
|
| 1248 |
+
ls -lh $repo/test_wavs/*.wav
|
| 1249 |
+
|
| 1250 |
+
pushd $repo/exp
|
| 1251 |
+
git lfs pull --include "data/lang_bpe_500/bpe.model"
|
| 1252 |
+
git lfs pull --include "data/lang_bpe_500/tokens.txt"
|
| 1253 |
+
git lfs pull --include "data/lang_bpe_500/HLG.pt"
|
| 1254 |
+
git lfs pull --include "data/lang_bpe_500/L.pt"
|
| 1255 |
+
git lfs pull --include "data/lang_bpe_500/LG.pt"
|
| 1256 |
+
git lfs pull --include "data/lang_bpe_500/Linv.pt"
|
| 1257 |
+
git lfs pull --include "data/lm/G_4_gram.pt"
|
| 1258 |
+
git lfs pull --include "exp/jit_script.pt"
|
| 1259 |
+
git lfs pull --include "exp/pretrained.pt"
|
| 1260 |
+
ln -s pretrained.pt epoch-99.pt
|
| 1261 |
+
ls -lh *.pt
|
| 1262 |
+
popd
|
| 1263 |
+
|
| 1264 |
+
log "Export to torchscript model"
|
| 1265 |
+
./zipformer/export.py \
|
| 1266 |
+
--exp-dir $repo/exp \
|
| 1267 |
+
--use-transducer 1 \
|
| 1268 |
+
--use-ctc 1 \
|
| 1269 |
+
--use-averaged-model false \
|
| 1270 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 1271 |
+
--epoch 99 \
|
| 1272 |
+
--avg 1 \
|
| 1273 |
+
--jit 1
|
| 1274 |
+
|
| 1275 |
+
ls -lh $repo/exp/*.pt
|
| 1276 |
+
|
| 1277 |
+
log "Decode with models exported by torch.jit.script()"
|
| 1278 |
+
|
| 1279 |
+
for method in ctc-decoding 1best; do
|
| 1280 |
+
./zipformer/jit_pretrained_ctc.py \
|
| 1281 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 1282 |
+
--model-filename $repo/exp/jit_script.pt \
|
| 1283 |
+
--HLG $repo/data/lang_bpe_500/HLG.pt \
|
| 1284 |
+
--words-file $repo/data/lang_bpe_500/words.txt \
|
| 1285 |
+
--G $repo/data/lm/G_4_gram.pt \
|
| 1286 |
+
--method $method \
|
| 1287 |
+
--sample-rate 16000 \
|
| 1288 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 1289 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 1290 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 1291 |
+
done
|
| 1292 |
+
|
| 1293 |
+
for method in ctc-decoding 1best; do
|
| 1294 |
+
log "$method"
|
| 1295 |
+
|
| 1296 |
+
./zipformer/pretrained_ctc.py \
|
| 1297 |
+
--use-transducer 1 \
|
| 1298 |
+
--use-ctc 1 \
|
| 1299 |
+
--method $method \
|
| 1300 |
+
--checkpoint $repo/exp/pretrained.pt \
|
| 1301 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 1302 |
+
--HLG $repo/data/lang_bpe_500/HLG.pt \
|
| 1303 |
+
--G $repo/data/lm/G_4_gram.pt \
|
| 1304 |
+
--words-file $repo/data/lang_bpe_500/words.txt \
|
| 1305 |
+
--sample-rate 16000 \
|
| 1306 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 1307 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 1308 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 1309 |
+
done
|
| 1310 |
+
rm -rf $repo
|
| 1311 |
+
}
|
| 1312 |
+
|
| 1313 |
+
function test_100h_transducer_stateless_multi_datasets_bpe_500_2022_02_21() {
|
| 1314 |
+
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-100h-transducer-stateless-multi-datasets-bpe-500-2022-02-21
|
| 1315 |
+
|
| 1316 |
+
log "Downloading pre-trained model from $repo_url"
|
| 1317 |
+
git lfs install
|
| 1318 |
+
git clone $repo_url
|
| 1319 |
+
repo=$(basename $repo_url)
|
| 1320 |
+
|
| 1321 |
+
log "Display test files"
|
| 1322 |
+
tree $repo/
|
| 1323 |
+
ls -lh $repo/test_wavs/*.wav
|
| 1324 |
+
|
| 1325 |
+
for sym in 1 2 3; do
|
| 1326 |
+
log "Greedy search with --max-sym-per-frame $sym"
|
| 1327 |
+
|
| 1328 |
+
./transducer_stateless_multi_datasets/pretrained.py \
|
| 1329 |
+
--method greedy_search \
|
| 1330 |
+
--max-sym-per-frame $sym \
|
| 1331 |
+
--checkpoint $repo/exp/pretrained.pt \
|
| 1332 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 1333 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 1334 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 1335 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 1336 |
+
done
|
| 1337 |
+
|
| 1338 |
+
for method in modified_beam_search beam_search fast_beam_search; do
|
| 1339 |
+
log "$method"
|
| 1340 |
+
|
| 1341 |
+
./transducer_stateless_multi_datasets/pretrained.py \
|
| 1342 |
+
--method $method \
|
| 1343 |
+
--beam-size 4 \
|
| 1344 |
+
--checkpoint $repo/exp/pretrained.pt \
|
| 1345 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 1346 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 1347 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 1348 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 1349 |
+
done
|
| 1350 |
+
rm -rf $repo
|
| 1351 |
+
}
|
| 1352 |
+
|
| 1353 |
+
function test_transducer_stateless_multi_datasets_bpe_500_2022_03_01() {
|
| 1354 |
+
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-transducer-stateless-multi-datasets-bpe-500-2022-03-01
|
| 1355 |
+
|
| 1356 |
+
log "Downloading pre-trained model from $repo_url"
|
| 1357 |
+
git lfs install
|
| 1358 |
+
git clone $repo_url
|
| 1359 |
+
repo=$(basename $repo_url)
|
| 1360 |
+
|
| 1361 |
+
log "Display test files"
|
| 1362 |
+
tree $repo/
|
| 1363 |
+
ls -lh $repo/test_wavs/*.wav
|
| 1364 |
+
|
| 1365 |
+
for sym in 1 2 3; do
|
| 1366 |
+
log "Greedy search with --max-sym-per-frame $sym"
|
| 1367 |
+
|
| 1368 |
+
./transducer_stateless_multi_datasets/pretrained.py \
|
| 1369 |
+
--method greedy_search \
|
| 1370 |
+
--max-sym-per-frame $sym \
|
| 1371 |
+
--checkpoint $repo/exp/pretrained.pt \
|
| 1372 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 1373 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 1374 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 1375 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 1376 |
+
done
|
| 1377 |
+
|
| 1378 |
+
for method in modified_beam_search beam_search fast_beam_search; do
|
| 1379 |
+
log "$method"
|
| 1380 |
+
|
| 1381 |
+
./transducer_stateless_multi_datasets/pretrained.py \
|
| 1382 |
+
--method $method \
|
| 1383 |
+
--beam-size 4 \
|
| 1384 |
+
--checkpoint $repo/exp/pretrained.pt \
|
| 1385 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 1386 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 1387 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 1388 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 1389 |
+
done
|
| 1390 |
+
rm -rf $repo
|
| 1391 |
+
}
|
| 1392 |
+
|
| 1393 |
+
function test_transducer_stateless_bpe_500_2022_02_07() {
|
| 1394 |
+
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07
|
| 1395 |
+
|
| 1396 |
+
log "Downloading pre-trained model from $repo_url"
|
| 1397 |
+
git lfs install
|
| 1398 |
+
git clone $repo_url
|
| 1399 |
+
repo=$(basename $repo_url)
|
| 1400 |
+
|
| 1401 |
+
log "Display test files"
|
| 1402 |
+
tree $repo/
|
| 1403 |
+
ls -lh $repo/test_wavs/*.wav
|
| 1404 |
+
|
| 1405 |
+
for sym in 1 2 3; do
|
| 1406 |
+
log "Greedy search with --max-sym-per-frame $sym"
|
| 1407 |
+
|
| 1408 |
+
./transducer_stateless/pretrained.py \
|
| 1409 |
+
--method greedy_search \
|
| 1410 |
+
--max-sym-per-frame $sym \
|
| 1411 |
+
--checkpoint $repo/exp/pretrained.pt \
|
| 1412 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 1413 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 1414 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 1415 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 1416 |
+
done
|
| 1417 |
+
|
| 1418 |
+
for method in fast_beam_search modified_beam_search beam_search; do
|
| 1419 |
+
log "$method"
|
| 1420 |
+
|
| 1421 |
+
./transducer_stateless/pretrained.py \
|
| 1422 |
+
--method $method \
|
| 1423 |
+
--beam-size 4 \
|
| 1424 |
+
--checkpoint $repo/exp/pretrained.pt \
|
| 1425 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 1426 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 1427 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 1428 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 1429 |
+
done
|
| 1430 |
+
rm -rf $repo
|
| 1431 |
+
}
|
| 1432 |
+
|
| 1433 |
+
function test_zipformer_ctc_en_2023_10_02() {
|
| 1434 |
+
repo_url=https://huggingface.co/csukuangfj/sherpa-onnx-zipformer-ctc-en-2023-10-02
|
| 1435 |
+
log "Downloading pre-trained model from $repo_url"
|
| 1436 |
+
git lfs install
|
| 1437 |
+
git clone $repo_url
|
| 1438 |
+
repo=$(basename $repo_url)
|
| 1439 |
+
|
| 1440 |
+
log "Display test files"
|
| 1441 |
+
tree $repo/
|
| 1442 |
+
ls -lh $repo/test_wavs/*.wav
|
| 1443 |
+
|
| 1444 |
+
log "CTC greedy search"
|
| 1445 |
+
|
| 1446 |
+
./zipformer/onnx_pretrained_ctc.py \
|
| 1447 |
+
--nn-model $repo/model.onnx \
|
| 1448 |
+
--tokens $repo/tokens.txt \
|
| 1449 |
+
$repo/test_wavs/0.wav \
|
| 1450 |
+
$repo/test_wavs/1.wav \
|
| 1451 |
+
$repo/test_wavs/2.wav
|
| 1452 |
+
|
| 1453 |
+
log "CTC H decoding"
|
| 1454 |
+
|
| 1455 |
+
./zipformer/onnx_pretrained_ctc_H.py \
|
| 1456 |
+
--nn-model $repo/model.onnx \
|
| 1457 |
+
--tokens $repo/tokens.txt \
|
| 1458 |
+
--H $repo/H.fst \
|
| 1459 |
+
$repo/test_wavs/0.wav \
|
| 1460 |
+
$repo/test_wavs/1.wav \
|
| 1461 |
+
$repo/test_wavs/2.wav
|
| 1462 |
+
|
| 1463 |
+
log "CTC HL decoding"
|
| 1464 |
+
|
| 1465 |
+
./zipformer/onnx_pretrained_ctc_HL.py \
|
| 1466 |
+
--nn-model $repo/model.onnx \
|
| 1467 |
+
--words $repo/words.txt \
|
| 1468 |
+
--HL $repo/HL.fst \
|
| 1469 |
+
$repo/test_wavs/0.wav \
|
| 1470 |
+
$repo/test_wavs/1.wav \
|
| 1471 |
+
$repo/test_wavs/2.wav
|
| 1472 |
+
|
| 1473 |
+
log "CTC HLG decoding"
|
| 1474 |
+
|
| 1475 |
+
./zipformer/onnx_pretrained_ctc_HLG.py \
|
| 1476 |
+
--nn-model $repo/model.onnx \
|
| 1477 |
+
--words $repo/words.txt \
|
| 1478 |
+
--HLG $repo/HLG.fst \
|
| 1479 |
+
$repo/test_wavs/0.wav \
|
| 1480 |
+
$repo/test_wavs/1.wav \
|
| 1481 |
+
$repo/test_wavs/2.wav
|
| 1482 |
+
|
| 1483 |
+
rm -rf $repo
|
| 1484 |
+
}
|
| 1485 |
+
|
| 1486 |
+
function test_conformer_ctc_jit_bpe_500_2021_11_09() {
|
| 1487 |
+
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09
|
| 1488 |
+
log "Downloading pre-trained model from $repo_url"
|
| 1489 |
+
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
| 1490 |
+
repo=$(basename $repo_url)
|
| 1491 |
+
pushd $repo
|
| 1492 |
+
|
| 1493 |
+
git lfs pull --include "exp/pretrained.pt"
|
| 1494 |
+
git lfs pull --include "data/lang_bpe_500/HLG.pt"
|
| 1495 |
+
git lfs pull --include "data/lang_bpe_500/L.pt"
|
| 1496 |
+
git lfs pull --include "data/lang_bpe_500/L_disambig.pt"
|
| 1497 |
+
git lfs pull --include "data/lang_bpe_500/Linv.pt"
|
| 1498 |
+
git lfs pull --include "data/lang_bpe_500/bpe.model"
|
| 1499 |
+
git lfs pull --include "data/lang_bpe_500/lexicon.txt"
|
| 1500 |
+
git lfs pull --include "data/lang_bpe_500/lexicon_disambig.txt"
|
| 1501 |
+
git lfs pull --include "data/lang_bpe_500/tokens.txt"
|
| 1502 |
+
git lfs pull --include "data/lang_bpe_500/words.txt"
|
| 1503 |
+
git lfs pull --include "data/lm/G_3_gram.fst.txt"
|
| 1504 |
+
|
| 1505 |
+
popd
|
| 1506 |
+
|
| 1507 |
+
log "Display test files"
|
| 1508 |
+
tree $repo/
|
| 1509 |
+
ls -lh $repo/test_wavs/*.wav
|
| 1510 |
+
|
| 1511 |
+
log "CTC decoding"
|
| 1512 |
+
|
| 1513 |
+
./conformer_ctc/pretrained.py \
|
| 1514 |
+
--method ctc-decoding \
|
| 1515 |
+
--num-classes 500 \
|
| 1516 |
+
--checkpoint $repo/exp/pretrained.pt \
|
| 1517 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 1518 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 1519 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 1520 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 1521 |
+
|
| 1522 |
+
log "HLG decoding"
|
| 1523 |
+
|
| 1524 |
+
./conformer_ctc/pretrained.py \
|
| 1525 |
+
--method 1best \
|
| 1526 |
+
--num-classes 500 \
|
| 1527 |
+
--checkpoint $repo/exp/pretrained.pt \
|
| 1528 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 1529 |
+
--words-file $repo/data/lang_bpe_500/words.txt \
|
| 1530 |
+
--HLG $repo/data/lang_bpe_500/HLG.pt \
|
| 1531 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 1532 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 1533 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 1534 |
+
|
| 1535 |
+
log "CTC decoding on CPU with kaldi decoders using OpenFst"
|
| 1536 |
+
|
| 1537 |
+
log "Exporting model with torchscript"
|
| 1538 |
+
|
| 1539 |
+
pushd $repo/exp
|
| 1540 |
+
ln -s pretrained.pt epoch-99.pt
|
| 1541 |
+
popd
|
| 1542 |
+
|
| 1543 |
+
./conformer_ctc/export.py \
|
| 1544 |
+
--epoch 99 \
|
| 1545 |
+
--avg 1 \
|
| 1546 |
+
--exp-dir $repo/exp \
|
| 1547 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 1548 |
+
--jit 1
|
| 1549 |
+
|
| 1550 |
+
ls -lh $repo/exp
|
| 1551 |
+
|
| 1552 |
+
|
| 1553 |
+
log "Generating H.fst, HL.fst"
|
| 1554 |
+
|
| 1555 |
+
./local/prepare_lang_fst.py --lang-dir $repo/data/lang_bpe_500 --ngram-G $repo/data/lm/G_3_gram.fst.txt
|
| 1556 |
+
|
| 1557 |
+
ls -lh $repo/data/lang_bpe_500
|
| 1558 |
+
|
| 1559 |
+
log "Decoding with H on CPU with OpenFst"
|
| 1560 |
+
|
| 1561 |
+
./conformer_ctc/jit_pretrained_decode_with_H.py \
|
| 1562 |
+
--nn-model $repo/exp/cpu_jit.pt \
|
| 1563 |
+
--H $repo/data/lang_bpe_500/H.fst \
|
| 1564 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 1565 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 1566 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 1567 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 1568 |
+
|
| 1569 |
+
log "Decoding with HL on CPU with OpenFst"
|
| 1570 |
+
|
| 1571 |
+
./conformer_ctc/jit_pretrained_decode_with_HL.py \
|
| 1572 |
+
--nn-model $repo/exp/cpu_jit.pt \
|
| 1573 |
+
--HL $repo/data/lang_bpe_500/HL.fst \
|
| 1574 |
+
--words $repo/data/lang_bpe_500/words.txt \
|
| 1575 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 1576 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 1577 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 1578 |
+
|
| 1579 |
+
log "Decoding with HLG on CPU with OpenFst"
|
| 1580 |
+
|
| 1581 |
+
./conformer_ctc/jit_pretrained_decode_with_HLG.py \
|
| 1582 |
+
--nn-model $repo/exp/cpu_jit.pt \
|
| 1583 |
+
--HLG $repo/data/lang_bpe_500/HLG.fst \
|
| 1584 |
+
--words $repo/data/lang_bpe_500/words.txt \
|
| 1585 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 1586 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 1587 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 1588 |
+
|
| 1589 |
+
rm -rf $repo
|
| 1590 |
+
}
|
| 1591 |
+
|
| 1592 |
+
function test_transducer_bpe_500_2021_12_23() {
|
| 1593 |
+
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-transducer-bpe-500-2021-12-23
|
| 1594 |
+
|
| 1595 |
+
log "Downloading pre-trained model from $repo_url"
|
| 1596 |
+
git lfs install
|
| 1597 |
+
git clone $repo_url
|
| 1598 |
+
repo=$(basename $repo_url)
|
| 1599 |
+
|
| 1600 |
+
log "Display test files"
|
| 1601 |
+
tree $repo/
|
| 1602 |
+
ls -lh $repo/test_wavs/*.wav
|
| 1603 |
+
|
| 1604 |
+
log "Beam search decoding"
|
| 1605 |
+
|
| 1606 |
+
./transducer/pretrained.py \
|
| 1607 |
+
--method beam_search \
|
| 1608 |
+
--beam-size 4 \
|
| 1609 |
+
--checkpoint $repo/exp/pretrained.pt \
|
| 1610 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 1611 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 1612 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 1613 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 1614 |
+
|
| 1615 |
+
rm -rf $repo
|
| 1616 |
+
}
|
| 1617 |
+
|
| 1618 |
+
prepare_data
|
| 1619 |
+
run_diagnostics
|
| 1620 |
+
test_streaming_zipformer_ctc_hlg
|
| 1621 |
+
test_pruned_transducer_stateless_2022_03_12
|
| 1622 |
+
test_pruned_transducer_stateless2_2022_04_29
|
| 1623 |
+
test_pruned_transducer_stateless3_2022_04_29
|
| 1624 |
+
test_pruned_transducer_stateless5_2022_05_13
|
| 1625 |
+
test_pruned_transducer_stateless7_2022_11_11
|
| 1626 |
+
test_pruned_transducer_stateless8_2022_11_14
|
| 1627 |
+
test_pruned_transducer_stateless7_ctc_2022_12_01
|
| 1628 |
+
test_zipformer_mmi_2022_12_08
|
| 1629 |
+
test_pruned_transducer_stateless7_streaming_2022_12_29
|
| 1630 |
+
test_pruned_transducer_stateless7_ctc_bs_2023_01_29
|
| 1631 |
+
test_conformer_ctc3_2022_11_27
|
| 1632 |
+
test_lstm_transducer_stateless2_2022_09_03
|
| 1633 |
+
test_pruned_transducer_stateless3_2022_05_13
|
| 1634 |
+
test_streaming_pruned_transducer_stateless2_20220625
|
| 1635 |
+
test_streaming_zipformer_2023_05_17
|
| 1636 |
+
test_zipformer_2023_05_18
|
| 1637 |
+
test_transducer_stateless2_torchaudio_2022_04_19
|
| 1638 |
+
test_zipformer_transducer_ctc_2023_06_13
|
| 1639 |
+
test_100h_transducer_stateless_multi_datasets_bpe_500_2022_02_21
|
| 1640 |
+
test_transducer_stateless_multi_datasets_bpe_500_2022_03_01
|
| 1641 |
+
test_transducer_stateless_bpe_500_2022_02_07
|
| 1642 |
+
test_zipformer_ctc_en_2023_10_02
|
| 1643 |
+
# test_conformer_ctc_jit_bpe_500_2021_11_09 # failes for torch != 1.13.x and torch != 2.0.x
|
| 1644 |
+
test_transducer_bpe_500_2021_12_23
|
.github/scripts/librispeech/ASR/run_rknn.sh
ADDED
|
@@ -0,0 +1,275 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
|
| 3 |
+
set -ex
|
| 4 |
+
|
| 5 |
+
python3 -m pip install kaldi-native-fbank soundfile librosa
|
| 6 |
+
|
| 7 |
+
log() {
|
| 8 |
+
# This function is from espnet
|
| 9 |
+
local fname=${BASH_SOURCE[1]##*/}
|
| 10 |
+
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
|
| 11 |
+
}
|
| 12 |
+
|
| 13 |
+
cd egs/librispeech/ASR
|
| 14 |
+
|
| 15 |
+
# https://huggingface.co/csukuangfj/k2fsa-zipformer-chinese-english-mixed
|
| 16 |
+
# sherpa-onnx-streaming-zipformer-bilingual-zh-en-2023-02-20
|
| 17 |
+
function export_2023_02_20() {
|
| 18 |
+
d=exp_2023_02_20
|
| 19 |
+
|
| 20 |
+
mkdir $d
|
| 21 |
+
pushd $d
|
| 22 |
+
|
| 23 |
+
curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-chinese-english-mixed/resolve/main/exp/pretrained.pt
|
| 24 |
+
mv pretrained.pt epoch-99.pt
|
| 25 |
+
|
| 26 |
+
curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-chinese-english-mixed/resolve/main/data/lang_char_bpe/tokens.txt
|
| 27 |
+
|
| 28 |
+
curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-chinese-english-mixed/resolve/main/test_wavs/0.wav
|
| 29 |
+
curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-chinese-english-mixed/resolve/main/test_wavs/1.wav
|
| 30 |
+
curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-chinese-english-mixed/resolve/main/test_wavs/2.wav
|
| 31 |
+
curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-chinese-english-mixed/resolve/main/test_wavs/3.wav
|
| 32 |
+
curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-chinese-english-mixed/resolve/main/test_wavs/4.wav
|
| 33 |
+
ls -lh
|
| 34 |
+
popd
|
| 35 |
+
|
| 36 |
+
./pruned_transducer_stateless7_streaming/export-onnx-zh.py \
|
| 37 |
+
--dynamic-batch 0 \
|
| 38 |
+
--enable-int8-quantization 0 \
|
| 39 |
+
--tokens $d/tokens.txt \
|
| 40 |
+
--use-averaged-model 0 \
|
| 41 |
+
--epoch 99 \
|
| 42 |
+
--avg 1 \
|
| 43 |
+
--exp-dir $d/ \
|
| 44 |
+
--decode-chunk-len 64 \
|
| 45 |
+
--num-encoder-layers "2,4,3,2,4" \
|
| 46 |
+
--feedforward-dims "1024,1024,1536,1536,1024" \
|
| 47 |
+
--nhead "8,8,8,8,8" \
|
| 48 |
+
--encoder-dims "384,384,384,384,384" \
|
| 49 |
+
--attention-dims "192,192,192,192,192" \
|
| 50 |
+
--encoder-unmasked-dims "256,256,256,256,256" \
|
| 51 |
+
--zipformer-downsampling-factors "1,2,4,8,2" \
|
| 52 |
+
--cnn-module-kernels "31,31,31,31,31" \
|
| 53 |
+
--decoder-dim 512 \
|
| 54 |
+
--joiner-dim 512
|
| 55 |
+
|
| 56 |
+
ls -lh $d/
|
| 57 |
+
|
| 58 |
+
./pruned_transducer_stateless7_streaming/onnx_pretrained.py \
|
| 59 |
+
--encoder-model-filename $d/encoder-epoch-99-avg-1.onnx \
|
| 60 |
+
--decoder-model-filename $d/decoder-epoch-99-avg-1.onnx \
|
| 61 |
+
--joiner-model-filename $d/joiner-epoch-99-avg-1.onnx \
|
| 62 |
+
--tokens $d/tokens.txt \
|
| 63 |
+
$d/0.wav
|
| 64 |
+
|
| 65 |
+
./pruned_transducer_stateless7_streaming/onnx_pretrained.py \
|
| 66 |
+
--encoder-model-filename $d/encoder-epoch-99-avg-1.onnx \
|
| 67 |
+
--decoder-model-filename $d/decoder-epoch-99-avg-1.onnx \
|
| 68 |
+
--joiner-model-filename $d/joiner-epoch-99-avg-1.onnx \
|
| 69 |
+
--tokens $d/tokens.txt \
|
| 70 |
+
$d/1.wav
|
| 71 |
+
|
| 72 |
+
for platform in rk3562 rk3566 rk3568 rk3576 rk3588; do
|
| 73 |
+
dst=sherpa-onnx-$platform-streaming-zipformer-bilingual-zh-en-2023-02-20
|
| 74 |
+
mkdir -p $dst
|
| 75 |
+
|
| 76 |
+
./pruned_transducer_stateless7_streaming/export_rknn.py \
|
| 77 |
+
--in-encoder $d/encoder-epoch-99-avg-1.onnx \
|
| 78 |
+
--in-decoder $d/decoder-epoch-99-avg-1.onnx \
|
| 79 |
+
--in-joiner $d/joiner-epoch-99-avg-1.onnx \
|
| 80 |
+
--out-encoder $dst/encoder.rknn \
|
| 81 |
+
--out-decoder $dst/decoder.rknn \
|
| 82 |
+
--out-joiner $dst/joiner.rknn \
|
| 83 |
+
--target-platform $platform 2>/dev/null
|
| 84 |
+
|
| 85 |
+
ls -lh $dst/
|
| 86 |
+
|
| 87 |
+
./pruned_transducer_stateless7_streaming/test_rknn_on_cpu_simulator.py \
|
| 88 |
+
--encoder $d/encoder-epoch-99-avg-1.onnx \
|
| 89 |
+
--decoder $d/decoder-epoch-99-avg-1.onnx \
|
| 90 |
+
--joiner $d/joiner-epoch-99-avg-1.onnx \
|
| 91 |
+
--tokens $d/tokens.txt \
|
| 92 |
+
--wav $d/0.wav
|
| 93 |
+
|
| 94 |
+
cp $d/tokens.txt $dst
|
| 95 |
+
mkdir $dst/test_wavs
|
| 96 |
+
cp $d/*.wav $dst/test_wavs
|
| 97 |
+
|
| 98 |
+
tar cjvf $dst.tar.bz2 $dst
|
| 99 |
+
ls -lh $dst.tar.bz2
|
| 100 |
+
mv $dst.tar.bz2 /icefall/
|
| 101 |
+
ls -lh $dst/
|
| 102 |
+
echo "---"
|
| 103 |
+
|
| 104 |
+
rm -rf $dst
|
| 105 |
+
done
|
| 106 |
+
}
|
| 107 |
+
|
| 108 |
+
# https://huggingface.co/csukuangfj/k2fsa-zipformer-bilingual-zh-en-t
|
| 109 |
+
# sherpa-onnx-streaming-zipformer-small-bilingual-zh-en-2023-02-16
|
| 110 |
+
function export_2023_02_16() {
|
| 111 |
+
d=exp_2023_02_16
|
| 112 |
+
|
| 113 |
+
mkdir $d
|
| 114 |
+
pushd $d
|
| 115 |
+
|
| 116 |
+
curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-bilingual-zh-en-t/resolve/main/exp/pretrained.pt
|
| 117 |
+
mv pretrained.pt epoch-99.pt
|
| 118 |
+
|
| 119 |
+
curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-bilingual-zh-en-t/resolve/main/data/lang_char_bpe/tokens.txt
|
| 120 |
+
curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-bilingual-zh-en-t/resolve/main/test_wavs/0.wav
|
| 121 |
+
curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-bilingual-zh-en-t/resolve/main/test_wavs/1.wav
|
| 122 |
+
curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-bilingual-zh-en-t/resolve/main/test_wavs/2.wav
|
| 123 |
+
curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-bilingual-zh-en-t/resolve/main/test_wavs/3.wav
|
| 124 |
+
curl -SL -O https://huggingface.co/csukuangfj/k2fsa-zipformer-bilingual-zh-en-t/resolve/main/test_wavs/4.wav
|
| 125 |
+
|
| 126 |
+
ls -lh
|
| 127 |
+
|
| 128 |
+
popd
|
| 129 |
+
|
| 130 |
+
./pruned_transducer_stateless7_streaming/export-onnx-zh.py \
|
| 131 |
+
--dynamic-batch 0 \
|
| 132 |
+
--enable-int8-quantization 0 \
|
| 133 |
+
--tokens $d/tokens.txt \
|
| 134 |
+
--use-averaged-model 0 \
|
| 135 |
+
--epoch 99 \
|
| 136 |
+
--avg 1 \
|
| 137 |
+
--exp-dir $d/ \
|
| 138 |
+
--decode-chunk-len 64 \
|
| 139 |
+
\
|
| 140 |
+
--num-encoder-layers 2,2,2,2,2 \
|
| 141 |
+
--feedforward-dims 768,768,768,768,768 \
|
| 142 |
+
--nhead 4,4,4,4,4 \
|
| 143 |
+
--encoder-dims 256,256,256,256,256 \
|
| 144 |
+
--attention-dims 192,192,192,192,192 \
|
| 145 |
+
--encoder-unmasked-dims 192,192,192,192,192 \
|
| 146 |
+
\
|
| 147 |
+
--zipformer-downsampling-factors "1,2,4,8,2" \
|
| 148 |
+
--cnn-module-kernels "31,31,31,31,31" \
|
| 149 |
+
--decoder-dim 512 \
|
| 150 |
+
--joiner-dim 512
|
| 151 |
+
|
| 152 |
+
ls -lh $d/
|
| 153 |
+
|
| 154 |
+
./pruned_transducer_stateless7_streaming/onnx_pretrained.py \
|
| 155 |
+
--encoder-model-filename $d/encoder-epoch-99-avg-1.onnx \
|
| 156 |
+
--decoder-model-filename $d/decoder-epoch-99-avg-1.onnx \
|
| 157 |
+
--joiner-model-filename $d/joiner-epoch-99-avg-1.onnx \
|
| 158 |
+
--tokens $d/tokens.txt \
|
| 159 |
+
$d/0.wav
|
| 160 |
+
|
| 161 |
+
./pruned_transducer_stateless7_streaming/onnx_pretrained.py \
|
| 162 |
+
--encoder-model-filename $d/encoder-epoch-99-avg-1.onnx \
|
| 163 |
+
--decoder-model-filename $d/decoder-epoch-99-avg-1.onnx \
|
| 164 |
+
--joiner-model-filename $d/joiner-epoch-99-avg-1.onnx \
|
| 165 |
+
--tokens $d/tokens.txt \
|
| 166 |
+
$d/1.wav
|
| 167 |
+
|
| 168 |
+
for platform in rk3562 rk3566 rk3568 rk3576 rk3588; do
|
| 169 |
+
dst=sherpa-onnx-$platform-streaming-zipformer-small-bilingual-zh-en-2023-02-16
|
| 170 |
+
mkdir -p $dst
|
| 171 |
+
|
| 172 |
+
./pruned_transducer_stateless7_streaming/export_rknn.py \
|
| 173 |
+
--in-encoder $d/encoder-epoch-99-avg-1.onnx \
|
| 174 |
+
--in-decoder $d/decoder-epoch-99-avg-1.onnx \
|
| 175 |
+
--in-joiner $d/joiner-epoch-99-avg-1.onnx \
|
| 176 |
+
--out-encoder $dst/encoder.rknn \
|
| 177 |
+
--out-decoder $dst/decoder.rknn \
|
| 178 |
+
--out-joiner $dst/joiner.rknn \
|
| 179 |
+
--target-platform $platform 2>/dev/null
|
| 180 |
+
|
| 181 |
+
ls -lh $dst/
|
| 182 |
+
|
| 183 |
+
./pruned_transducer_stateless7_streaming/test_rknn_on_cpu_simulator.py \
|
| 184 |
+
--encoder $d/encoder-epoch-99-avg-1.onnx \
|
| 185 |
+
--decoder $d/decoder-epoch-99-avg-1.onnx \
|
| 186 |
+
--joiner $d/joiner-epoch-99-avg-1.onnx \
|
| 187 |
+
--tokens $d/tokens.txt \
|
| 188 |
+
--wav $d/0.wav
|
| 189 |
+
|
| 190 |
+
cp $d/tokens.txt $dst
|
| 191 |
+
mkdir $dst/test_wavs
|
| 192 |
+
cp $d/*.wav $dst/test_wavs
|
| 193 |
+
|
| 194 |
+
tar cjvf $dst.tar.bz2 $dst
|
| 195 |
+
ls -lh $dst.tar.bz2
|
| 196 |
+
mv $dst.tar.bz2 /icefall/
|
| 197 |
+
ls -lh $dst/
|
| 198 |
+
echo "---"
|
| 199 |
+
|
| 200 |
+
rm -rf $dst
|
| 201 |
+
done
|
| 202 |
+
}
|
| 203 |
+
|
| 204 |
+
# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#csukuangfj-sherpa-onnx-streaming-zipformer-en-2023-06-26-english
|
| 205 |
+
function export_2023_06_26() {
|
| 206 |
+
d=exp_2023_06_26
|
| 207 |
+
|
| 208 |
+
mkdir $d
|
| 209 |
+
pushd $d
|
| 210 |
+
|
| 211 |
+
curl -SL -O https://huggingface.co/Zengwei/icefall-asr-librispeech-streaming-zipformer-2023-05-17/resolve/main/exp/pretrained.pt
|
| 212 |
+
mv pretrained.pt epoch-99.pt
|
| 213 |
+
|
| 214 |
+
curl -SL -O https://huggingface.co/Zengwei/icefall-asr-librispeech-streaming-zipformer-2023-05-17/resolve/main/data/lang_bpe_500/tokens.txt
|
| 215 |
+
|
| 216 |
+
curl -SL -o 0.wav https://huggingface.co/Zengwei/icefall-asr-librispeech-streaming-zipformer-2023-05-17/resolve/main/data/lang_bpe_500/tokens.txt
|
| 217 |
+
curl -SL -o 1.wav https://huggingface.co/Zengwei/icefall-asr-librispeech-streaming-zipformer-2023-05-17/resolve/main/test_wavs/1221-135766-0001.wav
|
| 218 |
+
curl -SL -o 2.wav https://huggingface.co/Zengwei/icefall-asr-librispeech-streaming-zipformer-2023-05-17/resolve/main/test_wavs/1221-135766-0002.wav
|
| 219 |
+
|
| 220 |
+
ls -lh
|
| 221 |
+
|
| 222 |
+
popd
|
| 223 |
+
|
| 224 |
+
./zipformer/export-onnx-streaming.py \
|
| 225 |
+
--dynamic-batch 0 \
|
| 226 |
+
--enable-int8-quantization 0 \
|
| 227 |
+
--tokens $d/tokens.txt \
|
| 228 |
+
--use-averaged-model 0 \
|
| 229 |
+
--epoch 99 \
|
| 230 |
+
--avg 1 \
|
| 231 |
+
--exp-dir $d \
|
| 232 |
+
--use-ctc 0 \
|
| 233 |
+
--use-transducer 1 \
|
| 234 |
+
\
|
| 235 |
+
--chunk-size 32 \
|
| 236 |
+
--left-context-frames 128 \
|
| 237 |
+
--causal 1
|
| 238 |
+
|
| 239 |
+
ls -lh $d/
|
| 240 |
+
|
| 241 |
+
for platform in rk3562 rk3566 rk3568 rk3576 rk3588; do
|
| 242 |
+
dst=sherpa-onnx-$platform-streaming-zipformer-en-2023-06-26
|
| 243 |
+
mkdir -p $dst
|
| 244 |
+
|
| 245 |
+
./zipformer/export_rknn_transducer_streaming.py \
|
| 246 |
+
--in-encoder $d/encoder-epoch-99-avg-1-chunk-32-left-128.onnx \
|
| 247 |
+
--in-decoder $d/decoder-epoch-99-avg-1-chunk-32-left-128.onnx \
|
| 248 |
+
--in-joiner $d/joiner-epoch-99-avg-1-chunk-32-left-128.onnx \
|
| 249 |
+
--out-encoder $dst/encoder.rknn \
|
| 250 |
+
--out-decoder $dst/decoder.rknn \
|
| 251 |
+
--out-joiner $dst/joiner.rknn \
|
| 252 |
+
--target-platform $platform
|
| 253 |
+
|
| 254 |
+
ls -lh $dst/
|
| 255 |
+
|
| 256 |
+
cp $d/tokens.txt $dst
|
| 257 |
+
mkdir $dst/test_wavs
|
| 258 |
+
cp $d/*.wav $dst/test_wavs
|
| 259 |
+
|
| 260 |
+
tar cjvf $dst.tar.bz2 $dst
|
| 261 |
+
ls -lh $dst.tar.bz2
|
| 262 |
+
mv $dst.tar.bz2 /icefall/
|
| 263 |
+
ls -lh $dst/
|
| 264 |
+
echo "---"
|
| 265 |
+
|
| 266 |
+
rm -rf $dst
|
| 267 |
+
done
|
| 268 |
+
}
|
| 269 |
+
|
| 270 |
+
if [[ $rknn_toolkit2_version == "2.1.0" ]]; then
|
| 271 |
+
export_2023_02_16
|
| 272 |
+
export_2023_02_20
|
| 273 |
+
else
|
| 274 |
+
export_2023_06_26
|
| 275 |
+
fi
|
.github/scripts/ljspeech/TTS/run-matcha.sh
ADDED
|
@@ -0,0 +1,157 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
|
| 3 |
+
set -ex
|
| 4 |
+
|
| 5 |
+
apt-get update
|
| 6 |
+
apt-get install -y sox
|
| 7 |
+
|
| 8 |
+
python3 -m pip install piper_phonemize -f https://k2-fsa.github.io/icefall/piper_phonemize.html
|
| 9 |
+
python3 -m pip install espnet_tts_frontend
|
| 10 |
+
python3 -m pip install numba conformer==0.3.2 diffusers librosa
|
| 11 |
+
|
| 12 |
+
log() {
|
| 13 |
+
# This function is from espnet
|
| 14 |
+
local fname=${BASH_SOURCE[1]##*/}
|
| 15 |
+
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
|
| 16 |
+
}
|
| 17 |
+
|
| 18 |
+
cd egs/ljspeech/TTS
|
| 19 |
+
|
| 20 |
+
sed -i.bak s/600/8/g ./prepare.sh
|
| 21 |
+
sed -i.bak s/"first 100"/"first 3"/g ./prepare.sh
|
| 22 |
+
sed -i.bak s/500/5/g ./prepare.sh
|
| 23 |
+
git diff
|
| 24 |
+
|
| 25 |
+
function prepare_data() {
|
| 26 |
+
# We have created a subset of the data for testing
|
| 27 |
+
#
|
| 28 |
+
mkdir -p download
|
| 29 |
+
pushd download
|
| 30 |
+
wget -q https://huggingface.co/csukuangfj/ljspeech-subset-for-ci-test/resolve/main/LJSpeech-1.1.tar.bz2
|
| 31 |
+
tar xvf LJSpeech-1.1.tar.bz2
|
| 32 |
+
popd
|
| 33 |
+
|
| 34 |
+
./prepare.sh
|
| 35 |
+
tree .
|
| 36 |
+
}
|
| 37 |
+
|
| 38 |
+
function train() {
|
| 39 |
+
pushd ./matcha
|
| 40 |
+
sed -i.bak s/1500/3/g ./train.py
|
| 41 |
+
git diff .
|
| 42 |
+
popd
|
| 43 |
+
|
| 44 |
+
./matcha/train.py \
|
| 45 |
+
--exp-dir matcha/exp \
|
| 46 |
+
--num-epochs 1 \
|
| 47 |
+
--save-every-n 1 \
|
| 48 |
+
--num-buckets 2 \
|
| 49 |
+
--tokens data/tokens.txt \
|
| 50 |
+
--max-duration 20
|
| 51 |
+
|
| 52 |
+
ls -lh matcha/exp
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
function infer() {
|
| 56 |
+
|
| 57 |
+
curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v1
|
| 58 |
+
|
| 59 |
+
./matcha/infer.py \
|
| 60 |
+
--num-buckets 2 \
|
| 61 |
+
--epoch 1 \
|
| 62 |
+
--exp-dir ./matcha/exp \
|
| 63 |
+
--tokens data/tokens.txt \
|
| 64 |
+
--vocoder ./generator_v1 \
|
| 65 |
+
--input-text "how are you doing?" \
|
| 66 |
+
--output-wav ./generated.wav
|
| 67 |
+
|
| 68 |
+
ls -lh *.wav
|
| 69 |
+
soxi ./generated.wav
|
| 70 |
+
rm -v ./generated.wav
|
| 71 |
+
rm -v generator_v1
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
function export_onnx() {
|
| 75 |
+
pushd matcha/exp
|
| 76 |
+
curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-ljspeech-matcha-en-2024-10-28/resolve/main/exp/epoch-4000.pt
|
| 77 |
+
popd
|
| 78 |
+
|
| 79 |
+
pushd data/fbank
|
| 80 |
+
rm -fv *.json
|
| 81 |
+
curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-ljspeech-matcha-en-2024-10-28/resolve/main/data/cmvn.json
|
| 82 |
+
popd
|
| 83 |
+
|
| 84 |
+
./matcha/export_onnx.py \
|
| 85 |
+
--exp-dir ./matcha/exp \
|
| 86 |
+
--epoch 4000 \
|
| 87 |
+
--tokens ./data/tokens.txt \
|
| 88 |
+
--cmvn ./data/fbank/cmvn.json
|
| 89 |
+
|
| 90 |
+
ls -lh *.onnx
|
| 91 |
+
|
| 92 |
+
if false; then
|
| 93 |
+
# The CI machine does not have enough memory to run it
|
| 94 |
+
#
|
| 95 |
+
curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v1
|
| 96 |
+
curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v2
|
| 97 |
+
curl -SL -O https://github.com/csukuangfj/models/raw/refs/heads/master/hifigan/generator_v3
|
| 98 |
+
python3 ./matcha/export_onnx_hifigan.py
|
| 99 |
+
else
|
| 100 |
+
curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-ljspeech-matcha-en-2024-10-28/resolve/main/exp/hifigan_v1.onnx
|
| 101 |
+
curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-ljspeech-matcha-en-2024-10-28/resolve/main/exp/hifigan_v2.onnx
|
| 102 |
+
curl -SL -O https://huggingface.co/csukuangfj/icefall-tts-ljspeech-matcha-en-2024-10-28/resolve/main/exp/hifigan_v3.onnx
|
| 103 |
+
fi
|
| 104 |
+
|
| 105 |
+
ls -lh *.onnx
|
| 106 |
+
|
| 107 |
+
for v in v1 v2 v3; do
|
| 108 |
+
python3 ./matcha/onnx_pretrained.py \
|
| 109 |
+
--acoustic-model ./model-steps-6.onnx \
|
| 110 |
+
--vocoder ./hifigan_$v.onnx \
|
| 111 |
+
--tokens ./data/tokens.txt \
|
| 112 |
+
--input-text "how are you doing?" \
|
| 113 |
+
--output-wav /icefall/generated-matcha-tts-steps-6-$v.wav
|
| 114 |
+
done
|
| 115 |
+
|
| 116 |
+
ls -lh /icefall/*.wav
|
| 117 |
+
soxi /icefall/generated-matcha-tts-steps-6-*.wav
|
| 118 |
+
|
| 119 |
+
cp ./model-steps-*.onnx /icefall
|
| 120 |
+
|
| 121 |
+
d=matcha-icefall-en_US-ljspeech
|
| 122 |
+
mkdir $d
|
| 123 |
+
cp -v data/tokens.txt $d
|
| 124 |
+
cp model-steps-3.onnx $d
|
| 125 |
+
pushd $d
|
| 126 |
+
curl -SL -O https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/espeak-ng-data.tar.bz2
|
| 127 |
+
tar xf espeak-ng-data.tar.bz2
|
| 128 |
+
rm espeak-ng-data.tar.bz2
|
| 129 |
+
|
| 130 |
+
cat >README.md <<EOF
|
| 131 |
+
# Introduction
|
| 132 |
+
|
| 133 |
+
This model is trained using the dataset from
|
| 134 |
+
https://keithito.com/LJ-Speech-Dataset/
|
| 135 |
+
|
| 136 |
+
The dataset contains only 1 female speaker.
|
| 137 |
+
|
| 138 |
+
You can find the training code at
|
| 139 |
+
https://github.com/k2-fsa/icefall/tree/master/egs/ljspeech/TTS#matcha
|
| 140 |
+
EOF
|
| 141 |
+
|
| 142 |
+
ls -lh
|
| 143 |
+
|
| 144 |
+
popd
|
| 145 |
+
|
| 146 |
+
tar cvjf $d.tar.bz2 $d
|
| 147 |
+
mv $d.tar.bz2 /icefall
|
| 148 |
+
mv $d /icefall
|
| 149 |
+
}
|
| 150 |
+
|
| 151 |
+
prepare_data
|
| 152 |
+
train
|
| 153 |
+
infer
|
| 154 |
+
export_onnx
|
| 155 |
+
|
| 156 |
+
rm -rfv generator_v* matcha/exp
|
| 157 |
+
git checkout .
|
.github/scripts/ljspeech/TTS/run.sh
ADDED
|
@@ -0,0 +1,157 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
|
| 3 |
+
set -ex
|
| 4 |
+
|
| 5 |
+
python3 -m pip install piper_phonemize -f https://k2-fsa.github.io/icefall/piper_phonemize.html
|
| 6 |
+
python3 -m pip install espnet_tts_frontend
|
| 7 |
+
python3 -m pip install numba
|
| 8 |
+
|
| 9 |
+
log() {
|
| 10 |
+
# This function is from espnet
|
| 11 |
+
local fname=${BASH_SOURCE[1]##*/}
|
| 12 |
+
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
|
| 13 |
+
}
|
| 14 |
+
|
| 15 |
+
cd egs/ljspeech/TTS
|
| 16 |
+
|
| 17 |
+
sed -i.bak s/600/8/g ./prepare.sh
|
| 18 |
+
sed -i.bak s/"first 100"/"first 3"/g ./prepare.sh
|
| 19 |
+
sed -i.bak s/500/5/g ./prepare.sh
|
| 20 |
+
git diff
|
| 21 |
+
|
| 22 |
+
function prepare_data() {
|
| 23 |
+
# We have created a subset of the data for testing
|
| 24 |
+
#
|
| 25 |
+
mkdir -p download
|
| 26 |
+
pushd download
|
| 27 |
+
wget -q https://huggingface.co/csukuangfj/ljspeech-subset-for-ci-test/resolve/main/LJSpeech-1.1.tar.bz2
|
| 28 |
+
tar xvf LJSpeech-1.1.tar.bz2
|
| 29 |
+
popd
|
| 30 |
+
|
| 31 |
+
./prepare.sh
|
| 32 |
+
tree .
|
| 33 |
+
}
|
| 34 |
+
|
| 35 |
+
function train() {
|
| 36 |
+
pushd ./vits
|
| 37 |
+
sed -i.bak s/200/3/g ./train.py
|
| 38 |
+
git diff .
|
| 39 |
+
popd
|
| 40 |
+
|
| 41 |
+
for t in low medium high; do
|
| 42 |
+
./vits/train.py \
|
| 43 |
+
--exp-dir vits/exp-$t \
|
| 44 |
+
--model-type $t \
|
| 45 |
+
--num-epochs 1 \
|
| 46 |
+
--save-every-n 1 \
|
| 47 |
+
--num-buckets 2 \
|
| 48 |
+
--tokens data/tokens.txt \
|
| 49 |
+
--max-duration 20
|
| 50 |
+
|
| 51 |
+
ls -lh vits/exp-$t
|
| 52 |
+
done
|
| 53 |
+
}
|
| 54 |
+
|
| 55 |
+
function infer() {
|
| 56 |
+
for t in low medium high; do
|
| 57 |
+
./vits/infer.py \
|
| 58 |
+
--num-buckets 2 \
|
| 59 |
+
--model-type $t \
|
| 60 |
+
--epoch 1 \
|
| 61 |
+
--exp-dir ./vits/exp-$t \
|
| 62 |
+
--tokens data/tokens.txt \
|
| 63 |
+
--max-duration 20
|
| 64 |
+
done
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
function export_onnx() {
|
| 68 |
+
for t in low medium high; do
|
| 69 |
+
./vits/export-onnx.py \
|
| 70 |
+
--model-type $t \
|
| 71 |
+
--epoch 1 \
|
| 72 |
+
--exp-dir ./vits/exp-$t \
|
| 73 |
+
--tokens data/tokens.txt
|
| 74 |
+
|
| 75 |
+
ls -lh vits/exp-$t/
|
| 76 |
+
done
|
| 77 |
+
}
|
| 78 |
+
|
| 79 |
+
function test_medium() {
|
| 80 |
+
git clone https://huggingface.co/csukuangfj/icefall-tts-ljspeech-vits-medium-2024-03-12
|
| 81 |
+
|
| 82 |
+
./vits/export-onnx.py \
|
| 83 |
+
--model-type medium \
|
| 84 |
+
--epoch 820 \
|
| 85 |
+
--exp-dir ./icefall-tts-ljspeech-vits-medium-2024-03-12/exp \
|
| 86 |
+
--tokens ./icefall-tts-ljspeech-vits-medium-2024-03-12/data/tokens.txt
|
| 87 |
+
|
| 88 |
+
ls -lh ./icefall-tts-ljspeech-vits-medium-2024-03-12/exp
|
| 89 |
+
|
| 90 |
+
./vits/test_onnx.py \
|
| 91 |
+
--model-filename ./icefall-tts-ljspeech-vits-medium-2024-03-12/exp/vits-epoch-820.onnx \
|
| 92 |
+
--tokens ./icefall-tts-ljspeech-vits-medium-2024-03-12/data/tokens.txt \
|
| 93 |
+
--output-filename /icefall/test-medium.wav
|
| 94 |
+
|
| 95 |
+
ls -lh /icefall/test-medium.wav
|
| 96 |
+
|
| 97 |
+
d=/icefall/vits-icefall-en_US-ljspeech-medium
|
| 98 |
+
mkdir $d
|
| 99 |
+
cp -v ./icefall-tts-ljspeech-vits-medium-2024-03-12/data/tokens.txt $d/
|
| 100 |
+
cp -v ./icefall-tts-ljspeech-vits-medium-2024-03-12/exp/vits-epoch-820.onnx $d/model.onnx
|
| 101 |
+
|
| 102 |
+
rm -rf icefall-tts-ljspeech-vits-medium-2024-03-12
|
| 103 |
+
|
| 104 |
+
pushd $d
|
| 105 |
+
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/espeak-ng-data.tar.bz2
|
| 106 |
+
tar xf espeak-ng-data.tar.bz2
|
| 107 |
+
rm espeak-ng-data.tar.bz2
|
| 108 |
+
cd ..
|
| 109 |
+
tar cjf vits-icefall-en_US-ljspeech-medium.tar.bz2 vits-icefall-en_US-ljspeech-medium
|
| 110 |
+
rm -rf vits-icefall-en_US-ljspeech-medium
|
| 111 |
+
ls -lh *.tar.bz2
|
| 112 |
+
popd
|
| 113 |
+
}
|
| 114 |
+
|
| 115 |
+
function test_low() {
|
| 116 |
+
git clone https://huggingface.co/csukuangfj/icefall-tts-ljspeech-vits-low-2024-03-12
|
| 117 |
+
|
| 118 |
+
./vits/export-onnx.py \
|
| 119 |
+
--model-type low \
|
| 120 |
+
--epoch 1600 \
|
| 121 |
+
--exp-dir ./icefall-tts-ljspeech-vits-low-2024-03-12/exp \
|
| 122 |
+
--tokens ./icefall-tts-ljspeech-vits-low-2024-03-12/data/tokens.txt
|
| 123 |
+
|
| 124 |
+
ls -lh ./icefall-tts-ljspeech-vits-low-2024-03-12/exp
|
| 125 |
+
|
| 126 |
+
./vits/test_onnx.py \
|
| 127 |
+
--model-filename ./icefall-tts-ljspeech-vits-low-2024-03-12/exp/vits-epoch-1600.onnx \
|
| 128 |
+
--tokens ./icefall-tts-ljspeech-vits-low-2024-03-12/data/tokens.txt \
|
| 129 |
+
--output-filename /icefall/test-low.wav
|
| 130 |
+
|
| 131 |
+
ls -lh /icefall/test-low.wav
|
| 132 |
+
|
| 133 |
+
d=/icefall/vits-icefall-en_US-ljspeech-low
|
| 134 |
+
mkdir $d
|
| 135 |
+
cp -v ./icefall-tts-ljspeech-vits-low-2024-03-12/data/tokens.txt $d/
|
| 136 |
+
cp -v ./icefall-tts-ljspeech-vits-low-2024-03-12/exp/vits-epoch-1600.onnx $d/model.onnx
|
| 137 |
+
|
| 138 |
+
rm -rf icefall-tts-ljspeech-vits-low-2024-03-12
|
| 139 |
+
|
| 140 |
+
pushd $d
|
| 141 |
+
wget -q https://github.com/k2-fsa/sherpa-onnx/releases/download/tts-models/espeak-ng-data.tar.bz2
|
| 142 |
+
tar xf espeak-ng-data.tar.bz2
|
| 143 |
+
rm espeak-ng-data.tar.bz2
|
| 144 |
+
cd ..
|
| 145 |
+
tar cjf vits-icefall-en_US-ljspeech-low.tar.bz2 vits-icefall-en_US-ljspeech-low
|
| 146 |
+
rm -rf vits-icefall-en_US-ljspeech-low
|
| 147 |
+
ls -lh *.tar.bz2
|
| 148 |
+
popd
|
| 149 |
+
}
|
| 150 |
+
|
| 151 |
+
prepare_data
|
| 152 |
+
train
|
| 153 |
+
infer
|
| 154 |
+
export_onnx
|
| 155 |
+
rm -rf vits/exp-{low,medium,high}
|
| 156 |
+
test_medium
|
| 157 |
+
test_low
|
.github/scripts/multi_zh-hans/ASR/run.sh
ADDED
|
@@ -0,0 +1,756 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
|
| 3 |
+
set -ex
|
| 4 |
+
|
| 5 |
+
git config --global user.name "k2-fsa"
|
| 6 |
+
git config --global user.email "csukuangfj@gmail.com"
|
| 7 |
+
git config --global lfs.allowincompletepush true
|
| 8 |
+
|
| 9 |
+
python3 -m pip install onnxmltools==1.13.0 onnx==1.17.0 onnxruntime==1.17.1 sherpa-onnx
|
| 10 |
+
|
| 11 |
+
log() {
|
| 12 |
+
# This function is from espnet
|
| 13 |
+
local fname=${BASH_SOURCE[1]##*/}
|
| 14 |
+
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
|
| 15 |
+
}
|
| 16 |
+
|
| 17 |
+
cd egs/multi_zh-hans/ASR
|
| 18 |
+
|
| 19 |
+
log "pwd: $PWD"
|
| 20 |
+
|
| 21 |
+
function run_2023_9_2() {
|
| 22 |
+
repo_url=https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-2023-9-2
|
| 23 |
+
log "Downloading pre-trained model from $repo_url"
|
| 24 |
+
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
| 25 |
+
repo=$(basename $repo_url)
|
| 26 |
+
pushd $repo
|
| 27 |
+
cd exp
|
| 28 |
+
git lfs pull --include pretrained.pt
|
| 29 |
+
ln -s pretrained.pt epoch-99.pt
|
| 30 |
+
cd ../data/lang_bpe_2000
|
| 31 |
+
ls -lh
|
| 32 |
+
git lfs pull --include L.pt L_disambig.pt Linv.pt bpe.model
|
| 33 |
+
git lfs pull --include "*.model"
|
| 34 |
+
ls -lh
|
| 35 |
+
popd
|
| 36 |
+
|
| 37 |
+
log "--------------------------------------------"
|
| 38 |
+
log "Export non-streaming ONNX transducer models "
|
| 39 |
+
log "--------------------------------------------"
|
| 40 |
+
./zipformer/export-onnx.py \
|
| 41 |
+
--tokens $repo/data/lang_bpe_2000/tokens.txt \
|
| 42 |
+
--use-averaged-model 0 \
|
| 43 |
+
--epoch 99 \
|
| 44 |
+
--avg 1 \
|
| 45 |
+
--exp-dir $repo/exp \
|
| 46 |
+
--causal False \
|
| 47 |
+
--fp16 1
|
| 48 |
+
|
| 49 |
+
ls -lh $repo/exp
|
| 50 |
+
|
| 51 |
+
./zipformer/onnx_pretrained.py \
|
| 52 |
+
--encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
|
| 53 |
+
--decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
|
| 54 |
+
--joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
|
| 55 |
+
--tokens $repo/data/lang_bpe_2000/tokens.txt \
|
| 56 |
+
$repo/test_wavs/DEV_T0000000000.wav \
|
| 57 |
+
$repo/test_wavs/DEV_T0000000001.wav \
|
| 58 |
+
$repo/test_wavs/DEV_T0000000002.wav \
|
| 59 |
+
$repo/test_wavs/TEST_MEETING_T0000000113.wav \
|
| 60 |
+
$repo/test_wavs/TEST_MEETING_T0000000219.wav \
|
| 61 |
+
$repo/test_wavs/TEST_MEETING_T0000000351.wav
|
| 62 |
+
|
| 63 |
+
./zipformer/onnx_pretrained.py \
|
| 64 |
+
--encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.int8.onnx \
|
| 65 |
+
--decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
|
| 66 |
+
--joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.int8.onnx \
|
| 67 |
+
--tokens $repo/data/lang_bpe_2000/tokens.txt \
|
| 68 |
+
$repo/test_wavs/DEV_T0000000000.wav \
|
| 69 |
+
$repo/test_wavs/DEV_T0000000001.wav \
|
| 70 |
+
$repo/test_wavs/DEV_T0000000002.wav \
|
| 71 |
+
$repo/test_wavs/TEST_MEETING_T0000000113.wav \
|
| 72 |
+
$repo/test_wavs/TEST_MEETING_T0000000219.wav \
|
| 73 |
+
$repo/test_wavs/TEST_MEETING_T0000000351.wav
|
| 74 |
+
|
| 75 |
+
./zipformer/onnx_pretrained.py \
|
| 76 |
+
--encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.fp16.onnx \
|
| 77 |
+
--decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.fp16.onnx \
|
| 78 |
+
--joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.fp16.onnx \
|
| 79 |
+
--tokens $repo/data/lang_bpe_2000/tokens.txt \
|
| 80 |
+
$repo/test_wavs/DEV_T0000000000.wav \
|
| 81 |
+
$repo/test_wavs/DEV_T0000000001.wav \
|
| 82 |
+
$repo/test_wavs/DEV_T0000000002.wav \
|
| 83 |
+
$repo/test_wavs/TEST_MEETING_T0000000113.wav \
|
| 84 |
+
$repo/test_wavs/TEST_MEETING_T0000000219.wav \
|
| 85 |
+
$repo/test_wavs/TEST_MEETING_T0000000351.wav
|
| 86 |
+
|
| 87 |
+
rm -rf $repo
|
| 88 |
+
}
|
| 89 |
+
|
| 90 |
+
function run_2023_11_05_streaming() {
|
| 91 |
+
repo_url=https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-ctc-streaming-2023-11-05
|
| 92 |
+
log "Downloading pre-trained model from $repo_url"
|
| 93 |
+
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
| 94 |
+
repo=$(basename $repo_url)
|
| 95 |
+
|
| 96 |
+
pushd $repo
|
| 97 |
+
cd exp/
|
| 98 |
+
git lfs pull --include pretrained.pt
|
| 99 |
+
rm -fv epoch-20.pt
|
| 100 |
+
rm -fv *.onnx
|
| 101 |
+
ln -s pretrained.pt epoch-20.pt
|
| 102 |
+
cd ../data/lang_bpe_2000
|
| 103 |
+
ls -lh
|
| 104 |
+
git lfs pull --include L.pt L_disambig.pt Linv.pt bpe.model
|
| 105 |
+
git lfs pull --include "*.model"
|
| 106 |
+
ls -lh
|
| 107 |
+
popd
|
| 108 |
+
|
| 109 |
+
log "----------------------------------------"
|
| 110 |
+
log "Export streaming ONNX CTC models "
|
| 111 |
+
log "----------------------------------------"
|
| 112 |
+
./zipformer/export-onnx-streaming-ctc.py \
|
| 113 |
+
--exp-dir $repo/exp \
|
| 114 |
+
--tokens $repo/data/lang_bpe_2000/tokens.txt \
|
| 115 |
+
--causal 1 \
|
| 116 |
+
--avg 1 \
|
| 117 |
+
--epoch 20 \
|
| 118 |
+
--use-averaged-model 0 \
|
| 119 |
+
--chunk-size 16 \
|
| 120 |
+
--left-context-frames 128 \
|
| 121 |
+
--use-ctc 1 \
|
| 122 |
+
--fp16 1
|
| 123 |
+
|
| 124 |
+
ls -lh $repo/exp/
|
| 125 |
+
|
| 126 |
+
log "------------------------------------------------------------"
|
| 127 |
+
log "Test exported streaming ONNX CTC models (greedy search) "
|
| 128 |
+
log "------------------------------------------------------------"
|
| 129 |
+
|
| 130 |
+
test_wavs=(
|
| 131 |
+
DEV_T0000000000.wav
|
| 132 |
+
DEV_T0000000001.wav
|
| 133 |
+
DEV_T0000000002.wav
|
| 134 |
+
TEST_MEETING_T0000000113.wav
|
| 135 |
+
TEST_MEETING_T0000000219.wav
|
| 136 |
+
TEST_MEETING_T0000000351.wav
|
| 137 |
+
)
|
| 138 |
+
|
| 139 |
+
for w in ${test_wavs[@]}; do
|
| 140 |
+
log "----fp32----"
|
| 141 |
+
./zipformer/onnx_pretrained-streaming-ctc.py \
|
| 142 |
+
--model-filename $repo/exp/ctc-epoch-20-avg-1-chunk-16-left-128.onnx \
|
| 143 |
+
--tokens $repo/data/lang_bpe_2000/tokens.txt \
|
| 144 |
+
$repo/test_wavs/$w
|
| 145 |
+
|
| 146 |
+
log "----int8----"
|
| 147 |
+
|
| 148 |
+
./zipformer/onnx_pretrained-streaming-ctc.py \
|
| 149 |
+
--model-filename $repo/exp/ctc-epoch-20-avg-1-chunk-16-left-128.int8.onnx \
|
| 150 |
+
--tokens $repo/data/lang_bpe_2000/tokens.txt \
|
| 151 |
+
$repo/test_wavs/$w
|
| 152 |
+
|
| 153 |
+
log "----fp16----"
|
| 154 |
+
|
| 155 |
+
./zipformer/onnx_pretrained-streaming-ctc.py \
|
| 156 |
+
--model-filename $repo/exp/ctc-epoch-20-avg-1-chunk-16-left-128.fp16.onnx \
|
| 157 |
+
--tokens $repo/data/lang_bpe_2000/tokens.txt \
|
| 158 |
+
$repo/test_wavs/$w
|
| 159 |
+
done
|
| 160 |
+
|
| 161 |
+
log "Upload onnx CTC models to huggingface"
|
| 162 |
+
name=(
|
| 163 |
+
sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13
|
| 164 |
+
sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-int8-2023-12-13
|
| 165 |
+
sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-fp16-2023-12-13
|
| 166 |
+
)
|
| 167 |
+
for n in ${name[@]}; do
|
| 168 |
+
url=https://huggingface.co/k2-fsa/$n
|
| 169 |
+
GIT_LFS_SKIP_SMUDGE=1 git clone $url
|
| 170 |
+
dst=$(basename $url)
|
| 171 |
+
if [[ $n == sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-2023-12-13 ]]; then
|
| 172 |
+
cp -v $repo/exp/ctc-epoch-20-avg-1-chunk-16-left-128.onnx $dst
|
| 173 |
+
elif [[ $n == sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-int8-2023-12-13 ]]; then
|
| 174 |
+
cp -v $repo/exp/ctc-epoch-20-avg-1-chunk-16-left-128.int8.onnx $dst
|
| 175 |
+
elif [[ $n == sherpa-onnx-streaming-zipformer-ctc-multi-zh-hans-fp16-2023-12-13 ]]; then
|
| 176 |
+
cp -v $repo/exp/ctc-epoch-20-avg-1-chunk-16-left-128.fp16.onnx $dst
|
| 177 |
+
fi
|
| 178 |
+
|
| 179 |
+
cp -v $repo/data/lang_bpe_2000/tokens.txt $dst
|
| 180 |
+
cp -v $repo/data/lang_bpe_2000/bpe.model $dst
|
| 181 |
+
mkdir -p $dst/test_wavs
|
| 182 |
+
cp -v $repo/test_wavs/*.wav $dst/test_wavs
|
| 183 |
+
cd $dst
|
| 184 |
+
git lfs track "*.onnx" "bpe.model" "*.wav"
|
| 185 |
+
ls -lh
|
| 186 |
+
file bpe.model
|
| 187 |
+
git status
|
| 188 |
+
git add .
|
| 189 |
+
git commit -m "upload model" && git push https://k2-fsa:${HF_TOKEN}@huggingface.co/k2-fsa/$dst main || true
|
| 190 |
+
|
| 191 |
+
log "Upload models to https://github.com/k2-fsa/sherpa-onnx"
|
| 192 |
+
rm -rf .git
|
| 193 |
+
rm -fv .gitattributes
|
| 194 |
+
cd ..
|
| 195 |
+
tar cjfv $dst.tar.bz2 $dst
|
| 196 |
+
ls -lh *.tar.bz2
|
| 197 |
+
mv -v $dst.tar.bz2 ../../../
|
| 198 |
+
done
|
| 199 |
+
|
| 200 |
+
log "----------------------------------------"
|
| 201 |
+
log "Export streaming ONNX transducer models "
|
| 202 |
+
log "----------------------------------------"
|
| 203 |
+
|
| 204 |
+
./zipformer/export-onnx-streaming.py \
|
| 205 |
+
--exp-dir $repo/exp \
|
| 206 |
+
--tokens $repo/data/lang_bpe_2000/tokens.txt \
|
| 207 |
+
--causal 1 \
|
| 208 |
+
--avg 1 \
|
| 209 |
+
--epoch 20 \
|
| 210 |
+
--use-averaged-model 0 \
|
| 211 |
+
--chunk-size 16 \
|
| 212 |
+
--left-context-frames 128 \
|
| 213 |
+
--use-ctc 0 \
|
| 214 |
+
--fp16 1
|
| 215 |
+
|
| 216 |
+
ls -lh $repo/exp
|
| 217 |
+
|
| 218 |
+
log "------------------------------------------------------------"
|
| 219 |
+
log "Test exported streaming ONNX transducer models (Python code)"
|
| 220 |
+
log "------------------------------------------------------------"
|
| 221 |
+
|
| 222 |
+
log "test fp32"
|
| 223 |
+
./zipformer/onnx_pretrained-streaming.py \
|
| 224 |
+
--encoder-model-filename $repo/exp/encoder-epoch-20-avg-1-chunk-16-left-128.onnx \
|
| 225 |
+
--decoder-model-filename $repo/exp/decoder-epoch-20-avg-1-chunk-16-left-128.onnx \
|
| 226 |
+
--joiner-model-filename $repo/exp/joiner-epoch-20-avg-1-chunk-16-left-128.onnx \
|
| 227 |
+
--tokens $repo/data/lang_bpe_2000/tokens.txt \
|
| 228 |
+
$repo/test_wavs/DEV_T0000000000.wav
|
| 229 |
+
|
| 230 |
+
log "test int8"
|
| 231 |
+
./zipformer/onnx_pretrained-streaming.py \
|
| 232 |
+
--encoder-model-filename $repo/exp/encoder-epoch-20-avg-1-chunk-16-left-128.int8.onnx \
|
| 233 |
+
--decoder-model-filename $repo/exp/decoder-epoch-20-avg-1-chunk-16-left-128.onnx \
|
| 234 |
+
--joiner-model-filename $repo/exp/joiner-epoch-20-avg-1-chunk-16-left-128.int8.onnx \
|
| 235 |
+
--tokens $repo/data/lang_bpe_2000/tokens.txt \
|
| 236 |
+
$repo/test_wavs/DEV_T0000000000.wav
|
| 237 |
+
|
| 238 |
+
log "test fp16"
|
| 239 |
+
./zipformer/onnx_pretrained-streaming.py \
|
| 240 |
+
--encoder-model-filename $repo/exp/encoder-epoch-20-avg-1-chunk-16-left-128.fp16.onnx \
|
| 241 |
+
--decoder-model-filename $repo/exp/decoder-epoch-20-avg-1-chunk-16-left-128.fp16.onnx \
|
| 242 |
+
--joiner-model-filename $repo/exp/joiner-epoch-20-avg-1-chunk-16-left-128.fp16.onnx \
|
| 243 |
+
--tokens $repo/data/lang_bpe_2000/tokens.txt \
|
| 244 |
+
$repo/test_wavs/DEV_T0000000000.wav
|
| 245 |
+
|
| 246 |
+
name=(
|
| 247 |
+
sherpa-onnx-streaming-zipformer-multi-zh-hans-2023-12-13
|
| 248 |
+
sherpa-onnx-streaming-zipformer-multi-zh-hans-int8-2023-12-13
|
| 249 |
+
sherpa-onnx-streaming-zipformer-multi-zh-hans-fp16-2023-12-13
|
| 250 |
+
)
|
| 251 |
+
|
| 252 |
+
for n in ${name[@]}; do
|
| 253 |
+
url=https://huggingface.co/csukuangfj/$n
|
| 254 |
+
GIT_LFS_SKIP_SMUDGE=1 git clone $url
|
| 255 |
+
dst=$(basename $url)
|
| 256 |
+
if [[ $n == sherpa-onnx-streaming-zipformer-multi-zh-hans-2023-12-13 ]]; then
|
| 257 |
+
cp -v $repo/exp/encoder-epoch-20-avg-1-chunk-16-left-128.onnx $dst
|
| 258 |
+
cp -v $repo/exp/decoder-epoch-20-avg-1-chunk-16-left-128.onnx $dst
|
| 259 |
+
cp -v $repo/exp/joiner-epoch-20-avg-1-chunk-16-left-128.onnx $dst
|
| 260 |
+
elif [[ $n == sherpa-onnx-streaming-zipformer-multi-zh-hans-int8-2023-12-13 ]]; then
|
| 261 |
+
cp -v $repo/exp/encoder-epoch-20-avg-1-chunk-16-left-128.int8.onnx $dst
|
| 262 |
+
cp -v $repo/exp/decoder-epoch-20-avg-1-chunk-16-left-128.onnx $dst
|
| 263 |
+
cp -v $repo/exp/joiner-epoch-20-avg-1-chunk-16-left-128.int8.onnx $dst
|
| 264 |
+
elif [[ $n == sherpa-onnx-streaming-zipformer-multi-zh-hans-fp16-2023-12-13 ]]; then
|
| 265 |
+
cp -v $repo/exp/encoder-epoch-20-avg-1-chunk-16-left-128.fp16.onnx $dst
|
| 266 |
+
cp -v $repo/exp/decoder-epoch-20-avg-1-chunk-16-left-128.fp16.onnx $dst
|
| 267 |
+
cp -v $repo/exp/joiner-epoch-20-avg-1-chunk-16-left-128.fp16.onnx $dst
|
| 268 |
+
fi
|
| 269 |
+
|
| 270 |
+
cp -v $repo/data/lang_bpe_2000/tokens.txt $dst
|
| 271 |
+
cp -v $repo/data/lang_bpe_2000/bpe.model $dst
|
| 272 |
+
mkdir -p $dst/test_wavs
|
| 273 |
+
cp -v $repo/test_wavs/*.wav $dst/test_wavs
|
| 274 |
+
cd $dst
|
| 275 |
+
git lfs track "*.onnx" "bpe.model" "*.wav"
|
| 276 |
+
ls -lh
|
| 277 |
+
file bpe.model
|
| 278 |
+
git status
|
| 279 |
+
git add .
|
| 280 |
+
git commit -m "upload model" && git push https://csukuangfj:${HF_TOKEN}@huggingface.co/csukuangfj/$dst main || true
|
| 281 |
+
|
| 282 |
+
log "Upload models to https://github.com/k2-fsa/sherpa-onnx"
|
| 283 |
+
rm -rf .git
|
| 284 |
+
rm -fv .gitattributes
|
| 285 |
+
cd ..
|
| 286 |
+
tar cjfv $dst.tar.bz2 $dst
|
| 287 |
+
ls -lh *.tar.bz2
|
| 288 |
+
mv -v $dst.tar.bz2 ../../../
|
| 289 |
+
done
|
| 290 |
+
}
|
| 291 |
+
|
| 292 |
+
function run_2023_12_12_streaming() {
|
| 293 |
+
log "Upload onnx transducer models to huggingface"
|
| 294 |
+
|
| 295 |
+
url=https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-multi-zh-hans-2023-12-12
|
| 296 |
+
GIT_LFS_SKIP_SMUDGE=1 git clone $url
|
| 297 |
+
dst=$(basename $url)
|
| 298 |
+
cp -v $repo/exp/encoder*.onnx $dst
|
| 299 |
+
cp -v $repo/exp/decoder*.onnx $dst
|
| 300 |
+
cp -v $repo/exp/joiner*.onnx $dst
|
| 301 |
+
cp -v $repo/data/lang_bpe_2000/tokens.txt $dst
|
| 302 |
+
cp -v $repo/data/lang_bpe_2000/bpe.model $dst
|
| 303 |
+
mkdir -p $dst/test_wavs
|
| 304 |
+
cp -v $repo/test_wavs/*.wav $dst/test_wavs
|
| 305 |
+
cd $dst
|
| 306 |
+
git lfs track "*.onnx" bpe.model "*.wav"
|
| 307 |
+
git add .
|
| 308 |
+
git commit -m "upload model" && git push https://k2-fsa:${HF_TOKEN}@huggingface.co/k2-fsa/$dst main || true
|
| 309 |
+
|
| 310 |
+
log "Upload models to https://github.com/k2-fsa/sherpa-onnx"
|
| 311 |
+
rm -rf .git
|
| 312 |
+
rm -fv .gitattributes
|
| 313 |
+
cd ..
|
| 314 |
+
tar cjfv $dst.tar.bz2 $dst
|
| 315 |
+
ls -lh *.tar.bz2
|
| 316 |
+
mv -v $dst.tar.bz2 ../../../
|
| 317 |
+
}
|
| 318 |
+
|
| 319 |
+
function run_yuekai_large() {
|
| 320 |
+
repo_url=https://csukuangfj:${HF_TOKEN}@huggingface.co/yuekai/icefall-asr-multi-zh-hans-zipformer-large
|
| 321 |
+
log "Downloading pre-trained model from $repo_url"
|
| 322 |
+
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
| 323 |
+
repo=$(basename $repo_url)
|
| 324 |
+
pushd $repo
|
| 325 |
+
git lfs pull --include pretrained.pt
|
| 326 |
+
mv pretrained.pt epoch-99.pt
|
| 327 |
+
curl -SL -O https://huggingface.co/pingzxy/icefall-asr-multi-zh-hans-zipformer-large-onnx/resolve/main/tokens.txt
|
| 328 |
+
popd
|
| 329 |
+
|
| 330 |
+
log "----------------------------------------"
|
| 331 |
+
log "Export streaming ONNX CTC models "
|
| 332 |
+
log "----------------------------------------"
|
| 333 |
+
./zipformer/export-onnx-streaming-ctc.py \
|
| 334 |
+
--exp-dir $repo/ \
|
| 335 |
+
--tokens $repo/tokens.txt \
|
| 336 |
+
--causal 1 \
|
| 337 |
+
--avg 1 \
|
| 338 |
+
--epoch 99 \
|
| 339 |
+
--use-averaged-model 0 \
|
| 340 |
+
--chunk-size 16 \
|
| 341 |
+
--left-context-frames 128 \
|
| 342 |
+
--use-ctc 1 \
|
| 343 |
+
\
|
| 344 |
+
--num-encoder-layers 2,2,4,5,4,2 \
|
| 345 |
+
--feedforward-dim 768,1024,1536,2048,1536,768 \
|
| 346 |
+
--encoder-dim 256,384,512,768,512,256 \
|
| 347 |
+
--encoder-unmasked-dim 192,192,256,320,256,192 \
|
| 348 |
+
\
|
| 349 |
+
--fp16 1 \
|
| 350 |
+
--use-whisper-features 1
|
| 351 |
+
|
| 352 |
+
|
| 353 |
+
ls -lh $repo/
|
| 354 |
+
pushd $repo
|
| 355 |
+
|
| 356 |
+
cat >README.md <<EOF
|
| 357 |
+
# Introduction
|
| 358 |
+
|
| 359 |
+
This model is converted
|
| 360 |
+
from
|
| 361 |
+
https://huggingface.co/yuekai/icefall-asr-multi-zh-hans-zipformer-large
|
| 362 |
+
|
| 363 |
+
The training code can be found at
|
| 364 |
+
https://github.com/k2-fsa/icefall/blob/master/egs/multi_zh-hans/ASR/RESULTS.md#multi-chinese-datasets-char-based-training-results-streaming-on-zipformer-large-model
|
| 365 |
+
EOF
|
| 366 |
+
|
| 367 |
+
mv -v ctc-epoch-99-avg-1-chunk-16-left-128.fp16.onnx model.fp16.onnx
|
| 368 |
+
mv -v ctc-epoch-99-avg-1-chunk-16-left-128.int8.onnx model.int8.onnx
|
| 369 |
+
mv -v ctc-epoch-99-avg-1-chunk-16-left-128.onnx model.onnx
|
| 370 |
+
|
| 371 |
+
ls -lh *.onnx
|
| 372 |
+
|
| 373 |
+
mkdir test_wavs
|
| 374 |
+
cd test_wavs
|
| 375 |
+
curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-small-ctc-zh-int8-2025-04-01/resolve/main/test_wavs/0.wav
|
| 376 |
+
curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-small-ctc-zh-int8-2025-04-01/resolve/main/test_wavs/1.wav
|
| 377 |
+
curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-small-ctc-zh-int8-2025-04-01/resolve/main/test_wavs/8k.wav
|
| 378 |
+
popd
|
| 379 |
+
|
| 380 |
+
for w in 0.wav 1.wav 8k.wav; do
|
| 381 |
+
log "---fp32---"
|
| 382 |
+
sherpa-onnx \
|
| 383 |
+
--zipformer2-ctc-model=$repo/model.onnx \
|
| 384 |
+
--tokens=$repo/tokens.txt \
|
| 385 |
+
$repo/test_wavs/$w
|
| 386 |
+
|
| 387 |
+
log "---int8---"
|
| 388 |
+
|
| 389 |
+
sherpa-onnx \
|
| 390 |
+
--zipformer2-ctc-model=$repo/model.int8.onnx \
|
| 391 |
+
--tokens=$repo/tokens.txt \
|
| 392 |
+
$repo/test_wavs/$w
|
| 393 |
+
|
| 394 |
+
log "---fp16---"
|
| 395 |
+
|
| 396 |
+
sherpa-onnx \
|
| 397 |
+
--zipformer2-ctc-model=$repo/model.fp16.onnx \
|
| 398 |
+
--tokens=$repo/tokens.txt \
|
| 399 |
+
$repo/test_wavs/$w
|
| 400 |
+
done
|
| 401 |
+
|
| 402 |
+
name=(
|
| 403 |
+
sherpa-onnx-streaming-zipformer-ctc-zh-2025-06-30
|
| 404 |
+
sherpa-onnx-streaming-zipformer-ctc-zh-int8-2025-06-30
|
| 405 |
+
sherpa-onnx-streaming-zipformer-ctc-zh-fp16-2025-06-30
|
| 406 |
+
)
|
| 407 |
+
for n in ${name[@]}; do
|
| 408 |
+
url=https://huggingface.co/csukuangfj/$n
|
| 409 |
+
GIT_LFS_SKIP_SMUDGE=1 git clone $url
|
| 410 |
+
dst=$(basename $url)
|
| 411 |
+
if [[ $n == sherpa-onnx-streaming-zipformer-ctc-zh-2025-06-30 ]]; then
|
| 412 |
+
cp -v $repo/model.onnx $dst
|
| 413 |
+
elif [[ $n == sherpa-onnx-streaming-zipformer-ctc-zh-int8-2025-06-30 ]]; then
|
| 414 |
+
cp -v $repo/model.int8.onnx $dst
|
| 415 |
+
elif [[ $n == sherpa-onnx-streaming-zipformer-ctc-zh-fp16-2025-06-30 ]]; then
|
| 416 |
+
cp -v $repo/model.fp16.onnx $dst
|
| 417 |
+
fi
|
| 418 |
+
|
| 419 |
+
cp -v $repo/tokens.txt $dst
|
| 420 |
+
cp -v $repo/README.md $dst
|
| 421 |
+
mkdir -p $dst/test_wavs
|
| 422 |
+
cp -v $repo/test_wavs/*.wav $dst/test_wavs
|
| 423 |
+
cd $dst
|
| 424 |
+
git lfs track "*.onnx" "*.wav"
|
| 425 |
+
ls -lh
|
| 426 |
+
git status
|
| 427 |
+
git add .
|
| 428 |
+
git commit -m "upload model" && git push https://csukuangfj:${HF_TOKEN}@huggingface.co/csukuangfj/$dst main || true
|
| 429 |
+
|
| 430 |
+
log "Upload models to https://github.com/k2-fsa/sherpa-onnx"
|
| 431 |
+
rm -rf .git
|
| 432 |
+
rm -fv .gitattributes
|
| 433 |
+
cd ..
|
| 434 |
+
tar cjfv $dst.tar.bz2 $dst
|
| 435 |
+
ls -lh *.tar.bz2
|
| 436 |
+
mv -v $dst.tar.bz2 ../../../
|
| 437 |
+
done
|
| 438 |
+
|
| 439 |
+
rm $repo/*.onnx
|
| 440 |
+
|
| 441 |
+
log "----------------------------------------"
|
| 442 |
+
log "Export streaming ONNX transducer models "
|
| 443 |
+
log "----------------------------------------"
|
| 444 |
+
|
| 445 |
+
./zipformer/export-onnx-streaming.py \
|
| 446 |
+
--exp-dir $repo \
|
| 447 |
+
--tokens $repo/tokens.txt \
|
| 448 |
+
--causal 1 \
|
| 449 |
+
--avg 1 \
|
| 450 |
+
--epoch 99 \
|
| 451 |
+
--use-averaged-model 0 \
|
| 452 |
+
--chunk-size 16 \
|
| 453 |
+
--left-context-frames 128 \
|
| 454 |
+
--use-ctc 0 \
|
| 455 |
+
\
|
| 456 |
+
--num-encoder-layers 2,2,4,5,4,2 \
|
| 457 |
+
--feedforward-dim 768,1024,1536,2048,1536,768 \
|
| 458 |
+
--encoder-dim 256,384,512,768,512,256 \
|
| 459 |
+
--encoder-unmasked-dim 192,192,256,320,256,192 \
|
| 460 |
+
\
|
| 461 |
+
--fp16 1 \
|
| 462 |
+
--use-whisper-features 1
|
| 463 |
+
|
| 464 |
+
ls -lh $repo
|
| 465 |
+
pushd $repo
|
| 466 |
+
for m in encoder decoder joiner; do
|
| 467 |
+
mv -v $m-epoch-99-avg-1-chunk-16-left-128.onnx $m.onnx
|
| 468 |
+
mv -v $m-epoch-99-avg-1-chunk-16-left-128.fp16.onnx $m.fp16.onnx
|
| 469 |
+
mv -v $m-epoch-99-avg-1-chunk-16-left-128.int8.onnx $m.int8.onnx
|
| 470 |
+
done
|
| 471 |
+
ls -lh *.onnx
|
| 472 |
+
popd
|
| 473 |
+
|
| 474 |
+
for w in 0.wav 1.wav 8k.wav; do
|
| 475 |
+
log "---fp32---"
|
| 476 |
+
sherpa-onnx \
|
| 477 |
+
--encoder=$repo/encoder.onnx \
|
| 478 |
+
--decoder=$repo/decoder.onnx \
|
| 479 |
+
--joiner=$repo/joiner.onnx \
|
| 480 |
+
--tokens=$repo/tokens.txt \
|
| 481 |
+
$repo/test_wavs/$w
|
| 482 |
+
|
| 483 |
+
log "---int8---"
|
| 484 |
+
|
| 485 |
+
sherpa-onnx \
|
| 486 |
+
--encoder=$repo/encoder.int8.onnx \
|
| 487 |
+
--decoder=$repo/decoder.onnx \
|
| 488 |
+
--joiner=$repo/joiner.int8.onnx \
|
| 489 |
+
--tokens=$repo/tokens.txt \
|
| 490 |
+
$repo/test_wavs/$w
|
| 491 |
+
|
| 492 |
+
log "---fp16---"
|
| 493 |
+
|
| 494 |
+
sherpa-onnx \
|
| 495 |
+
--encoder=$repo/encoder.fp16.onnx \
|
| 496 |
+
--decoder=$repo/decoder.fp16.onnx \
|
| 497 |
+
--joiner=$repo/joiner.fp16.onnx \
|
| 498 |
+
--tokens=$repo/tokens.txt \
|
| 499 |
+
$repo/test_wavs/$w
|
| 500 |
+
done
|
| 501 |
+
|
| 502 |
+
name=(
|
| 503 |
+
sherpa-onnx-streaming-zipformer-zh-2025-06-30
|
| 504 |
+
sherpa-onnx-streaming-zipformer-zh-int8-2025-06-30
|
| 505 |
+
sherpa-onnx-streaming-zipformer-zh-fp16-2025-06-30
|
| 506 |
+
)
|
| 507 |
+
for n in ${name[@]}; do
|
| 508 |
+
url=https://huggingface.co/csukuangfj/$n
|
| 509 |
+
GIT_LFS_SKIP_SMUDGE=1 git clone $url
|
| 510 |
+
dst=$(basename $url)
|
| 511 |
+
if [[ $n == sherpa-onnx-streaming-zipformer-zh-2025-06-30 ]]; then
|
| 512 |
+
cp -v $repo/encoder.onnx $dst
|
| 513 |
+
cp -v $repo/decoder.onnx $dst
|
| 514 |
+
cp -v $repo/joiner.onnx $dst
|
| 515 |
+
elif [[ $n == sherpa-onnx-streaming-zipformer-zh-int8-2025-06-30 ]]; then
|
| 516 |
+
cp -v $repo/encoder.int8.onnx $dst
|
| 517 |
+
cp -v $repo/decoder.onnx $dst
|
| 518 |
+
cp -v $repo/joiner.int8.onnx $dst
|
| 519 |
+
elif [[ $n == sherpa-onnx-streaming-zipformer-zh-fp16-2025-06-30 ]]; then
|
| 520 |
+
cp -v $repo/encoder.fp16.onnx $dst
|
| 521 |
+
cp -v $repo/decoder.fp16.onnx $dst
|
| 522 |
+
cp -v $repo/joiner.fp16.onnx $dst
|
| 523 |
+
fi
|
| 524 |
+
|
| 525 |
+
cp -v $repo/tokens.txt $dst
|
| 526 |
+
cp -v $repo/README.md $dst
|
| 527 |
+
mkdir -p $dst/test_wavs
|
| 528 |
+
cp -v $repo/test_wavs/*.wav $dst/test_wavs
|
| 529 |
+
cd $dst
|
| 530 |
+
git lfs track "*.onnx" "*.wav"
|
| 531 |
+
ls -lh
|
| 532 |
+
git status
|
| 533 |
+
git add .
|
| 534 |
+
git commit -m "upload model" && git push https://csukuangfj:${HF_TOKEN}@huggingface.co/csukuangfj/$dst main || true
|
| 535 |
+
|
| 536 |
+
log "Upload models to https://github.com/k2-fsa/sherpa-onnx"
|
| 537 |
+
rm -rf .git
|
| 538 |
+
rm -fv .gitattributes
|
| 539 |
+
cd ..
|
| 540 |
+
tar cjfv $dst.tar.bz2 $dst
|
| 541 |
+
ls -lh *.tar.bz2
|
| 542 |
+
mv -v $dst.tar.bz2 ../../../
|
| 543 |
+
done
|
| 544 |
+
}
|
| 545 |
+
|
| 546 |
+
function run_yuekai_xl() {
|
| 547 |
+
repo_url=https://csukuangfj:${HF_TOKEN}@huggingface.co/yuekai/icefall-asr-multi-zh-hans-zipformer-xl
|
| 548 |
+
log "Downloading pre-trained model from $repo_url"
|
| 549 |
+
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
| 550 |
+
repo=$(basename $repo_url)
|
| 551 |
+
|
| 552 |
+
pushd $repo
|
| 553 |
+
git lfs pull --include pretrained.pt
|
| 554 |
+
git lfs pull --include data/lang_bpe_2000/bpe.model
|
| 555 |
+
mv pretrained.pt epoch-99.pt
|
| 556 |
+
ls -lh *.pt
|
| 557 |
+
popd
|
| 558 |
+
|
| 559 |
+
log "----------------------------------------"
|
| 560 |
+
log "Export streaming ONNX CTC models "
|
| 561 |
+
log "----------------------------------------"
|
| 562 |
+
./zipformer/export-onnx-streaming-ctc.py \
|
| 563 |
+
--exp-dir $repo/ \
|
| 564 |
+
--tokens $repo/data/lang_bpe_2000/tokens.txt \
|
| 565 |
+
--causal 1 \
|
| 566 |
+
--avg 1 \
|
| 567 |
+
--epoch 99 \
|
| 568 |
+
--use-averaged-model 0 \
|
| 569 |
+
--chunk-size 16 \
|
| 570 |
+
--left-context-frames 128 \
|
| 571 |
+
--use-ctc 1 \
|
| 572 |
+
\
|
| 573 |
+
--num-encoder-layers 2,3,5,6,5,3 \
|
| 574 |
+
--feedforward-dim 1536,2048,3072,4096,3072,1536 \
|
| 575 |
+
--encoder-dim 512,768,1024,1536,1024,512 \
|
| 576 |
+
--encoder-unmasked-dim 192,192,256,320,256,192 \
|
| 577 |
+
--decoder-dim 768 --joiner-dim 768 \
|
| 578 |
+
--value-head-dim 18 \
|
| 579 |
+
--query-head-dim 48 \
|
| 580 |
+
--num-heads 4,4,4,8,4,4 \
|
| 581 |
+
\
|
| 582 |
+
--fp16 1 \
|
| 583 |
+
--use-whisper-features 1 \
|
| 584 |
+
--use-external-data 1
|
| 585 |
+
|
| 586 |
+
mv -v ctc-epoch-99-avg-1-chunk-16-left-128.int8.onnx model.int8.onnx
|
| 587 |
+
mv -v ctc-epoch-99-avg-1-chunk-16-left-128.fp16.onnx model.fp16.onnx
|
| 588 |
+
|
| 589 |
+
ls -lh *.onnx
|
| 590 |
+
|
| 591 |
+
mkdir test_wavs
|
| 592 |
+
pushd test_wavs
|
| 593 |
+
curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-small-ctc-zh-int8-2025-04-01/resolve/main/test_wavs/0.wav
|
| 594 |
+
curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-small-ctc-zh-int8-2025-04-01/resolve/main/test_wavs/1.wav
|
| 595 |
+
curl -SL -O https://huggingface.co/csukuangfj/sherpa-onnx-streaming-zipformer-small-ctc-zh-int8-2025-04-01/resolve/main/test_wavs/8k.wav
|
| 596 |
+
popd
|
| 597 |
+
|
| 598 |
+
for w in 0.wav 1.wav 8k.wav; do
|
| 599 |
+
log "---int8---"
|
| 600 |
+
|
| 601 |
+
sherpa-onnx \
|
| 602 |
+
--zipformer2-ctc-model=./model.int8.onnx \
|
| 603 |
+
--tokens=$repo/data/lang_bpe_2000/tokens.txt \
|
| 604 |
+
test_wavs/$w
|
| 605 |
+
|
| 606 |
+
log "---fp16---"
|
| 607 |
+
|
| 608 |
+
sherpa-onnx \
|
| 609 |
+
--zipformer2-ctc-model=./model.fp16.onnx \
|
| 610 |
+
--tokens=$repo/data/lang_bpe_2000/tokens.txt \
|
| 611 |
+
test_wavs/$w
|
| 612 |
+
done
|
| 613 |
+
|
| 614 |
+
pushd $repo
|
| 615 |
+
cat >README.md <<EOF
|
| 616 |
+
# Introduction
|
| 617 |
+
|
| 618 |
+
This model is converted
|
| 619 |
+
from
|
| 620 |
+
https://huggingface.co/yuekai/icefall-asr-multi-zh-hans-zipformer-xl
|
| 621 |
+
|
| 622 |
+
The training code can be found at
|
| 623 |
+
https://github.com/k2-fsa/icefall/blob/master/egs/multi_zh-hans/ASR/RESULTS.md#multi-chinese-datasets-char-based-training-results-streaming-on-zipformer-xl-model
|
| 624 |
+
EOF
|
| 625 |
+
popd
|
| 626 |
+
|
| 627 |
+
name=(
|
| 628 |
+
sherpa-onnx-streaming-zipformer-ctc-zh-xlarge-int8-2025-06-30
|
| 629 |
+
sherpa-onnx-streaming-zipformer-ctc-zh-xlarge-fp16-2025-06-30
|
| 630 |
+
)
|
| 631 |
+
|
| 632 |
+
for n in ${name[@]}; do
|
| 633 |
+
url=https://huggingface.co/csukuangfj/$n
|
| 634 |
+
GIT_LFS_SKIP_SMUDGE=1 git clone $url
|
| 635 |
+
dst=$(basename $url)
|
| 636 |
+
if [[ $n == sherpa-onnx-streaming-zipformer-ctc-zh-xlarge-fp16-2025-06-30 ]]; then
|
| 637 |
+
cp -v model.fp16.onnx $dst
|
| 638 |
+
elif [[ $n == sherpa-onnx-streaming-zipformer-ctc-zh-xlarge-int8-2025-06-30 ]]; then
|
| 639 |
+
cp -v model.int8.onnx $dst
|
| 640 |
+
fi
|
| 641 |
+
|
| 642 |
+
cp -v $repo/data/lang_bpe_2000/tokens.txt $dst
|
| 643 |
+
cp -v $repo/data/lang_bpe_2000/bpe.model $dst
|
| 644 |
+
cp -v $repo/README.md $dst
|
| 645 |
+
mkdir -p $dst/test_wavs
|
| 646 |
+
cp -v ./test_wavs/*.wav $dst/test_wavs
|
| 647 |
+
cd $dst
|
| 648 |
+
git lfs track "*.onnx" "*.wav" "bpe.model"
|
| 649 |
+
ls -lh
|
| 650 |
+
git status
|
| 651 |
+
git add .
|
| 652 |
+
git commit -m "upload model" && git push https://csukuangfj:${HF_TOKEN}@huggingface.co/csukuangfj/$dst main || true
|
| 653 |
+
|
| 654 |
+
log "Upload models to https://github.com/k2-fsa/sherpa-onnx"
|
| 655 |
+
rm -rf .git
|
| 656 |
+
rm -fv .gitattributes
|
| 657 |
+
cd ..
|
| 658 |
+
|
| 659 |
+
ls -lh $dst
|
| 660 |
+
tar cjfv $dst.tar.bz2 $dst
|
| 661 |
+
ls -lh *.tar.bz2
|
| 662 |
+
mv -v $dst.tar.bz2 ../../../
|
| 663 |
+
done
|
| 664 |
+
|
| 665 |
+
rm -fv *.onnx *.weights
|
| 666 |
+
|
| 667 |
+
log "----------------------------------------"
|
| 668 |
+
log "Export streaming ONNX transducer models "
|
| 669 |
+
log "----------------------------------------"
|
| 670 |
+
|
| 671 |
+
./zipformer/export-onnx-streaming.py \
|
| 672 |
+
--exp-dir $repo/ \
|
| 673 |
+
--tokens $repo/data/lang_bpe_2000/tokens.txt \
|
| 674 |
+
--causal 1 \
|
| 675 |
+
--avg 1 \
|
| 676 |
+
--epoch 99 \
|
| 677 |
+
--use-averaged-model 0 \
|
| 678 |
+
--chunk-size 16 \
|
| 679 |
+
--left-context-frames 128 \
|
| 680 |
+
--use-ctc 0 \
|
| 681 |
+
\
|
| 682 |
+
--num-encoder-layers 2,3,5,6,5,3 \
|
| 683 |
+
--feedforward-dim 1536,2048,3072,4096,3072,1536 \
|
| 684 |
+
--encoder-dim 512,768,1024,1536,1024,512 \
|
| 685 |
+
--encoder-unmasked-dim 192,192,256,320,256,192 \
|
| 686 |
+
--decoder-dim 768 --joiner-dim 768 \
|
| 687 |
+
--value-head-dim 18 \
|
| 688 |
+
--query-head-dim 48 \
|
| 689 |
+
--num-heads 4,4,4,8,4,4 \
|
| 690 |
+
\
|
| 691 |
+
--fp16 1 \
|
| 692 |
+
--use-whisper-features 1 \
|
| 693 |
+
--use-external-data 1
|
| 694 |
+
|
| 695 |
+
ls -lh *.onnx
|
| 696 |
+
ls -lh *.weights
|
| 697 |
+
|
| 698 |
+
mv encoder-epoch-99-avg-1-chunk-16-left-128.fp16.onnx encoder.fp16.onnx
|
| 699 |
+
mv encoder-epoch-99-avg-1-chunk-16-left-128.int8.onnx encoder.int8.onnx
|
| 700 |
+
|
| 701 |
+
mv $repo/decoder-epoch-99-avg-1-chunk-16-left-128.onnx decoder.onnx
|
| 702 |
+
mv $repo/decoder-epoch-99-avg-1-chunk-16-left-128.fp16.onnx decoder.fp16.onnx
|
| 703 |
+
|
| 704 |
+
mv $repo/joiner-epoch-99-avg-1-chunk-16-left-128.int8.onnx joiner.int8.onnx
|
| 705 |
+
mv $repo/joiner-epoch-99-avg-1-chunk-16-left-128.fp16.onnx joiner.fp16.onnx
|
| 706 |
+
|
| 707 |
+
name=(
|
| 708 |
+
sherpa-onnx-streaming-zipformer-zh-xlarge-int8-2025-06-30
|
| 709 |
+
sherpa-onnx-streaming-zipformer-zh-xlarge-fp16-2025-06-30
|
| 710 |
+
)
|
| 711 |
+
|
| 712 |
+
for n in ${name[@]}; do
|
| 713 |
+
url=https://huggingface.co/csukuangfj/$n
|
| 714 |
+
GIT_LFS_SKIP_SMUDGE=1 git clone $url
|
| 715 |
+
dst=$(basename $url)
|
| 716 |
+
if [[ $n == sherpa-onnx-streaming-zipformer-zh-xlarge-fp16-2025-06-30 ]]; then
|
| 717 |
+
cp -v encoder.fp16.onnx $dst
|
| 718 |
+
cp -v decoder.fp16.onnx $dst
|
| 719 |
+
cp -v joiner.fp16.onnx $dst
|
| 720 |
+
elif [[ $n == sherpa-onnx-streaming-zipformer-zh-xlarge-int8-2025-06-30 ]]; then
|
| 721 |
+
cp -v encoder.int8.onnx $dst
|
| 722 |
+
cp -v decoder.onnx $dst
|
| 723 |
+
cp -v joiner.int8.onnx $dst
|
| 724 |
+
fi
|
| 725 |
+
|
| 726 |
+
cp -v $repo/data/lang_bpe_2000/tokens.txt $dst
|
| 727 |
+
cp -v $repo/data/lang_bpe_2000/bpe.model $dst
|
| 728 |
+
cp -v $repo/README.md $dst
|
| 729 |
+
mkdir -p $dst/test_wavs
|
| 730 |
+
cp -v ./test_wavs/*.wav $dst/test_wavs
|
| 731 |
+
cd $dst
|
| 732 |
+
git lfs track "*.onnx" "*.wav" "bpe.model"
|
| 733 |
+
ls -lh
|
| 734 |
+
git status
|
| 735 |
+
git add .
|
| 736 |
+
git commit -m "upload model" && git push https://csukuangfj:${HF_TOKEN}@huggingface.co/csukuangfj/$dst main || true
|
| 737 |
+
|
| 738 |
+
log "Upload models to https://github.com/k2-fsa/sherpa-onnx"
|
| 739 |
+
rm -rf .git
|
| 740 |
+
rm -fv .gitattributes
|
| 741 |
+
cd ..
|
| 742 |
+
|
| 743 |
+
ls -lh $dst
|
| 744 |
+
tar cjfv $dst.tar.bz2 $dst
|
| 745 |
+
ls -lh *.tar.bz2
|
| 746 |
+
mv -v $dst.tar.bz2 ../../../
|
| 747 |
+
done
|
| 748 |
+
|
| 749 |
+
rm -fv *.onnx *.weights
|
| 750 |
+
}
|
| 751 |
+
|
| 752 |
+
# run_yuekai_large
|
| 753 |
+
# run_yuekai_xl
|
| 754 |
+
# run_2023_9_2
|
| 755 |
+
run_2023_11_05_streaming
|
| 756 |
+
# run_2023_12_12_streaming
|
.github/scripts/multi_zh-hans/ASR/run_rknn.sh
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
|
| 3 |
+
set -ex
|
| 4 |
+
|
| 5 |
+
python3 -m pip install kaldi-native-fbank soundfile librosa
|
| 6 |
+
|
| 7 |
+
log() {
|
| 8 |
+
# This function is from espnet
|
| 9 |
+
local fname=${BASH_SOURCE[1]##*/}
|
| 10 |
+
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
|
| 11 |
+
}
|
| 12 |
+
|
| 13 |
+
cd egs/multi_zh-hans/ASR
|
| 14 |
+
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#sherpa-onnx-streaming-zipformer-multi-zh-hans-2023-12-12-chinese
|
| 18 |
+
function export_2023_11_05() {
|
| 19 |
+
d=exp
|
| 20 |
+
mkdir $d
|
| 21 |
+
pushd $d
|
| 22 |
+
curl -SL -O https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-ctc-streaming-2023-11-05/resolve/main/data/lang_bpe_2000/tokens.txt
|
| 23 |
+
curl -SL -O https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-ctc-streaming-2023-11-05/resolve/main/exp/pretrained.pt
|
| 24 |
+
mv pretrained.pt epoch-99.pt
|
| 25 |
+
|
| 26 |
+
curl -SL -o 0.wav https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-ctc-streaming-2023-11-05/resolve/main/test_wavs/DEV_T0000000000.wav
|
| 27 |
+
curl -SL -o 1.wav https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-ctc-streaming-2023-11-05/resolve/main/test_wavs/DEV_T0000000001.wav
|
| 28 |
+
curl -SL -o 2.wav https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-ctc-streaming-2023-11-05/resolve/main/test_wavs/DEV_T0000000002.wav
|
| 29 |
+
ls -lh
|
| 30 |
+
popd
|
| 31 |
+
|
| 32 |
+
./zipformer/export-onnx-streaming.py \
|
| 33 |
+
--dynamic-batch 0 \
|
| 34 |
+
--enable-int8-quantization 0 \
|
| 35 |
+
--tokens $d/tokens.txt \
|
| 36 |
+
--use-averaged-model 0 \
|
| 37 |
+
--epoch 99 \
|
| 38 |
+
--avg 1 \
|
| 39 |
+
--exp-dir $d \
|
| 40 |
+
--use-ctc 0 \
|
| 41 |
+
--use-transducer 1 \
|
| 42 |
+
--chunk-size 32 \
|
| 43 |
+
--left-context-frames 128 \
|
| 44 |
+
--causal 1
|
| 45 |
+
|
| 46 |
+
for platform in rk3562 rk3566 rk3568 rk3576 rk3588; do
|
| 47 |
+
dst=sherpa-onnx-$platform-streaming-zipformer-multi-zh-hans-2023-12-12
|
| 48 |
+
mkdir -p $dst
|
| 49 |
+
|
| 50 |
+
./zipformer/export_rknn_transducer_streaming.py \
|
| 51 |
+
--in-encoder $d/encoder-epoch-99-avg-1-chunk-32-left-128.onnx \
|
| 52 |
+
--in-decoder $d/decoder-epoch-99-avg-1-chunk-32-left-128.onnx \
|
| 53 |
+
--in-joiner $d/joiner-epoch-99-avg-1-chunk-32-left-128.onnx \
|
| 54 |
+
--out-encoder $dst/encoder.rknn \
|
| 55 |
+
--out-decoder $dst/decoder.rknn \
|
| 56 |
+
--out-joiner $dst/joiner.rknn \
|
| 57 |
+
--target-platform $platform
|
| 58 |
+
|
| 59 |
+
cp $d/tokens.txt $dst
|
| 60 |
+
mkdir $dst/test_wavs
|
| 61 |
+
cp $d/*.wav $dst/test_wavs
|
| 62 |
+
|
| 63 |
+
tar cjvf $dst.tar.bz2 $dst
|
| 64 |
+
ls -lh $dst.tar.bz2
|
| 65 |
+
mv $dst.tar.bz2 /icefall/
|
| 66 |
+
ls -lh $dst/
|
| 67 |
+
echo "---"
|
| 68 |
+
|
| 69 |
+
rm -rf $dst
|
| 70 |
+
done
|
| 71 |
+
}
|
| 72 |
+
|
| 73 |
+
export_2023_11_05
|
.github/scripts/prepare-librispeech-test-clean-and-test-other-manifests.sh
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
|
| 3 |
+
# This script assumes that test-clean and test-other are downloaded
|
| 4 |
+
# to egs/librispeech/ASR/download/LibriSpeech and generates manifest
|
| 5 |
+
# files in egs/librispeech/ASR/data/manifests
|
| 6 |
+
|
| 7 |
+
set -e
|
| 8 |
+
|
| 9 |
+
cd egs/librispeech/ASR
|
| 10 |
+
[ ! -e download ] && ln -s ~/tmp/download .
|
| 11 |
+
mkdir -p data/manifests
|
| 12 |
+
lhotse prepare librispeech -j 2 -p test-clean -p test-other ./download/LibriSpeech data/manifests
|
| 13 |
+
ls -lh data/manifests
|
.github/scripts/run-gigaspeech-pruned-transducer-stateless2-2022-05-12.sh
ADDED
|
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
|
| 3 |
+
set -e
|
| 4 |
+
|
| 5 |
+
log() {
|
| 6 |
+
# This function is from espnet
|
| 7 |
+
local fname=${BASH_SOURCE[1]##*/}
|
| 8 |
+
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
|
| 9 |
+
}
|
| 10 |
+
|
| 11 |
+
cd egs/gigaspeech/ASR
|
| 12 |
+
|
| 13 |
+
repo_url=https://huggingface.co/wgb14/icefall-asr-gigaspeech-pruned-transducer-stateless2
|
| 14 |
+
|
| 15 |
+
log "Downloading pre-trained model from $repo_url"
|
| 16 |
+
git lfs install
|
| 17 |
+
git clone $repo_url
|
| 18 |
+
repo=$(basename $repo_url)
|
| 19 |
+
|
| 20 |
+
echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}"
|
| 21 |
+
echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}"
|
| 22 |
+
if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_NAME}" == x"workflow_dispatch" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then
|
| 23 |
+
mkdir -p pruned_transducer_stateless2/exp
|
| 24 |
+
ln -s $PWD/$repo/exp/pretrained-iter-3488000-avg-20.pt pruned_transducer_stateless2/exp/epoch-999.pt
|
| 25 |
+
ln -s $PWD/$repo/data/lang_bpe_500 data/
|
| 26 |
+
|
| 27 |
+
ls -lh data
|
| 28 |
+
ls -lh data/lang_bpe_500
|
| 29 |
+
ls -lh data/fbank
|
| 30 |
+
ls -lh pruned_transducer_stateless2/exp
|
| 31 |
+
|
| 32 |
+
pushd data/fbank
|
| 33 |
+
curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/cuts_DEV.jsonl.gz
|
| 34 |
+
curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/cuts_TEST.jsonl.gz
|
| 35 |
+
curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/feats_DEV.lca
|
| 36 |
+
curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/feats_TEST.lca
|
| 37 |
+
|
| 38 |
+
ln -sf cuts_DEV.jsonl.gz gigaspeech_cuts_DEV.jsonl.gz
|
| 39 |
+
ln -sf cuts_TEST.jsonl.gz gigaspeech_cuts_TEST.jsonl.gz
|
| 40 |
+
popd
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
log "Decoding dev and test"
|
| 44 |
+
|
| 45 |
+
# use a small value for decoding with CPU
|
| 46 |
+
max_duration=100
|
| 47 |
+
|
| 48 |
+
# Test only greedy_search to reduce CI running time
|
| 49 |
+
# for method in greedy_search fast_beam_search modified_beam_search; do
|
| 50 |
+
for method in greedy_search; do
|
| 51 |
+
log "Decoding with $method"
|
| 52 |
+
|
| 53 |
+
./pruned_transducer_stateless2/decode.py \
|
| 54 |
+
--decoding-method $method \
|
| 55 |
+
--epoch 999 \
|
| 56 |
+
--avg 1 \
|
| 57 |
+
--max-duration $max_duration \
|
| 58 |
+
--exp-dir pruned_transducer_stateless2/exp
|
| 59 |
+
done
|
| 60 |
+
|
| 61 |
+
rm pruned_transducer_stateless2/exp/*.pt
|
| 62 |
+
fi
|
.github/scripts/run-gigaspeech-zipformer-2023-10-17.sh
ADDED
|
@@ -0,0 +1,172 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
|
| 3 |
+
set -e
|
| 4 |
+
|
| 5 |
+
log() {
|
| 6 |
+
# This function is from espnet
|
| 7 |
+
local fname=${BASH_SOURCE[1]##*/}
|
| 8 |
+
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
|
| 9 |
+
}
|
| 10 |
+
|
| 11 |
+
cd egs/gigaspeech/ASR
|
| 12 |
+
|
| 13 |
+
repo_url=https://huggingface.co/yfyeung/icefall-asr-gigaspeech-zipformer-2023-10-17
|
| 14 |
+
|
| 15 |
+
log "Downloading pre-trained model from $repo_url"
|
| 16 |
+
git lfs install
|
| 17 |
+
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
| 18 |
+
repo=$(basename $repo_url)
|
| 19 |
+
|
| 20 |
+
log "Display test files"
|
| 21 |
+
tree $repo/
|
| 22 |
+
ls -lh $repo/test_wavs/*.wav
|
| 23 |
+
|
| 24 |
+
pushd $repo/exp
|
| 25 |
+
git lfs pull --include "data/lang_bpe_500/bpe.model"
|
| 26 |
+
git lfs pull --include "data/lang_bpe_500/tokens.txt"
|
| 27 |
+
git lfs pull --include "exp/jit_script.pt"
|
| 28 |
+
git lfs pull --include "exp/pretrained.pt"
|
| 29 |
+
rm epoch-30.pt
|
| 30 |
+
ln -s pretrained.pt epoch-30.pt
|
| 31 |
+
rm *.onnx
|
| 32 |
+
ls -lh
|
| 33 |
+
popd
|
| 34 |
+
|
| 35 |
+
log "----------------------------------------"
|
| 36 |
+
log "Export ONNX transducer models "
|
| 37 |
+
log "----------------------------------------"
|
| 38 |
+
|
| 39 |
+
./zipformer/export-onnx.py \
|
| 40 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 41 |
+
--use-averaged-model 0 \
|
| 42 |
+
--epoch 30 \
|
| 43 |
+
--avg 1 \
|
| 44 |
+
--exp-dir $repo/exp
|
| 45 |
+
|
| 46 |
+
ls -lh $repo/exp
|
| 47 |
+
|
| 48 |
+
log "------------------------------------------------------------"
|
| 49 |
+
log "Test exported ONNX transducer models (Python code) "
|
| 50 |
+
log "------------------------------------------------------------"
|
| 51 |
+
|
| 52 |
+
log "test fp32"
|
| 53 |
+
./zipformer/onnx_pretrained.py \
|
| 54 |
+
--encoder-model-filename $repo/exp/encoder-epoch-30-avg-1.onnx \
|
| 55 |
+
--decoder-model-filename $repo/exp/decoder-epoch-30-avg-1.onnx \
|
| 56 |
+
--joiner-model-filename $repo/exp/joiner-epoch-30-avg-1.onnx \
|
| 57 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 58 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 59 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 60 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 61 |
+
|
| 62 |
+
log "test int8"
|
| 63 |
+
./zipformer/onnx_pretrained.py \
|
| 64 |
+
--encoder-model-filename $repo/exp/encoder-epoch-30-avg-1.int8.onnx \
|
| 65 |
+
--decoder-model-filename $repo/exp/decoder-epoch-30-avg-1.onnx \
|
| 66 |
+
--joiner-model-filename $repo/exp/joiner-epoch-30-avg-1.int8.onnx \
|
| 67 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 68 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 69 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 70 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 71 |
+
|
| 72 |
+
log "Upload models to huggingface"
|
| 73 |
+
git config --global user.name "k2-fsa"
|
| 74 |
+
git config --global user.email "xxx@gmail.com"
|
| 75 |
+
|
| 76 |
+
url=https://huggingface.co/k2-fsa/sherpa-onnx-zipformer-gigaspeech-2023-12-12
|
| 77 |
+
GIT_LFS_SKIP_SMUDGE=1 git clone $url
|
| 78 |
+
dst=$(basename $url)
|
| 79 |
+
cp -v $repo/exp/*.onnx $dst
|
| 80 |
+
cp -v $repo/data/lang_bpe_500/tokens.txt $dst
|
| 81 |
+
cp -v $repo/data/lang_bpe_500/bpe.model $dst
|
| 82 |
+
mkdir -p $dst/test_wavs
|
| 83 |
+
cp -v $repo/test_wavs/*.wav $dst/test_wavs
|
| 84 |
+
cd $dst
|
| 85 |
+
git lfs track "*.onnx"
|
| 86 |
+
git add .
|
| 87 |
+
git commit -m "upload model" && git push https://k2-fsa:${HF_TOKEN}@huggingface.co/k2-fsa/$dst main || true
|
| 88 |
+
|
| 89 |
+
log "Upload models to https://github.com/k2-fsa/sherpa-onnx"
|
| 90 |
+
rm -rf .git
|
| 91 |
+
rm -fv .gitattributes
|
| 92 |
+
cd ..
|
| 93 |
+
tar cjfv $dst.tar.bz2 $dst
|
| 94 |
+
ls -lh
|
| 95 |
+
mv -v $dst.tar.bz2 ../../../
|
| 96 |
+
|
| 97 |
+
log "Export to torchscript model"
|
| 98 |
+
./zipformer/export.py \
|
| 99 |
+
--exp-dir $repo/exp \
|
| 100 |
+
--use-averaged-model false \
|
| 101 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 102 |
+
--epoch 30 \
|
| 103 |
+
--avg 1 \
|
| 104 |
+
--jit 1
|
| 105 |
+
|
| 106 |
+
ls -lh $repo/exp/*.pt
|
| 107 |
+
|
| 108 |
+
log "Decode with models exported by torch.jit.script()"
|
| 109 |
+
|
| 110 |
+
./zipformer/jit_pretrained.py \
|
| 111 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 112 |
+
--nn-model-filename $repo/exp/jit_script.pt \
|
| 113 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 114 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 115 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 116 |
+
|
| 117 |
+
for method in greedy_search modified_beam_search fast_beam_search; do
|
| 118 |
+
log "$method"
|
| 119 |
+
|
| 120 |
+
./zipformer/pretrained.py \
|
| 121 |
+
--method $method \
|
| 122 |
+
--beam-size 4 \
|
| 123 |
+
--checkpoint $repo/exp/pretrained.pt \
|
| 124 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 125 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 126 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 127 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 128 |
+
done
|
| 129 |
+
|
| 130 |
+
echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}"
|
| 131 |
+
echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}"
|
| 132 |
+
if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_NAME}" == x"workflow_dispatch" || x"${GITHUB_EVENT_LABEL_NAME}" == x"run-decode" ]]; then
|
| 133 |
+
mkdir -p zipformer/exp
|
| 134 |
+
ln -s $PWD/$repo/exp/pretrained.pt zipformer/exp/epoch-30.pt
|
| 135 |
+
mkdir -p data
|
| 136 |
+
ln -s $PWD/$repo/data/lang_bpe_500 data/
|
| 137 |
+
|
| 138 |
+
ls -lh data
|
| 139 |
+
ls -lh zipformer/exp
|
| 140 |
+
|
| 141 |
+
mkdir -p data/fbank
|
| 142 |
+
pushd data/fbank
|
| 143 |
+
|
| 144 |
+
curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/cuts_DEV.jsonl.gz
|
| 145 |
+
curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/cuts_TEST.jsonl.gz
|
| 146 |
+
curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/feats_DEV.lca
|
| 147 |
+
curl -SL -O https://huggingface.co/csukuangfj/giga-dev-dataset-fbank/resolve/main/data/fbank/feats_TEST.lca
|
| 148 |
+
|
| 149 |
+
ln -sf cuts_DEV.jsonl.gz gigaspeech_cuts_DEV.jsonl.gz
|
| 150 |
+
ln -sf cuts_TEST.jsonl.gz gigaspeech_cuts_TEST.jsonl.gz
|
| 151 |
+
|
| 152 |
+
popd
|
| 153 |
+
|
| 154 |
+
log "Decoding test-clean and test-other"
|
| 155 |
+
|
| 156 |
+
# use a small value for decoding with CPU
|
| 157 |
+
max_duration=100
|
| 158 |
+
|
| 159 |
+
for method in greedy_search; do
|
| 160 |
+
log "Decoding with $method"
|
| 161 |
+
|
| 162 |
+
./zipformer/decode.py \
|
| 163 |
+
--decoding-method $method \
|
| 164 |
+
--epoch 30 \
|
| 165 |
+
--avg 1 \
|
| 166 |
+
--use-averaged-model 0 \
|
| 167 |
+
--max-duration $max_duration \
|
| 168 |
+
--exp-dir zipformer/exp
|
| 169 |
+
done
|
| 170 |
+
|
| 171 |
+
rm zipformer/exp/*.pt
|
| 172 |
+
fi
|
.github/scripts/run-librispeech-lstm-transducer-stateless2-2022-09-03.sh
ADDED
|
@@ -0,0 +1,191 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
#
|
| 3 |
+
set -e
|
| 4 |
+
|
| 5 |
+
log() {
|
| 6 |
+
# This function is from espnet
|
| 7 |
+
local fname=${BASH_SOURCE[1]##*/}
|
| 8 |
+
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
|
| 9 |
+
}
|
| 10 |
+
|
| 11 |
+
cd egs/librispeech/ASR
|
| 12 |
+
|
| 13 |
+
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03
|
| 14 |
+
|
| 15 |
+
log "Downloading pre-trained model from $repo_url"
|
| 16 |
+
git lfs install
|
| 17 |
+
git clone $repo_url
|
| 18 |
+
repo=$(basename $repo_url)
|
| 19 |
+
abs_repo=$(realpath $repo)
|
| 20 |
+
|
| 21 |
+
log "Display test files"
|
| 22 |
+
tree $repo/
|
| 23 |
+
ls -lh $repo/test_wavs/*.wav
|
| 24 |
+
|
| 25 |
+
pushd $repo/exp
|
| 26 |
+
ln -s pretrained-iter-468000-avg-16.pt pretrained.pt
|
| 27 |
+
ln -s pretrained-iter-468000-avg-16.pt epoch-99.pt
|
| 28 |
+
popd
|
| 29 |
+
|
| 30 |
+
log "Test exporting with torch.jit.trace()"
|
| 31 |
+
|
| 32 |
+
./lstm_transducer_stateless2/export.py \
|
| 33 |
+
--exp-dir $repo/exp \
|
| 34 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 35 |
+
--epoch 99 \
|
| 36 |
+
--avg 1 \
|
| 37 |
+
--use-averaged-model 0 \
|
| 38 |
+
--jit-trace 1
|
| 39 |
+
|
| 40 |
+
log "Decode with models exported by torch.jit.trace()"
|
| 41 |
+
|
| 42 |
+
./lstm_transducer_stateless2/jit_pretrained.py \
|
| 43 |
+
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
| 44 |
+
--encoder-model-filename $repo/exp/encoder_jit_trace.pt \
|
| 45 |
+
--decoder-model-filename $repo/exp/decoder_jit_trace.pt \
|
| 46 |
+
--joiner-model-filename $repo/exp/joiner_jit_trace.pt \
|
| 47 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 48 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 49 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 50 |
+
|
| 51 |
+
for sym in 1 2 3; do
|
| 52 |
+
log "Greedy search with --max-sym-per-frame $sym"
|
| 53 |
+
|
| 54 |
+
./lstm_transducer_stateless2/pretrained.py \
|
| 55 |
+
--method greedy_search \
|
| 56 |
+
--max-sym-per-frame $sym \
|
| 57 |
+
--checkpoint $repo/exp/pretrained.pt \
|
| 58 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 59 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 60 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 61 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 62 |
+
done
|
| 63 |
+
|
| 64 |
+
for method in modified_beam_search beam_search fast_beam_search; do
|
| 65 |
+
log "$method"
|
| 66 |
+
|
| 67 |
+
./lstm_transducer_stateless2/pretrained.py \
|
| 68 |
+
--method $method \
|
| 69 |
+
--beam-size 4 \
|
| 70 |
+
--checkpoint $repo/exp/pretrained.pt \
|
| 71 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 72 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 73 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 74 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 75 |
+
done
|
| 76 |
+
|
| 77 |
+
echo "GITHUB_EVENT_NAME: ${GITHUB_EVENT_NAME}"
|
| 78 |
+
echo "GITHUB_EVENT_LABEL_NAME: ${GITHUB_EVENT_LABEL_NAME}"
|
| 79 |
+
|
| 80 |
+
if [[ x"${GITHUB_EVENT_LABEL_NAME}" == x"shallow-fusion" ]]; then
|
| 81 |
+
lm_repo_url=https://huggingface.co/ezerhouni/icefall-librispeech-rnn-lm
|
| 82 |
+
log "Download pre-trained RNN-LM model from ${lm_repo_url}"
|
| 83 |
+
GIT_LFS_SKIP_SMUDGE=1 git clone $lm_repo_url
|
| 84 |
+
lm_repo=$(basename $lm_repo_url)
|
| 85 |
+
pushd $lm_repo
|
| 86 |
+
git lfs pull --include "exp/pretrained.pt"
|
| 87 |
+
mv exp/pretrained.pt exp/epoch-88.pt
|
| 88 |
+
popd
|
| 89 |
+
|
| 90 |
+
mkdir -p lstm_transducer_stateless2/exp
|
| 91 |
+
ln -sf $PWD/$repo/exp/pretrained.pt lstm_transducer_stateless2/exp/epoch-999.pt
|
| 92 |
+
ln -s $PWD/$repo/data/lang_bpe_500 data/
|
| 93 |
+
|
| 94 |
+
ls -lh data
|
| 95 |
+
ls -lh lstm_transducer_stateless2/exp
|
| 96 |
+
|
| 97 |
+
log "Decoding test-clean and test-other with RNN LM"
|
| 98 |
+
|
| 99 |
+
./lstm_transducer_stateless2/decode.py \
|
| 100 |
+
--use-averaged-model 0 \
|
| 101 |
+
--epoch 999 \
|
| 102 |
+
--avg 1 \
|
| 103 |
+
--exp-dir lstm_transducer_stateless2/exp \
|
| 104 |
+
--max-duration 600 \
|
| 105 |
+
--decoding-method modified_beam_search_lm_shallow_fusion \
|
| 106 |
+
--beam 4 \
|
| 107 |
+
--use-shallow-fusion 1 \
|
| 108 |
+
--lm-type rnn \
|
| 109 |
+
--lm-exp-dir $lm_repo/exp \
|
| 110 |
+
--lm-epoch 88 \
|
| 111 |
+
--lm-avg 1 \
|
| 112 |
+
--lm-scale 0.3 \
|
| 113 |
+
--rnn-lm-num-layers 3 \
|
| 114 |
+
--rnn-lm-tie-weights 1
|
| 115 |
+
fi
|
| 116 |
+
|
| 117 |
+
if [[ x"${GITHUB_EVENT_LABEL_NAME}" == x"LODR" ]]; then
|
| 118 |
+
bigram_repo_url=https://huggingface.co/marcoyang/librispeech_bigram
|
| 119 |
+
log "Download bi-gram LM from ${bigram_repo_url}"
|
| 120 |
+
GIT_LFS_SKIP_SMUDGE=1 git clone $bigram_repo_url
|
| 121 |
+
bigramlm_repo=$(basename $bigram_repo_url)
|
| 122 |
+
pushd $bigramlm_repo
|
| 123 |
+
git lfs pull --include "2gram.fst.txt"
|
| 124 |
+
cp 2gram.fst.txt $abs_repo/data/lang_bpe_500/.
|
| 125 |
+
popd
|
| 126 |
+
|
| 127 |
+
lm_repo_url=https://huggingface.co/ezerhouni/icefall-librispeech-rnn-lm
|
| 128 |
+
log "Download pre-trained RNN-LM model from ${lm_repo_url}"
|
| 129 |
+
GIT_LFS_SKIP_SMUDGE=1 git clone $lm_repo_url
|
| 130 |
+
lm_repo=$(basename $lm_repo_url)
|
| 131 |
+
pushd $lm_repo
|
| 132 |
+
git lfs pull --include "exp/pretrained.pt"
|
| 133 |
+
mv exp/pretrained.pt exp/epoch-88.pt
|
| 134 |
+
popd
|
| 135 |
+
|
| 136 |
+
mkdir -p lstm_transducer_stateless2/exp
|
| 137 |
+
ln -sf $PWD/$repo/exp/pretrained.pt lstm_transducer_stateless2/exp/epoch-999.pt
|
| 138 |
+
ln -s $PWD/$repo/data/lang_bpe_500 data/
|
| 139 |
+
|
| 140 |
+
ls -lh data
|
| 141 |
+
ls -lh lstm_transducer_stateless2/exp
|
| 142 |
+
|
| 143 |
+
log "Decoding test-clean and test-other"
|
| 144 |
+
|
| 145 |
+
./lstm_transducer_stateless2/decode.py \
|
| 146 |
+
--use-averaged-model 0 \
|
| 147 |
+
--epoch 999 \
|
| 148 |
+
--avg 1 \
|
| 149 |
+
--exp-dir lstm_transducer_stateless2/exp \
|
| 150 |
+
--max-duration 600 \
|
| 151 |
+
--decoding-method modified_beam_search_LODR \
|
| 152 |
+
--beam 4 \
|
| 153 |
+
--use-shallow-fusion 1 \
|
| 154 |
+
--lm-type rnn \
|
| 155 |
+
--lm-exp-dir $lm_repo/exp \
|
| 156 |
+
--lm-scale 0.4 \
|
| 157 |
+
--lm-epoch 88 \
|
| 158 |
+
--rnn-lm-avg 1 \
|
| 159 |
+
--rnn-lm-num-layers 3 \
|
| 160 |
+
--rnn-lm-tie-weights 1 \
|
| 161 |
+
--tokens-ngram 2 \
|
| 162 |
+
--ngram-lm-scale -0.16
|
| 163 |
+
fi
|
| 164 |
+
|
| 165 |
+
if [[ x"${GITHUB_EVENT_NAME}" == x"schedule" || x"${GITHUB_EVENT_NAME}" == x"workflow_dispatch" ]]; then
|
| 166 |
+
mkdir -p lstm_transducer_stateless2/exp
|
| 167 |
+
ln -s $PWD/$repo/exp/pretrained.pt lstm_transducer_stateless2/exp/epoch-999.pt
|
| 168 |
+
ln -s $PWD/$repo/data/lang_bpe_500 data/
|
| 169 |
+
|
| 170 |
+
ls -lh data
|
| 171 |
+
ls -lh lstm_transducer_stateless2/exp
|
| 172 |
+
|
| 173 |
+
log "Decoding test-clean and test-other"
|
| 174 |
+
|
| 175 |
+
# use a small value for decoding with CPU
|
| 176 |
+
max_duration=100
|
| 177 |
+
|
| 178 |
+
for method in greedy_search fast_beam_search; do
|
| 179 |
+
log "Decoding with $method"
|
| 180 |
+
|
| 181 |
+
./lstm_transducer_stateless2/decode.py \
|
| 182 |
+
--decoding-method $method \
|
| 183 |
+
--epoch 999 \
|
| 184 |
+
--avg 1 \
|
| 185 |
+
--use-averaged-model 0 \
|
| 186 |
+
--max-duration $max_duration \
|
| 187 |
+
--exp-dir lstm_transducer_stateless2/exp
|
| 188 |
+
done
|
| 189 |
+
|
| 190 |
+
rm lstm_transducer_stateless2/exp/*.pt
|
| 191 |
+
fi
|
.github/scripts/run-multi-corpora-zipformer.sh
ADDED
|
@@ -0,0 +1,135 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
|
| 3 |
+
set -e
|
| 4 |
+
|
| 5 |
+
log() {
|
| 6 |
+
# This function is from espnet
|
| 7 |
+
local fname=${BASH_SOURCE[1]##*/}
|
| 8 |
+
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
|
| 9 |
+
}
|
| 10 |
+
|
| 11 |
+
cd egs/multi_zh-hans/ASR
|
| 12 |
+
|
| 13 |
+
log "==== Test icefall-asr-multi-zh-hans-zipformer-2023-9-2 ===="
|
| 14 |
+
repo_url=https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-2023-9-2/
|
| 15 |
+
|
| 16 |
+
log "Downloading pre-trained model from $repo_url"
|
| 17 |
+
git lfs install
|
| 18 |
+
git clone $repo_url
|
| 19 |
+
repo=$(basename $repo_url)
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
log "Display test files"
|
| 23 |
+
tree $repo/
|
| 24 |
+
ls -lh $repo/test_wavs/*.wav
|
| 25 |
+
|
| 26 |
+
pushd $repo/exp
|
| 27 |
+
ln -s epoch-20.pt epoch-99.pt
|
| 28 |
+
popd
|
| 29 |
+
|
| 30 |
+
ls -lh $repo/exp/*.pt
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
./zipformer/pretrained.py \
|
| 34 |
+
--checkpoint $repo/exp/epoch-99.pt \
|
| 35 |
+
--tokens $repo/data/lang_bpe_2000/tokens.txt \
|
| 36 |
+
--method greedy_search \
|
| 37 |
+
$repo/test_wavs/DEV_T0000000000.wav \
|
| 38 |
+
$repo/test_wavs/DEV_T0000000001.wav \
|
| 39 |
+
$repo/test_wavs/DEV_T0000000002.wav
|
| 40 |
+
|
| 41 |
+
for method in modified_beam_search fast_beam_search; do
|
| 42 |
+
log "$method"
|
| 43 |
+
|
| 44 |
+
./zipformer/pretrained.py \
|
| 45 |
+
--method $method \
|
| 46 |
+
--beam-size 4 \
|
| 47 |
+
--checkpoint $repo/exp/epoch-99.pt \
|
| 48 |
+
--tokens $repo/data/lang_bpe_2000/tokens.txt \
|
| 49 |
+
$repo/test_wavs/DEV_T0000000000.wav \
|
| 50 |
+
$repo/test_wavs/DEV_T0000000001.wav \
|
| 51 |
+
$repo/test_wavs/DEV_T0000000002.wav
|
| 52 |
+
done
|
| 53 |
+
|
| 54 |
+
rm -rf $repo
|
| 55 |
+
|
| 56 |
+
log "==== Test icefall-asr-multi-zh-hans-zipformer-ctc-2023-10-24 ===="
|
| 57 |
+
repo_url=https://huggingface.co/zrjin/icefall-asr-multi-zh-hans-zipformer-ctc-2023-10-24/
|
| 58 |
+
|
| 59 |
+
log "Downloading pre-trained model from $repo_url"
|
| 60 |
+
git lfs install
|
| 61 |
+
git clone $repo_url
|
| 62 |
+
repo=$(basename $repo_url)
|
| 63 |
+
|
| 64 |
+
|
| 65 |
+
log "Display test files"
|
| 66 |
+
tree $repo/
|
| 67 |
+
ls -lh $repo/test_wavs/*.wav
|
| 68 |
+
|
| 69 |
+
pushd $repo/exp
|
| 70 |
+
ln -s epoch-20.pt epoch-99.pt
|
| 71 |
+
popd
|
| 72 |
+
|
| 73 |
+
ls -lh $repo/exp/*.pt
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
./zipformer/pretrained.py \
|
| 77 |
+
--checkpoint $repo/exp/epoch-99.pt \
|
| 78 |
+
--tokens $repo/data/lang_bpe_2000/tokens.txt \
|
| 79 |
+
--use-ctc 1 \
|
| 80 |
+
--method greedy_search \
|
| 81 |
+
$repo/test_wavs/DEV_T0000000000.wav \
|
| 82 |
+
$repo/test_wavs/DEV_T0000000001.wav \
|
| 83 |
+
$repo/test_wavs/DEV_T0000000002.wav
|
| 84 |
+
|
| 85 |
+
for method in modified_beam_search fast_beam_search; do
|
| 86 |
+
log "$method"
|
| 87 |
+
|
| 88 |
+
./zipformer/pretrained.py \
|
| 89 |
+
--method $method \
|
| 90 |
+
--beam-size 4 \
|
| 91 |
+
--use-ctc 1 \
|
| 92 |
+
--checkpoint $repo/exp/epoch-99.pt \
|
| 93 |
+
--tokens $repo/data/lang_bpe_2000/tokens.txt \
|
| 94 |
+
$repo/test_wavs/DEV_T0000000000.wav \
|
| 95 |
+
$repo/test_wavs/DEV_T0000000001.wav \
|
| 96 |
+
$repo/test_wavs/DEV_T0000000002.wav
|
| 97 |
+
done
|
| 98 |
+
|
| 99 |
+
rm -rf $repo
|
| 100 |
+
|
| 101 |
+
cd ../../../egs/multi_zh_en/ASR
|
| 102 |
+
log "==== Test icefall-asr-zipformer-multi-zh-en-2023-11-22 ===="
|
| 103 |
+
repo_url=https://huggingface.co/zrjin/icefall-asr-zipformer-multi-zh-en-2023-11-22/
|
| 104 |
+
|
| 105 |
+
log "Downloading pre-trained model from $repo_url"
|
| 106 |
+
git lfs install
|
| 107 |
+
git clone $repo_url
|
| 108 |
+
repo=$(basename $repo_url)
|
| 109 |
+
|
| 110 |
+
log "Display test files"
|
| 111 |
+
tree $repo/
|
| 112 |
+
ls -lh $repo/test_wavs/*.wav
|
| 113 |
+
|
| 114 |
+
./zipformer/pretrained.py \
|
| 115 |
+
--checkpoint $repo/exp/pretrained.pt \
|
| 116 |
+
--bpe-model $repo/data/lang_bbpe_2000/bbpe.model \
|
| 117 |
+
--method greedy_search \
|
| 118 |
+
$repo/test_wavs/_1634_210_2577_1_1525157964032_3712259_29.wav \
|
| 119 |
+
$repo/test_wavs/_1634_210_2577_1_1525157964032_3712259_55.wav \
|
| 120 |
+
$repo/test_wavs/_1634_210_2577_1_1525157964032_3712259_75.wav
|
| 121 |
+
|
| 122 |
+
for method in modified_beam_search fast_beam_search; do
|
| 123 |
+
log "$method"
|
| 124 |
+
|
| 125 |
+
./zipformer/pretrained.py \
|
| 126 |
+
--method $method \
|
| 127 |
+
--beam-size 4 \
|
| 128 |
+
--checkpoint $repo/exp/pretrained.pt \
|
| 129 |
+
--bpe-model $repo/data/lang_bbpe_2000/bbpe.model \
|
| 130 |
+
$repo/test_wavs/_1634_210_2577_1_1525157964032_3712259_29.wav \
|
| 131 |
+
$repo/test_wavs/_1634_210_2577_1_1525157964032_3712259_55.wav \
|
| 132 |
+
$repo/test_wavs/_1634_210_2577_1_1525157964032_3712259_75.wav
|
| 133 |
+
done
|
| 134 |
+
|
| 135 |
+
rm -rf $repo
|
.github/scripts/run-swbd-conformer-ctc-2023-08-26.sh
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
|
| 3 |
+
set -e
|
| 4 |
+
|
| 5 |
+
log() {
|
| 6 |
+
# This function is from espnet
|
| 7 |
+
local fname=${BASH_SOURCE[1]##*/}
|
| 8 |
+
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
|
| 9 |
+
}
|
| 10 |
+
|
| 11 |
+
cd egs/swbd/ASR
|
| 12 |
+
|
| 13 |
+
repo_url=https://huggingface.co/zrjin/icefall-asr-swbd-conformer-ctc-2023-8-26
|
| 14 |
+
|
| 15 |
+
log "Downloading pre-trained model from $repo_url"
|
| 16 |
+
git lfs install
|
| 17 |
+
git clone $repo_url
|
| 18 |
+
repo=$(basename $repo_url)
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
log "Display test files"
|
| 22 |
+
tree $repo/
|
| 23 |
+
ls -lh $repo/test_wavs/*.wav
|
| 24 |
+
|
| 25 |
+
pushd $repo/exp
|
| 26 |
+
ln -s epoch-98.pt epoch-99.pt
|
| 27 |
+
popd
|
| 28 |
+
|
| 29 |
+
ls -lh $repo/exp/*.pt
|
| 30 |
+
|
| 31 |
+
for method in ctc-decoding 1best; do
|
| 32 |
+
log "$method"
|
| 33 |
+
|
| 34 |
+
./conformer_ctc/pretrained.py \
|
| 35 |
+
--method $method \
|
| 36 |
+
--checkpoint $repo/exp/epoch-99.pt \
|
| 37 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 38 |
+
--words-file $repo/data/lang_bpe_500/words.txt \
|
| 39 |
+
--HLG $repo/data/lang_bpe_500/HLG.pt \
|
| 40 |
+
--G $repo/data/lm/G_4_gram.pt \
|
| 41 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 42 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 43 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 44 |
+
done
|
.github/scripts/run-wenetspeech-pruned-transducer-stateless2.sh
ADDED
|
@@ -0,0 +1,119 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
|
| 3 |
+
set -e
|
| 4 |
+
|
| 5 |
+
log() {
|
| 6 |
+
# This function is from espnet
|
| 7 |
+
local fname=${BASH_SOURCE[1]##*/}
|
| 8 |
+
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
|
| 9 |
+
}
|
| 10 |
+
|
| 11 |
+
cd egs/wenetspeech/ASR
|
| 12 |
+
|
| 13 |
+
repo_url=https://huggingface.co/luomingshuang/icefall_asr_wenetspeech_pruned_transducer_stateless2
|
| 14 |
+
|
| 15 |
+
log "Downloading pre-trained model from $repo_url"
|
| 16 |
+
git lfs install
|
| 17 |
+
git clone $repo_url
|
| 18 |
+
repo=$(basename $repo_url)
|
| 19 |
+
|
| 20 |
+
log "Display test files"
|
| 21 |
+
tree $repo/
|
| 22 |
+
ls -lh $repo/test_wavs/*.wav
|
| 23 |
+
|
| 24 |
+
pushd $repo/exp
|
| 25 |
+
ln -s pretrained_epoch_10_avg_2.pt pretrained.pt
|
| 26 |
+
ln -s pretrained_epoch_10_avg_2.pt epoch-99.pt
|
| 27 |
+
popd
|
| 28 |
+
|
| 29 |
+
log "Test exporting to ONNX format"
|
| 30 |
+
|
| 31 |
+
./pruned_transducer_stateless2/export-onnx.py \
|
| 32 |
+
--exp-dir $repo/exp \
|
| 33 |
+
--tokens $repo/data/lang_char/tokens.txt \
|
| 34 |
+
--epoch 99 \
|
| 35 |
+
--avg 1
|
| 36 |
+
|
| 37 |
+
log "Export to torchscript model"
|
| 38 |
+
|
| 39 |
+
./pruned_transducer_stateless2/export.py \
|
| 40 |
+
--exp-dir $repo/exp \
|
| 41 |
+
--tokens $repo/data/lang_char/tokens.txt \
|
| 42 |
+
--epoch 99 \
|
| 43 |
+
--avg 1 \
|
| 44 |
+
--jit 1
|
| 45 |
+
|
| 46 |
+
./pruned_transducer_stateless2/export.py \
|
| 47 |
+
--exp-dir $repo/exp \
|
| 48 |
+
--tokens $repo/data/lang_char/tokens.txt \
|
| 49 |
+
--epoch 99 \
|
| 50 |
+
--avg 1 \
|
| 51 |
+
--jit-trace 1
|
| 52 |
+
|
| 53 |
+
ls -lh $repo/exp/*.onnx
|
| 54 |
+
ls -lh $repo/exp/*.pt
|
| 55 |
+
|
| 56 |
+
log "Decode with ONNX models"
|
| 57 |
+
|
| 58 |
+
./pruned_transducer_stateless2/onnx_check.py \
|
| 59 |
+
--jit-filename $repo/exp/cpu_jit.pt \
|
| 60 |
+
--onnx-encoder-filename $repo/exp/encoder-epoch-10-avg-2.onnx \
|
| 61 |
+
--onnx-decoder-filename $repo/exp/decoder-epoch-10-avg-2.onnx \
|
| 62 |
+
--onnx-joiner-filename $repo/exp/joiner-epoch-10-avg-2.onnx \
|
| 63 |
+
--onnx-joiner-encoder-proj-filename $repo/exp/joiner_encoder_proj-epoch-10-avg-2.onnx \
|
| 64 |
+
--onnx-joiner-decoder-proj-filename $repo/exp/joiner_decoder_proj-epoch-10-avg-2.onnx
|
| 65 |
+
|
| 66 |
+
./pruned_transducer_stateless2/onnx_pretrained.py \
|
| 67 |
+
--tokens $repo/data/lang_char/tokens.txt \
|
| 68 |
+
--encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
|
| 69 |
+
--decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
|
| 70 |
+
--joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
|
| 71 |
+
$repo/test_wavs/DEV_T0000000000.wav \
|
| 72 |
+
$repo/test_wavs/DEV_T0000000001.wav \
|
| 73 |
+
$repo/test_wavs/DEV_T0000000002.wav
|
| 74 |
+
|
| 75 |
+
log "Decode with models exported by torch.jit.trace()"
|
| 76 |
+
|
| 77 |
+
./pruned_transducer_stateless2/jit_pretrained.py \
|
| 78 |
+
--tokens $repo/data/lang_char/tokens.txt \
|
| 79 |
+
--encoder-model-filename $repo/exp/encoder_jit_trace.pt \
|
| 80 |
+
--decoder-model-filename $repo/exp/decoder_jit_trace.pt \
|
| 81 |
+
--joiner-model-filename $repo/exp/joiner_jit_trace.pt \
|
| 82 |
+
$repo/test_wavs/DEV_T0000000000.wav \
|
| 83 |
+
$repo/test_wavs/DEV_T0000000001.wav \
|
| 84 |
+
$repo/test_wavs/DEV_T0000000002.wav
|
| 85 |
+
|
| 86 |
+
./pruned_transducer_stateless2/jit_pretrained.py \
|
| 87 |
+
--tokens $repo/data/lang_char/tokens.txt \
|
| 88 |
+
--encoder-model-filename $repo/exp/encoder_jit_script.pt \
|
| 89 |
+
--decoder-model-filename $repo/exp/decoder_jit_script.pt \
|
| 90 |
+
--joiner-model-filename $repo/exp/joiner_jit_script.pt \
|
| 91 |
+
$repo/test_wavs/DEV_T0000000000.wav \
|
| 92 |
+
$repo/test_wavs/DEV_T0000000001.wav \
|
| 93 |
+
$repo/test_wavs/DEV_T0000000002.wav
|
| 94 |
+
|
| 95 |
+
for sym in 1 2 3; do
|
| 96 |
+
log "Greedy search with --max-sym-per-frame $sym"
|
| 97 |
+
|
| 98 |
+
./pruned_transducer_stateless2/pretrained.py \
|
| 99 |
+
--checkpoint $repo/exp/epoch-99.pt \
|
| 100 |
+
--lang-dir $repo/data/lang_char \
|
| 101 |
+
--decoding-method greedy_search \
|
| 102 |
+
--max-sym-per-frame $sym \
|
| 103 |
+
$repo/test_wavs/DEV_T0000000000.wav \
|
| 104 |
+
$repo/test_wavs/DEV_T0000000001.wav \
|
| 105 |
+
$repo/test_wavs/DEV_T0000000002.wav
|
| 106 |
+
done
|
| 107 |
+
|
| 108 |
+
for method in modified_beam_search beam_search fast_beam_search; do
|
| 109 |
+
log "$method"
|
| 110 |
+
|
| 111 |
+
./pruned_transducer_stateless2/pretrained.py \
|
| 112 |
+
--decoding-method $method \
|
| 113 |
+
--beam-size 4 \
|
| 114 |
+
--checkpoint $repo/exp/epoch-99.pt \
|
| 115 |
+
--lang-dir $repo/data/lang_char \
|
| 116 |
+
$repo/test_wavs/DEV_T0000000000.wav \
|
| 117 |
+
$repo/test_wavs/DEV_T0000000001.wav \
|
| 118 |
+
$repo/test_wavs/DEV_T0000000002.wav
|
| 119 |
+
done
|
.github/scripts/test-ncnn-export.sh
ADDED
|
@@ -0,0 +1,230 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
|
| 3 |
+
set -e
|
| 4 |
+
|
| 5 |
+
log() {
|
| 6 |
+
# This function is from espnet
|
| 7 |
+
local fname=${BASH_SOURCE[1]##*/}
|
| 8 |
+
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
|
| 9 |
+
}
|
| 10 |
+
|
| 11 |
+
pushd egs/librispeech/ASR
|
| 12 |
+
|
| 13 |
+
log "Install ncnn and pnnx"
|
| 14 |
+
|
| 15 |
+
# We are using a modified ncnn here. Will try to merge it to the official repo
|
| 16 |
+
# of ncnn
|
| 17 |
+
git clone https://github.com/csukuangfj/ncnn
|
| 18 |
+
pushd ncnn
|
| 19 |
+
git submodule init
|
| 20 |
+
git submodule update python/pybind11
|
| 21 |
+
python3 setup.py bdist_wheel
|
| 22 |
+
ls -lh dist/
|
| 23 |
+
pip install dist/*.whl
|
| 24 |
+
cd tools/pnnx
|
| 25 |
+
mkdir build
|
| 26 |
+
cd build
|
| 27 |
+
|
| 28 |
+
echo "which python3"
|
| 29 |
+
|
| 30 |
+
which python3
|
| 31 |
+
#/opt/hostedtoolcache/Python/3.8.16/x64/bin/python3
|
| 32 |
+
|
| 33 |
+
cmake -D Python3_EXECUTABLE=$(which python3) ..
|
| 34 |
+
make -j4 pnnx
|
| 35 |
+
|
| 36 |
+
./src/pnnx || echo "pass"
|
| 37 |
+
|
| 38 |
+
popd
|
| 39 |
+
|
| 40 |
+
export PATH=$PWD/ncnn/tools/pnnx/build/src:$PATH
|
| 41 |
+
|
| 42 |
+
log "=========================================================================="
|
| 43 |
+
repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05
|
| 44 |
+
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
| 45 |
+
repo=$(basename $repo_url)
|
| 46 |
+
|
| 47 |
+
pushd $repo
|
| 48 |
+
git lfs pull --include "exp/pretrained-epoch-30-avg-10-averaged.pt"
|
| 49 |
+
|
| 50 |
+
cd exp
|
| 51 |
+
ln -s pretrained-epoch-30-avg-10-averaged.pt epoch-99.pt
|
| 52 |
+
popd
|
| 53 |
+
|
| 54 |
+
log "Export via torch.jit.trace()"
|
| 55 |
+
|
| 56 |
+
./conv_emformer_transducer_stateless2/export-for-ncnn.py \
|
| 57 |
+
--exp-dir $repo/exp \
|
| 58 |
+
--epoch 99 \
|
| 59 |
+
--avg 1 \
|
| 60 |
+
--use-averaged-model 0 \
|
| 61 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 62 |
+
--num-encoder-layers 12 \
|
| 63 |
+
--chunk-length 32 \
|
| 64 |
+
--cnn-module-kernel 31 \
|
| 65 |
+
--left-context-length 32 \
|
| 66 |
+
--right-context-length 8 \
|
| 67 |
+
--memory-size 32
|
| 68 |
+
|
| 69 |
+
pnnx $repo/exp/encoder_jit_trace-pnnx.pt
|
| 70 |
+
pnnx $repo/exp/decoder_jit_trace-pnnx.pt
|
| 71 |
+
pnnx $repo/exp/joiner_jit_trace-pnnx.pt
|
| 72 |
+
|
| 73 |
+
python3 ./conv_emformer_transducer_stateless2/streaming-ncnn-decode.py \
|
| 74 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 75 |
+
--encoder-param-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.param \
|
| 76 |
+
--encoder-bin-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.bin \
|
| 77 |
+
--decoder-param-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.param \
|
| 78 |
+
--decoder-bin-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.bin \
|
| 79 |
+
--joiner-param-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.param \
|
| 80 |
+
--joiner-bin-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.bin \
|
| 81 |
+
$repo/test_wavs/1089-134686-0001.wav
|
| 82 |
+
|
| 83 |
+
rm -rf $repo
|
| 84 |
+
log "--------------------------------------------------------------------------"
|
| 85 |
+
|
| 86 |
+
log "=========================================================================="
|
| 87 |
+
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03
|
| 88 |
+
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
| 89 |
+
repo=$(basename $repo_url)
|
| 90 |
+
|
| 91 |
+
pushd $repo
|
| 92 |
+
git lfs pull --include "exp/pretrained-iter-468000-avg-16.pt"
|
| 93 |
+
|
| 94 |
+
cd exp
|
| 95 |
+
ln -s pretrained-iter-468000-avg-16.pt epoch-99.pt
|
| 96 |
+
popd
|
| 97 |
+
|
| 98 |
+
log "Export via torch.jit.trace()"
|
| 99 |
+
|
| 100 |
+
./lstm_transducer_stateless2/export-for-ncnn.py \
|
| 101 |
+
--exp-dir $repo/exp \
|
| 102 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 103 |
+
--epoch 99 \
|
| 104 |
+
--avg 1 \
|
| 105 |
+
--use-averaged-model 0
|
| 106 |
+
|
| 107 |
+
pnnx $repo/exp/encoder_jit_trace-pnnx.pt
|
| 108 |
+
pnnx $repo/exp/decoder_jit_trace-pnnx.pt
|
| 109 |
+
pnnx $repo/exp/joiner_jit_trace-pnnx.pt
|
| 110 |
+
|
| 111 |
+
python3 ./lstm_transducer_stateless2/streaming-ncnn-decode.py \
|
| 112 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 113 |
+
--encoder-param-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.param \
|
| 114 |
+
--encoder-bin-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.bin \
|
| 115 |
+
--decoder-param-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.param \
|
| 116 |
+
--decoder-bin-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.bin \
|
| 117 |
+
--joiner-param-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.param \
|
| 118 |
+
--joiner-bin-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.bin \
|
| 119 |
+
$repo/test_wavs/1089-134686-0001.wav
|
| 120 |
+
|
| 121 |
+
python3 ./lstm_transducer_stateless2/ncnn-decode.py \
|
| 122 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 123 |
+
--encoder-param-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.param \
|
| 124 |
+
--encoder-bin-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.bin \
|
| 125 |
+
--decoder-param-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.param \
|
| 126 |
+
--decoder-bin-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.bin \
|
| 127 |
+
--joiner-param-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.param \
|
| 128 |
+
--joiner-bin-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.bin \
|
| 129 |
+
$repo/test_wavs/1089-134686-0001.wav
|
| 130 |
+
|
| 131 |
+
rm -rf $repo
|
| 132 |
+
log "--------------------------------------------------------------------------"
|
| 133 |
+
|
| 134 |
+
log "=========================================================================="
|
| 135 |
+
repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29
|
| 136 |
+
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
| 137 |
+
repo=$(basename $repo_url)
|
| 138 |
+
|
| 139 |
+
pushd $repo
|
| 140 |
+
git lfs pull --include "exp/pretrained.pt"
|
| 141 |
+
|
| 142 |
+
cd exp
|
| 143 |
+
ln -s pretrained.pt epoch-99.pt
|
| 144 |
+
popd
|
| 145 |
+
|
| 146 |
+
./pruned_transducer_stateless7_streaming/export-for-ncnn.py \
|
| 147 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 148 |
+
--exp-dir $repo/exp \
|
| 149 |
+
--use-averaged-model 0 \
|
| 150 |
+
--epoch 99 \
|
| 151 |
+
--avg 1 \
|
| 152 |
+
\
|
| 153 |
+
--decode-chunk-len 32 \
|
| 154 |
+
--num-encoder-layers "2,4,3,2,4" \
|
| 155 |
+
--feedforward-dims "1024,1024,2048,2048,1024" \
|
| 156 |
+
--nhead "8,8,8,8,8" \
|
| 157 |
+
--encoder-dims "384,384,384,384,384" \
|
| 158 |
+
--attention-dims "192,192,192,192,192" \
|
| 159 |
+
--encoder-unmasked-dims "256,256,256,256,256" \
|
| 160 |
+
--zipformer-downsampling-factors "1,2,4,8,2" \
|
| 161 |
+
--cnn-module-kernels "31,31,31,31,31" \
|
| 162 |
+
--decoder-dim 512 \
|
| 163 |
+
--joiner-dim 512
|
| 164 |
+
|
| 165 |
+
pnnx $repo/exp/encoder_jit_trace-pnnx.pt
|
| 166 |
+
pnnx $repo/exp/decoder_jit_trace-pnnx.pt
|
| 167 |
+
pnnx $repo/exp/joiner_jit_trace-pnnx.pt
|
| 168 |
+
|
| 169 |
+
python3 ./pruned_transducer_stateless7_streaming/streaming-ncnn-decode.py \
|
| 170 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 171 |
+
--encoder-param-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.param \
|
| 172 |
+
--encoder-bin-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.bin \
|
| 173 |
+
--decoder-param-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.param \
|
| 174 |
+
--decoder-bin-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.bin \
|
| 175 |
+
--joiner-param-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.param \
|
| 176 |
+
--joiner-bin-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.bin \
|
| 177 |
+
$repo/test_wavs/1089-134686-0001.wav
|
| 178 |
+
|
| 179 |
+
rm -rf $repo
|
| 180 |
+
log "--------------------------------------------------------------------------"
|
| 181 |
+
|
| 182 |
+
log "=========================================================================="
|
| 183 |
+
repo_url=https://huggingface.co/pfluo/k2fsa-zipformer-chinese-english-mixed
|
| 184 |
+
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
| 185 |
+
repo=$(basename $repo_url)
|
| 186 |
+
|
| 187 |
+
pushd $repo
|
| 188 |
+
git lfs pull --include "data/lang_char_bpe/L.pt"
|
| 189 |
+
git lfs pull --include "data/lang_char_bpe/L_disambig.pt"
|
| 190 |
+
git lfs pull --include "data/lang_char_bpe/Linv.pt"
|
| 191 |
+
git lfs pull --include "exp/pretrained.pt"
|
| 192 |
+
|
| 193 |
+
cd exp
|
| 194 |
+
ln -s pretrained.pt epoch-9999.pt
|
| 195 |
+
popd
|
| 196 |
+
|
| 197 |
+
./pruned_transducer_stateless7_streaming/export-for-ncnn-zh.py \
|
| 198 |
+
--tokens $repo/data/lang_char_bpe/tokens.txt \
|
| 199 |
+
--exp-dir $repo/exp \
|
| 200 |
+
--use-averaged-model 0 \
|
| 201 |
+
--epoch 9999 \
|
| 202 |
+
--avg 1 \
|
| 203 |
+
--decode-chunk-len 32 \
|
| 204 |
+
--num-encoder-layers "2,4,3,2,4" \
|
| 205 |
+
--feedforward-dims "1024,1024,1536,1536,1024" \
|
| 206 |
+
--nhead "8,8,8,8,8" \
|
| 207 |
+
--encoder-dims "384,384,384,384,384" \
|
| 208 |
+
--attention-dims "192,192,192,192,192" \
|
| 209 |
+
--encoder-unmasked-dims "256,256,256,256,256" \
|
| 210 |
+
--zipformer-downsampling-factors "1,2,4,8,2" \
|
| 211 |
+
--cnn-module-kernels "31,31,31,31,31" \
|
| 212 |
+
--decoder-dim 512 \
|
| 213 |
+
--joiner-dim 512
|
| 214 |
+
|
| 215 |
+
pnnx $repo/exp/encoder_jit_trace-pnnx.pt
|
| 216 |
+
pnnx $repo/exp/decoder_jit_trace-pnnx.pt
|
| 217 |
+
pnnx $repo/exp/joiner_jit_trace-pnnx.pt
|
| 218 |
+
|
| 219 |
+
python3 ./pruned_transducer_stateless7_streaming/streaming-ncnn-decode.py \
|
| 220 |
+
--tokens $repo/data/lang_char_bpe/tokens.txt \
|
| 221 |
+
--encoder-param-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.param \
|
| 222 |
+
--encoder-bin-filename $repo/exp/encoder_jit_trace-pnnx.ncnn.bin \
|
| 223 |
+
--decoder-param-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.param \
|
| 224 |
+
--decoder-bin-filename $repo/exp/decoder_jit_trace-pnnx.ncnn.bin \
|
| 225 |
+
--joiner-param-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.param \
|
| 226 |
+
--joiner-bin-filename $repo/exp/joiner_jit_trace-pnnx.ncnn.bin \
|
| 227 |
+
$repo/test_wavs/0.wav
|
| 228 |
+
|
| 229 |
+
rm -rf $repo
|
| 230 |
+
log "--------------------------------------------------------------------------"
|
.github/scripts/test-onnx-export.sh
ADDED
|
@@ -0,0 +1,466 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
|
| 3 |
+
set -e
|
| 4 |
+
|
| 5 |
+
log() {
|
| 6 |
+
# This function is from espnet
|
| 7 |
+
local fname=${BASH_SOURCE[1]##*/}
|
| 8 |
+
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
|
| 9 |
+
}
|
| 10 |
+
|
| 11 |
+
cd egs/librispeech/ASR
|
| 12 |
+
|
| 13 |
+
log "=========================================================================="
|
| 14 |
+
repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-zipformer-2023-05-15
|
| 15 |
+
log "Downloading pre-trained model from $repo_url"
|
| 16 |
+
git lfs install
|
| 17 |
+
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
| 18 |
+
repo=$(basename $repo_url)
|
| 19 |
+
|
| 20 |
+
pushd $repo
|
| 21 |
+
git lfs pull --include "exp/pretrained.pt"
|
| 22 |
+
cd exp
|
| 23 |
+
ln -s pretrained.pt epoch-99.pt
|
| 24 |
+
popd
|
| 25 |
+
|
| 26 |
+
log "Export via torch.jit.script()"
|
| 27 |
+
./zipformer/export.py \
|
| 28 |
+
--use-averaged-model 0 \
|
| 29 |
+
--exp-dir $repo/exp \
|
| 30 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 31 |
+
--epoch 99 \
|
| 32 |
+
--avg 1 \
|
| 33 |
+
--jit 1
|
| 34 |
+
|
| 35 |
+
log "Test export to ONNX format"
|
| 36 |
+
./zipformer/export-onnx.py \
|
| 37 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 38 |
+
--use-averaged-model 0 \
|
| 39 |
+
--epoch 99 \
|
| 40 |
+
--avg 1 \
|
| 41 |
+
--exp-dir $repo/exp \
|
| 42 |
+
--num-encoder-layers "2,2,3,4,3,2" \
|
| 43 |
+
--downsampling-factor "1,2,4,8,4,2" \
|
| 44 |
+
--feedforward-dim "512,768,1024,1536,1024,768" \
|
| 45 |
+
--num-heads "4,4,4,8,4,4" \
|
| 46 |
+
--encoder-dim "192,256,384,512,384,256" \
|
| 47 |
+
--query-head-dim 32 \
|
| 48 |
+
--value-head-dim 12 \
|
| 49 |
+
--pos-head-dim 4 \
|
| 50 |
+
--pos-dim 48 \
|
| 51 |
+
--encoder-unmasked-dim "192,192,256,256,256,192" \
|
| 52 |
+
--cnn-module-kernel "31,31,15,15,15,31" \
|
| 53 |
+
--decoder-dim 512 \
|
| 54 |
+
--joiner-dim 512 \
|
| 55 |
+
--causal False \
|
| 56 |
+
--chunk-size "16,32,64,-1" \
|
| 57 |
+
--left-context-frames "64,128,256,-1"
|
| 58 |
+
|
| 59 |
+
ls -lh $repo/exp
|
| 60 |
+
|
| 61 |
+
log "Run onnx_check.py"
|
| 62 |
+
|
| 63 |
+
./zipformer/onnx_check.py \
|
| 64 |
+
--jit-filename $repo/exp/jit_script.pt \
|
| 65 |
+
--onnx-encoder-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
|
| 66 |
+
--onnx-decoder-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
|
| 67 |
+
--onnx-joiner-filename $repo/exp/joiner-epoch-99-avg-1.onnx
|
| 68 |
+
|
| 69 |
+
log "Run onnx_pretrained.py"
|
| 70 |
+
|
| 71 |
+
./zipformer/onnx_pretrained.py \
|
| 72 |
+
--encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
|
| 73 |
+
--decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
|
| 74 |
+
--joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
|
| 75 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 76 |
+
$repo/test_wavs/1089-134686-0001.wav
|
| 77 |
+
|
| 78 |
+
rm -rf $repo
|
| 79 |
+
|
| 80 |
+
repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-streaming-zipformer-2023-05-17
|
| 81 |
+
log "Downloading pre-trained model from $repo_url"
|
| 82 |
+
git lfs install
|
| 83 |
+
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
| 84 |
+
repo=$(basename $repo_url)
|
| 85 |
+
|
| 86 |
+
pushd $repo
|
| 87 |
+
git lfs pull --include "exp/pretrained.pt"
|
| 88 |
+
|
| 89 |
+
cd exp
|
| 90 |
+
ln -s pretrained.pt epoch-99.pt
|
| 91 |
+
popd
|
| 92 |
+
|
| 93 |
+
log "Test export streaming model to ONNX format"
|
| 94 |
+
./zipformer/export-onnx-streaming.py \
|
| 95 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 96 |
+
--use-averaged-model 0 \
|
| 97 |
+
--epoch 99 \
|
| 98 |
+
--avg 1 \
|
| 99 |
+
--exp-dir $repo/exp \
|
| 100 |
+
--num-encoder-layers "2,2,3,4,3,2" \
|
| 101 |
+
--downsampling-factor "1,2,4,8,4,2" \
|
| 102 |
+
--feedforward-dim "512,768,1024,1536,1024,768" \
|
| 103 |
+
--num-heads "4,4,4,8,4,4" \
|
| 104 |
+
--encoder-dim "192,256,384,512,384,256" \
|
| 105 |
+
--query-head-dim 32 \
|
| 106 |
+
--value-head-dim 12 \
|
| 107 |
+
--pos-head-dim 4 \
|
| 108 |
+
--pos-dim 48 \
|
| 109 |
+
--encoder-unmasked-dim "192,192,256,256,256,192" \
|
| 110 |
+
--cnn-module-kernel "31,31,15,15,15,31" \
|
| 111 |
+
--decoder-dim 512 \
|
| 112 |
+
--joiner-dim 512 \
|
| 113 |
+
--causal True \
|
| 114 |
+
--chunk-size 16 \
|
| 115 |
+
--left-context-frames 64
|
| 116 |
+
|
| 117 |
+
ls -lh $repo/exp
|
| 118 |
+
|
| 119 |
+
log "Run onnx_pretrained-streaming.py"
|
| 120 |
+
|
| 121 |
+
./zipformer/onnx_pretrained-streaming.py \
|
| 122 |
+
--encoder-model-filename $repo/exp/encoder-epoch-99-avg-1-chunk-16-left-64.onnx \
|
| 123 |
+
--decoder-model-filename $repo/exp/decoder-epoch-99-avg-1-chunk-16-left-64.onnx \
|
| 124 |
+
--joiner-model-filename $repo/exp/joiner-epoch-99-avg-1-chunk-16-left-64.onnx \
|
| 125 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 126 |
+
$repo/test_wavs/1089-134686-0001.wav
|
| 127 |
+
|
| 128 |
+
rm -rf $repo
|
| 129 |
+
|
| 130 |
+
log "--------------------------------------------------------------------------"
|
| 131 |
+
|
| 132 |
+
log "=========================================================================="
|
| 133 |
+
repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29
|
| 134 |
+
log "Downloading pre-trained model from $repo_url"
|
| 135 |
+
git lfs install
|
| 136 |
+
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
| 137 |
+
repo=$(basename $repo_url)
|
| 138 |
+
|
| 139 |
+
pushd $repo
|
| 140 |
+
git lfs pull --include "data/lang_bpe_500/bpe.model"
|
| 141 |
+
git lfs pull --include "exp/pretrained.pt"
|
| 142 |
+
cd exp
|
| 143 |
+
ln -s pretrained.pt epoch-99.pt
|
| 144 |
+
popd
|
| 145 |
+
|
| 146 |
+
log "Export via torch.jit.trace()"
|
| 147 |
+
|
| 148 |
+
./pruned_transducer_stateless7_streaming/jit_trace_export.py \
|
| 149 |
+
--bpe-model $repo/data/lang_bpe_500/bpe.model \
|
| 150 |
+
--use-averaged-model 0 \
|
| 151 |
+
--epoch 99 \
|
| 152 |
+
--avg 1 \
|
| 153 |
+
--decode-chunk-len 32 \
|
| 154 |
+
--exp-dir $repo/exp/
|
| 155 |
+
|
| 156 |
+
log "Test exporting to ONNX format"
|
| 157 |
+
|
| 158 |
+
./pruned_transducer_stateless7_streaming/export-onnx.py \
|
| 159 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 160 |
+
--use-averaged-model 0 \
|
| 161 |
+
--epoch 99 \
|
| 162 |
+
--avg 1 \
|
| 163 |
+
--decode-chunk-len 32 \
|
| 164 |
+
--exp-dir $repo/exp/
|
| 165 |
+
|
| 166 |
+
ls -lh $repo/exp
|
| 167 |
+
|
| 168 |
+
log "Run onnx_check.py"
|
| 169 |
+
|
| 170 |
+
./pruned_transducer_stateless7_streaming/onnx_check.py \
|
| 171 |
+
--jit-encoder-filename $repo/exp/encoder_jit_trace.pt \
|
| 172 |
+
--jit-decoder-filename $repo/exp/decoder_jit_trace.pt \
|
| 173 |
+
--jit-joiner-filename $repo/exp/joiner_jit_trace.pt \
|
| 174 |
+
--onnx-encoder-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
|
| 175 |
+
--onnx-decoder-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
|
| 176 |
+
--onnx-joiner-filename $repo/exp/joiner-epoch-99-avg-1.onnx
|
| 177 |
+
|
| 178 |
+
log "Run onnx_pretrained.py"
|
| 179 |
+
|
| 180 |
+
./pruned_transducer_stateless7_streaming/onnx_pretrained.py \
|
| 181 |
+
--encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
|
| 182 |
+
--decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
|
| 183 |
+
--joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
|
| 184 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 185 |
+
$repo/test_wavs/1089-134686-0001.wav
|
| 186 |
+
|
| 187 |
+
rm -rf $repo
|
| 188 |
+
log "--------------------------------------------------------------------------"
|
| 189 |
+
|
| 190 |
+
log "=========================================================================="
|
| 191 |
+
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13
|
| 192 |
+
log "Downloading pre-trained model from $repo_url"
|
| 193 |
+
git lfs install
|
| 194 |
+
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
| 195 |
+
repo=$(basename $repo_url)
|
| 196 |
+
|
| 197 |
+
pushd $repo
|
| 198 |
+
git lfs pull --include "data/lang_bpe_500/bpe.model"
|
| 199 |
+
git lfs pull --include "exp/pretrained-iter-1224000-avg-14.pt"
|
| 200 |
+
|
| 201 |
+
cd exp
|
| 202 |
+
ln -s pretrained-iter-1224000-avg-14.pt epoch-9999.pt
|
| 203 |
+
popd
|
| 204 |
+
|
| 205 |
+
log "Export via torch.jit.script()"
|
| 206 |
+
|
| 207 |
+
./pruned_transducer_stateless3/export.py \
|
| 208 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 209 |
+
--epoch 9999 \
|
| 210 |
+
--avg 1 \
|
| 211 |
+
--exp-dir $repo/exp/ \
|
| 212 |
+
--jit 1
|
| 213 |
+
|
| 214 |
+
log "Test exporting to ONNX format"
|
| 215 |
+
|
| 216 |
+
./pruned_transducer_stateless3/export-onnx.py \
|
| 217 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 218 |
+
--epoch 9999 \
|
| 219 |
+
--avg 1 \
|
| 220 |
+
--exp-dir $repo/exp/
|
| 221 |
+
|
| 222 |
+
ls -lh $repo/exp
|
| 223 |
+
|
| 224 |
+
log "Run onnx_check.py"
|
| 225 |
+
|
| 226 |
+
./pruned_transducer_stateless3/onnx_check.py \
|
| 227 |
+
--jit-filename $repo/exp/cpu_jit.pt \
|
| 228 |
+
--onnx-encoder-filename $repo/exp/encoder-epoch-9999-avg-1.onnx \
|
| 229 |
+
--onnx-decoder-filename $repo/exp/decoder-epoch-9999-avg-1.onnx \
|
| 230 |
+
--onnx-joiner-filename $repo/exp/joiner-epoch-9999-avg-1.onnx
|
| 231 |
+
|
| 232 |
+
log "Run onnx_pretrained.py"
|
| 233 |
+
|
| 234 |
+
./pruned_transducer_stateless3/onnx_pretrained.py \
|
| 235 |
+
--encoder-model-filename $repo/exp/encoder-epoch-9999-avg-1.onnx \
|
| 236 |
+
--decoder-model-filename $repo/exp/decoder-epoch-9999-avg-1.onnx \
|
| 237 |
+
--joiner-model-filename $repo/exp/joiner-epoch-9999-avg-1.onnx \
|
| 238 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 239 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 240 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 241 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 242 |
+
|
| 243 |
+
rm -rf $repo
|
| 244 |
+
log "--------------------------------------------------------------------------"
|
| 245 |
+
|
| 246 |
+
log "=========================================================================="
|
| 247 |
+
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless5-2022-05-13
|
| 248 |
+
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
| 249 |
+
repo=$(basename $repo_url)
|
| 250 |
+
|
| 251 |
+
pushd $repo
|
| 252 |
+
git lfs pull --include "data/lang_bpe_500/bpe.model"
|
| 253 |
+
git lfs pull --include "exp/pretrained-epoch-39-avg-7.pt"
|
| 254 |
+
|
| 255 |
+
cd exp
|
| 256 |
+
ln -s pretrained-epoch-39-avg-7.pt epoch-99.pt
|
| 257 |
+
popd
|
| 258 |
+
|
| 259 |
+
log "Export via torch.jit.script()"
|
| 260 |
+
|
| 261 |
+
./pruned_transducer_stateless5/export.py \
|
| 262 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 263 |
+
--epoch 99 \
|
| 264 |
+
--avg 1 \
|
| 265 |
+
--use-averaged-model 0 \
|
| 266 |
+
--exp-dir $repo/exp \
|
| 267 |
+
--num-encoder-layers 18 \
|
| 268 |
+
--dim-feedforward 2048 \
|
| 269 |
+
--nhead 8 \
|
| 270 |
+
--encoder-dim 512 \
|
| 271 |
+
--decoder-dim 512 \
|
| 272 |
+
--joiner-dim 512 \
|
| 273 |
+
--jit 1
|
| 274 |
+
|
| 275 |
+
log "Test exporting to ONNX format"
|
| 276 |
+
|
| 277 |
+
./pruned_transducer_stateless5/export-onnx.py \
|
| 278 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 279 |
+
--epoch 99 \
|
| 280 |
+
--avg 1 \
|
| 281 |
+
--use-averaged-model 0 \
|
| 282 |
+
--exp-dir $repo/exp \
|
| 283 |
+
--num-encoder-layers 18 \
|
| 284 |
+
--dim-feedforward 2048 \
|
| 285 |
+
--nhead 8 \
|
| 286 |
+
--encoder-dim 512 \
|
| 287 |
+
--decoder-dim 512 \
|
| 288 |
+
--joiner-dim 512
|
| 289 |
+
|
| 290 |
+
ls -lh $repo/exp
|
| 291 |
+
|
| 292 |
+
log "Run onnx_check.py"
|
| 293 |
+
|
| 294 |
+
./pruned_transducer_stateless5/onnx_check.py \
|
| 295 |
+
--jit-filename $repo/exp/cpu_jit.pt \
|
| 296 |
+
--onnx-encoder-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
|
| 297 |
+
--onnx-decoder-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
|
| 298 |
+
--onnx-joiner-filename $repo/exp/joiner-epoch-99-avg-1.onnx
|
| 299 |
+
|
| 300 |
+
log "Run onnx_pretrained.py"
|
| 301 |
+
|
| 302 |
+
./pruned_transducer_stateless5/onnx_pretrained.py \
|
| 303 |
+
--encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
|
| 304 |
+
--decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
|
| 305 |
+
--joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
|
| 306 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 307 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 308 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 309 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 310 |
+
|
| 311 |
+
rm -rf $repo
|
| 312 |
+
log "--------------------------------------------------------------------------"
|
| 313 |
+
|
| 314 |
+
log "=========================================================================="
|
| 315 |
+
repo_url=
|
| 316 |
+
|
| 317 |
+
rm -rf $repo
|
| 318 |
+
log "--------------------------------------------------------------------------"
|
| 319 |
+
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless7-2022-11-11
|
| 320 |
+
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
| 321 |
+
repo=$(basename $repo_url)
|
| 322 |
+
|
| 323 |
+
pushd $repo
|
| 324 |
+
git lfs pull --include "exp/pretrained.pt"
|
| 325 |
+
|
| 326 |
+
cd exp
|
| 327 |
+
ln -s pretrained.pt epoch-99.pt
|
| 328 |
+
popd
|
| 329 |
+
|
| 330 |
+
log "Export via torch.jit.script()"
|
| 331 |
+
|
| 332 |
+
./pruned_transducer_stateless7/export.py \
|
| 333 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 334 |
+
--use-averaged-model 0 \
|
| 335 |
+
--epoch 99 \
|
| 336 |
+
--avg 1 \
|
| 337 |
+
--exp-dir $repo/exp \
|
| 338 |
+
--feedforward-dims "1024,1024,2048,2048,1024" \
|
| 339 |
+
--jit 1
|
| 340 |
+
|
| 341 |
+
log "Test exporting to ONNX format"
|
| 342 |
+
|
| 343 |
+
./pruned_transducer_stateless7/export-onnx.py \
|
| 344 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 345 |
+
--use-averaged-model 0 \
|
| 346 |
+
--epoch 99 \
|
| 347 |
+
--avg 1 \
|
| 348 |
+
--exp-dir $repo/exp \
|
| 349 |
+
--feedforward-dims "1024,1024,2048,2048,1024"
|
| 350 |
+
|
| 351 |
+
ls -lh $repo/exp
|
| 352 |
+
|
| 353 |
+
log "Run onnx_check.py"
|
| 354 |
+
|
| 355 |
+
./pruned_transducer_stateless7/onnx_check.py \
|
| 356 |
+
--jit-filename $repo/exp/cpu_jit.pt \
|
| 357 |
+
--onnx-encoder-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
|
| 358 |
+
--onnx-decoder-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
|
| 359 |
+
--onnx-joiner-filename $repo/exp/joiner-epoch-99-avg-1.onnx
|
| 360 |
+
|
| 361 |
+
log "Run onnx_pretrained.py"
|
| 362 |
+
|
| 363 |
+
./pruned_transducer_stateless7/onnx_pretrained.py \
|
| 364 |
+
--encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
|
| 365 |
+
--decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
|
| 366 |
+
--joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
|
| 367 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 368 |
+
$repo/test_wavs/1089-134686-0001.wav \
|
| 369 |
+
$repo/test_wavs/1221-135766-0001.wav \
|
| 370 |
+
$repo/test_wavs/1221-135766-0002.wav
|
| 371 |
+
|
| 372 |
+
log "=========================================================================="
|
| 373 |
+
repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-conv-emformer-transducer-stateless2-2022-07-05
|
| 374 |
+
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
| 375 |
+
repo=$(basename $repo_url)
|
| 376 |
+
|
| 377 |
+
pushd $repo
|
| 378 |
+
git lfs pull --include "data/lang_bpe_500/bpe.model"
|
| 379 |
+
git lfs pull --include "exp/pretrained-epoch-30-avg-10-averaged.pt"
|
| 380 |
+
|
| 381 |
+
cd exp
|
| 382 |
+
ln -s pretrained-epoch-30-avg-10-averaged.pt epoch-99.pt
|
| 383 |
+
popd
|
| 384 |
+
|
| 385 |
+
log "Test exporting to ONNX format"
|
| 386 |
+
|
| 387 |
+
./conv_emformer_transducer_stateless2/export-onnx.py \
|
| 388 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 389 |
+
--use-averaged-model 0 \
|
| 390 |
+
--epoch 99 \
|
| 391 |
+
--avg 1 \
|
| 392 |
+
--exp-dir $repo/exp \
|
| 393 |
+
--num-encoder-layers 12 \
|
| 394 |
+
--chunk-length 32 \
|
| 395 |
+
--cnn-module-kernel 31 \
|
| 396 |
+
--left-context-length 32 \
|
| 397 |
+
--right-context-length 8 \
|
| 398 |
+
--memory-size 32
|
| 399 |
+
|
| 400 |
+
log "Run onnx_pretrained.py"
|
| 401 |
+
|
| 402 |
+
./conv_emformer_transducer_stateless2/onnx_pretrained.py \
|
| 403 |
+
--encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
|
| 404 |
+
--decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
|
| 405 |
+
--joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
|
| 406 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 407 |
+
$repo/test_wavs/1221-135766-0001.wav
|
| 408 |
+
|
| 409 |
+
rm -rf $repo
|
| 410 |
+
log "--------------------------------------------------------------------------"
|
| 411 |
+
|
| 412 |
+
log "=========================================================================="
|
| 413 |
+
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03
|
| 414 |
+
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url
|
| 415 |
+
repo=$(basename $repo_url)
|
| 416 |
+
|
| 417 |
+
pushd $repo
|
| 418 |
+
git lfs pull --include "data/lang_bpe_500/bpe.model"
|
| 419 |
+
git lfs pull --include "exp/pretrained-iter-468000-avg-16.pt"
|
| 420 |
+
|
| 421 |
+
cd exp
|
| 422 |
+
ln -s pretrained-iter-468000-avg-16.pt epoch-99.pt
|
| 423 |
+
popd
|
| 424 |
+
|
| 425 |
+
log "Export via torch.jit.trace()"
|
| 426 |
+
|
| 427 |
+
./lstm_transducer_stateless2/export.py \
|
| 428 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 429 |
+
--use-averaged-model 0 \
|
| 430 |
+
--epoch 99 \
|
| 431 |
+
--avg 1 \
|
| 432 |
+
--exp-dir $repo/exp/ \
|
| 433 |
+
--jit-trace 1
|
| 434 |
+
|
| 435 |
+
log "Test exporting to ONNX format"
|
| 436 |
+
|
| 437 |
+
./lstm_transducer_stateless2/export-onnx.py \
|
| 438 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 439 |
+
--use-averaged-model 0 \
|
| 440 |
+
--epoch 99 \
|
| 441 |
+
--avg 1 \
|
| 442 |
+
--exp-dir $repo/exp
|
| 443 |
+
|
| 444 |
+
ls -lh $repo/exp
|
| 445 |
+
|
| 446 |
+
log "Run onnx_check.py"
|
| 447 |
+
|
| 448 |
+
./lstm_transducer_stateless2/onnx_check.py \
|
| 449 |
+
--jit-encoder-filename $repo/exp/encoder_jit_trace.pt \
|
| 450 |
+
--jit-decoder-filename $repo/exp/decoder_jit_trace.pt \
|
| 451 |
+
--jit-joiner-filename $repo/exp/joiner_jit_trace.pt \
|
| 452 |
+
--onnx-encoder-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
|
| 453 |
+
--onnx-decoder-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
|
| 454 |
+
--onnx-joiner-filename $repo/exp/joiner-epoch-99-avg-1.onnx
|
| 455 |
+
|
| 456 |
+
log "Run onnx_pretrained.py"
|
| 457 |
+
|
| 458 |
+
./lstm_transducer_stateless2/onnx_pretrained.py \
|
| 459 |
+
--encoder-model-filename $repo/exp/encoder-epoch-99-avg-1.onnx \
|
| 460 |
+
--decoder-model-filename $repo/exp/decoder-epoch-99-avg-1.onnx \
|
| 461 |
+
--joiner-model-filename $repo/exp/joiner-epoch-99-avg-1.onnx \
|
| 462 |
+
--tokens $repo/data/lang_bpe_500/tokens.txt \
|
| 463 |
+
$repo/test_wavs/1221-135766-0001.wav
|
| 464 |
+
|
| 465 |
+
rm -rf $repo
|
| 466 |
+
log "--------------------------------------------------------------------------"
|
.github/scripts/wenetspeech/ASR/run_rknn.sh
ADDED
|
@@ -0,0 +1,196 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
|
| 3 |
+
set -ex
|
| 4 |
+
|
| 5 |
+
python3 -m pip install kaldi-native-fbank soundfile librosa
|
| 6 |
+
|
| 7 |
+
log() {
|
| 8 |
+
# This function is from espnet
|
| 9 |
+
local fname=${BASH_SOURCE[1]##*/}
|
| 10 |
+
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
|
| 11 |
+
}
|
| 12 |
+
|
| 13 |
+
cd egs/wenetspeech/ASR
|
| 14 |
+
|
| 15 |
+
#https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#k2-fsa-icefall-asr-zipformer-wenetspeech-streaming-small-chinese
|
| 16 |
+
function export_2025_03_02() {
|
| 17 |
+
d=exp_2025_03_02
|
| 18 |
+
mkdir $d
|
| 19 |
+
pushd $d
|
| 20 |
+
curl -SL -O https://huggingface.co/k2-fsa/icefall-asr-zipformer-wenetspeech-streaming-small/resolve/main/data/lang_char/tokens.txt
|
| 21 |
+
curl -SL -O https://huggingface.co/k2-fsa/icefall-asr-zipformer-wenetspeech-streaming-small/resolve/main/exp/pretrained.pt
|
| 22 |
+
mv pretrained.pt epoch-99.pt
|
| 23 |
+
|
| 24 |
+
curl -SL -o 0.wav https://huggingface.co/k2-fsa/icefall-asr-zipformer-wenetspeech-streaming-small/resolve/main/test_wavs/DEV_T0000000000.wav
|
| 25 |
+
curl -SL -o 1.wav https://huggingface.co/k2-fsa/icefall-asr-zipformer-wenetspeech-streaming-small/resolve/main/test_wavs/DEV_T0000000001.wav
|
| 26 |
+
curl -SL -o 2.wav https://huggingface.co/k2-fsa/icefall-asr-zipformer-wenetspeech-streaming-small/resolve/main/test_wavs/DEV_T0000000002.wav
|
| 27 |
+
ls -lh
|
| 28 |
+
popd
|
| 29 |
+
|
| 30 |
+
./zipformer/export-onnx-streaming.py \
|
| 31 |
+
--dynamic-batch 0 \
|
| 32 |
+
--enable-int8-quantization 0 \
|
| 33 |
+
--tokens $d/tokens.txt \
|
| 34 |
+
--use-averaged-model 0 \
|
| 35 |
+
--epoch 99 \
|
| 36 |
+
--avg 1 \
|
| 37 |
+
--exp-dir $d \
|
| 38 |
+
--use-ctc 0 \
|
| 39 |
+
--use-transducer 1 \
|
| 40 |
+
\
|
| 41 |
+
--num-encoder-layers 2,2,2,2,2,2 \
|
| 42 |
+
--feedforward-dim 512,768,768,768,768,768 \
|
| 43 |
+
--encoder-dim 192,256,256,256,256,256 \
|
| 44 |
+
--encoder-unmasked-dim 192,192,192,192,192,192 \
|
| 45 |
+
\
|
| 46 |
+
--chunk-size 32 \
|
| 47 |
+
--left-context-frames 128 \
|
| 48 |
+
--causal 1
|
| 49 |
+
|
| 50 |
+
for platform in rk3562 rk3566 rk3568 rk3576 rk3588; do
|
| 51 |
+
dst=sherpa-onnx-$platform-streaming-zipformer-small-zh-2025-03-02
|
| 52 |
+
mkdir -p $dst
|
| 53 |
+
|
| 54 |
+
./zipformer/export_rknn_transducer_streaming.py \
|
| 55 |
+
--in-encoder $d/encoder-epoch-99-avg-1-chunk-32-left-128.onnx \
|
| 56 |
+
--in-decoder $d/decoder-epoch-99-avg-1-chunk-32-left-128.onnx \
|
| 57 |
+
--in-joiner $d/joiner-epoch-99-avg-1-chunk-32-left-128.onnx \
|
| 58 |
+
--out-encoder $dst/encoder.rknn \
|
| 59 |
+
--out-decoder $dst/decoder.rknn \
|
| 60 |
+
--out-joiner $dst/joiner.rknn \
|
| 61 |
+
--target-platform $platform
|
| 62 |
+
|
| 63 |
+
cp $d/tokens.txt $dst
|
| 64 |
+
mkdir $dst/test_wavs
|
| 65 |
+
cp $d/*.wav $dst/test_wavs
|
| 66 |
+
|
| 67 |
+
tar cjvf $dst.tar.bz2 $dst
|
| 68 |
+
ls -lh $dst.tar.bz2
|
| 69 |
+
mv $dst.tar.bz2 /icefall/
|
| 70 |
+
ls -lh $dst/
|
| 71 |
+
echo "---"
|
| 72 |
+
|
| 73 |
+
rm -rf $dst
|
| 74 |
+
done
|
| 75 |
+
rm -rf $d
|
| 76 |
+
}
|
| 77 |
+
|
| 78 |
+
# https://k2-fsa.github.io/sherpa/onnx/pretrained_models/online-transducer/zipformer-transducer-models.html#k2-fsa-icefall-asr-zipformer-wenetspeech-streaming-large-chinese
|
| 79 |
+
function export_2025_03_03() {
|
| 80 |
+
d=exp_2025_03_03
|
| 81 |
+
mkdir $d
|
| 82 |
+
pushd $d
|
| 83 |
+
curl -SL -O https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/data/lang_char/tokens.txt
|
| 84 |
+
curl -SL -O https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/exp/pretrained.pt
|
| 85 |
+
mv pretrained.pt epoch-99.pt
|
| 86 |
+
|
| 87 |
+
curl -SL -o 0.wav https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/test_wavs/DEV_T0000000000.wav
|
| 88 |
+
curl -SL -o 1.wav https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/test_wavs/DEV_T0000000001.wav
|
| 89 |
+
curl -SL -o 2.wav https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/test_wavs/DEV_T0000000002.wav
|
| 90 |
+
ls -lh
|
| 91 |
+
popd
|
| 92 |
+
|
| 93 |
+
./zipformer/export-onnx-streaming.py \
|
| 94 |
+
--dynamic-batch 0 \
|
| 95 |
+
--enable-int8-quantization 0 \
|
| 96 |
+
--tokens $d/tokens.txt \
|
| 97 |
+
--use-averaged-model 0 \
|
| 98 |
+
--epoch 99 \
|
| 99 |
+
--avg 1 \
|
| 100 |
+
--exp-dir $d \
|
| 101 |
+
--use-ctc 0 \
|
| 102 |
+
--use-transducer 1 \
|
| 103 |
+
\
|
| 104 |
+
--chunk-size 32 \
|
| 105 |
+
--left-context-frames 128 \
|
| 106 |
+
--causal 1
|
| 107 |
+
|
| 108 |
+
for platform in rk3562 rk3566 rk3568 rk3576 rk3588; do
|
| 109 |
+
dst=sherpa-onnx-$platform-streaming-zipformer-zh-2025-03-03
|
| 110 |
+
mkdir -p $dst
|
| 111 |
+
|
| 112 |
+
./zipformer/export_rknn_transducer_streaming.py \
|
| 113 |
+
--in-encoder $d/encoder-epoch-99-avg-1-chunk-32-left-128.onnx \
|
| 114 |
+
--in-decoder $d/decoder-epoch-99-avg-1-chunk-32-left-128.onnx \
|
| 115 |
+
--in-joiner $d/joiner-epoch-99-avg-1-chunk-32-left-128.onnx \
|
| 116 |
+
--out-encoder $dst/encoder.rknn \
|
| 117 |
+
--out-decoder $dst/decoder.rknn \
|
| 118 |
+
--out-joiner $dst/joiner.rknn \
|
| 119 |
+
--target-platform $platform
|
| 120 |
+
|
| 121 |
+
cp $d/tokens.txt $dst
|
| 122 |
+
mkdir $dst/test_wavs
|
| 123 |
+
cp $d/*.wav $dst/test_wavs
|
| 124 |
+
|
| 125 |
+
tar cjvf $dst.tar.bz2 $dst
|
| 126 |
+
ls -lh $dst.tar.bz2
|
| 127 |
+
mv $dst.tar.bz2 /icefall/
|
| 128 |
+
ls -lh $dst/
|
| 129 |
+
echo "---"
|
| 130 |
+
ls -lh $dst.tar.bz2
|
| 131 |
+
|
| 132 |
+
rm -rf $dst
|
| 133 |
+
done
|
| 134 |
+
rm -rf $d
|
| 135 |
+
}
|
| 136 |
+
|
| 137 |
+
function export_2023_06_15() {
|
| 138 |
+
d=exp_2023_06_15
|
| 139 |
+
mkdir $d
|
| 140 |
+
pushd $d
|
| 141 |
+
curl -SL -O https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/data/lang_char/tokens.txt
|
| 142 |
+
curl -SL -O https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/exp/pretrained.pt
|
| 143 |
+
mv pretrained.pt epoch-99.pt
|
| 144 |
+
|
| 145 |
+
curl -SL -o 0.wav https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/test_wavs/DEV_T0000000000.wav
|
| 146 |
+
curl -SL -o 1.wav https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/test_wavs/DEV_T0000000001.wav
|
| 147 |
+
curl -SL -o 2.wav https://huggingface.co/pkufool/icefall-asr-zipformer-streaming-wenetspeech-20230615/resolve/main/test_wavs/DEV_T0000000002.wav
|
| 148 |
+
ls -lh
|
| 149 |
+
popd
|
| 150 |
+
|
| 151 |
+
./zipformer/export-onnx-streaming.py \
|
| 152 |
+
--dynamic-batch 0 \
|
| 153 |
+
--enable-int8-quantization 0 \
|
| 154 |
+
--tokens $d/tokens.txt \
|
| 155 |
+
--use-averaged-model 0 \
|
| 156 |
+
--epoch 99 \
|
| 157 |
+
--avg 1 \
|
| 158 |
+
--exp-dir $d \
|
| 159 |
+
--use-ctc 0 \
|
| 160 |
+
--use-transducer 1 \
|
| 161 |
+
\
|
| 162 |
+
--chunk-size 32 \
|
| 163 |
+
--left-context-frames 128 \
|
| 164 |
+
--causal 1
|
| 165 |
+
|
| 166 |
+
for platform in rk3562 rk3566 rk3568 rk3576 rk3588; do
|
| 167 |
+
dst=sherpa-onnx-$platform-streaming-zipformer-zh-2023-06-15
|
| 168 |
+
mkdir -p $dst
|
| 169 |
+
|
| 170 |
+
./zipformer/export_rknn_transducer_streaming.py \
|
| 171 |
+
--in-encoder $d/encoder-epoch-99-avg-1-chunk-32-left-128.onnx \
|
| 172 |
+
--in-decoder $d/decoder-epoch-99-avg-1-chunk-32-left-128.onnx \
|
| 173 |
+
--in-joiner $d/joiner-epoch-99-avg-1-chunk-32-left-128.onnx \
|
| 174 |
+
--out-encoder $dst/encoder.rknn \
|
| 175 |
+
--out-decoder $dst/decoder.rknn \
|
| 176 |
+
--out-joiner $dst/joiner.rknn \
|
| 177 |
+
--target-platform $platform
|
| 178 |
+
|
| 179 |
+
cp $d/tokens.txt $dst
|
| 180 |
+
mkdir $dst/test_wavs
|
| 181 |
+
cp $d/*.wav $dst/test_wavs
|
| 182 |
+
|
| 183 |
+
tar cjvf $dst.tar.bz2 $dst
|
| 184 |
+
ls -lh $dst.tar.bz2
|
| 185 |
+
mv $dst.tar.bz2 /icefall/
|
| 186 |
+
ls -lh $dst/
|
| 187 |
+
echo "---"
|
| 188 |
+
ls -lh $dst.tar.bz2
|
| 189 |
+
|
| 190 |
+
rm -rf $dst
|
| 191 |
+
done
|
| 192 |
+
}
|
| 193 |
+
|
| 194 |
+
export_2025_03_02
|
| 195 |
+
export_2025_03_03
|
| 196 |
+
export_2023_06_15
|
.github/scripts/yesno/ASR/run.sh
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env bash
|
| 2 |
+
|
| 3 |
+
set -ex
|
| 4 |
+
|
| 5 |
+
log() {
|
| 6 |
+
# This function is from espnet
|
| 7 |
+
local fname=${BASH_SOURCE[1]##*/}
|
| 8 |
+
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*"
|
| 9 |
+
}
|
| 10 |
+
|
| 11 |
+
cd egs/yesno/ASR
|
| 12 |
+
|
| 13 |
+
log "data preparation"
|
| 14 |
+
./prepare.sh
|
| 15 |
+
|
| 16 |
+
log "training"
|
| 17 |
+
python3 ./tdnn/train.py
|
| 18 |
+
|
| 19 |
+
log "decoding"
|
| 20 |
+
python3 ./tdnn/decode.py
|
| 21 |
+
|
| 22 |
+
log "export to pretrained.pt"
|
| 23 |
+
|
| 24 |
+
python3 ./tdnn/export.py --epoch 14 --avg 2
|
| 25 |
+
|
| 26 |
+
python3 ./tdnn/pretrained.py \
|
| 27 |
+
--checkpoint ./tdnn/exp/pretrained.pt \
|
| 28 |
+
--HLG ./data/lang_phone/HLG.pt \
|
| 29 |
+
--words-file ./data/lang_phone/words.txt \
|
| 30 |
+
download/waves_yesno/0_0_0_1_0_0_0_1.wav \
|
| 31 |
+
download/waves_yesno/0_0_1_0_0_0_1_0.wav
|
| 32 |
+
|
| 33 |
+
log "Test exporting to torchscript"
|
| 34 |
+
python3 ./tdnn/export.py --epoch 14 --avg 2 --jit 1
|
| 35 |
+
|
| 36 |
+
python3 ./tdnn/jit_pretrained.py \
|
| 37 |
+
--nn-model ./tdnn/exp/cpu_jit.pt \
|
| 38 |
+
--HLG ./data/lang_phone/HLG.pt \
|
| 39 |
+
--words-file ./data/lang_phone/words.txt \
|
| 40 |
+
download/waves_yesno/0_0_0_1_0_0_0_1.wav \
|
| 41 |
+
download/waves_yesno/0_0_1_0_0_0_1_0.wav
|
| 42 |
+
|
| 43 |
+
log "Test exporting to onnx"
|
| 44 |
+
python3 ./tdnn/export_onnx.py --epoch 14 --avg 2
|
| 45 |
+
|
| 46 |
+
log "Test float32 model"
|
| 47 |
+
python3 ./tdnn/onnx_pretrained.py \
|
| 48 |
+
--nn-model ./tdnn/exp/model-epoch-14-avg-2.onnx \
|
| 49 |
+
--HLG ./data/lang_phone/HLG.pt \
|
| 50 |
+
--words-file ./data/lang_phone/words.txt \
|
| 51 |
+
download/waves_yesno/0_0_0_1_0_0_0_1.wav \
|
| 52 |
+
download/waves_yesno/0_0_1_0_0_0_1_0.wav
|
| 53 |
+
|
| 54 |
+
log "Test int8 model"
|
| 55 |
+
python3 ./tdnn/onnx_pretrained.py \
|
| 56 |
+
--nn-model ./tdnn/exp/model-epoch-14-avg-2.int8.onnx \
|
| 57 |
+
--HLG ./data/lang_phone/HLG.pt \
|
| 58 |
+
--words-file ./data/lang_phone/words.txt \
|
| 59 |
+
download/waves_yesno/0_0_0_1_0_0_0_1.wav \
|
| 60 |
+
download/waves_yesno/0_0_1_0_0_0_1_0.wav
|
| 61 |
+
|
| 62 |
+
log "Test decoding with H"
|
| 63 |
+
python3 ./tdnn/export.py --epoch 14 --avg 2 --jit 1
|
| 64 |
+
|
| 65 |
+
python3 ./tdnn/jit_pretrained_decode_with_H.py \
|
| 66 |
+
--nn-model ./tdnn/exp/cpu_jit.pt \
|
| 67 |
+
--H ./data/lang_phone/H.fst \
|
| 68 |
+
--tokens ./data/lang_phone/tokens.txt \
|
| 69 |
+
./download/waves_yesno/0_0_0_1_0_0_0_1.wav \
|
| 70 |
+
./download/waves_yesno/0_0_1_0_0_0_1_0.wav \
|
| 71 |
+
./download/waves_yesno/0_0_1_0_0_1_1_1.wav
|
| 72 |
+
|
| 73 |
+
log "Test decoding with HL"
|
| 74 |
+
python3 ./tdnn/export.py --epoch 14 --avg 2 --jit 1
|
| 75 |
+
|
| 76 |
+
python3 ./tdnn/jit_pretrained_decode_with_HL.py \
|
| 77 |
+
--nn-model ./tdnn/exp/cpu_jit.pt \
|
| 78 |
+
--HL ./data/lang_phone/HL.fst \
|
| 79 |
+
--words ./data/lang_phone/words.txt \
|
| 80 |
+
./download/waves_yesno/0_0_0_1_0_0_0_1.wav \
|
| 81 |
+
./download/waves_yesno/0_0_1_0_0_0_1_0.wav \
|
| 82 |
+
./download/waves_yesno/0_0_1_0_0_1_1_1.wav
|
| 83 |
+
|
| 84 |
+
log "Show generated files"
|
| 85 |
+
ls -lh tdnn/exp
|
| 86 |
+
ls -lh data/lang_phone
|
.github/workflows/aishell.yml
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: aishell
|
| 2 |
+
|
| 3 |
+
on:
|
| 4 |
+
push:
|
| 5 |
+
branches:
|
| 6 |
+
- master
|
| 7 |
+
|
| 8 |
+
pull_request:
|
| 9 |
+
branches:
|
| 10 |
+
- master
|
| 11 |
+
|
| 12 |
+
workflow_dispatch:
|
| 13 |
+
|
| 14 |
+
concurrency:
|
| 15 |
+
group: aishell-${{ github.ref }}
|
| 16 |
+
cancel-in-progress: true
|
| 17 |
+
|
| 18 |
+
jobs:
|
| 19 |
+
generate_build_matrix:
|
| 20 |
+
if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
|
| 21 |
+
|
| 22 |
+
# see https://github.com/pytorch/pytorch/pull/50633
|
| 23 |
+
runs-on: ubuntu-latest
|
| 24 |
+
outputs:
|
| 25 |
+
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
| 26 |
+
steps:
|
| 27 |
+
- uses: actions/checkout@v4
|
| 28 |
+
with:
|
| 29 |
+
fetch-depth: 0
|
| 30 |
+
- name: Generating build matrix
|
| 31 |
+
id: set-matrix
|
| 32 |
+
run: |
|
| 33 |
+
# outputting for debugging purposes
|
| 34 |
+
python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10"
|
| 35 |
+
MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10")
|
| 36 |
+
echo "::set-output name=matrix::${MATRIX}"
|
| 37 |
+
aishell:
|
| 38 |
+
needs: generate_build_matrix
|
| 39 |
+
name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }}
|
| 40 |
+
runs-on: ubuntu-latest
|
| 41 |
+
strategy:
|
| 42 |
+
fail-fast: false
|
| 43 |
+
matrix:
|
| 44 |
+
${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
|
| 45 |
+
|
| 46 |
+
steps:
|
| 47 |
+
- uses: actions/checkout@v4
|
| 48 |
+
with:
|
| 49 |
+
fetch-depth: 0
|
| 50 |
+
|
| 51 |
+
- name: Free space
|
| 52 |
+
shell: bash
|
| 53 |
+
run: |
|
| 54 |
+
df -h
|
| 55 |
+
rm -rf /opt/hostedtoolcache
|
| 56 |
+
df -h
|
| 57 |
+
echo "pwd: $PWD"
|
| 58 |
+
echo "github.workspace ${{ github.workspace }}"
|
| 59 |
+
|
| 60 |
+
- name: Run aishell tests
|
| 61 |
+
uses: addnab/docker-run-action@v3
|
| 62 |
+
with:
|
| 63 |
+
image: ghcr.io/${{ github.repository_owner }}/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v${{ matrix.version }}
|
| 64 |
+
options: |
|
| 65 |
+
--volume ${{ github.workspace }}/:/icefall
|
| 66 |
+
shell: bash
|
| 67 |
+
run: |
|
| 68 |
+
export PYTHONPATH=/icefall:$PYTHONPATH
|
| 69 |
+
cd /icefall
|
| 70 |
+
git config --global --add safe.directory /icefall
|
| 71 |
+
|
| 72 |
+
.github/scripts/aishell/ASR/run.sh
|
.github/workflows/audioset.yml
ADDED
|
@@ -0,0 +1,137 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: audioset
|
| 2 |
+
|
| 3 |
+
on:
|
| 4 |
+
push:
|
| 5 |
+
branches:
|
| 6 |
+
- master
|
| 7 |
+
|
| 8 |
+
pull_request:
|
| 9 |
+
branches:
|
| 10 |
+
- master
|
| 11 |
+
|
| 12 |
+
workflow_dispatch:
|
| 13 |
+
|
| 14 |
+
concurrency:
|
| 15 |
+
group: audioset-${{ github.ref }}
|
| 16 |
+
cancel-in-progress: true
|
| 17 |
+
|
| 18 |
+
jobs:
|
| 19 |
+
generate_build_matrix:
|
| 20 |
+
if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
|
| 21 |
+
# see https://github.com/pytorch/pytorch/pull/50633
|
| 22 |
+
runs-on: ubuntu-latest
|
| 23 |
+
outputs:
|
| 24 |
+
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
| 25 |
+
steps:
|
| 26 |
+
- uses: actions/checkout@v4
|
| 27 |
+
with:
|
| 28 |
+
fetch-depth: 0
|
| 29 |
+
- name: Generating build matrix
|
| 30 |
+
id: set-matrix
|
| 31 |
+
run: |
|
| 32 |
+
# outputting for debugging purposes
|
| 33 |
+
python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10"
|
| 34 |
+
MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10")
|
| 35 |
+
echo "::set-output name=matrix::${MATRIX}"
|
| 36 |
+
|
| 37 |
+
audioset:
|
| 38 |
+
needs: generate_build_matrix
|
| 39 |
+
name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }}
|
| 40 |
+
runs-on: ubuntu-latest
|
| 41 |
+
strategy:
|
| 42 |
+
fail-fast: false
|
| 43 |
+
matrix:
|
| 44 |
+
${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
|
| 45 |
+
|
| 46 |
+
steps:
|
| 47 |
+
- uses: actions/checkout@v4
|
| 48 |
+
with:
|
| 49 |
+
fetch-depth: 0
|
| 50 |
+
|
| 51 |
+
- name: Free space
|
| 52 |
+
shell: bash
|
| 53 |
+
run: |
|
| 54 |
+
ls -lh
|
| 55 |
+
df -h
|
| 56 |
+
rm -rf /opt/hostedtoolcache
|
| 57 |
+
df -h
|
| 58 |
+
echo "pwd: $PWD"
|
| 59 |
+
echo "github.workspace ${{ github.workspace }}"
|
| 60 |
+
|
| 61 |
+
- name: Run tests
|
| 62 |
+
uses: addnab/docker-run-action@v3
|
| 63 |
+
with:
|
| 64 |
+
image: ghcr.io/${{ github.repository_owner }}/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v${{ matrix.version }}
|
| 65 |
+
options: |
|
| 66 |
+
--volume ${{ github.workspace }}/:/icefall
|
| 67 |
+
shell: bash
|
| 68 |
+
run: |
|
| 69 |
+
export PYTHONPATH=/icefall:$PYTHONPATH
|
| 70 |
+
cd /icefall
|
| 71 |
+
git config --global --add safe.directory /icefall
|
| 72 |
+
|
| 73 |
+
.github/scripts/audioset/AT/run.sh
|
| 74 |
+
|
| 75 |
+
- name: Show model files
|
| 76 |
+
shell: bash
|
| 77 |
+
run: |
|
| 78 |
+
sudo chown -R runner ./model-onnx
|
| 79 |
+
ls -lh ./model-onnx
|
| 80 |
+
chmod -x ./model-onnx/class_labels_indices.csv
|
| 81 |
+
|
| 82 |
+
echo "----------"
|
| 83 |
+
ls -lh ./model-onnx/*
|
| 84 |
+
|
| 85 |
+
- name: Upload model to huggingface
|
| 86 |
+
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
|
| 87 |
+
env:
|
| 88 |
+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
| 89 |
+
uses: nick-fields/retry@v3
|
| 90 |
+
with:
|
| 91 |
+
max_attempts: 20
|
| 92 |
+
timeout_seconds: 200
|
| 93 |
+
shell: bash
|
| 94 |
+
command: |
|
| 95 |
+
git config --global user.email "csukuangfj@gmail.com"
|
| 96 |
+
git config --global user.name "Fangjun Kuang"
|
| 97 |
+
|
| 98 |
+
rm -rf huggingface
|
| 99 |
+
export GIT_LFS_SKIP_SMUDGE=1
|
| 100 |
+
|
| 101 |
+
git clone https://huggingface.co/k2-fsa/sherpa-onnx-zipformer-audio-tagging-2024-04-09 huggingface
|
| 102 |
+
cd huggingface
|
| 103 |
+
git fetch
|
| 104 |
+
git pull
|
| 105 |
+
git merge -m "merge remote" --ff origin main
|
| 106 |
+
cp ../model-onnx/*.onnx ./
|
| 107 |
+
cp ../model-onnx/*.csv ./
|
| 108 |
+
cp -a ../model-onnx/test_wavs ./
|
| 109 |
+
ls -lh
|
| 110 |
+
git add .
|
| 111 |
+
git status
|
| 112 |
+
git commit -m "update models"
|
| 113 |
+
git status
|
| 114 |
+
|
| 115 |
+
git push https://csukuangfj:$HF_TOKEN@huggingface.co/k2-fsa/sherpa-onnx-zipformer-audio-tagging-2024-04-09 main || true
|
| 116 |
+
rm -rf huggingface
|
| 117 |
+
|
| 118 |
+
- name: Prepare for release
|
| 119 |
+
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
|
| 120 |
+
shell: bash
|
| 121 |
+
run: |
|
| 122 |
+
d=sherpa-onnx-zipformer-audio-tagging-2024-04-09
|
| 123 |
+
mv ./model-onnx $d
|
| 124 |
+
tar cjvf ${d}.tar.bz2 $d
|
| 125 |
+
ls -lh
|
| 126 |
+
|
| 127 |
+
- name: Release exported onnx models
|
| 128 |
+
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
|
| 129 |
+
uses: svenstaro/upload-release-action@v2
|
| 130 |
+
with:
|
| 131 |
+
file_glob: true
|
| 132 |
+
overwrite: true
|
| 133 |
+
file: sherpa-onnx-*.tar.bz2
|
| 134 |
+
repo_name: k2-fsa/sherpa-onnx
|
| 135 |
+
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
|
| 136 |
+
tag: audio-tagging-models
|
| 137 |
+
|
.github/workflows/baker_zh.yml
ADDED
|
@@ -0,0 +1,152 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: baker_zh
|
| 2 |
+
|
| 3 |
+
on:
|
| 4 |
+
push:
|
| 5 |
+
branches:
|
| 6 |
+
- master
|
| 7 |
+
- baker-matcha-2
|
| 8 |
+
|
| 9 |
+
pull_request:
|
| 10 |
+
branches:
|
| 11 |
+
- master
|
| 12 |
+
|
| 13 |
+
workflow_dispatch:
|
| 14 |
+
|
| 15 |
+
concurrency:
|
| 16 |
+
group: baker-zh-${{ github.ref }}
|
| 17 |
+
cancel-in-progress: true
|
| 18 |
+
|
| 19 |
+
jobs:
|
| 20 |
+
generate_build_matrix:
|
| 21 |
+
if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
|
| 22 |
+
# see https://github.com/pytorch/pytorch/pull/50633
|
| 23 |
+
runs-on: ubuntu-latest
|
| 24 |
+
outputs:
|
| 25 |
+
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
| 26 |
+
steps:
|
| 27 |
+
- uses: actions/checkout@v4
|
| 28 |
+
with:
|
| 29 |
+
fetch-depth: 0
|
| 30 |
+
- name: Generating build matrix
|
| 31 |
+
id: set-matrix
|
| 32 |
+
run: |
|
| 33 |
+
# outputting for debugging purposes
|
| 34 |
+
python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10"
|
| 35 |
+
MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10")
|
| 36 |
+
echo "::set-output name=matrix::${MATRIX}"
|
| 37 |
+
|
| 38 |
+
baker_zh:
|
| 39 |
+
needs: generate_build_matrix
|
| 40 |
+
name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }}
|
| 41 |
+
runs-on: ubuntu-latest
|
| 42 |
+
strategy:
|
| 43 |
+
fail-fast: false
|
| 44 |
+
matrix:
|
| 45 |
+
${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
|
| 46 |
+
|
| 47 |
+
steps:
|
| 48 |
+
- uses: actions/checkout@v4
|
| 49 |
+
with:
|
| 50 |
+
fetch-depth: 0
|
| 51 |
+
|
| 52 |
+
- name: Free space
|
| 53 |
+
shell: bash
|
| 54 |
+
run: |
|
| 55 |
+
ls -lh
|
| 56 |
+
df -h
|
| 57 |
+
rm -rf /opt/hostedtoolcache
|
| 58 |
+
df -h
|
| 59 |
+
echo "pwd: $PWD"
|
| 60 |
+
echo "github.workspace ${{ github.workspace }}"
|
| 61 |
+
|
| 62 |
+
- name: Run tests
|
| 63 |
+
uses: addnab/docker-run-action@v3
|
| 64 |
+
with:
|
| 65 |
+
image: ghcr.io/${{ github.repository_owner }}/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v${{ matrix.version }}
|
| 66 |
+
options: |
|
| 67 |
+
--volume ${{ github.workspace }}/:/icefall
|
| 68 |
+
shell: bash
|
| 69 |
+
run: |
|
| 70 |
+
export PYTHONPATH=/icefall:$PYTHONPATH
|
| 71 |
+
cd /icefall
|
| 72 |
+
|
| 73 |
+
pip install onnx==1.17.0
|
| 74 |
+
|
| 75 |
+
pip list
|
| 76 |
+
|
| 77 |
+
git config --global --add safe.directory /icefall
|
| 78 |
+
|
| 79 |
+
.github/scripts/baker_zh/TTS/run-matcha.sh
|
| 80 |
+
|
| 81 |
+
- name: display files
|
| 82 |
+
shell: bash
|
| 83 |
+
run: |
|
| 84 |
+
ls -lh
|
| 85 |
+
|
| 86 |
+
- uses: actions/upload-artifact@v4
|
| 87 |
+
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
|
| 88 |
+
with:
|
| 89 |
+
name: generated-test-files-${{ matrix.python-version }}-${{ matrix.torch-version }}
|
| 90 |
+
path: ./*.wav
|
| 91 |
+
|
| 92 |
+
- uses: actions/upload-artifact@v4
|
| 93 |
+
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
|
| 94 |
+
with:
|
| 95 |
+
name: step-2
|
| 96 |
+
path: ./model-steps-2.onnx
|
| 97 |
+
|
| 98 |
+
- uses: actions/upload-artifact@v4
|
| 99 |
+
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
|
| 100 |
+
with:
|
| 101 |
+
name: step-3
|
| 102 |
+
path: ./model-steps-3.onnx
|
| 103 |
+
|
| 104 |
+
- uses: actions/upload-artifact@v4
|
| 105 |
+
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
|
| 106 |
+
with:
|
| 107 |
+
name: step-4
|
| 108 |
+
path: ./model-steps-4.onnx
|
| 109 |
+
|
| 110 |
+
- uses: actions/upload-artifact@v4
|
| 111 |
+
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
|
| 112 |
+
with:
|
| 113 |
+
name: step-5
|
| 114 |
+
path: ./model-steps-5.onnx
|
| 115 |
+
|
| 116 |
+
- uses: actions/upload-artifact@v4
|
| 117 |
+
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
|
| 118 |
+
with:
|
| 119 |
+
name: step-6
|
| 120 |
+
path: ./model-steps-6.onnx
|
| 121 |
+
|
| 122 |
+
- name: Upload models to huggingface
|
| 123 |
+
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
|
| 124 |
+
shell: bash
|
| 125 |
+
env:
|
| 126 |
+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
| 127 |
+
run: |
|
| 128 |
+
d=matcha-icefall-zh-baker
|
| 129 |
+
|
| 130 |
+
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/$d hf
|
| 131 |
+
cp -av $d/* hf/
|
| 132 |
+
|
| 133 |
+
pushd hf
|
| 134 |
+
git add .
|
| 135 |
+
|
| 136 |
+
git config --global user.name "csukuangfj"
|
| 137 |
+
git config --global user.email "csukuangfj@gmail.com"
|
| 138 |
+
git config --global lfs.allowincompletepush true
|
| 139 |
+
|
| 140 |
+
git commit -m "upload model" && git push https://csukuangfj:${HF_TOKEN}@huggingface.co/csukuangfj/$d main || true
|
| 141 |
+
popd
|
| 142 |
+
|
| 143 |
+
- name: Release exported onnx models
|
| 144 |
+
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
|
| 145 |
+
uses: svenstaro/upload-release-action@v2
|
| 146 |
+
with:
|
| 147 |
+
file_glob: true
|
| 148 |
+
overwrite: true
|
| 149 |
+
file: matcha-icefall-*.tar.bz2
|
| 150 |
+
repo_name: k2-fsa/sherpa-onnx
|
| 151 |
+
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
|
| 152 |
+
tag: tts-models
|
.github/workflows/build-cpu-docker.yml
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: build-cpu-docker
|
| 2 |
+
on:
|
| 3 |
+
workflow_dispatch:
|
| 4 |
+
|
| 5 |
+
concurrency:
|
| 6 |
+
group: build-cpu-docker-${{ github.ref }}
|
| 7 |
+
cancel-in-progress: true
|
| 8 |
+
|
| 9 |
+
jobs:
|
| 10 |
+
generate_build_matrix:
|
| 11 |
+
if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
|
| 12 |
+
# see https://github.com/pytorch/pytorch/pull/50633
|
| 13 |
+
runs-on: ubuntu-latest
|
| 14 |
+
outputs:
|
| 15 |
+
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
| 16 |
+
steps:
|
| 17 |
+
- uses: actions/checkout@v4
|
| 18 |
+
with:
|
| 19 |
+
fetch-depth: 0
|
| 20 |
+
- name: Generating build matrix
|
| 21 |
+
id: set-matrix
|
| 22 |
+
run: |
|
| 23 |
+
# outputting for debugging purposes
|
| 24 |
+
python ./.github/scripts/docker/generate_build_matrix.py
|
| 25 |
+
MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py)
|
| 26 |
+
echo "::set-output name=matrix::${MATRIX}"
|
| 27 |
+
build-cpu-docker:
|
| 28 |
+
needs: generate_build_matrix
|
| 29 |
+
name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }}
|
| 30 |
+
runs-on: ubuntu-latest
|
| 31 |
+
strategy:
|
| 32 |
+
fail-fast: false
|
| 33 |
+
matrix:
|
| 34 |
+
${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
|
| 35 |
+
|
| 36 |
+
steps:
|
| 37 |
+
# refer to https://github.com/actions/checkout
|
| 38 |
+
- uses: actions/checkout@v4
|
| 39 |
+
with:
|
| 40 |
+
fetch-depth: 0
|
| 41 |
+
|
| 42 |
+
- name: Free space
|
| 43 |
+
shell: bash
|
| 44 |
+
run: |
|
| 45 |
+
df -h
|
| 46 |
+
rm -rf /opt/hostedtoolcache
|
| 47 |
+
df -h
|
| 48 |
+
|
| 49 |
+
- name: 'Login to GitHub Container Registry'
|
| 50 |
+
uses: docker/login-action@v2
|
| 51 |
+
with:
|
| 52 |
+
registry: ghcr.io
|
| 53 |
+
username: ${{ github.actor }}
|
| 54 |
+
password: ${{ secrets.GITHUB_TOKEN }}
|
| 55 |
+
|
| 56 |
+
- name: Build docker Image
|
| 57 |
+
shell: bash
|
| 58 |
+
run: |
|
| 59 |
+
cd .github/scripts/docker
|
| 60 |
+
torch_version=${{ matrix.torch-version }}
|
| 61 |
+
torchaudio_version=${{ matrix.torchaudio-version }}
|
| 62 |
+
|
| 63 |
+
echo "torch_version: $torch_version"
|
| 64 |
+
echo "torchaudio_version: $torchaudio_version"
|
| 65 |
+
|
| 66 |
+
version=${{ matrix.version }}
|
| 67 |
+
|
| 68 |
+
tag=ghcr.io/${{ github.repository_owner }}/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v$version
|
| 69 |
+
echo "tag: $tag"
|
| 70 |
+
|
| 71 |
+
docker build \
|
| 72 |
+
-t $tag \
|
| 73 |
+
--build-arg PYTHON_VERSION=${{ matrix.python-version }} \
|
| 74 |
+
--build-arg TORCH_VERSION=$torch_version \
|
| 75 |
+
--build-arg TORCHAUDIO_VERSION=$torchaudio_version \
|
| 76 |
+
--build-arg K2_VERSION=${{ matrix.k2-version }} \
|
| 77 |
+
--build-arg KALDIFEAT_VERSION=${{ matrix.kaldifeat-version }} \
|
| 78 |
+
.
|
| 79 |
+
|
| 80 |
+
docker image ls
|
| 81 |
+
docker push $tag
|
.github/workflows/build-doc.yml
ADDED
|
@@ -0,0 +1,74 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright 2022 Xiaomi Corp. (author: Fangjun Kuang)
|
| 2 |
+
|
| 3 |
+
# See ../../LICENSE for clarification regarding multiple authors
|
| 4 |
+
#
|
| 5 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 6 |
+
# you may not use this file except in compliance with the License.
|
| 7 |
+
# You may obtain a copy of the License at
|
| 8 |
+
#
|
| 9 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 10 |
+
#
|
| 11 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 12 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 13 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 14 |
+
# See the License for the specific language governing permissions and
|
| 15 |
+
# limitations under the License.
|
| 16 |
+
|
| 17 |
+
# refer to https://github.com/actions/starter-workflows/pull/47/files
|
| 18 |
+
|
| 19 |
+
# You can access it at https://k2-fsa.github.io/icefall/
|
| 20 |
+
name: Generate doc
|
| 21 |
+
on:
|
| 22 |
+
push:
|
| 23 |
+
branches:
|
| 24 |
+
- master
|
| 25 |
+
- doc
|
| 26 |
+
pull_request:
|
| 27 |
+
types: [labeled]
|
| 28 |
+
|
| 29 |
+
workflow_dispatch:
|
| 30 |
+
|
| 31 |
+
concurrency:
|
| 32 |
+
group: build_doc-${{ github.ref }}
|
| 33 |
+
cancel-in-progress: true
|
| 34 |
+
|
| 35 |
+
jobs:
|
| 36 |
+
build-doc:
|
| 37 |
+
# if: github.event.label.name == 'doc' || github.event_name == 'push'
|
| 38 |
+
runs-on: ${{ matrix.os }}
|
| 39 |
+
strategy:
|
| 40 |
+
fail-fast: false
|
| 41 |
+
matrix:
|
| 42 |
+
os: [ubuntu-latest]
|
| 43 |
+
python-version: ["3.8"]
|
| 44 |
+
steps:
|
| 45 |
+
# refer to https://github.com/actions/checkout
|
| 46 |
+
- uses: actions/checkout@v4
|
| 47 |
+
with:
|
| 48 |
+
fetch-depth: 0
|
| 49 |
+
|
| 50 |
+
- name: Setup Python ${{ matrix.python-version }}
|
| 51 |
+
uses: actions/setup-python@v2
|
| 52 |
+
with:
|
| 53 |
+
python-version: ${{ matrix.python-version }}
|
| 54 |
+
|
| 55 |
+
- name: Display Python version
|
| 56 |
+
run: python -c "import sys; print(sys.version)"
|
| 57 |
+
|
| 58 |
+
- name: Build doc
|
| 59 |
+
shell: bash
|
| 60 |
+
run: |
|
| 61 |
+
.github/scripts/generate-piper-phonemize-page.py
|
| 62 |
+
cd docs
|
| 63 |
+
python3 -m pip install -r ./requirements.txt
|
| 64 |
+
make html
|
| 65 |
+
touch build/html/.nojekyll
|
| 66 |
+
|
| 67 |
+
cp -v ../piper_phonemize.html ./build/html/
|
| 68 |
+
|
| 69 |
+
- name: Deploy
|
| 70 |
+
uses: peaceiris/actions-gh-pages@v3
|
| 71 |
+
with:
|
| 72 |
+
github_token: ${{ secrets.GITHUB_TOKEN }}
|
| 73 |
+
publish_dir: ./docs/build/html
|
| 74 |
+
publish_branch: gh-pages
|
.github/workflows/build-docker-image.yml
ADDED
|
@@ -0,0 +1,84 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# see also
|
| 2 |
+
# https://docs.github.com/en/actions/publishing-packages/publishing-docker-images#publishing-images-to-github-packages
|
| 3 |
+
name: Build docker image
|
| 4 |
+
on:
|
| 5 |
+
workflow_dispatch:
|
| 6 |
+
|
| 7 |
+
concurrency:
|
| 8 |
+
group: build_docker-${{ github.ref }}
|
| 9 |
+
cancel-in-progress: true
|
| 10 |
+
|
| 11 |
+
jobs:
|
| 12 |
+
build-docker-image:
|
| 13 |
+
name: ${{ matrix.image }}
|
| 14 |
+
runs-on: ${{ matrix.os }}
|
| 15 |
+
strategy:
|
| 16 |
+
fail-fast: false
|
| 17 |
+
matrix:
|
| 18 |
+
os: [ubuntu-latest]
|
| 19 |
+
image: ["torch2.4.1-cuda12.4", "torch2.4.1-cuda12.1", "torch2.4.1-cuda11.8", "torch2.4.0-cuda12.4", "torch2.4.0-cuda12.1", "torch2.4.0-cuda11.8", "torch2.3.1-cuda12.1", "torch2.3.1-cuda11.8", "torch2.2.2-cuda12.1", "torch2.2.2-cuda11.8", "torch2.2.1-cuda12.1", "torch2.2.1-cuda11.8", "torch2.2.0-cuda12.1", "torch2.2.0-cuda11.8", "torch2.1.0-cuda12.1", "torch2.1.0-cuda11.8", "torch2.0.0-cuda11.7", "torch1.13.0-cuda11.6", "torch1.12.1-cuda11.3", "torch1.9.0-cuda10.2"]
|
| 20 |
+
|
| 21 |
+
steps:
|
| 22 |
+
# refer to https://github.com/actions/checkout
|
| 23 |
+
- uses: actions/checkout@v2
|
| 24 |
+
with:
|
| 25 |
+
fetch-depth: 0
|
| 26 |
+
|
| 27 |
+
- name: Rename
|
| 28 |
+
shell: bash
|
| 29 |
+
run: |
|
| 30 |
+
image=${{ matrix.image }}
|
| 31 |
+
mv -v ./docker/$image.dockerfile ./Dockerfile
|
| 32 |
+
|
| 33 |
+
- name: Free space
|
| 34 |
+
shell: bash
|
| 35 |
+
run: |
|
| 36 |
+
df -h
|
| 37 |
+
rm -rf /opt/hostedtoolcache
|
| 38 |
+
df -h
|
| 39 |
+
|
| 40 |
+
- name: Free more space
|
| 41 |
+
shell: bash
|
| 42 |
+
run: |
|
| 43 |
+
# https://github.com/orgs/community/discussions/25678
|
| 44 |
+
cd /opt
|
| 45 |
+
find . -maxdepth 1 -mindepth 1 '!' -path ./containerd '!' -path ./actionarchivecache '!' -path ./runner '!' -path ./runner-cache -exec rm -rf '{}' ';'
|
| 46 |
+
|
| 47 |
+
sudo rm -rf /usr/share/dotnet
|
| 48 |
+
sudo rm -rf "/usr/local/share/boost"
|
| 49 |
+
sudo rm -rf "$AGENT_TOOLSDIRECTORY"
|
| 50 |
+
|
| 51 |
+
- name: Free Disk Space (Ubuntu)
|
| 52 |
+
uses: jlumbroso/free-disk-space@main
|
| 53 |
+
with:
|
| 54 |
+
# this might remove tools that are actually needed,
|
| 55 |
+
# if set to "true" but frees about 6 GB
|
| 56 |
+
tool-cache: false
|
| 57 |
+
|
| 58 |
+
# all of these default to true, but feel free to set to
|
| 59 |
+
# "false" if necessary for your workflow
|
| 60 |
+
android: true
|
| 61 |
+
dotnet: true
|
| 62 |
+
haskell: true
|
| 63 |
+
large-packages: true
|
| 64 |
+
docker-images: false
|
| 65 |
+
swap-storage: true
|
| 66 |
+
|
| 67 |
+
- name: Check space
|
| 68 |
+
shell: bash
|
| 69 |
+
run: |
|
| 70 |
+
df -h
|
| 71 |
+
|
| 72 |
+
- name: Log in to Docker Hub
|
| 73 |
+
uses: docker/login-action@v2
|
| 74 |
+
with:
|
| 75 |
+
username: ${{ secrets.DOCKER_USERNAME }}
|
| 76 |
+
password: ${{ secrets.DOCKER_PASSWORD }}
|
| 77 |
+
|
| 78 |
+
- name: Build and push
|
| 79 |
+
uses: docker/build-push-action@v4
|
| 80 |
+
with:
|
| 81 |
+
context: .
|
| 82 |
+
file: ./Dockerfile
|
| 83 |
+
push: true
|
| 84 |
+
tags: k2fsa/icefall:${{ matrix.image }}
|
.github/workflows/ksponspeech.yml
ADDED
|
@@ -0,0 +1,167 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: ksponspeech
|
| 2 |
+
|
| 3 |
+
on:
|
| 4 |
+
push:
|
| 5 |
+
branches:
|
| 6 |
+
- ksponspeech
|
| 7 |
+
|
| 8 |
+
workflow_dispatch:
|
| 9 |
+
|
| 10 |
+
jobs:
|
| 11 |
+
ksponspeech:
|
| 12 |
+
runs-on: ${{ matrix.os }}
|
| 13 |
+
strategy:
|
| 14 |
+
matrix:
|
| 15 |
+
os: [ubuntu-latest]
|
| 16 |
+
python-version: [3.8]
|
| 17 |
+
fail-fast: false
|
| 18 |
+
|
| 19 |
+
steps:
|
| 20 |
+
- uses: actions/checkout@v4
|
| 21 |
+
with:
|
| 22 |
+
fetch-depth: 0
|
| 23 |
+
|
| 24 |
+
- name: Setup Python ${{ matrix.python-version }}
|
| 25 |
+
uses: actions/setup-python@v5
|
| 26 |
+
with:
|
| 27 |
+
python-version: ${{ matrix.python-version }}
|
| 28 |
+
cache: 'pip'
|
| 29 |
+
cache-dependency-path: '**/requirements-ci.txt'
|
| 30 |
+
|
| 31 |
+
- name: Install Python dependencies
|
| 32 |
+
run: |
|
| 33 |
+
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
| 34 |
+
pip uninstall -y protobuf
|
| 35 |
+
pip install --no-binary protobuf protobuf==3.20.*
|
| 36 |
+
|
| 37 |
+
- name: Cache kaldifeat
|
| 38 |
+
id: my-cache
|
| 39 |
+
uses: actions/cache@v2
|
| 40 |
+
with:
|
| 41 |
+
path: |
|
| 42 |
+
~/tmp/kaldifeat
|
| 43 |
+
key: cache-tmp-${{ matrix.python-version }}-2023-05-22
|
| 44 |
+
|
| 45 |
+
- name: Install kaldifeat
|
| 46 |
+
if: steps.my-cache.outputs.cache-hit != 'true'
|
| 47 |
+
shell: bash
|
| 48 |
+
run: |
|
| 49 |
+
.github/scripts/install-kaldifeat.sh
|
| 50 |
+
|
| 51 |
+
- name: Test
|
| 52 |
+
shell: bash
|
| 53 |
+
run: |
|
| 54 |
+
export PYTHONPATH=$PWD:$PYTHONPATH
|
| 55 |
+
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
| 56 |
+
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
| 57 |
+
|
| 58 |
+
.github/scripts/ksponspeech/ASR/run.sh
|
| 59 |
+
|
| 60 |
+
- name: Show model files (2024-06-24)
|
| 61 |
+
shell: bash
|
| 62 |
+
run: |
|
| 63 |
+
src=/tmp/model-2024-06-24
|
| 64 |
+
ls -lh $src
|
| 65 |
+
|
| 66 |
+
- name: Show model files (2024-06-16)
|
| 67 |
+
shell: bash
|
| 68 |
+
run: |
|
| 69 |
+
src=/tmp/model-2024-06-16
|
| 70 |
+
ls -lh $src
|
| 71 |
+
|
| 72 |
+
- name: Upload model to huggingface (2024-06-24)
|
| 73 |
+
env:
|
| 74 |
+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
| 75 |
+
uses: nick-fields/retry@v3
|
| 76 |
+
with:
|
| 77 |
+
max_attempts: 20
|
| 78 |
+
timeout_seconds: 200
|
| 79 |
+
shell: bash
|
| 80 |
+
command: |
|
| 81 |
+
src=/tmp/model-2024-06-24
|
| 82 |
+
git config --global user.email "csukuangfj@gmail.com"
|
| 83 |
+
git config --global user.name "Fangjun Kuang"
|
| 84 |
+
|
| 85 |
+
rm -rf hf
|
| 86 |
+
export GIT_LFS_SKIP_SMUDGE=1
|
| 87 |
+
export GIT_CLONE_PROTECTION_ACTIVE=false
|
| 88 |
+
|
| 89 |
+
git clone https://huggingface.co/k2-fsa/sherpa-onnx-zipformer-korean-2024-06-24 hf
|
| 90 |
+
cd hf
|
| 91 |
+
git fetch
|
| 92 |
+
git pull
|
| 93 |
+
git merge -m "merge remote" --ff origin main
|
| 94 |
+
cp -av $src/* ./
|
| 95 |
+
ls -lh
|
| 96 |
+
git lfs track "bpe.model"
|
| 97 |
+
git lfs track "*.onnx"
|
| 98 |
+
git add .
|
| 99 |
+
git status
|
| 100 |
+
git commit -m "update models"
|
| 101 |
+
git status
|
| 102 |
+
|
| 103 |
+
git push https://csukuangfj:$HF_TOKEN@huggingface.co/k2-fsa/sherpa-onnx-zipformer-korean-2024-06-24 main || true
|
| 104 |
+
rm -rf hf
|
| 105 |
+
|
| 106 |
+
- name: Upload model to huggingface (2024-06-16)
|
| 107 |
+
env:
|
| 108 |
+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
| 109 |
+
uses: nick-fields/retry@v3
|
| 110 |
+
with:
|
| 111 |
+
max_attempts: 20
|
| 112 |
+
timeout_seconds: 200
|
| 113 |
+
shell: bash
|
| 114 |
+
command: |
|
| 115 |
+
src=/tmp/model-2024-06-16
|
| 116 |
+
git config --global user.email "csukuangfj@gmail.com"
|
| 117 |
+
git config --global user.name "Fangjun Kuang"
|
| 118 |
+
|
| 119 |
+
rm -rf hf
|
| 120 |
+
export GIT_LFS_SKIP_SMUDGE=1
|
| 121 |
+
export GIT_CLONE_PROTECTION_ACTIVE=false
|
| 122 |
+
|
| 123 |
+
git clone https://huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16 hf
|
| 124 |
+
cd hf
|
| 125 |
+
git fetch
|
| 126 |
+
git pull
|
| 127 |
+
git merge -m "merge remote" --ff origin main
|
| 128 |
+
cp -v $src/* ./
|
| 129 |
+
ls -lh
|
| 130 |
+
git lfs track "bpe.model"
|
| 131 |
+
git lfs track "*.onnx"
|
| 132 |
+
cp -av test_wavs $src/
|
| 133 |
+
git add .
|
| 134 |
+
git status
|
| 135 |
+
git commit -m "update models"
|
| 136 |
+
git status
|
| 137 |
+
|
| 138 |
+
git push https://csukuangfj:$HF_TOKEN@huggingface.co/k2-fsa/sherpa-onnx-streaming-zipformer-korean-2024-06-16 main || true
|
| 139 |
+
rm -rf hf
|
| 140 |
+
|
| 141 |
+
- name: Prepare for release (2024-06-16)
|
| 142 |
+
shell: bash
|
| 143 |
+
run: |
|
| 144 |
+
src=/tmp/model-2024-06-16
|
| 145 |
+
d=sherpa-onnx-streaming-zipformer-korean-2024-06-16
|
| 146 |
+
mv $src ./$d
|
| 147 |
+
tar cjvf ${d}.tar.bz2 $d
|
| 148 |
+
ls -lh
|
| 149 |
+
|
| 150 |
+
- name: Prepare for release (2024-06-24)
|
| 151 |
+
shell: bash
|
| 152 |
+
run: |
|
| 153 |
+
src=/tmp/model-2024-06-24
|
| 154 |
+
d=sherpa-onnx-zipformer-korean-2024-06-24
|
| 155 |
+
mv $src ./$d
|
| 156 |
+
tar cjvf ${d}.tar.bz2 $d
|
| 157 |
+
ls -lh
|
| 158 |
+
|
| 159 |
+
- name: Release exported onnx models
|
| 160 |
+
uses: svenstaro/upload-release-action@v2
|
| 161 |
+
with:
|
| 162 |
+
file_glob: true
|
| 163 |
+
overwrite: true
|
| 164 |
+
file: sherpa-onnx-*.tar.bz2
|
| 165 |
+
repo_name: k2-fsa/sherpa-onnx
|
| 166 |
+
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
|
| 167 |
+
tag: asr-models
|
.github/workflows/librispeech.yml
ADDED
|
@@ -0,0 +1,72 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: librispeech
|
| 2 |
+
on:
|
| 3 |
+
push:
|
| 4 |
+
branches:
|
| 5 |
+
- master
|
| 6 |
+
|
| 7 |
+
pull_request:
|
| 8 |
+
branches:
|
| 9 |
+
- master
|
| 10 |
+
|
| 11 |
+
workflow_dispatch:
|
| 12 |
+
|
| 13 |
+
concurrency:
|
| 14 |
+
group: librispeech-${{ github.ref }}
|
| 15 |
+
cancel-in-progress: true
|
| 16 |
+
|
| 17 |
+
jobs:
|
| 18 |
+
generate_build_matrix:
|
| 19 |
+
if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
|
| 20 |
+
# see https://github.com/pytorch/pytorch/pull/50633
|
| 21 |
+
runs-on: ubuntu-latest
|
| 22 |
+
outputs:
|
| 23 |
+
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
| 24 |
+
steps:
|
| 25 |
+
- uses: actions/checkout@v4
|
| 26 |
+
with:
|
| 27 |
+
fetch-depth: 0
|
| 28 |
+
- name: Generating build matrix
|
| 29 |
+
id: set-matrix
|
| 30 |
+
run: |
|
| 31 |
+
# outputting for debugging purposes
|
| 32 |
+
python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10"
|
| 33 |
+
# MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10")
|
| 34 |
+
MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10" --min-torch-version "2.6.0")
|
| 35 |
+
echo "::set-output name=matrix::${MATRIX}"
|
| 36 |
+
librispeech:
|
| 37 |
+
needs: generate_build_matrix
|
| 38 |
+
name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }}
|
| 39 |
+
runs-on: ubuntu-latest
|
| 40 |
+
strategy:
|
| 41 |
+
fail-fast: false
|
| 42 |
+
matrix:
|
| 43 |
+
${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
|
| 44 |
+
|
| 45 |
+
steps:
|
| 46 |
+
# refer to https://github.com/actions/checkout
|
| 47 |
+
- uses: actions/checkout@v4
|
| 48 |
+
with:
|
| 49 |
+
fetch-depth: 0
|
| 50 |
+
|
| 51 |
+
- name: Free space
|
| 52 |
+
shell: bash
|
| 53 |
+
run: |
|
| 54 |
+
df -h
|
| 55 |
+
rm -rf /opt/hostedtoolcache
|
| 56 |
+
df -h
|
| 57 |
+
echo "pwd: $PWD"
|
| 58 |
+
echo "github.workspace ${{ github.workspace }}"
|
| 59 |
+
|
| 60 |
+
- name: Test zipformer/train.py with LibriSpeech
|
| 61 |
+
uses: addnab/docker-run-action@v3
|
| 62 |
+
with:
|
| 63 |
+
image: ghcr.io/${{ github.repository_owner }}/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v${{ matrix.version }}
|
| 64 |
+
options: |
|
| 65 |
+
--volume ${{ github.workspace }}/:/icefall
|
| 66 |
+
shell: bash
|
| 67 |
+
run: |
|
| 68 |
+
export PYTHONPATH=/icefall:$PYTHONPATH
|
| 69 |
+
cd /icefall
|
| 70 |
+
git config --global --add safe.directory /icefall
|
| 71 |
+
|
| 72 |
+
.github/scripts/librispeech/ASR/run.sh
|
.github/workflows/ljspeech.yml
ADDED
|
@@ -0,0 +1,166 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: ljspeech
|
| 2 |
+
|
| 3 |
+
on:
|
| 4 |
+
push:
|
| 5 |
+
branches:
|
| 6 |
+
- master
|
| 7 |
+
|
| 8 |
+
pull_request:
|
| 9 |
+
branches:
|
| 10 |
+
- master
|
| 11 |
+
|
| 12 |
+
workflow_dispatch:
|
| 13 |
+
|
| 14 |
+
concurrency:
|
| 15 |
+
group: ljspeech-${{ github.ref }}
|
| 16 |
+
cancel-in-progress: true
|
| 17 |
+
|
| 18 |
+
jobs:
|
| 19 |
+
generate_build_matrix:
|
| 20 |
+
if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
|
| 21 |
+
# see https://github.com/pytorch/pytorch/pull/50633
|
| 22 |
+
runs-on: ubuntu-latest
|
| 23 |
+
outputs:
|
| 24 |
+
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
| 25 |
+
steps:
|
| 26 |
+
- uses: actions/checkout@v4
|
| 27 |
+
with:
|
| 28 |
+
fetch-depth: 0
|
| 29 |
+
- name: Generating build matrix
|
| 30 |
+
id: set-matrix
|
| 31 |
+
run: |
|
| 32 |
+
# outputting for debugging purposes
|
| 33 |
+
python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10"
|
| 34 |
+
MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --python-version "3.10")
|
| 35 |
+
echo "::set-output name=matrix::${MATRIX}"
|
| 36 |
+
|
| 37 |
+
ljspeech:
|
| 38 |
+
needs: generate_build_matrix
|
| 39 |
+
name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }}
|
| 40 |
+
runs-on: ubuntu-latest
|
| 41 |
+
strategy:
|
| 42 |
+
fail-fast: false
|
| 43 |
+
matrix:
|
| 44 |
+
${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
|
| 45 |
+
|
| 46 |
+
steps:
|
| 47 |
+
- uses: actions/checkout@v4
|
| 48 |
+
with:
|
| 49 |
+
fetch-depth: 0
|
| 50 |
+
|
| 51 |
+
- name: Free space
|
| 52 |
+
shell: bash
|
| 53 |
+
run: |
|
| 54 |
+
ls -lh
|
| 55 |
+
df -h
|
| 56 |
+
rm -rf /opt/hostedtoolcache
|
| 57 |
+
df -h
|
| 58 |
+
echo "pwd: $PWD"
|
| 59 |
+
echo "github.workspace ${{ github.workspace }}"
|
| 60 |
+
|
| 61 |
+
- name: Run tests
|
| 62 |
+
uses: addnab/docker-run-action@v3
|
| 63 |
+
with:
|
| 64 |
+
image: ghcr.io/${{ github.repository_owner }}/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v${{ matrix.version }}
|
| 65 |
+
options: |
|
| 66 |
+
--volume ${{ github.workspace }}/:/icefall
|
| 67 |
+
shell: bash
|
| 68 |
+
run: |
|
| 69 |
+
export PYTHONPATH=/icefall:$PYTHONPATH
|
| 70 |
+
cd /icefall
|
| 71 |
+
git config --global --add safe.directory /icefall
|
| 72 |
+
|
| 73 |
+
pip install "matplotlib<=3.9.4"
|
| 74 |
+
|
| 75 |
+
pip list
|
| 76 |
+
|
| 77 |
+
.github/scripts/ljspeech/TTS/run-matcha.sh
|
| 78 |
+
.github/scripts/ljspeech/TTS/run.sh
|
| 79 |
+
|
| 80 |
+
- name: display files
|
| 81 |
+
shell: bash
|
| 82 |
+
run: |
|
| 83 |
+
ls -lh
|
| 84 |
+
|
| 85 |
+
- uses: actions/upload-artifact@v4
|
| 86 |
+
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
|
| 87 |
+
with:
|
| 88 |
+
name: generated-test-files-${{ matrix.python-version }}-${{ matrix.torch-version }}
|
| 89 |
+
path: ./*.wav
|
| 90 |
+
|
| 91 |
+
- name: Release exported onnx models
|
| 92 |
+
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0' && github.event_name == 'push'
|
| 93 |
+
uses: svenstaro/upload-release-action@v2
|
| 94 |
+
with:
|
| 95 |
+
file_glob: true
|
| 96 |
+
overwrite: true
|
| 97 |
+
file: vits-icefall-*.tar.bz2
|
| 98 |
+
repo_name: k2-fsa/sherpa-onnx
|
| 99 |
+
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
|
| 100 |
+
tag: tts-models
|
| 101 |
+
|
| 102 |
+
- uses: actions/upload-artifact@v4
|
| 103 |
+
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
|
| 104 |
+
with:
|
| 105 |
+
name: step-2
|
| 106 |
+
path: ./model-steps-2.onnx
|
| 107 |
+
|
| 108 |
+
- uses: actions/upload-artifact@v4
|
| 109 |
+
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
|
| 110 |
+
with:
|
| 111 |
+
name: step-3
|
| 112 |
+
path: ./model-steps-3.onnx
|
| 113 |
+
|
| 114 |
+
- uses: actions/upload-artifact@v4
|
| 115 |
+
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
|
| 116 |
+
with:
|
| 117 |
+
name: step-4
|
| 118 |
+
path: ./model-steps-4.onnx
|
| 119 |
+
|
| 120 |
+
- uses: actions/upload-artifact@v4
|
| 121 |
+
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
|
| 122 |
+
with:
|
| 123 |
+
name: step-5
|
| 124 |
+
path: ./model-steps-5.onnx
|
| 125 |
+
|
| 126 |
+
- uses: actions/upload-artifact@v4
|
| 127 |
+
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
|
| 128 |
+
with:
|
| 129 |
+
name: step-6
|
| 130 |
+
path: ./model-steps-6.onnx
|
| 131 |
+
|
| 132 |
+
- name: Upload models to huggingface
|
| 133 |
+
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
|
| 134 |
+
shell: bash
|
| 135 |
+
env:
|
| 136 |
+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
| 137 |
+
run: |
|
| 138 |
+
d=matcha-icefall-en_US-ljspeech
|
| 139 |
+
|
| 140 |
+
GIT_LFS_SKIP_SMUDGE=1 git clone https://huggingface.co/csukuangfj/$d hf
|
| 141 |
+
cp -av $d/* hf/
|
| 142 |
+
|
| 143 |
+
pushd hf
|
| 144 |
+
|
| 145 |
+
git lfs track "cmn_dict"
|
| 146 |
+
git lfs track "ru_dict"
|
| 147 |
+
|
| 148 |
+
git add .
|
| 149 |
+
|
| 150 |
+
git config --global user.name "csukuangfj"
|
| 151 |
+
git config --global user.email "csukuangfj@gmail.com"
|
| 152 |
+
git config --global lfs.allowincompletepush true
|
| 153 |
+
|
| 154 |
+
git commit -m "upload model" && git push https://csukuangfj:${HF_TOKEN}@huggingface.co/csukuangfj/$d main || true
|
| 155 |
+
popd
|
| 156 |
+
|
| 157 |
+
- name: Release exported onnx models
|
| 158 |
+
if: matrix.python-version == '3.10' && matrix.torch-version == '2.3.0'
|
| 159 |
+
uses: svenstaro/upload-release-action@v2
|
| 160 |
+
with:
|
| 161 |
+
file_glob: true
|
| 162 |
+
overwrite: true
|
| 163 |
+
file: matcha-icefall-*.tar.bz2
|
| 164 |
+
repo_name: k2-fsa/sherpa-onnx
|
| 165 |
+
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
|
| 166 |
+
tag: tts-models
|
.github/workflows/multi-zh-hans.yml
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: multi-zh-hans
|
| 2 |
+
|
| 3 |
+
on:
|
| 4 |
+
push:
|
| 5 |
+
branches:
|
| 6 |
+
- master
|
| 7 |
+
|
| 8 |
+
workflow_dispatch:
|
| 9 |
+
|
| 10 |
+
concurrency:
|
| 11 |
+
group: multi-zh-hans-${{ github.ref }}
|
| 12 |
+
cancel-in-progress: true
|
| 13 |
+
|
| 14 |
+
permissions:
|
| 15 |
+
contents: write
|
| 16 |
+
|
| 17 |
+
jobs:
|
| 18 |
+
generate_build_matrix:
|
| 19 |
+
if: github.repository_owner == 'csukuangfj' || github.repository_owner == 'k2-fsa'
|
| 20 |
+
# see https://github.com/pytorch/pytorch/pull/50633
|
| 21 |
+
runs-on: ubuntu-latest
|
| 22 |
+
outputs:
|
| 23 |
+
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
| 24 |
+
steps:
|
| 25 |
+
- uses: actions/checkout@v4
|
| 26 |
+
with:
|
| 27 |
+
fetch-depth: 0
|
| 28 |
+
- name: Generating build matrix
|
| 29 |
+
id: set-matrix
|
| 30 |
+
run: |
|
| 31 |
+
# outputting for debugging purposes
|
| 32 |
+
python ./.github/scripts/docker/generate_build_matrix.py --torch-version "2.7.0" --python-version "3.11"
|
| 33 |
+
MATRIX=$(python ./.github/scripts/docker/generate_build_matrix.py --torch-version "2.7.0" --python-version "3.11")
|
| 34 |
+
echo "::set-output name=matrix::${MATRIX}"
|
| 35 |
+
multi-zh-hans:
|
| 36 |
+
needs: generate_build_matrix
|
| 37 |
+
name: py${{ matrix.python-version }} torch${{ matrix.torch-version }} v${{ matrix.version }}
|
| 38 |
+
runs-on: ubuntu-latest
|
| 39 |
+
strategy:
|
| 40 |
+
fail-fast: false
|
| 41 |
+
matrix:
|
| 42 |
+
${{ fromJson(needs.generate_build_matrix.outputs.matrix) }}
|
| 43 |
+
|
| 44 |
+
steps:
|
| 45 |
+
- uses: actions/checkout@v4
|
| 46 |
+
with:
|
| 47 |
+
fetch-depth: 0
|
| 48 |
+
|
| 49 |
+
- name: Free space
|
| 50 |
+
shell: bash
|
| 51 |
+
run: |
|
| 52 |
+
df -h
|
| 53 |
+
rm -rf /opt/hostedtoolcache
|
| 54 |
+
df -h
|
| 55 |
+
echo "pwd: $PWD"
|
| 56 |
+
echo "github.workspace ${{ github.workspace }}"
|
| 57 |
+
|
| 58 |
+
- name: Test with multi_zh-hans
|
| 59 |
+
uses: addnab/docker-run-action@v3
|
| 60 |
+
with:
|
| 61 |
+
image: ghcr.io/${{ github.repository_owner }}/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v${{ matrix.version }}
|
| 62 |
+
options: |
|
| 63 |
+
--volume ${{ github.workspace }}/:/icefall
|
| 64 |
+
shell: bash
|
| 65 |
+
run: |
|
| 66 |
+
export PYTHONPATH=/icefall:$PYTHONPATH
|
| 67 |
+
export HF_TOKEN=${{ secrets.HF_TOKEN }}
|
| 68 |
+
cd /icefall
|
| 69 |
+
git config --global --add safe.directory /icefall
|
| 70 |
+
|
| 71 |
+
.github/scripts/multi_zh-hans/ASR/run.sh
|
| 72 |
+
|
| 73 |
+
- name: Show models
|
| 74 |
+
shell: bash
|
| 75 |
+
run: |
|
| 76 |
+
ls -lh *.tar.bz2
|
| 77 |
+
|
| 78 |
+
- name: upload model to https://github.com/k2-fsa/sherpa-onnx
|
| 79 |
+
uses: svenstaro/upload-release-action@v2
|
| 80 |
+
with:
|
| 81 |
+
file_glob: true
|
| 82 |
+
file: ./*.tar.bz2
|
| 83 |
+
overwrite: true
|
| 84 |
+
repo_name: k2-fsa/sherpa-onnx
|
| 85 |
+
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
|
| 86 |
+
tag: asr-models
|
.github/workflows/rknn.yml
ADDED
|
@@ -0,0 +1,134 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: rknn
|
| 2 |
+
|
| 3 |
+
on:
|
| 4 |
+
push:
|
| 5 |
+
branches:
|
| 6 |
+
- master
|
| 7 |
+
- rknn-zipformer2
|
| 8 |
+
|
| 9 |
+
pull_request:
|
| 10 |
+
branches:
|
| 11 |
+
- master
|
| 12 |
+
|
| 13 |
+
workflow_dispatch:
|
| 14 |
+
|
| 15 |
+
concurrency:
|
| 16 |
+
group: rknn-${{ github.ref }}
|
| 17 |
+
cancel-in-progress: true
|
| 18 |
+
|
| 19 |
+
jobs:
|
| 20 |
+
rknn:
|
| 21 |
+
name: RKNN ${{ matrix.recipe }} ${{ matrix.rknn_toolkit2_version }}
|
| 22 |
+
runs-on: ubuntu-latest
|
| 23 |
+
strategy:
|
| 24 |
+
fail-fast: false
|
| 25 |
+
matrix:
|
| 26 |
+
python-version: ["3.10"]
|
| 27 |
+
k2-version: ["1.24.4.dev20241029"]
|
| 28 |
+
kaldifeat-version: ["1.25.5.dev20241029"]
|
| 29 |
+
torch-version: ["2.0.0"]
|
| 30 |
+
torchaudio-version: ["2.0.1"]
|
| 31 |
+
version: ["20241218"]
|
| 32 |
+
# recipe: ["librispeech", "wenetspeech", "multi_zh-hans"]
|
| 33 |
+
recipe: ["librispeech"]
|
| 34 |
+
rknn_toolkit2_version: ["2.2.0", "2.1.0"]
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
steps:
|
| 38 |
+
- uses: actions/checkout@v4
|
| 39 |
+
with:
|
| 40 |
+
fetch-depth: 0
|
| 41 |
+
|
| 42 |
+
- name: Export RKNN model
|
| 43 |
+
uses: addnab/docker-run-action@v3
|
| 44 |
+
with:
|
| 45 |
+
image: ghcr.io/${{ github.repository_owner }}/icefall:cpu-py${{ matrix.python-version }}-torch${{ matrix.torch-version }}-v${{ matrix.version }}
|
| 46 |
+
options: |
|
| 47 |
+
--volume ${{ github.workspace }}/:/icefall
|
| 48 |
+
shell: bash
|
| 49 |
+
run: |
|
| 50 |
+
cat /etc/*release
|
| 51 |
+
lsb_release -a
|
| 52 |
+
uname -a
|
| 53 |
+
python3 --version
|
| 54 |
+
export PYTHONPATH=/icefall:$PYTHONPATH
|
| 55 |
+
cd /icefall
|
| 56 |
+
git config --global --add safe.directory /icefall
|
| 57 |
+
|
| 58 |
+
python3 -m torch.utils.collect_env
|
| 59 |
+
python3 -m k2.version
|
| 60 |
+
pip list
|
| 61 |
+
export rknn_toolkit2_version=${{ matrix.rknn_toolkit2_version }}
|
| 62 |
+
|
| 63 |
+
if [[ $rknn_toolkit2_version == "2.1.0" ]]; then
|
| 64 |
+
# for the folder pruned_transducer_stateless7_streaming
|
| 65 |
+
curl -SL -O https://huggingface.co/csukuangfj/rknn-toolkit2/resolve/main/rknn_toolkit2-2.1.0%2B708089d1-cp310-cp310-linux_x86_64.whl
|
| 66 |
+
else
|
| 67 |
+
# for the folder zipformer/
|
| 68 |
+
curl -SL -O https://huggingface.co/csukuangfj/rknn-toolkit2/resolve/main/rknn_toolkit2-2.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl
|
| 69 |
+
fi
|
| 70 |
+
|
| 71 |
+
# Install rknn
|
| 72 |
+
pip install ./*.whl "numpy<=1.26.4"
|
| 73 |
+
pip list | grep rknn
|
| 74 |
+
echo "---"
|
| 75 |
+
pip list
|
| 76 |
+
echo "---"
|
| 77 |
+
|
| 78 |
+
recipe=${{ matrix.recipe }}
|
| 79 |
+
.github/scripts/$recipe/ASR/run_rknn.sh > log-$recipe.txt 2>&1 || true
|
| 80 |
+
|
| 81 |
+
- uses: actions/upload-artifact@v4
|
| 82 |
+
with:
|
| 83 |
+
name: log-${{ matrix.recipe }}-${{ matrix.rknn_toolkit2_version }}
|
| 84 |
+
path: ./log-*.txt
|
| 85 |
+
|
| 86 |
+
- name: Display results
|
| 87 |
+
shell: bash
|
| 88 |
+
run: |
|
| 89 |
+
ls -lh *rk*.tar.bz2 || true
|
| 90 |
+
|
| 91 |
+
- name: Release to GitHub
|
| 92 |
+
uses: svenstaro/upload-release-action@v2
|
| 93 |
+
with:
|
| 94 |
+
file_glob: true
|
| 95 |
+
overwrite: true
|
| 96 |
+
file: sherpa-onnx-*.tar.bz2
|
| 97 |
+
repo_name: k2-fsa/sherpa-onnx
|
| 98 |
+
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
|
| 99 |
+
tag: asr-models
|
| 100 |
+
|
| 101 |
+
- name: Upload model to huggingface
|
| 102 |
+
if: github.event_name == 'push' || github.event_name == 'workflow_dispatch'
|
| 103 |
+
env:
|
| 104 |
+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
| 105 |
+
uses: nick-fields/retry@v3
|
| 106 |
+
with:
|
| 107 |
+
max_attempts: 20
|
| 108 |
+
timeout_seconds: 200
|
| 109 |
+
shell: bash
|
| 110 |
+
command: |
|
| 111 |
+
git config --global user.email "csukuangfj@gmail.com"
|
| 112 |
+
git config --global user.name "Fangjun Kuang"
|
| 113 |
+
|
| 114 |
+
rm -rf huggingface
|
| 115 |
+
export GIT_LFS_SKIP_SMUDGE=1
|
| 116 |
+
|
| 117 |
+
git clone https://huggingface.co/csukuangfj/sherpa-onnx-rknn-models huggingface
|
| 118 |
+
cd huggingface
|
| 119 |
+
|
| 120 |
+
git fetch
|
| 121 |
+
git pull
|
| 122 |
+
git merge -m "merge remote" --ff origin main
|
| 123 |
+
dst=streaming-asr
|
| 124 |
+
mkdir -p $dst
|
| 125 |
+
cp ../*rk*.tar.bz2 $dst/ || true
|
| 126 |
+
|
| 127 |
+
ls -lh $dst
|
| 128 |
+
git add .
|
| 129 |
+
git status
|
| 130 |
+
git commit -m "update models"
|
| 131 |
+
git status
|
| 132 |
+
|
| 133 |
+
git push https://csukuangfj:$HF_TOKEN@huggingface.co/csukuangfj/sherpa-onnx-rknn-models main || true
|
| 134 |
+
rm -rf huggingface
|
.github/workflows/run-docker-image.yml
ADDED
|
@@ -0,0 +1,144 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: Run docker image
|
| 2 |
+
on:
|
| 3 |
+
workflow_dispatch:
|
| 4 |
+
|
| 5 |
+
concurrency:
|
| 6 |
+
group: run_docker_image-${{ github.ref }}
|
| 7 |
+
cancel-in-progress: true
|
| 8 |
+
|
| 9 |
+
jobs:
|
| 10 |
+
run-docker-image:
|
| 11 |
+
name: ${{ matrix.image }}
|
| 12 |
+
runs-on: ${{ matrix.os }}
|
| 13 |
+
strategy:
|
| 14 |
+
fail-fast: false
|
| 15 |
+
matrix:
|
| 16 |
+
os: [ubuntu-latest]
|
| 17 |
+
image: ["torch2.4.0-cuda12.4", "torch2.4.0-cuda12.1", "torch2.4.0-cuda11.8", "torch2.3.1-cuda12.1", "torch2.3.1-cuda11.8", "torch2.2.2-cuda12.1", "torch2.2.2-cuda11.8", "torch2.2.1-cuda12.1", "torch2.2.1-cuda11.8", "torch2.2.0-cuda12.1", "torch2.2.0-cuda11.8", "torch2.1.0-cuda12.1", "torch2.1.0-cuda11.8", "torch2.0.0-cuda11.7", "torch1.13.0-cuda11.6", "torch1.12.1-cuda11.3", "torch1.9.0-cuda10.2"]
|
| 18 |
+
steps:
|
| 19 |
+
# refer to https://github.com/actions/checkout
|
| 20 |
+
- uses: actions/checkout@v2
|
| 21 |
+
with:
|
| 22 |
+
fetch-depth: 0
|
| 23 |
+
|
| 24 |
+
- name: Free space
|
| 25 |
+
shell: bash
|
| 26 |
+
run: |
|
| 27 |
+
df -h
|
| 28 |
+
rm -rf /opt/hostedtoolcache
|
| 29 |
+
df -h
|
| 30 |
+
|
| 31 |
+
- name: Free more space
|
| 32 |
+
shell: bash
|
| 33 |
+
run: |
|
| 34 |
+
# https://github.com/orgs/community/discussions/25678
|
| 35 |
+
cd /opt
|
| 36 |
+
find . -maxdepth 1 -mindepth 1 '!' -path ./containerd '!' -path ./actionarchivecache '!' -path ./runner '!' -path ./runner-cache -exec rm -rf '{}' ';'
|
| 37 |
+
|
| 38 |
+
sudo rm -rf /usr/share/dotnet
|
| 39 |
+
sudo rm -rf "/usr/local/share/boost"
|
| 40 |
+
sudo rm -rf "$AGENT_TOOLSDIRECTORY"
|
| 41 |
+
|
| 42 |
+
- name: Free Disk Space (Ubuntu)
|
| 43 |
+
uses: jlumbroso/free-disk-space@main
|
| 44 |
+
with:
|
| 45 |
+
# this might remove tools that are actually needed,
|
| 46 |
+
# if set to "true" but frees about 6 GB
|
| 47 |
+
tool-cache: false
|
| 48 |
+
|
| 49 |
+
# all of these default to true, but feel free to set to
|
| 50 |
+
# "false" if necessary for your workflow
|
| 51 |
+
android: true
|
| 52 |
+
dotnet: true
|
| 53 |
+
haskell: true
|
| 54 |
+
large-packages: true
|
| 55 |
+
docker-images: false
|
| 56 |
+
swap-storage: true
|
| 57 |
+
|
| 58 |
+
- name: Check space
|
| 59 |
+
shell: bash
|
| 60 |
+
run: |
|
| 61 |
+
df -h
|
| 62 |
+
|
| 63 |
+
- name: Run the build process with Docker
|
| 64 |
+
uses: addnab/docker-run-action@v3
|
| 65 |
+
with:
|
| 66 |
+
image: k2fsa/icefall:${{ matrix.image }}
|
| 67 |
+
shell: bash
|
| 68 |
+
run: |
|
| 69 |
+
uname -a
|
| 70 |
+
cat /etc/*release
|
| 71 |
+
|
| 72 |
+
find / -name libcuda* 2>/dev/null
|
| 73 |
+
|
| 74 |
+
ls -lh /usr/local/
|
| 75 |
+
ls -lh /usr/local/cuda*
|
| 76 |
+
|
| 77 |
+
nvcc --version
|
| 78 |
+
|
| 79 |
+
ls -lh /usr/local/cuda-*/compat/*
|
| 80 |
+
|
| 81 |
+
# For torch1.9.0-cuda10.2
|
| 82 |
+
export LD_LIBRARY_PATH=/usr/local/cuda-10.2/compat:$LD_LIBRARY_PATH
|
| 83 |
+
|
| 84 |
+
# For torch1.12.1-cuda11.3
|
| 85 |
+
export LD_LIBRARY_PATH=/usr/local/cuda-11.3/compat:$LD_LIBRARY_PATH
|
| 86 |
+
|
| 87 |
+
# For torch2.0.0-cuda11.7
|
| 88 |
+
export LD_LIBRARY_PATH=/usr/local/cuda-11.7/compat:$LD_LIBRARY_PATH
|
| 89 |
+
|
| 90 |
+
# For torch2.1.0-cuda11.8
|
| 91 |
+
export LD_LIBRARY_PATH=/usr/local/cuda-11.8/compat:$LD_LIBRARY_PATH
|
| 92 |
+
|
| 93 |
+
# For torch2.1.0-cuda12.1
|
| 94 |
+
export LD_LIBRARY_PATH=/usr/local/cuda-12.1/compat:$LD_LIBRARY_PATH
|
| 95 |
+
|
| 96 |
+
|
| 97 |
+
which nvcc
|
| 98 |
+
cuda_dir=$(dirname $(which nvcc))
|
| 99 |
+
echo "cuda_dir: $cuda_dir"
|
| 100 |
+
|
| 101 |
+
find $cuda_dir -name libcuda.so*
|
| 102 |
+
echo "--------------------"
|
| 103 |
+
|
| 104 |
+
find / -name libcuda.so* 2>/dev/null
|
| 105 |
+
|
| 106 |
+
# for torch1.13.0-cuda11.6
|
| 107 |
+
if [ -e /opt/conda/lib/stubs/libcuda.so ]; then
|
| 108 |
+
cd /opt/conda/lib/stubs && ln -s libcuda.so libcuda.so.1 && cd -
|
| 109 |
+
export LD_LIBRARY_PATH=/opt/conda/lib/stubs:$LD_LIBRARY_PATH
|
| 110 |
+
fi
|
| 111 |
+
|
| 112 |
+
find / -name libcuda.so* 2>/dev/null
|
| 113 |
+
echo "LD_LIBRARY_PATH: $LD_LIBRARY_PATH"
|
| 114 |
+
|
| 115 |
+
python3 --version
|
| 116 |
+
which python3
|
| 117 |
+
|
| 118 |
+
python3 -m pip list
|
| 119 |
+
|
| 120 |
+
echo "----------torch----------"
|
| 121 |
+
python3 -m torch.utils.collect_env
|
| 122 |
+
|
| 123 |
+
echo "----------k2----------"
|
| 124 |
+
python3 -c "import k2; print(k2.__file__)"
|
| 125 |
+
python3 -c "import k2; print(k2.__dev_version__)"
|
| 126 |
+
python3 -m k2.version
|
| 127 |
+
|
| 128 |
+
echo "----------lhotse----------"
|
| 129 |
+
python3 -c "import lhotse; print(lhotse.__file__)"
|
| 130 |
+
python3 -c "import lhotse; print(lhotse.__version__)"
|
| 131 |
+
|
| 132 |
+
echo "----------kaldifeat----------"
|
| 133 |
+
python3 -c "import kaldifeat; print(kaldifeat.__file__)"
|
| 134 |
+
python3 -c "import kaldifeat; print(kaldifeat.__version__)"
|
| 135 |
+
|
| 136 |
+
echo "Test yesno recipe"
|
| 137 |
+
|
| 138 |
+
cd egs/yesno/ASR
|
| 139 |
+
|
| 140 |
+
./prepare.sh
|
| 141 |
+
|
| 142 |
+
./tdnn/train.py
|
| 143 |
+
|
| 144 |
+
./tdnn/decode.py
|
.github/workflows/run-gigaspeech-2022-05-13.yml
ADDED
|
@@ -0,0 +1,128 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright 2021 Fangjun Kuang (csukuangfj@gmail.com)
|
| 2 |
+
|
| 3 |
+
# See ../../LICENSE for clarification regarding multiple authors
|
| 4 |
+
#
|
| 5 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 6 |
+
# you may not use this file except in compliance with the License.
|
| 7 |
+
# You may obtain a copy of the License at
|
| 8 |
+
#
|
| 9 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 10 |
+
#
|
| 11 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 12 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 13 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 14 |
+
# See the License for the specific language governing permissions and
|
| 15 |
+
# limitations under the License.
|
| 16 |
+
|
| 17 |
+
name: run-gigaspeech-2022-05-13
|
| 18 |
+
# stateless transducer + k2 pruned rnnt-loss + reworked conformer
|
| 19 |
+
|
| 20 |
+
on:
|
| 21 |
+
push:
|
| 22 |
+
branches:
|
| 23 |
+
- master
|
| 24 |
+
pull_request:
|
| 25 |
+
types: [labeled]
|
| 26 |
+
|
| 27 |
+
schedule:
|
| 28 |
+
# minute (0-59)
|
| 29 |
+
# hour (0-23)
|
| 30 |
+
# day of the month (1-31)
|
| 31 |
+
# month (1-12)
|
| 32 |
+
# day of the week (0-6)
|
| 33 |
+
# nightly build at 15:50 UTC time every day
|
| 34 |
+
- cron: "50 15 * * *"
|
| 35 |
+
|
| 36 |
+
workflow_dispatch:
|
| 37 |
+
|
| 38 |
+
concurrency:
|
| 39 |
+
group: run_gigaspeech_2022_05_13-${{ github.ref }}
|
| 40 |
+
cancel-in-progress: true
|
| 41 |
+
|
| 42 |
+
jobs:
|
| 43 |
+
run_gigaspeech_2022_05_13:
|
| 44 |
+
if: github.event_name == 'workflow_dispatch' || github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule'
|
| 45 |
+
runs-on: ${{ matrix.os }}
|
| 46 |
+
strategy:
|
| 47 |
+
matrix:
|
| 48 |
+
os: [ubuntu-latest]
|
| 49 |
+
python-version: [3.8]
|
| 50 |
+
|
| 51 |
+
fail-fast: false
|
| 52 |
+
|
| 53 |
+
steps:
|
| 54 |
+
- uses: actions/checkout@v2
|
| 55 |
+
with:
|
| 56 |
+
fetch-depth: 0
|
| 57 |
+
|
| 58 |
+
- name: Setup Python ${{ matrix.python-version }}
|
| 59 |
+
uses: actions/setup-python@v2
|
| 60 |
+
with:
|
| 61 |
+
python-version: ${{ matrix.python-version }}
|
| 62 |
+
cache: 'pip'
|
| 63 |
+
cache-dependency-path: '**/requirements-ci.txt'
|
| 64 |
+
|
| 65 |
+
- name: Install Python dependencies
|
| 66 |
+
run: |
|
| 67 |
+
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
| 68 |
+
pip uninstall -y protobuf
|
| 69 |
+
pip install --no-binary protobuf protobuf==3.20.*
|
| 70 |
+
|
| 71 |
+
- name: Cache kaldifeat
|
| 72 |
+
id: my-cache
|
| 73 |
+
uses: actions/cache@v2
|
| 74 |
+
with:
|
| 75 |
+
path: |
|
| 76 |
+
~/tmp/kaldifeat
|
| 77 |
+
key: cache-tmp-${{ matrix.python-version }}-2023-05-22
|
| 78 |
+
|
| 79 |
+
- name: Install kaldifeat
|
| 80 |
+
if: steps.my-cache.outputs.cache-hit != 'true'
|
| 81 |
+
shell: bash
|
| 82 |
+
run: |
|
| 83 |
+
.github/scripts/install-kaldifeat.sh
|
| 84 |
+
|
| 85 |
+
- name: Download GigaSpeech dev/test dataset
|
| 86 |
+
shell: bash
|
| 87 |
+
run: |
|
| 88 |
+
sudo apt-get install -y -q git-lfs
|
| 89 |
+
|
| 90 |
+
.github/scripts/download-gigaspeech-dev-test-dataset.sh
|
| 91 |
+
|
| 92 |
+
- name: Inference with pre-trained model
|
| 93 |
+
shell: bash
|
| 94 |
+
env:
|
| 95 |
+
GITHUB_EVENT_NAME: ${{ github.event_name }}
|
| 96 |
+
GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
|
| 97 |
+
run: |
|
| 98 |
+
ln -s ~/tmp/giga-dev-dataset-fbank/data egs/gigaspeech/ASR/
|
| 99 |
+
|
| 100 |
+
ls -lh egs/gigaspeech/ASR/data/fbank
|
| 101 |
+
|
| 102 |
+
export PYTHONPATH=$PWD:$PYTHONPATH
|
| 103 |
+
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
| 104 |
+
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
| 105 |
+
|
| 106 |
+
.github/scripts/run-gigaspeech-pruned-transducer-stateless2-2022-05-12.sh
|
| 107 |
+
|
| 108 |
+
- name: Display decoding results for gigaspeech pruned_transducer_stateless2
|
| 109 |
+
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' || github.event.label.name == 'run-decode'
|
| 110 |
+
shell: bash
|
| 111 |
+
run: |
|
| 112 |
+
cd egs/gigaspeech/ASR/
|
| 113 |
+
tree ./pruned_transducer_stateless2/exp
|
| 114 |
+
|
| 115 |
+
sudo apt-get -qq install tree
|
| 116 |
+
|
| 117 |
+
cd pruned_transducer_stateless2
|
| 118 |
+
echo "results for pruned_transducer_stateless2"
|
| 119 |
+
echo "===greedy search==="
|
| 120 |
+
find exp/greedy_search -name "log-*" -exec grep -n --color "best for dev" {} + | sort -n -k2
|
| 121 |
+
find exp/greedy_search -name "log-*" -exec grep -n --color "best for test" {} + | sort -n -k2
|
| 122 |
+
|
| 123 |
+
- name: Upload decoding results for gigaspeech pruned_transducer_stateless2
|
| 124 |
+
uses: actions/upload-artifact@v4
|
| 125 |
+
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch' || github.event.label.name == 'run-decode'
|
| 126 |
+
with:
|
| 127 |
+
name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-gigaspeech-pruned_transducer_stateless2-2022-05-12
|
| 128 |
+
path: egs/gigaspeech/ASR/pruned_transducer_stateless2/exp/
|
.github/workflows/run-gigaspeech-zipformer-2023-10-17.yml
ADDED
|
@@ -0,0 +1,136 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright 2022 Fangjun Kuang (csukuangfj@gmail.com)
|
| 2 |
+
|
| 3 |
+
# See ../../LICENSE for clarification regarding multiple authors
|
| 4 |
+
#
|
| 5 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 6 |
+
# you may not use this file except in compliance with the License.
|
| 7 |
+
# You may obtain a copy of the License at
|
| 8 |
+
#
|
| 9 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 10 |
+
#
|
| 11 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 12 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 13 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 14 |
+
# See the License for the specific language governing permissions and
|
| 15 |
+
# limitations under the License.
|
| 16 |
+
|
| 17 |
+
name: run-gigaspeech-zipformer-2023-10-17
|
| 18 |
+
# zipformer
|
| 19 |
+
|
| 20 |
+
on:
|
| 21 |
+
push:
|
| 22 |
+
branches:
|
| 23 |
+
- master
|
| 24 |
+
|
| 25 |
+
pull_request:
|
| 26 |
+
types: [labeled]
|
| 27 |
+
|
| 28 |
+
schedule:
|
| 29 |
+
# minute (0-59)
|
| 30 |
+
# hour (0-23)
|
| 31 |
+
# day of the month (1-31)
|
| 32 |
+
# month (1-12)
|
| 33 |
+
# day of the week (0-6)
|
| 34 |
+
# nightly build at 15:50 UTC time every day
|
| 35 |
+
- cron: "50 15 * * *"
|
| 36 |
+
|
| 37 |
+
workflow_dispatch:
|
| 38 |
+
|
| 39 |
+
concurrency:
|
| 40 |
+
group: run_gigaspeech_2023_10_17_zipformer-${{ github.ref }}
|
| 41 |
+
cancel-in-progress: true
|
| 42 |
+
|
| 43 |
+
jobs:
|
| 44 |
+
run_gigaspeech_2023_10_17_zipformer:
|
| 45 |
+
if: github.event.label.name == 'zipformer' ||github.event.label.name == 'ready' || github.event.label.name == 'run-decode' || github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
|
| 46 |
+
runs-on: ${{ matrix.os }}
|
| 47 |
+
strategy:
|
| 48 |
+
matrix:
|
| 49 |
+
os: [ubuntu-latest]
|
| 50 |
+
python-version: [3.8]
|
| 51 |
+
|
| 52 |
+
fail-fast: false
|
| 53 |
+
|
| 54 |
+
steps:
|
| 55 |
+
- uses: actions/checkout@v2
|
| 56 |
+
with:
|
| 57 |
+
fetch-depth: 0
|
| 58 |
+
|
| 59 |
+
- name: Setup Python ${{ matrix.python-version }}
|
| 60 |
+
uses: actions/setup-python@v2
|
| 61 |
+
with:
|
| 62 |
+
python-version: ${{ matrix.python-version }}
|
| 63 |
+
cache: 'pip'
|
| 64 |
+
cache-dependency-path: '**/requirements-ci.txt'
|
| 65 |
+
|
| 66 |
+
- name: Install Python dependencies
|
| 67 |
+
run: |
|
| 68 |
+
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
| 69 |
+
pip uninstall -y protobuf
|
| 70 |
+
pip install --no-binary protobuf protobuf==3.20.*
|
| 71 |
+
|
| 72 |
+
- name: Cache kaldifeat
|
| 73 |
+
id: my-cache
|
| 74 |
+
uses: actions/cache@v2
|
| 75 |
+
with:
|
| 76 |
+
path: |
|
| 77 |
+
~/tmp/kaldifeat
|
| 78 |
+
key: cache-tmp-${{ matrix.python-version }}-2023-05-22
|
| 79 |
+
|
| 80 |
+
- name: Install kaldifeat
|
| 81 |
+
if: steps.my-cache.outputs.cache-hit != 'true'
|
| 82 |
+
shell: bash
|
| 83 |
+
run: |
|
| 84 |
+
.github/scripts/install-kaldifeat.sh
|
| 85 |
+
|
| 86 |
+
- name: Inference with pre-trained model
|
| 87 |
+
shell: bash
|
| 88 |
+
env:
|
| 89 |
+
GITHUB_EVENT_NAME: ${{ github.event_name }}
|
| 90 |
+
GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
|
| 91 |
+
HF_TOKEN: ${{ secrets.HF_TOKEN }}
|
| 92 |
+
run: |
|
| 93 |
+
sudo apt-get -qq install git-lfs tree
|
| 94 |
+
export PYTHONPATH=$PWD:$PYTHONPATH
|
| 95 |
+
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
| 96 |
+
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
| 97 |
+
|
| 98 |
+
.github/scripts/run-gigaspeech-zipformer-2023-10-17.sh
|
| 99 |
+
|
| 100 |
+
- name: upload model to https://github.com/k2-fsa/sherpa-onnx
|
| 101 |
+
uses: svenstaro/upload-release-action@v2
|
| 102 |
+
with:
|
| 103 |
+
file_glob: true
|
| 104 |
+
file: ./*.tar.bz2
|
| 105 |
+
overwrite: true
|
| 106 |
+
repo_name: k2-fsa/sherpa-onnx
|
| 107 |
+
repo_token: ${{ secrets.UPLOAD_GH_SHERPA_ONNX_TOKEN }}
|
| 108 |
+
tag: asr-models
|
| 109 |
+
|
| 110 |
+
- name: Display decoding results for gigaspeech zipformer
|
| 111 |
+
if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' || github.event_name == 'workflow_dispatch'
|
| 112 |
+
shell: bash
|
| 113 |
+
run: |
|
| 114 |
+
cd egs/gigaspeech/ASR/
|
| 115 |
+
tree ./zipformer/exp
|
| 116 |
+
|
| 117 |
+
cd zipformer
|
| 118 |
+
echo "results for zipformer"
|
| 119 |
+
echo "===greedy search==="
|
| 120 |
+
find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
|
| 121 |
+
find exp/greedy_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
|
| 122 |
+
|
| 123 |
+
# echo "===fast_beam_search==="
|
| 124 |
+
# find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
|
| 125 |
+
# find exp/fast_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
|
| 126 |
+
#
|
| 127 |
+
# echo "===modified beam search==="
|
| 128 |
+
# find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
|
| 129 |
+
# find exp/modified_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
|
| 130 |
+
|
| 131 |
+
- name: Upload decoding results for gigaspeech zipformer
|
| 132 |
+
uses: actions/upload-artifact@v4
|
| 133 |
+
if: github.event_name == 'schedule' || github.event.label.name == 'run-decode' || github.event_name == 'workflow_dispatch'
|
| 134 |
+
with:
|
| 135 |
+
name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-zipformer-2022-11-11
|
| 136 |
+
path: egs/gigaspeech/ASR/zipformer/exp/
|
.github/workflows/run-librispeech-lstm-transducer-stateless2-2022-09-03.yml
ADDED
|
@@ -0,0 +1,165 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: run-librispeech-lstm-transducer2-2022-09-03
|
| 2 |
+
|
| 3 |
+
on:
|
| 4 |
+
push:
|
| 5 |
+
branches:
|
| 6 |
+
- master
|
| 7 |
+
pull_request:
|
| 8 |
+
types: [labeled]
|
| 9 |
+
|
| 10 |
+
schedule:
|
| 11 |
+
# minute (0-59)
|
| 12 |
+
# hour (0-23)
|
| 13 |
+
# day of the month (1-31)
|
| 14 |
+
# month (1-12)
|
| 15 |
+
# day of the week (0-6)
|
| 16 |
+
# nightly build at 15:50 UTC time every day
|
| 17 |
+
- cron: "50 15 * * *"
|
| 18 |
+
|
| 19 |
+
workflow_dispatch:
|
| 20 |
+
|
| 21 |
+
concurrency:
|
| 22 |
+
group: run_librispeech_lstm_transducer_stateless2_2022_09_03-${{ github.ref }}
|
| 23 |
+
cancel-in-progress: true
|
| 24 |
+
|
| 25 |
+
jobs:
|
| 26 |
+
run_librispeech_lstm_transducer_stateless2_2022_09_03:
|
| 27 |
+
if: github.event.label.name == 'ready' || github.event.label.name == 'LODR' || github.event.label.name == 'shallow-fusion' || github.event_name == 'push' || github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
|
| 28 |
+
runs-on: ${{ matrix.os }}
|
| 29 |
+
strategy:
|
| 30 |
+
matrix:
|
| 31 |
+
os: [ubuntu-latest]
|
| 32 |
+
python-version: [3.8]
|
| 33 |
+
|
| 34 |
+
fail-fast: false
|
| 35 |
+
|
| 36 |
+
steps:
|
| 37 |
+
- uses: actions/checkout@v2
|
| 38 |
+
with:
|
| 39 |
+
fetch-depth: 0
|
| 40 |
+
|
| 41 |
+
- name: Setup Python ${{ matrix.python-version }}
|
| 42 |
+
uses: actions/setup-python@v2
|
| 43 |
+
with:
|
| 44 |
+
python-version: ${{ matrix.python-version }}
|
| 45 |
+
cache: 'pip'
|
| 46 |
+
cache-dependency-path: '**/requirements-ci.txt'
|
| 47 |
+
|
| 48 |
+
- name: Install Python dependencies
|
| 49 |
+
run: |
|
| 50 |
+
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
| 51 |
+
pip uninstall -y protobuf
|
| 52 |
+
pip install --no-binary protobuf protobuf==3.20.*
|
| 53 |
+
|
| 54 |
+
- name: Cache kaldifeat
|
| 55 |
+
id: my-cache
|
| 56 |
+
uses: actions/cache@v2
|
| 57 |
+
with:
|
| 58 |
+
path: |
|
| 59 |
+
~/tmp/kaldifeat
|
| 60 |
+
key: cache-tmp-${{ matrix.python-version }}-2023-05-22
|
| 61 |
+
|
| 62 |
+
- name: Install kaldifeat
|
| 63 |
+
if: steps.my-cache.outputs.cache-hit != 'true'
|
| 64 |
+
shell: bash
|
| 65 |
+
run: |
|
| 66 |
+
.github/scripts/install-kaldifeat.sh
|
| 67 |
+
|
| 68 |
+
- name: Cache LibriSpeech test-clean and test-other datasets
|
| 69 |
+
id: libri-test-clean-and-test-other-data
|
| 70 |
+
uses: actions/cache@v2
|
| 71 |
+
with:
|
| 72 |
+
path: |
|
| 73 |
+
~/tmp/download
|
| 74 |
+
key: cache-libri-test-clean-and-test-other
|
| 75 |
+
|
| 76 |
+
- name: Download LibriSpeech test-clean and test-other
|
| 77 |
+
if: steps.libri-test-clean-and-test-other-data.outputs.cache-hit != 'true'
|
| 78 |
+
shell: bash
|
| 79 |
+
run: |
|
| 80 |
+
.github/scripts/download-librispeech-test-clean-and-test-other-dataset.sh
|
| 81 |
+
|
| 82 |
+
- name: Prepare manifests for LibriSpeech test-clean and test-other
|
| 83 |
+
shell: bash
|
| 84 |
+
run: |
|
| 85 |
+
.github/scripts/prepare-librispeech-test-clean-and-test-other-manifests.sh
|
| 86 |
+
|
| 87 |
+
- name: Cache LibriSpeech test-clean and test-other fbank features
|
| 88 |
+
id: libri-test-clean-and-test-other-fbank
|
| 89 |
+
uses: actions/cache@v2
|
| 90 |
+
with:
|
| 91 |
+
path: |
|
| 92 |
+
~/tmp/fbank-libri
|
| 93 |
+
key: cache-libri-fbank-test-clean-and-test-other-v2
|
| 94 |
+
|
| 95 |
+
- name: Compute fbank for LibriSpeech test-clean and test-other
|
| 96 |
+
if: steps.libri-test-clean-and-test-other-fbank.outputs.cache-hit != 'true'
|
| 97 |
+
shell: bash
|
| 98 |
+
run: |
|
| 99 |
+
.github/scripts/compute-fbank-librispeech-test-clean-and-test-other.sh
|
| 100 |
+
|
| 101 |
+
- name: Inference with pre-trained model
|
| 102 |
+
shell: bash
|
| 103 |
+
env:
|
| 104 |
+
GITHUB_EVENT_NAME: ${{ github.event_name }}
|
| 105 |
+
GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
|
| 106 |
+
run: |
|
| 107 |
+
mkdir -p egs/librispeech/ASR/data
|
| 108 |
+
ln -sfv ~/tmp/fbank-libri egs/librispeech/ASR/data/fbank
|
| 109 |
+
ls -lh egs/librispeech/ASR/data/*
|
| 110 |
+
|
| 111 |
+
sudo apt-get -qq install git-lfs tree
|
| 112 |
+
export PYTHONPATH=$PWD:$PYTHONPATH
|
| 113 |
+
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
| 114 |
+
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
| 115 |
+
|
| 116 |
+
.github/scripts/run-librispeech-lstm-transducer-stateless2-2022-09-03.sh
|
| 117 |
+
|
| 118 |
+
- name: Display decoding results for lstm_transducer_stateless2
|
| 119 |
+
if: github.event_name == 'schedule' || github.event_name == 'workflow_dispatch'
|
| 120 |
+
shell: bash
|
| 121 |
+
run: |
|
| 122 |
+
cd egs/librispeech/ASR
|
| 123 |
+
tree lstm_transducer_stateless2/exp
|
| 124 |
+
cd lstm_transducer_stateless2/exp
|
| 125 |
+
echo "===greedy search==="
|
| 126 |
+
find greedy_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
|
| 127 |
+
find greedy_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
|
| 128 |
+
|
| 129 |
+
echo "===fast_beam_search==="
|
| 130 |
+
find fast_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
|
| 131 |
+
find fast_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
|
| 132 |
+
|
| 133 |
+
# echo "===modified beam search==="
|
| 134 |
+
# find modified_beam_search -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
|
| 135 |
+
# find modified_beam_search -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
|
| 136 |
+
|
| 137 |
+
- name: Display decoding results for lstm_transducer_stateless2
|
| 138 |
+
if: github.event.label.name == 'shallow-fusion'
|
| 139 |
+
shell: bash
|
| 140 |
+
run: |
|
| 141 |
+
cd egs/librispeech/ASR
|
| 142 |
+
tree lstm_transducer_stateless2/exp
|
| 143 |
+
cd lstm_transducer_stateless2/exp
|
| 144 |
+
echo "===modified_beam_search_lm_shallow_fusion==="
|
| 145 |
+
echo "===Using RNNLM==="
|
| 146 |
+
find modified_beam_search_lm_shallow_fusion -name "log-*rnn*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
|
| 147 |
+
find modified_beam_search_lm_shallow_fusion -name "log-*rnn*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
|
| 148 |
+
|
| 149 |
+
- name: Display decoding results for lstm_transducer_stateless2
|
| 150 |
+
if: github.event.label.name == 'LODR'
|
| 151 |
+
shell: bash
|
| 152 |
+
run: |
|
| 153 |
+
cd egs/librispeech/ASR
|
| 154 |
+
tree lstm_transducer_stateless2/exp
|
| 155 |
+
cd lstm_transducer_stateless2/exp
|
| 156 |
+
echo "===modified_beam_search_rnnlm_LODR==="
|
| 157 |
+
find modified_beam_search_LODR -name "log-*" -exec grep -n --color "best for test-clean" {} + | sort -n -k2
|
| 158 |
+
find modified_beam_search_LODR -name "log-*" -exec grep -n --color "best for test-other" {} + | sort -n -k2
|
| 159 |
+
|
| 160 |
+
- name: Upload decoding results for lstm_transducer_stateless2
|
| 161 |
+
uses: actions/upload-artifact@v4
|
| 162 |
+
if: github.event_name == 'schedule' || github.event.label.name == 'shallow-fusion' || github.event.label.name == 'LODR' || github.event_name == 'workflow_dispatch'
|
| 163 |
+
with:
|
| 164 |
+
name: torch-${{ matrix.torch }}-python-${{ matrix.python-version }}-ubuntu-latest-cpu-lstm_transducer_stateless2-2022-09-03
|
| 165 |
+
path: egs/librispeech/ASR/lstm_transducer_stateless2/exp/
|
.github/workflows/run-multi-corpora-zipformer.yml
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright 2023 Xiaomi Corp. (author: Zengrui Jin)
|
| 2 |
+
|
| 3 |
+
# See ../../LICENSE for clarification regarding multiple authors
|
| 4 |
+
#
|
| 5 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 6 |
+
# you may not use this file except in compliance with the License.
|
| 7 |
+
# You may obtain a copy of the License at
|
| 8 |
+
#
|
| 9 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 10 |
+
#
|
| 11 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 12 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 13 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 14 |
+
# See the License for the specific language governing permissions and
|
| 15 |
+
# limitations under the License.
|
| 16 |
+
|
| 17 |
+
name: run-multi-corpora-zipformer
|
| 18 |
+
|
| 19 |
+
on:
|
| 20 |
+
push:
|
| 21 |
+
branches:
|
| 22 |
+
- master
|
| 23 |
+
pull_request:
|
| 24 |
+
types: [labeled]
|
| 25 |
+
|
| 26 |
+
workflow_dispatch:
|
| 27 |
+
|
| 28 |
+
concurrency:
|
| 29 |
+
group: run_multi-corpora_zipformer-${{ github.ref }}
|
| 30 |
+
cancel-in-progress: true
|
| 31 |
+
|
| 32 |
+
jobs:
|
| 33 |
+
run_multi-corpora_zipformer:
|
| 34 |
+
if: github.event.label.name == 'onnx' || github.event.label.name == 'ready' || github.event_name == 'push' || github.event.label.name == 'multi-zh_hans' || github.event.label.name == 'zipformer' || github.event.label.name == 'multi-corpora'
|
| 35 |
+
runs-on: ${{ matrix.os }}
|
| 36 |
+
strategy:
|
| 37 |
+
matrix:
|
| 38 |
+
os: [ubuntu-latest]
|
| 39 |
+
python-version: [3.8]
|
| 40 |
+
|
| 41 |
+
fail-fast: false
|
| 42 |
+
|
| 43 |
+
steps:
|
| 44 |
+
- uses: actions/checkout@v2
|
| 45 |
+
with:
|
| 46 |
+
fetch-depth: 0
|
| 47 |
+
|
| 48 |
+
- name: Setup Python ${{ matrix.python-version }}
|
| 49 |
+
uses: actions/setup-python@v2
|
| 50 |
+
with:
|
| 51 |
+
python-version: ${{ matrix.python-version }}
|
| 52 |
+
cache: 'pip'
|
| 53 |
+
cache-dependency-path: '**/requirements-ci.txt'
|
| 54 |
+
|
| 55 |
+
- name: Install Python dependencies
|
| 56 |
+
run: |
|
| 57 |
+
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
| 58 |
+
pip uninstall -y protobuf
|
| 59 |
+
pip install --no-binary protobuf protobuf==3.20.*
|
| 60 |
+
|
| 61 |
+
- name: Cache kaldifeat
|
| 62 |
+
id: my-cache
|
| 63 |
+
uses: actions/cache@v2
|
| 64 |
+
with:
|
| 65 |
+
path: |
|
| 66 |
+
~/tmp/kaldifeat
|
| 67 |
+
key: cache-tmp-${{ matrix.python-version }}-2023-05-22
|
| 68 |
+
|
| 69 |
+
- name: Install kaldifeat
|
| 70 |
+
if: steps.my-cache.outputs.cache-hit != 'true'
|
| 71 |
+
shell: bash
|
| 72 |
+
run: |
|
| 73 |
+
.github/scripts/install-kaldifeat.sh
|
| 74 |
+
|
| 75 |
+
- name: Inference with pre-trained model
|
| 76 |
+
shell: bash
|
| 77 |
+
env:
|
| 78 |
+
GITHUB_EVENT_NAME: ${{ github.event_name }}
|
| 79 |
+
GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
|
| 80 |
+
run: |
|
| 81 |
+
sudo apt-get -qq install git-lfs tree
|
| 82 |
+
export PYTHONPATH=$PWD:$PYTHONPATH
|
| 83 |
+
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
| 84 |
+
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
| 85 |
+
|
| 86 |
+
.github/scripts/run-multi-corpora-zipformer.sh
|
.github/workflows/run-ptb-rnn-lm.yml
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
name: run-ptb-rnn-lm-training
|
| 2 |
+
|
| 3 |
+
on:
|
| 4 |
+
push:
|
| 5 |
+
branches:
|
| 6 |
+
- master
|
| 7 |
+
pull_request:
|
| 8 |
+
types: [labeled]
|
| 9 |
+
|
| 10 |
+
schedule:
|
| 11 |
+
# minute (0-59)
|
| 12 |
+
# hour (0-23)
|
| 13 |
+
# day of the month (1-31)
|
| 14 |
+
# month (1-12)
|
| 15 |
+
# day of the week (0-6)
|
| 16 |
+
# nightly build at 15:50 UTC time every day
|
| 17 |
+
- cron: "50 15 * * *"
|
| 18 |
+
|
| 19 |
+
workflow_dispatch:
|
| 20 |
+
|
| 21 |
+
concurrency:
|
| 22 |
+
group: run_ptb_rnn_lm_training-${{ github.ref }}
|
| 23 |
+
cancel-in-progress: true
|
| 24 |
+
|
| 25 |
+
jobs:
|
| 26 |
+
run_ptb_rnn_lm_training:
|
| 27 |
+
if: github.event.label.name == 'ready' || github.event.label.name == 'rnnlm' || github.event_name == 'push' || github.event_name == 'schedule'
|
| 28 |
+
runs-on: ${{ matrix.os }}
|
| 29 |
+
strategy:
|
| 30 |
+
matrix:
|
| 31 |
+
os: [ubuntu-latest]
|
| 32 |
+
python-version: ["3.8"]
|
| 33 |
+
|
| 34 |
+
fail-fast: false
|
| 35 |
+
|
| 36 |
+
steps:
|
| 37 |
+
- uses: actions/checkout@v2
|
| 38 |
+
with:
|
| 39 |
+
fetch-depth: 0
|
| 40 |
+
|
| 41 |
+
- name: Setup Python ${{ matrix.python-version }}
|
| 42 |
+
uses: actions/setup-python@v2
|
| 43 |
+
with:
|
| 44 |
+
python-version: ${{ matrix.python-version }}
|
| 45 |
+
cache: 'pip'
|
| 46 |
+
cache-dependency-path: '**/requirements-ci.txt'
|
| 47 |
+
|
| 48 |
+
- name: Install Python dependencies
|
| 49 |
+
run: |
|
| 50 |
+
grep -v '^#' ./requirements-ci.txt | grep -v kaldifst | xargs -n 1 -L 1 pip install
|
| 51 |
+
pip uninstall -y protobuf
|
| 52 |
+
pip install --no-binary protobuf protobuf==3.20.*
|
| 53 |
+
|
| 54 |
+
- name: Prepare data
|
| 55 |
+
shell: bash
|
| 56 |
+
run: |
|
| 57 |
+
export PYTHONPATH=$PWD:$PYTHONPATH
|
| 58 |
+
cd egs/ptb/LM
|
| 59 |
+
./prepare.sh
|
| 60 |
+
|
| 61 |
+
- name: Run training
|
| 62 |
+
shell: bash
|
| 63 |
+
run: |
|
| 64 |
+
export PYTHONPATH=$PWD:$PYTHONPATH
|
| 65 |
+
cd egs/ptb/LM
|
| 66 |
+
./train-rnn-lm.sh --world-size 1 --num-epochs 5 --use-epoch 4 --use-avg 2
|
| 67 |
+
|
| 68 |
+
- name: Upload pretrained models
|
| 69 |
+
uses: actions/upload-artifact@v4
|
| 70 |
+
if: github.event.label.name == 'ready' || github.event.label.name == 'rnnlm' || github.event_name == 'push' || github.event_name == 'schedule'
|
| 71 |
+
with:
|
| 72 |
+
name: python-${{ matrix.python-version }}-ubuntu-rnn-lm-ptb
|
| 73 |
+
path: egs/ptb/LM/my-rnnlm-exp/
|
.github/workflows/run-swbd-conformer-ctc.yml
ADDED
|
@@ -0,0 +1,86 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright 2023 Xiaomi Corp. (author: Zengrui Jin)
|
| 2 |
+
|
| 3 |
+
# See ../../LICENSE for clarification regarding multiple authors
|
| 4 |
+
#
|
| 5 |
+
# Licensed under the Apache License, Version 2.0 (the "License");
|
| 6 |
+
# you may not use this file except in compliance with the License.
|
| 7 |
+
# You may obtain a copy of the License at
|
| 8 |
+
#
|
| 9 |
+
# http://www.apache.org/licenses/LICENSE-2.0
|
| 10 |
+
#
|
| 11 |
+
# Unless required by applicable law or agreed to in writing, software
|
| 12 |
+
# distributed under the License is distributed on an "AS IS" BASIS,
|
| 13 |
+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
| 14 |
+
# See the License for the specific language governing permissions and
|
| 15 |
+
# limitations under the License.
|
| 16 |
+
|
| 17 |
+
name: run-swbd-conformer_ctc
|
| 18 |
+
|
| 19 |
+
on:
|
| 20 |
+
push:
|
| 21 |
+
branches:
|
| 22 |
+
- master
|
| 23 |
+
pull_request:
|
| 24 |
+
types: [labeled]
|
| 25 |
+
|
| 26 |
+
workflow_dispatch:
|
| 27 |
+
|
| 28 |
+
concurrency:
|
| 29 |
+
group: run-swbd-conformer_ctc-${{ github.ref }}
|
| 30 |
+
cancel-in-progress: true
|
| 31 |
+
|
| 32 |
+
jobs:
|
| 33 |
+
run-swbd-conformer_ctc:
|
| 34 |
+
if: github.event.label.name == 'onnx' || github.event.label.name == 'ready' || github.event_name == 'push' || github.event.label.name == 'swbd'
|
| 35 |
+
runs-on: ${{ matrix.os }}
|
| 36 |
+
strategy:
|
| 37 |
+
matrix:
|
| 38 |
+
os: [ubuntu-latest]
|
| 39 |
+
python-version: [3.8]
|
| 40 |
+
|
| 41 |
+
fail-fast: false
|
| 42 |
+
|
| 43 |
+
steps:
|
| 44 |
+
- uses: actions/checkout@v2
|
| 45 |
+
with:
|
| 46 |
+
fetch-depth: 0
|
| 47 |
+
|
| 48 |
+
- name: Setup Python ${{ matrix.python-version }}
|
| 49 |
+
uses: actions/setup-python@v2
|
| 50 |
+
with:
|
| 51 |
+
python-version: ${{ matrix.python-version }}
|
| 52 |
+
cache: 'pip'
|
| 53 |
+
cache-dependency-path: '**/requirements-ci.txt'
|
| 54 |
+
|
| 55 |
+
- name: Install Python dependencies
|
| 56 |
+
run: |
|
| 57 |
+
grep -v '^#' ./requirements-ci.txt | xargs -n 1 -L 1 pip install
|
| 58 |
+
pip uninstall -y protobuf
|
| 59 |
+
pip install --no-binary protobuf protobuf==3.20.*
|
| 60 |
+
|
| 61 |
+
- name: Cache kaldifeat
|
| 62 |
+
id: my-cache
|
| 63 |
+
uses: actions/cache@v2
|
| 64 |
+
with:
|
| 65 |
+
path: |
|
| 66 |
+
~/tmp/kaldifeat
|
| 67 |
+
key: cache-tmp-${{ matrix.python-version }}-2023-05-22
|
| 68 |
+
|
| 69 |
+
- name: Install kaldifeat
|
| 70 |
+
if: steps.my-cache.outputs.cache-hit != 'true'
|
| 71 |
+
shell: bash
|
| 72 |
+
run: |
|
| 73 |
+
.github/scripts/install-kaldifeat.sh
|
| 74 |
+
|
| 75 |
+
- name: Inference with pre-trained model
|
| 76 |
+
shell: bash
|
| 77 |
+
env:
|
| 78 |
+
GITHUB_EVENT_NAME: ${{ github.event_name }}
|
| 79 |
+
GITHUB_EVENT_LABEL_NAME: ${{ github.event.label.name }}
|
| 80 |
+
run: |
|
| 81 |
+
sudo apt-get -qq install git-lfs tree
|
| 82 |
+
export PYTHONPATH=$PWD:$PYTHONPATH
|
| 83 |
+
export PYTHONPATH=~/tmp/kaldifeat/kaldifeat/python:$PYTHONPATH
|
| 84 |
+
export PYTHONPATH=~/tmp/kaldifeat/build/lib:$PYTHONPATH
|
| 85 |
+
|
| 86 |
+
.github/scripts/run-swbd-conformer-ctc-2023-08-26.sh
|