|
|
#!/usr/bin/env bash |
|
|
|
|
|
set -ex |
|
|
|
|
|
log() { |
|
|
|
|
|
local fname=${BASH_SOURCE[1]##*/} |
|
|
echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" |
|
|
} |
|
|
|
|
|
cd egs/librispeech/ASR |
|
|
|
|
|
function prepare_data() { |
|
|
|
|
|
|
|
|
mkdir -p download/lm |
|
|
pushd download/lm |
|
|
wget -q https://huggingface.co/csukuangfj/librispeech-for-ci/resolve/main/librispeech-lm-norm.txt.gz |
|
|
wget -q https://huggingface.co/csukuangfj/librispeech-for-ci/resolve/main/librispeech-lexicon.txt |
|
|
wget -q https://huggingface.co/csukuangfj/librispeech-for-ci/resolve/main/librispeech-vocab.txt |
|
|
ls -lh |
|
|
gunzip librispeech-lm-norm.txt.gz |
|
|
|
|
|
ls -lh |
|
|
popd |
|
|
|
|
|
pushd download/ |
|
|
wget -q https://huggingface.co/csukuangfj/librispeech-for-ci/resolve/main/LibriSpeech.tar.bz2 |
|
|
tar xf LibriSpeech.tar.bz2 |
|
|
rm LibriSpeech.tar.bz2 |
|
|
|
|
|
cd LibriSpeech |
|
|
ln -s train-clean-100 train-clean-360 |
|
|
ln -s train-other-500 train-other-500 |
|
|
popd |
|
|
|
|
|
mkdir -p data/manifests |
|
|
|
|
|
lhotse prepare librispeech -j 2 -p dev-clean -p dev-other -p test-clean -p test-other -p train-clean-100 download/LibriSpeech data/manifests |
|
|
ls -lh data/manifests |
|
|
|
|
|
./local/compute_fbank_librispeech.py --dataset "dev-clean dev-other test-clean test-other train-clean-100" --perturb-speed False |
|
|
ls -lh data/fbank |
|
|
|
|
|
./prepare.sh --stage 5 --stop-stage 6 |
|
|
} |
|
|
|
|
|
function run_diagnostics() { |
|
|
./zipformer/train.py \ |
|
|
--world-size 1 \ |
|
|
--num-epochs 1 \ |
|
|
--start-epoch 1 \ |
|
|
--use-fp16 0 \ |
|
|
--exp-dir zipformer/exp-small \ |
|
|
--causal 0 \ |
|
|
--num-encoder-layers 1,1,1,1,1,1 \ |
|
|
--feedforward-dim 64,96,96,96,96,96 \ |
|
|
--encoder-dim 32,64,64,64,64,64 \ |
|
|
--encoder-unmasked-dim 32,32,32,32,32,32 \ |
|
|
--base-lr 0.04 \ |
|
|
--full-libri 0 \ |
|
|
--enable-musan 0 \ |
|
|
--max-duration 30 \ |
|
|
--print-diagnostics 1 |
|
|
} |
|
|
|
|
|
function test_streaming_zipformer_ctc_hlg() { |
|
|
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-streaming-zipformer-small-2024-03-18 |
|
|
|
|
|
log "Downloading pre-trained model from $repo_url" |
|
|
git lfs install |
|
|
git clone $repo_url |
|
|
repo=$(basename $repo_url) |
|
|
|
|
|
rm $repo/exp-ctc-rnnt-small/*.onnx |
|
|
ls -lh $repo/exp-ctc-rnnt-small |
|
|
|
|
|
|
|
|
./zipformer/export-onnx-streaming-ctc.py \ |
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \ |
|
|
--epoch 30 \ |
|
|
--avg 3 \ |
|
|
--exp-dir $repo/exp-ctc-rnnt-small \ |
|
|
--causal 1 \ |
|
|
--use-ctc 1 \ |
|
|
--chunk-size 16 \ |
|
|
--left-context-frames 128 \ |
|
|
\ |
|
|
--num-encoder-layers 2,2,2,2,2,2 \ |
|
|
--feedforward-dim 512,768,768,768,768,768 \ |
|
|
--encoder-dim 192,256,256,256,256,256 \ |
|
|
--encoder-unmasked-dim 192,192,192,192,192,192 |
|
|
|
|
|
ls -lh $repo/exp-ctc-rnnt-small |
|
|
|
|
|
for wav in 0.wav 1.wav 8k.wav; do |
|
|
python3 ./zipformer/onnx_pretrained_ctc_HLG_streaming.py \ |
|
|
--nn-model $repo/exp-ctc-rnnt-small/ctc-epoch-30-avg-3-chunk-16-left-128.int8.onnx \ |
|
|
--words $repo/data/lang_bpe_500/words.txt \ |
|
|
--HLG $repo/data/lang_bpe_500/HLG.fst \ |
|
|
$repo/test_wavs/$wav |
|
|
done |
|
|
|
|
|
rm -rf $repo |
|
|
} |
|
|
|
|
|
function test_pruned_transducer_stateless_2022_03_12() { |
|
|
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless-2022-03-12 |
|
|
|
|
|
log "Downloading pre-trained model from $repo_url" |
|
|
git lfs install |
|
|
git clone $repo_url |
|
|
repo=$(basename $repo_url) |
|
|
|
|
|
log "Display test files" |
|
|
tree $repo/ |
|
|
ls -lh $repo/test_wavs/*.wav |
|
|
|
|
|
for sym in 1 2 3; do |
|
|
log "Greedy search with --max-sym-per-frame $sym" |
|
|
|
|
|
./pruned_transducer_stateless/pretrained.py \ |
|
|
--method greedy_search \ |
|
|
--max-sym-per-frame $sym \ |
|
|
--checkpoint $repo/exp/pretrained.pt \ |
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \ |
|
|
$repo/test_wavs/1089-134686-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0002.wav |
|
|
done |
|
|
|
|
|
for method in fast_beam_search modified_beam_search beam_search; do |
|
|
log "$method" |
|
|
|
|
|
./pruned_transducer_stateless/pretrained.py \ |
|
|
--method $method \ |
|
|
--beam-size 4 \ |
|
|
--checkpoint $repo/exp/pretrained.pt \ |
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \ |
|
|
$repo/test_wavs/1089-134686-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0002.wav |
|
|
done |
|
|
rm -rf $repo |
|
|
} |
|
|
|
|
|
function test_pruned_transducer_stateless2_2022_04_29() { |
|
|
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless2-2022-04-29 |
|
|
|
|
|
log "Downloading pre-trained model from $repo_url" |
|
|
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url |
|
|
repo=$(basename $repo_url) |
|
|
|
|
|
pushd $repo |
|
|
git lfs pull --include "data/lang_bpe_500/bpe.model" |
|
|
git lfs pull --include "exp/pretrained-epoch-38-avg-10.pt" |
|
|
popd |
|
|
|
|
|
log "Display test files" |
|
|
tree $repo/ |
|
|
ls -lh $repo/test_wavs/*.wav |
|
|
|
|
|
pushd $repo/exp |
|
|
ln -s pretrained-epoch-38-avg-10.pt pretrained.pt |
|
|
popd |
|
|
|
|
|
for sym in 1 2 3; do |
|
|
log "Greedy search with --max-sym-per-frame $sym" |
|
|
|
|
|
./pruned_transducer_stateless2/pretrained.py \ |
|
|
--method greedy_search \ |
|
|
--max-sym-per-frame $sym \ |
|
|
--checkpoint $repo/exp/pretrained.pt \ |
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \ |
|
|
$repo/test_wavs/1089-134686-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0002.wav |
|
|
done |
|
|
|
|
|
for method in modified_beam_search beam_search fast_beam_search; do |
|
|
log "$method" |
|
|
|
|
|
./pruned_transducer_stateless2/pretrained.py \ |
|
|
--method $method \ |
|
|
--beam-size 4 \ |
|
|
--checkpoint $repo/exp/pretrained.pt \ |
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \ |
|
|
$repo/test_wavs/1089-134686-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0002.wav |
|
|
done |
|
|
rm -rf $repo |
|
|
} |
|
|
|
|
|
function test_pruned_transducer_stateless3_2022_04_29() { |
|
|
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless3-2022-04-29 |
|
|
|
|
|
log "Downloading pre-trained model from $repo_url" |
|
|
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url |
|
|
repo=$(basename $repo_url) |
|
|
pushd $repo |
|
|
git lfs pull --include "data/lang_bpe_500/bpe.model" |
|
|
git lfs pull --include "exp/pretrained-epoch-25-avg-6.pt" |
|
|
popd |
|
|
|
|
|
log "Display test files" |
|
|
tree $repo/ |
|
|
ls -lh $repo/test_wavs/*.wav |
|
|
|
|
|
pushd $repo/exp |
|
|
ln -s pretrained-epoch-25-avg-6.pt pretrained.pt |
|
|
popd |
|
|
|
|
|
for sym in 1 2 3; do |
|
|
log "Greedy search with --max-sym-per-frame $sym" |
|
|
|
|
|
./pruned_transducer_stateless3/pretrained.py \ |
|
|
--method greedy_search \ |
|
|
--max-sym-per-frame $sym \ |
|
|
--checkpoint $repo/exp/pretrained.pt \ |
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \ |
|
|
$repo/test_wavs/1089-134686-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0002.wav |
|
|
done |
|
|
|
|
|
for method in modified_beam_search beam_search fast_beam_search; do |
|
|
log "$method" |
|
|
|
|
|
./pruned_transducer_stateless3/pretrained.py \ |
|
|
--method $method \ |
|
|
--beam-size 4 \ |
|
|
--checkpoint $repo/exp/pretrained.pt \ |
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \ |
|
|
$repo/test_wavs/1089-134686-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0002.wav |
|
|
done |
|
|
rm -rf $repo |
|
|
} |
|
|
|
|
|
function test_pruned_transducer_stateless5_2022_05_13() { |
|
|
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless5-2022-05-13 |
|
|
|
|
|
log "Downloading pre-trained model from $repo_url" |
|
|
git lfs install |
|
|
git clone $repo_url |
|
|
repo=$(basename $repo_url) |
|
|
|
|
|
log "Display test files" |
|
|
tree $repo/ |
|
|
ls -lh $repo/test_wavs/*.wav |
|
|
|
|
|
pushd $repo/exp |
|
|
ln -s pretrained-epoch-39-avg-7.pt pretrained.pt |
|
|
popd |
|
|
|
|
|
for sym in 1 2 3; do |
|
|
log "Greedy search with --max-sym-per-frame $sym" |
|
|
|
|
|
./pruned_transducer_stateless5/pretrained.py \ |
|
|
--method greedy_search \ |
|
|
--max-sym-per-frame $sym \ |
|
|
--checkpoint $repo/exp/pretrained.pt \ |
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \ |
|
|
--num-encoder-layers 18 \ |
|
|
--dim-feedforward 2048 \ |
|
|
--nhead 8 \ |
|
|
--encoder-dim 512 \ |
|
|
--decoder-dim 512 \ |
|
|
--joiner-dim 512 \ |
|
|
$repo/test_wavs/1089-134686-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0002.wav |
|
|
done |
|
|
|
|
|
for method in modified_beam_search beam_search fast_beam_search; do |
|
|
log "$method" |
|
|
|
|
|
./pruned_transducer_stateless5/pretrained.py \ |
|
|
--method $method \ |
|
|
--beam-size 4 \ |
|
|
--checkpoint $repo/exp/pretrained.pt \ |
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \ |
|
|
$repo/test_wavs/1089-134686-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0002.wav \ |
|
|
--num-encoder-layers 18 \ |
|
|
--dim-feedforward 2048 \ |
|
|
--nhead 8 \ |
|
|
--encoder-dim 512 \ |
|
|
--decoder-dim 512 \ |
|
|
--joiner-dim 512 |
|
|
done |
|
|
rm -rf $repo |
|
|
} |
|
|
|
|
|
function test_pruned_transducer_stateless7_2022_11_11() { |
|
|
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless7-2022-11-11 |
|
|
|
|
|
log "Downloading pre-trained model from $repo_url" |
|
|
git lfs install |
|
|
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url |
|
|
repo=$(basename $repo_url) |
|
|
|
|
|
log "Display test files" |
|
|
tree $repo/ |
|
|
ls -lh $repo/test_wavs/*.wav |
|
|
|
|
|
pushd $repo/exp |
|
|
git lfs pull --include "data/lang_bpe_500/bpe.model" |
|
|
git lfs pull --include "exp/cpu_jit.pt" |
|
|
git lfs pull --include "exp/pretrained.pt" |
|
|
ln -s pretrained.pt epoch-99.pt |
|
|
ls -lh *.pt |
|
|
popd |
|
|
|
|
|
log "Export to torchscript model" |
|
|
./pruned_transducer_stateless7/export.py \ |
|
|
--exp-dir $repo/exp \ |
|
|
--use-averaged-model false \ |
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \ |
|
|
--epoch 99 \ |
|
|
--avg 1 \ |
|
|
--jit 1 |
|
|
|
|
|
ls -lh $repo/exp/*.pt |
|
|
|
|
|
log "Decode with models exported by torch.jit.script()" |
|
|
|
|
|
./pruned_transducer_stateless7/jit_pretrained.py \ |
|
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \ |
|
|
--nn-model-filename $repo/exp/cpu_jit.pt \ |
|
|
$repo/test_wavs/1089-134686-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0002.wav |
|
|
|
|
|
for sym in 1 2 3; do |
|
|
log "Greedy search with --max-sym-per-frame $sym" |
|
|
|
|
|
./pruned_transducer_stateless7/pretrained.py \ |
|
|
--method greedy_search \ |
|
|
--max-sym-per-frame $sym \ |
|
|
--checkpoint $repo/exp/pretrained.pt \ |
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \ |
|
|
$repo/test_wavs/1089-134686-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0002.wav |
|
|
done |
|
|
|
|
|
for method in modified_beam_search beam_search fast_beam_search; do |
|
|
log "$method" |
|
|
|
|
|
./pruned_transducer_stateless7/pretrained.py \ |
|
|
--method $method \ |
|
|
--beam-size 4 \ |
|
|
--checkpoint $repo/exp/pretrained.pt \ |
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \ |
|
|
$repo/test_wavs/1089-134686-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0002.wav |
|
|
done |
|
|
rm -rf $repo |
|
|
} |
|
|
|
|
|
function test_pruned_transducer_stateless8_2022_11_14() { |
|
|
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless8-2022-11-14 |
|
|
|
|
|
log "Downloading pre-trained model from $repo_url" |
|
|
git lfs install |
|
|
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url |
|
|
repo=$(basename $repo_url) |
|
|
|
|
|
log "Display test files" |
|
|
tree $repo/ |
|
|
ls -lh $repo/test_wavs/*.wav |
|
|
|
|
|
pushd $repo/exp |
|
|
git lfs pull --include "data/lang_bpe_500/bpe.model" |
|
|
git lfs pull --include "exp/cpu_jit.pt" |
|
|
git lfs pull --include "exp/pretrained.pt" |
|
|
ln -s pretrained.pt epoch-99.pt |
|
|
ls -lh *.pt |
|
|
popd |
|
|
|
|
|
log "Decode with models exported by torch.jit.script()" |
|
|
|
|
|
./pruned_transducer_stateless8/jit_pretrained.py \ |
|
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \ |
|
|
--nn-model-filename $repo/exp/cpu_jit.pt \ |
|
|
$repo/test_wavs/1089-134686-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0002.wav |
|
|
|
|
|
log "Export to torchscript model" |
|
|
./pruned_transducer_stateless8/export.py \ |
|
|
--exp-dir $repo/exp \ |
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \ |
|
|
--use-averaged-model false \ |
|
|
--epoch 99 \ |
|
|
--avg 1 \ |
|
|
--jit 1 |
|
|
|
|
|
ls -lh $repo/exp/*.pt |
|
|
|
|
|
log "Decode with models exported by torch.jit.script()" |
|
|
|
|
|
./pruned_transducer_stateless8/jit_pretrained.py \ |
|
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \ |
|
|
--nn-model-filename $repo/exp/cpu_jit.pt \ |
|
|
$repo/test_wavs/1089-134686-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0002.wav |
|
|
|
|
|
for sym in 1 2 3; do |
|
|
log "Greedy search with --max-sym-per-frame $sym" |
|
|
|
|
|
./pruned_transducer_stateless8/pretrained.py \ |
|
|
--method greedy_search \ |
|
|
--max-sym-per-frame $sym \ |
|
|
--checkpoint $repo/exp/pretrained.pt \ |
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \ |
|
|
$repo/test_wavs/1089-134686-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0002.wav |
|
|
done |
|
|
|
|
|
for method in modified_beam_search beam_search fast_beam_search; do |
|
|
log "$method" |
|
|
|
|
|
./pruned_transducer_stateless8/pretrained.py \ |
|
|
--method $method \ |
|
|
--beam-size 4 \ |
|
|
--checkpoint $repo/exp/pretrained.pt \ |
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \ |
|
|
$repo/test_wavs/1089-134686-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0002.wav |
|
|
done |
|
|
rm -rf $repo |
|
|
} |
|
|
|
|
|
function test_pruned_transducer_stateless7_ctc_2022_12_01() { |
|
|
repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-pruned-transducer-stateless7-ctc-2022-12-01 |
|
|
|
|
|
log "Downloading pre-trained model from $repo_url" |
|
|
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url |
|
|
repo=$(basename $repo_url) |
|
|
|
|
|
log "Display test files" |
|
|
tree $repo/ |
|
|
ls -lh $repo/test_wavs/*.wav |
|
|
|
|
|
pushd $repo/exp |
|
|
git lfs pull --include "data/lang_bpe_500/HLG.pt" |
|
|
git lfs pull --include "data/lang_bpe_500/L.pt" |
|
|
git lfs pull --include "data/lang_bpe_500/LG.pt" |
|
|
git lfs pull --include "data/lang_bpe_500/Linv.pt" |
|
|
git lfs pull --include "data/lang_bpe_500/bpe.model" |
|
|
git lfs pull --include "data/lm/G_4_gram.pt" |
|
|
git lfs pull --include "exp/cpu_jit.pt" |
|
|
git lfs pull --include "exp/pretrained.pt" |
|
|
ln -s pretrained.pt epoch-99.pt |
|
|
ls -lh *.pt |
|
|
popd |
|
|
|
|
|
log "Export to torchscript model" |
|
|
./pruned_transducer_stateless7_ctc/export.py \ |
|
|
--exp-dir $repo/exp \ |
|
|
--use-averaged-model false \ |
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \ |
|
|
--epoch 99 \ |
|
|
--avg 1 \ |
|
|
--jit 1 |
|
|
|
|
|
ls -lh $repo/exp/*.pt |
|
|
|
|
|
log "Decode with models exported by torch.jit.script()" |
|
|
|
|
|
./pruned_transducer_stateless7_ctc/jit_pretrained.py \ |
|
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \ |
|
|
--nn-model-filename $repo/exp/cpu_jit.pt \ |
|
|
$repo/test_wavs/1089-134686-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0002.wav |
|
|
|
|
|
for m in ctc-decoding 1best; do |
|
|
./pruned_transducer_stateless7_ctc/jit_pretrained_ctc.py \ |
|
|
--model-filename $repo/exp/cpu_jit.pt \ |
|
|
--words-file $repo/data/lang_bpe_500/words.txt \ |
|
|
--HLG $repo/data/lang_bpe_500/HLG.pt \ |
|
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \ |
|
|
--G $repo/data/lm/G_4_gram.pt \ |
|
|
--method $m \ |
|
|
--sample-rate 16000 \ |
|
|
$repo/test_wavs/1089-134686-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0002.wav |
|
|
done |
|
|
|
|
|
for sym in 1 2 3; do |
|
|
log "Greedy search with --max-sym-per-frame $sym" |
|
|
|
|
|
./pruned_transducer_stateless7_ctc/pretrained.py \ |
|
|
--method greedy_search \ |
|
|
--max-sym-per-frame $sym \ |
|
|
--checkpoint $repo/exp/pretrained.pt \ |
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \ |
|
|
$repo/test_wavs/1089-134686-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0002.wav |
|
|
done |
|
|
|
|
|
for method in modified_beam_search beam_search fast_beam_search; do |
|
|
log "$method" |
|
|
|
|
|
./pruned_transducer_stateless7_ctc/pretrained.py \ |
|
|
--method $method \ |
|
|
--beam-size 4 \ |
|
|
--checkpoint $repo/exp/pretrained.pt \ |
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \ |
|
|
$repo/test_wavs/1089-134686-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0002.wav |
|
|
done |
|
|
|
|
|
for m in ctc-decoding 1best; do |
|
|
./pruned_transducer_stateless7_ctc/pretrained_ctc.py \ |
|
|
--checkpoint $repo/exp/pretrained.pt \ |
|
|
--words-file $repo/data/lang_bpe_500/words.txt \ |
|
|
--HLG $repo/data/lang_bpe_500/HLG.pt \ |
|
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \ |
|
|
--G $repo/data/lm/G_4_gram.pt \ |
|
|
--method $m \ |
|
|
--sample-rate 16000 \ |
|
|
$repo/test_wavs/1089-134686-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0002.wav |
|
|
done |
|
|
rm -rf $repo |
|
|
} |
|
|
|
|
|
function test_zipformer_mmi_2022_12_08() { |
|
|
repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-zipformer-mmi-2022-12-08 |
|
|
|
|
|
log "Downloading pre-trained model from $repo_url" |
|
|
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url |
|
|
repo=$(basename $repo_url) |
|
|
|
|
|
log "Display test files" |
|
|
tree $repo/ |
|
|
ls -lh $repo/test_wavs/*.wav |
|
|
|
|
|
pushd $repo/exp |
|
|
git lfs pull --include "data/lang_bpe_500/3gram.pt" |
|
|
git lfs pull --include "data/lang_bpe_500/4gram.pt" |
|
|
git lfs pull --include "data/lang_bpe_500/L.pt" |
|
|
git lfs pull --include "data/lang_bpe_500/LG.pt" |
|
|
git lfs pull --include "data/lang_bpe_500/Linv.pt" |
|
|
git lfs pull --include "data/lang_bpe_500/bpe.model" |
|
|
git lfs pull --include "exp/cpu_jit.pt" |
|
|
git lfs pull --include "exp/pretrained.pt" |
|
|
ln -s pretrained.pt epoch-99.pt |
|
|
ls -lh *.pt |
|
|
popd |
|
|
|
|
|
log "Export to torchscript model" |
|
|
./zipformer_mmi/export.py \ |
|
|
--exp-dir $repo/exp \ |
|
|
--use-averaged-model false \ |
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \ |
|
|
--epoch 99 \ |
|
|
--avg 1 \ |
|
|
--jit 1 |
|
|
|
|
|
ls -lh $repo/exp/*.pt |
|
|
|
|
|
log "Decode with models exported by torch.jit.script()" |
|
|
|
|
|
./zipformer_mmi/jit_pretrained.py \ |
|
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \ |
|
|
--nn-model-filename $repo/exp/cpu_jit.pt \ |
|
|
--lang-dir $repo/data/lang_bpe_500 \ |
|
|
$repo/test_wavs/1089-134686-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0002.wav |
|
|
|
|
|
for method in 1best nbest nbest-rescoring-LG nbest-rescoring-3-gram nbest-rescoring-4-gram; do |
|
|
log "$method" |
|
|
|
|
|
./zipformer_mmi/pretrained.py \ |
|
|
--method $method \ |
|
|
--checkpoint $repo/exp/pretrained.pt \ |
|
|
--lang-dir $repo/data/lang_bpe_500 \ |
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \ |
|
|
$repo/test_wavs/1089-134686-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0002.wav |
|
|
done |
|
|
rm -rf $repo |
|
|
} |
|
|
|
|
|
function test_pruned_transducer_stateless7_streaming_2022_12_29() { |
|
|
repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-pruned-transducer-stateless7-streaming-2022-12-29 |
|
|
|
|
|
log "Downloading pre-trained model from $repo_url" |
|
|
git lfs install |
|
|
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url |
|
|
repo=$(basename $repo_url) |
|
|
|
|
|
log "Display test files" |
|
|
tree $repo/ |
|
|
ls -lh $repo/test_wavs/*.wav |
|
|
|
|
|
pushd $repo |
|
|
git lfs pull --include "data/lang_bpe_500/bpe.model" |
|
|
git lfs pull --include "exp/cpu_jit.pt" |
|
|
git lfs pull --include "exp/pretrained.pt" |
|
|
git lfs pull --include "exp/encoder_jit_trace.pt" |
|
|
git lfs pull --include "exp/decoder_jit_trace.pt" |
|
|
git lfs pull --include "exp/joiner_jit_trace.pt" |
|
|
cd exp |
|
|
ln -s pretrained.pt epoch-99.pt |
|
|
ls -lh *.pt |
|
|
popd |
|
|
|
|
|
log "Export to torchscript model" |
|
|
./pruned_transducer_stateless7_streaming/export.py \ |
|
|
--exp-dir $repo/exp \ |
|
|
--use-averaged-model false \ |
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \ |
|
|
--decode-chunk-len 32 \ |
|
|
--epoch 99 \ |
|
|
--avg 1 \ |
|
|
--jit 1 |
|
|
|
|
|
ls -lh $repo/exp/*.pt |
|
|
|
|
|
log "Decode with models exported by torch.jit.script()" |
|
|
|
|
|
./pruned_transducer_stateless7_streaming/jit_pretrained.py \ |
|
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \ |
|
|
--nn-model-filename $repo/exp/cpu_jit.pt \ |
|
|
--decode-chunk-len 32 \ |
|
|
$repo/test_wavs/1089-134686-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0002.wav |
|
|
|
|
|
log "Export to torchscript model by torch.jit.trace()" |
|
|
./pruned_transducer_stateless7_streaming/jit_trace_export.py \ |
|
|
--exp-dir $repo/exp \ |
|
|
--use-averaged-model false \ |
|
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \ |
|
|
--decode-chunk-len 32 \ |
|
|
--epoch 99 \ |
|
|
--avg 1 |
|
|
|
|
|
log "Decode with models exported by torch.jit.trace()" |
|
|
|
|
|
./pruned_transducer_stateless7_streaming/jit_trace_pretrained.py \ |
|
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \ |
|
|
--encoder-model-filename $repo/exp/encoder_jit_trace.pt \ |
|
|
--decoder-model-filename $repo/exp/decoder_jit_trace.pt \ |
|
|
--joiner-model-filename $repo/exp/joiner_jit_trace.pt \ |
|
|
--decode-chunk-len 32 \ |
|
|
$repo/test_wavs/1089-134686-0001.wav |
|
|
|
|
|
for sym in 1 2 3; do |
|
|
log "Greedy search with --max-sym-per-frame $sym" |
|
|
|
|
|
./pruned_transducer_stateless7_streaming/pretrained.py \ |
|
|
--method greedy_search \ |
|
|
--max-sym-per-frame $sym \ |
|
|
--checkpoint $repo/exp/pretrained.pt \ |
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \ |
|
|
--decode-chunk-len 32 \ |
|
|
$repo/test_wavs/1089-134686-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0002.wav |
|
|
done |
|
|
|
|
|
for method in modified_beam_search beam_search fast_beam_search; do |
|
|
log "$method" |
|
|
|
|
|
./pruned_transducer_stateless7_streaming/pretrained.py \ |
|
|
--method $method \ |
|
|
--beam-size 4 \ |
|
|
--checkpoint $repo/exp/pretrained.pt \ |
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \ |
|
|
--decode-chunk-len 32 \ |
|
|
$repo/test_wavs/1089-134686-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0002.wav |
|
|
done |
|
|
|
|
|
rm -rf $repo |
|
|
} |
|
|
|
|
|
function test_pruned_transducer_stateless7_ctc_bs_2023_01_29() { |
|
|
repo_url=https://huggingface.co/yfyeung/icefall-asr-librispeech-pruned_transducer_stateless7_ctc_bs-2023-01-29 |
|
|
|
|
|
log "Downloading pre-trained model from $repo_url" |
|
|
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url |
|
|
repo=$(basename $repo_url) |
|
|
|
|
|
log "Display test files" |
|
|
tree $repo/ |
|
|
ls -lh $repo/test_wavs/*.wav |
|
|
|
|
|
pushd $repo/exp |
|
|
git lfs pull --include "data/lang_bpe_500/L.pt" |
|
|
git lfs pull --include "data/lang_bpe_500/LG.pt" |
|
|
git lfs pull --include "data/lang_bpe_500/HLG.pt" |
|
|
git lfs pull --include "data/lang_bpe_500/Linv.pt" |
|
|
git lfs pull --include "data/lang_bpe_500/bpe.model" |
|
|
git lfs pull --include "exp/cpu_jit.pt" |
|
|
git lfs pull --include "exp/pretrained.pt" |
|
|
ln -s pretrained.pt epoch-99.pt |
|
|
ls -lh *.pt |
|
|
popd |
|
|
|
|
|
log "Export to torchscript model" |
|
|
./pruned_transducer_stateless7_ctc_bs/export.py \ |
|
|
--exp-dir $repo/exp \ |
|
|
--use-averaged-model false \ |
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \ |
|
|
--epoch 99 \ |
|
|
--avg 1 \ |
|
|
--jit 1 |
|
|
|
|
|
ls -lh $repo/exp/*.pt |
|
|
|
|
|
log "Decode with models exported by torch.jit.script()" |
|
|
|
|
|
./pruned_transducer_stateless7_ctc_bs/jit_pretrained.py \ |
|
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \ |
|
|
--nn-model-filename $repo/exp/cpu_jit.pt \ |
|
|
$repo/test_wavs/1089-134686-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0002.wav |
|
|
|
|
|
for m in ctc-decoding 1best; do |
|
|
./pruned_transducer_stateless7_ctc_bs/jit_pretrained_ctc.py \ |
|
|
--model-filename $repo/exp/cpu_jit.pt \ |
|
|
--words-file $repo/data/lang_bpe_500/words.txt \ |
|
|
--HLG $repo/data/lang_bpe_500/HLG.pt \ |
|
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \ |
|
|
--method $m \ |
|
|
--sample-rate 16000 \ |
|
|
$repo/test_wavs/1089-134686-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0002.wav |
|
|
done |
|
|
|
|
|
for sym in 1 2 3; do |
|
|
log "Greedy search with --max-sym-per-frame $sym" |
|
|
|
|
|
./pruned_transducer_stateless7_ctc_bs/pretrained.py \ |
|
|
--method greedy_search \ |
|
|
--max-sym-per-frame $sym \ |
|
|
--checkpoint $repo/exp/pretrained.pt \ |
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \ |
|
|
$repo/test_wavs/1089-134686-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0002.wav |
|
|
done |
|
|
|
|
|
for method in modified_beam_search beam_search fast_beam_search; do |
|
|
log "$method" |
|
|
|
|
|
./pruned_transducer_stateless7_ctc_bs/pretrained.py \ |
|
|
--method $method \ |
|
|
--beam-size 4 \ |
|
|
--checkpoint $repo/exp/pretrained.pt \ |
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \ |
|
|
$repo/test_wavs/1089-134686-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0002.wav |
|
|
done |
|
|
|
|
|
for m in ctc-decoding 1best; do |
|
|
./pruned_transducer_stateless7_ctc_bs/pretrained_ctc.py \ |
|
|
--checkpoint $repo/exp/pretrained.pt \ |
|
|
--words-file $repo/data/lang_bpe_500/words.txt \ |
|
|
--HLG $repo/data/lang_bpe_500/HLG.pt \ |
|
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \ |
|
|
--method $m \ |
|
|
--sample-rate 16000 \ |
|
|
$repo/test_wavs/1089-134686-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0002.wav |
|
|
done |
|
|
rm -rf $repo |
|
|
} |
|
|
|
|
|
function test_conformer_ctc3_2022_11_27() { |
|
|
repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-conformer-ctc3-2022-11-27 |
|
|
|
|
|
log "Downloading pre-trained model from $repo_url" |
|
|
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url |
|
|
repo=$(basename $repo_url) |
|
|
|
|
|
log "Display test files" |
|
|
tree $repo/ |
|
|
ls -lh $repo/test_wavs/*.wav |
|
|
|
|
|
pushd $repo/exp |
|
|
git lfs pull --include "data/lang_bpe_500/HLG.pt" |
|
|
git lfs pull --include "data/lang_bpe_500/L.pt" |
|
|
git lfs pull --include "data/lang_bpe_500/LG.pt" |
|
|
git lfs pull --include "data/lang_bpe_500/Linv.pt" |
|
|
git lfs pull --include "data/lang_bpe_500/bpe.model" |
|
|
git lfs pull --include "data/lm/G_4_gram.pt" |
|
|
git lfs pull --include "exp/jit_trace.pt" |
|
|
git lfs pull --include "exp/pretrained.pt" |
|
|
ln -s pretrained.pt epoch-99.pt |
|
|
ls -lh *.pt |
|
|
popd |
|
|
|
|
|
log "Decode with models exported by torch.jit.trace()" |
|
|
|
|
|
for m in ctc-decoding 1best; do |
|
|
./conformer_ctc3/jit_pretrained.py \ |
|
|
--model-filename $repo/exp/jit_trace.pt \ |
|
|
--words-file $repo/data/lang_bpe_500/words.txt \ |
|
|
--HLG $repo/data/lang_bpe_500/HLG.pt \ |
|
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \ |
|
|
--G $repo/data/lm/G_4_gram.pt \ |
|
|
--method $m \ |
|
|
--sample-rate 16000 \ |
|
|
$repo/test_wavs/1089-134686-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0002.wav |
|
|
done |
|
|
|
|
|
log "Export to torchscript model" |
|
|
|
|
|
./conformer_ctc3/export.py \ |
|
|
--exp-dir $repo/exp \ |
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \ |
|
|
--jit-trace 1 \ |
|
|
--epoch 99 \ |
|
|
--avg 1 \ |
|
|
--use-averaged-model 0 |
|
|
|
|
|
ls -lh $repo/exp/*.pt |
|
|
|
|
|
log "Decode with models exported by torch.jit.trace()" |
|
|
|
|
|
for m in ctc-decoding 1best; do |
|
|
./conformer_ctc3/jit_pretrained.py \ |
|
|
--model-filename $repo/exp/jit_trace.pt \ |
|
|
--words-file $repo/data/lang_bpe_500/words.txt \ |
|
|
--HLG $repo/data/lang_bpe_500/HLG.pt \ |
|
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \ |
|
|
--G $repo/data/lm/G_4_gram.pt \ |
|
|
--method $m \ |
|
|
--sample-rate 16000 \ |
|
|
$repo/test_wavs/1089-134686-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0002.wav |
|
|
done |
|
|
|
|
|
for m in ctc-decoding 1best; do |
|
|
./conformer_ctc3/pretrained.py \ |
|
|
--checkpoint $repo/exp/pretrained.pt \ |
|
|
--words-file $repo/data/lang_bpe_500/words.txt \ |
|
|
--HLG $repo/data/lang_bpe_500/HLG.pt \ |
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \ |
|
|
--G $repo/data/lm/G_4_gram.pt \ |
|
|
--method $m \ |
|
|
--sample-rate 16000 \ |
|
|
$repo/test_wavs/1089-134686-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0002.wav |
|
|
done |
|
|
rm -rf $repo |
|
|
} |
|
|
|
|
|
function test_lstm_transducer_stateless2_2022_09_03() { |
|
|
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-lstm-transducer-stateless2-2022-09-03 |
|
|
|
|
|
log "Downloading pre-trained model from $repo_url" |
|
|
git lfs install |
|
|
git clone $repo_url |
|
|
repo=$(basename $repo_url) |
|
|
abs_repo=$(realpath $repo) |
|
|
|
|
|
log "Display test files" |
|
|
tree $repo/ |
|
|
ls -lh $repo/test_wavs/*.wav |
|
|
|
|
|
pushd $repo/exp |
|
|
ln -s pretrained-iter-468000-avg-16.pt pretrained.pt |
|
|
ln -s pretrained-iter-468000-avg-16.pt epoch-99.pt |
|
|
popd |
|
|
|
|
|
log "Test exporting with torch.jit.trace()" |
|
|
|
|
|
./lstm_transducer_stateless2/export.py \ |
|
|
--exp-dir $repo/exp \ |
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \ |
|
|
--epoch 99 \ |
|
|
--avg 1 \ |
|
|
--use-averaged-model 0 \ |
|
|
--jit-trace 1 |
|
|
|
|
|
log "Decode with models exported by torch.jit.trace()" |
|
|
|
|
|
./lstm_transducer_stateless2/jit_pretrained.py \ |
|
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \ |
|
|
--encoder-model-filename $repo/exp/encoder_jit_trace.pt \ |
|
|
--decoder-model-filename $repo/exp/decoder_jit_trace.pt \ |
|
|
--joiner-model-filename $repo/exp/joiner_jit_trace.pt \ |
|
|
$repo/test_wavs/1089-134686-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0002.wav |
|
|
|
|
|
for sym in 1 2 3; do |
|
|
log "Greedy search with --max-sym-per-frame $sym" |
|
|
|
|
|
./lstm_transducer_stateless2/pretrained.py \ |
|
|
--method greedy_search \ |
|
|
--max-sym-per-frame $sym \ |
|
|
--checkpoint $repo/exp/pretrained.pt \ |
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \ |
|
|
$repo/test_wavs/1089-134686-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0002.wav |
|
|
done |
|
|
|
|
|
for method in modified_beam_search beam_search fast_beam_search; do |
|
|
log "$method" |
|
|
|
|
|
./lstm_transducer_stateless2/pretrained.py \ |
|
|
--method $method \ |
|
|
--beam-size 4 \ |
|
|
--checkpoint $repo/exp/pretrained.pt \ |
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \ |
|
|
$repo/test_wavs/1089-134686-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0002.wav |
|
|
done |
|
|
rm -rf $repo |
|
|
} |
|
|
|
|
|
function test_pruned_transducer_stateless3_2022_05_13() { |
|
|
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-pruned-transducer-stateless3-2022-05-13 |
|
|
|
|
|
log "Downloading pre-trained model from $repo_url" |
|
|
git lfs install |
|
|
git clone $repo_url |
|
|
repo=$(basename $repo_url) |
|
|
|
|
|
log "Display test files" |
|
|
tree $repo/ |
|
|
ls -lh $repo/test_wavs/*.wav |
|
|
|
|
|
pushd $repo/exp |
|
|
ln -s pretrained-iter-1224000-avg-14.pt pretrained.pt |
|
|
ln -s pretrained-iter-1224000-avg-14.pt epoch-99.pt |
|
|
popd |
|
|
|
|
|
|
|
|
log "Export to torchscript model" |
|
|
./pruned_transducer_stateless3/export.py \ |
|
|
--exp-dir $repo/exp \ |
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \ |
|
|
--epoch 99 \ |
|
|
--avg 1 \ |
|
|
--jit 1 |
|
|
|
|
|
./pruned_transducer_stateless3/export.py \ |
|
|
--exp-dir $repo/exp \ |
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \ |
|
|
--epoch 99 \ |
|
|
--avg 1 \ |
|
|
--jit-trace 1 |
|
|
|
|
|
ls -lh $repo/exp/*.pt |
|
|
|
|
|
log "Decode with models exported by torch.jit.trace()" |
|
|
|
|
|
./pruned_transducer_stateless3/jit_pretrained.py \ |
|
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \ |
|
|
--encoder-model-filename $repo/exp/encoder_jit_trace.pt \ |
|
|
--decoder-model-filename $repo/exp/decoder_jit_trace.pt \ |
|
|
--joiner-model-filename $repo/exp/joiner_jit_trace.pt \ |
|
|
$repo/test_wavs/1089-134686-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0002.wav |
|
|
|
|
|
log "Decode with models exported by torch.jit.script()" |
|
|
|
|
|
./pruned_transducer_stateless3/jit_pretrained.py \ |
|
|
--bpe-model $repo/data/lang_bpe_500/bpe.model \ |
|
|
--encoder-model-filename $repo/exp/encoder_jit_script.pt \ |
|
|
--decoder-model-filename $repo/exp/decoder_jit_script.pt \ |
|
|
--joiner-model-filename $repo/exp/joiner_jit_script.pt \ |
|
|
$repo/test_wavs/1089-134686-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0002.wav |
|
|
|
|
|
|
|
|
for sym in 1 2 3; do |
|
|
log "Greedy search with --max-sym-per-frame $sym" |
|
|
|
|
|
./pruned_transducer_stateless3/pretrained.py \ |
|
|
--method greedy_search \ |
|
|
--max-sym-per-frame $sym \ |
|
|
--checkpoint $repo/exp/pretrained.pt \ |
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \ |
|
|
$repo/test_wavs/1089-134686-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0002.wav |
|
|
done |
|
|
|
|
|
for method in modified_beam_search beam_search fast_beam_search; do |
|
|
log "$method" |
|
|
|
|
|
./pruned_transducer_stateless3/pretrained.py \ |
|
|
--method $method \ |
|
|
--beam-size 4 \ |
|
|
--checkpoint $repo/exp/pretrained.pt \ |
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \ |
|
|
$repo/test_wavs/1089-134686-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0002.wav |
|
|
done |
|
|
|
|
|
rm -rf $repo |
|
|
} |
|
|
|
|
|
function test_streaming_pruned_transducer_stateless2_20220625() { |
|
|
repo_url=https://huggingface.co/pkufool/icefall_librispeech_streaming_pruned_transducer_stateless2_20220625 |
|
|
|
|
|
log "Downloading pre-trained model from $repo_url" |
|
|
git lfs install |
|
|
git clone $repo_url |
|
|
repo=$(basename $repo_url) |
|
|
|
|
|
log "Display test files" |
|
|
tree $repo/ |
|
|
ls -lh $repo/test_wavs/*.wav |
|
|
|
|
|
pushd $repo/exp |
|
|
ln -s pretrained-epoch-24-avg-10.pt pretrained.pt |
|
|
popd |
|
|
|
|
|
for sym in 1 2 3; do |
|
|
log "Greedy search with --max-sym-per-frame $sym" |
|
|
|
|
|
./pruned_transducer_stateless2/pretrained.py \ |
|
|
--method greedy_search \ |
|
|
--max-sym-per-frame $sym \ |
|
|
--checkpoint $repo/exp/pretrained.pt \ |
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \ |
|
|
--simulate-streaming 1 \ |
|
|
--causal-convolution 1 \ |
|
|
$repo/test_wavs/1089-134686-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0002.wav |
|
|
done |
|
|
|
|
|
for method in modified_beam_search beam_search fast_beam_search; do |
|
|
log "$method" |
|
|
|
|
|
./pruned_transducer_stateless2/pretrained.py \ |
|
|
--method $method \ |
|
|
--beam-size 4 \ |
|
|
--checkpoint $repo/exp/pretrained.pt \ |
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \ |
|
|
--simulate-streaming 1 \ |
|
|
--causal-convolution 1 \ |
|
|
$repo/test_wavs/1089-134686-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0002.wav |
|
|
done |
|
|
rm -rf $repo |
|
|
} |
|
|
|
|
|
function test_streaming_zipformer_2023_05_17() { |
|
|
repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-streaming-zipformer-2023-05-17 |
|
|
|
|
|
log "Downloading pre-trained model from $repo_url" |
|
|
git lfs install |
|
|
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url |
|
|
repo=$(basename $repo_url) |
|
|
|
|
|
log "Display test files" |
|
|
tree $repo/ |
|
|
ls -lh $repo/test_wavs/*.wav |
|
|
|
|
|
pushd $repo/exp |
|
|
git lfs pull --include "data/lang_bpe_500/bpe.model" |
|
|
git lfs pull --include "data/lang_bpe_500/tokens.txt" |
|
|
git lfs pull --include "exp/jit_script_chunk_16_left_128.pt" |
|
|
git lfs pull --include "exp/pretrained.pt" |
|
|
ln -s pretrained.pt epoch-99.pt |
|
|
ls -lh *.pt |
|
|
popd |
|
|
|
|
|
log "Export to torchscript model" |
|
|
./zipformer/export.py \ |
|
|
--exp-dir $repo/exp \ |
|
|
--use-averaged-model false \ |
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \ |
|
|
--causal 1 \ |
|
|
--chunk-size 16 \ |
|
|
--left-context-frames 128 \ |
|
|
--epoch 99 \ |
|
|
--avg 1 \ |
|
|
--jit 1 |
|
|
|
|
|
ls -lh $repo/exp/*.pt |
|
|
|
|
|
log "Decode with models exported by torch.jit.script()" |
|
|
|
|
|
./zipformer/jit_pretrained_streaming.py \ |
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \ |
|
|
--nn-model-filename $repo/exp/jit_script_chunk_16_left_128.pt \ |
|
|
$repo/test_wavs/1089-134686-0001.wav |
|
|
|
|
|
for method in greedy_search modified_beam_search fast_beam_search; do |
|
|
log "$method" |
|
|
|
|
|
./zipformer/pretrained.py \ |
|
|
--causal 1 \ |
|
|
--chunk-size 16 \ |
|
|
--left-context-frames 128 \ |
|
|
--method $method \ |
|
|
--beam-size 4 \ |
|
|
--checkpoint $repo/exp/pretrained.pt \ |
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \ |
|
|
$repo/test_wavs/1089-134686-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0002.wav |
|
|
done |
|
|
rm -rf $repo |
|
|
} |
|
|
|
|
|
function test_zipformer_2023_05_18() { |
|
|
repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-zipformer-2023-05-15 |
|
|
|
|
|
log "Downloading pre-trained model from $repo_url" |
|
|
git lfs install |
|
|
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url |
|
|
repo=$(basename $repo_url) |
|
|
|
|
|
log "Display test files" |
|
|
tree $repo/ |
|
|
ls -lh $repo/test_wavs/*.wav |
|
|
|
|
|
pushd $repo/exp |
|
|
git lfs pull --include "data/lang_bpe_500/bpe.model" |
|
|
git lfs pull --include "data/lang_bpe_500/tokens.txt" |
|
|
git lfs pull --include "exp/jit_script.pt" |
|
|
git lfs pull --include "exp/pretrained.pt" |
|
|
ln -s pretrained.pt epoch-99.pt |
|
|
ls -lh *.pt |
|
|
popd |
|
|
|
|
|
log "Export to torchscript model" |
|
|
./zipformer/export.py \ |
|
|
--exp-dir $repo/exp \ |
|
|
--use-averaged-model false \ |
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \ |
|
|
--epoch 99 \ |
|
|
--avg 1 \ |
|
|
--jit 1 |
|
|
|
|
|
ls -lh $repo/exp/*.pt |
|
|
|
|
|
log "Decode with models exported by torch.jit.script()" |
|
|
|
|
|
./zipformer/jit_pretrained.py \ |
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \ |
|
|
--nn-model-filename $repo/exp/jit_script.pt \ |
|
|
$repo/test_wavs/1089-134686-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0002.wav |
|
|
|
|
|
for method in greedy_search modified_beam_search fast_beam_search; do |
|
|
log "$method" |
|
|
|
|
|
./zipformer/pretrained.py \ |
|
|
--method $method \ |
|
|
--beam-size 4 \ |
|
|
--checkpoint $repo/exp/pretrained.pt \ |
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \ |
|
|
$repo/test_wavs/1089-134686-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0002.wav |
|
|
done |
|
|
rm -rf $repo |
|
|
} |
|
|
|
|
|
function test_transducer_stateless2_torchaudio_2022_04_19() { |
|
|
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-transducer-stateless2-torchaudio-2022-04-19 |
|
|
|
|
|
log "Downloading pre-trained model from $repo_url" |
|
|
git lfs install |
|
|
git clone $repo_url |
|
|
repo=$(basename $repo_url) |
|
|
|
|
|
log "Display test files" |
|
|
tree $repo/ |
|
|
ls -lh $repo/test_wavs/*.wav |
|
|
|
|
|
for sym in 1 2 3; do |
|
|
log "Greedy search with --max-sym-per-frame $sym" |
|
|
|
|
|
./transducer_stateless2/pretrained.py \ |
|
|
--method greedy_search \ |
|
|
--max-sym-per-frame $sym \ |
|
|
--checkpoint $repo/exp/pretrained.pt \ |
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \ |
|
|
$repo/test_wavs/1089-134686-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0002.wav |
|
|
done |
|
|
|
|
|
for method in fast_beam_search modified_beam_search beam_search; do |
|
|
log "$method" |
|
|
|
|
|
./transducer_stateless2/pretrained.py \ |
|
|
--method $method \ |
|
|
--beam-size 4 \ |
|
|
--checkpoint $repo/exp/pretrained.pt \ |
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \ |
|
|
$repo/test_wavs/1089-134686-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0002.wav |
|
|
done |
|
|
rm -rf $repo |
|
|
} |
|
|
|
|
|
function test_zipformer_transducer_ctc_2023_06_13() { |
|
|
repo_url=https://huggingface.co/Zengwei/icefall-asr-librispeech-zipformer-transducer-ctc-2023-06-13 |
|
|
|
|
|
log "Downloading pre-trained model from $repo_url" |
|
|
git lfs install |
|
|
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url |
|
|
repo=$(basename $repo_url) |
|
|
|
|
|
log "Display test files" |
|
|
tree $repo/ |
|
|
ls -lh $repo/test_wavs/*.wav |
|
|
|
|
|
pushd $repo/exp |
|
|
git lfs pull --include "data/lang_bpe_500/bpe.model" |
|
|
git lfs pull --include "data/lang_bpe_500/tokens.txt" |
|
|
git lfs pull --include "data/lang_bpe_500/HLG.pt" |
|
|
git lfs pull --include "data/lang_bpe_500/L.pt" |
|
|
git lfs pull --include "data/lang_bpe_500/LG.pt" |
|
|
git lfs pull --include "data/lang_bpe_500/Linv.pt" |
|
|
git lfs pull --include "data/lm/G_4_gram.pt" |
|
|
git lfs pull --include "exp/jit_script.pt" |
|
|
git lfs pull --include "exp/pretrained.pt" |
|
|
ln -s pretrained.pt epoch-99.pt |
|
|
ls -lh *.pt |
|
|
popd |
|
|
|
|
|
log "Export to torchscript model" |
|
|
./zipformer/export.py \ |
|
|
--exp-dir $repo/exp \ |
|
|
--use-transducer 1 \ |
|
|
--use-ctc 1 \ |
|
|
--use-averaged-model false \ |
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \ |
|
|
--epoch 99 \ |
|
|
--avg 1 \ |
|
|
--jit 1 |
|
|
|
|
|
ls -lh $repo/exp/*.pt |
|
|
|
|
|
log "Decode with models exported by torch.jit.script()" |
|
|
|
|
|
for method in ctc-decoding 1best; do |
|
|
./zipformer/jit_pretrained_ctc.py \ |
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \ |
|
|
--model-filename $repo/exp/jit_script.pt \ |
|
|
--HLG $repo/data/lang_bpe_500/HLG.pt \ |
|
|
--words-file $repo/data/lang_bpe_500/words.txt \ |
|
|
--G $repo/data/lm/G_4_gram.pt \ |
|
|
--method $method \ |
|
|
--sample-rate 16000 \ |
|
|
$repo/test_wavs/1089-134686-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0002.wav |
|
|
done |
|
|
|
|
|
for method in ctc-decoding 1best; do |
|
|
log "$method" |
|
|
|
|
|
./zipformer/pretrained_ctc.py \ |
|
|
--use-transducer 1 \ |
|
|
--use-ctc 1 \ |
|
|
--method $method \ |
|
|
--checkpoint $repo/exp/pretrained.pt \ |
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \ |
|
|
--HLG $repo/data/lang_bpe_500/HLG.pt \ |
|
|
--G $repo/data/lm/G_4_gram.pt \ |
|
|
--words-file $repo/data/lang_bpe_500/words.txt \ |
|
|
--sample-rate 16000 \ |
|
|
$repo/test_wavs/1089-134686-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0002.wav |
|
|
done |
|
|
rm -rf $repo |
|
|
} |
|
|
|
|
|
function test_100h_transducer_stateless_multi_datasets_bpe_500_2022_02_21() { |
|
|
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-100h-transducer-stateless-multi-datasets-bpe-500-2022-02-21 |
|
|
|
|
|
log "Downloading pre-trained model from $repo_url" |
|
|
git lfs install |
|
|
git clone $repo_url |
|
|
repo=$(basename $repo_url) |
|
|
|
|
|
log "Display test files" |
|
|
tree $repo/ |
|
|
ls -lh $repo/test_wavs/*.wav |
|
|
|
|
|
for sym in 1 2 3; do |
|
|
log "Greedy search with --max-sym-per-frame $sym" |
|
|
|
|
|
./transducer_stateless_multi_datasets/pretrained.py \ |
|
|
--method greedy_search \ |
|
|
--max-sym-per-frame $sym \ |
|
|
--checkpoint $repo/exp/pretrained.pt \ |
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \ |
|
|
$repo/test_wavs/1089-134686-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0002.wav |
|
|
done |
|
|
|
|
|
for method in modified_beam_search beam_search fast_beam_search; do |
|
|
log "$method" |
|
|
|
|
|
./transducer_stateless_multi_datasets/pretrained.py \ |
|
|
--method $method \ |
|
|
--beam-size 4 \ |
|
|
--checkpoint $repo/exp/pretrained.pt \ |
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \ |
|
|
$repo/test_wavs/1089-134686-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0002.wav |
|
|
done |
|
|
rm -rf $repo |
|
|
} |
|
|
|
|
|
function test_transducer_stateless_multi_datasets_bpe_500_2022_03_01() { |
|
|
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-transducer-stateless-multi-datasets-bpe-500-2022-03-01 |
|
|
|
|
|
log "Downloading pre-trained model from $repo_url" |
|
|
git lfs install |
|
|
git clone $repo_url |
|
|
repo=$(basename $repo_url) |
|
|
|
|
|
log "Display test files" |
|
|
tree $repo/ |
|
|
ls -lh $repo/test_wavs/*.wav |
|
|
|
|
|
for sym in 1 2 3; do |
|
|
log "Greedy search with --max-sym-per-frame $sym" |
|
|
|
|
|
./transducer_stateless_multi_datasets/pretrained.py \ |
|
|
--method greedy_search \ |
|
|
--max-sym-per-frame $sym \ |
|
|
--checkpoint $repo/exp/pretrained.pt \ |
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \ |
|
|
$repo/test_wavs/1089-134686-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0002.wav |
|
|
done |
|
|
|
|
|
for method in modified_beam_search beam_search fast_beam_search; do |
|
|
log "$method" |
|
|
|
|
|
./transducer_stateless_multi_datasets/pretrained.py \ |
|
|
--method $method \ |
|
|
--beam-size 4 \ |
|
|
--checkpoint $repo/exp/pretrained.pt \ |
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \ |
|
|
$repo/test_wavs/1089-134686-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0002.wav |
|
|
done |
|
|
rm -rf $repo |
|
|
} |
|
|
|
|
|
function test_transducer_stateless_bpe_500_2022_02_07() { |
|
|
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-transducer-stateless-bpe-500-2022-02-07 |
|
|
|
|
|
log "Downloading pre-trained model from $repo_url" |
|
|
git lfs install |
|
|
git clone $repo_url |
|
|
repo=$(basename $repo_url) |
|
|
|
|
|
log "Display test files" |
|
|
tree $repo/ |
|
|
ls -lh $repo/test_wavs/*.wav |
|
|
|
|
|
for sym in 1 2 3; do |
|
|
log "Greedy search with --max-sym-per-frame $sym" |
|
|
|
|
|
./transducer_stateless/pretrained.py \ |
|
|
--method greedy_search \ |
|
|
--max-sym-per-frame $sym \ |
|
|
--checkpoint $repo/exp/pretrained.pt \ |
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \ |
|
|
$repo/test_wavs/1089-134686-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0002.wav |
|
|
done |
|
|
|
|
|
for method in fast_beam_search modified_beam_search beam_search; do |
|
|
log "$method" |
|
|
|
|
|
./transducer_stateless/pretrained.py \ |
|
|
--method $method \ |
|
|
--beam-size 4 \ |
|
|
--checkpoint $repo/exp/pretrained.pt \ |
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \ |
|
|
$repo/test_wavs/1089-134686-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0002.wav |
|
|
done |
|
|
rm -rf $repo |
|
|
} |
|
|
|
|
|
function test_zipformer_ctc_en_2023_10_02() { |
|
|
repo_url=https://huggingface.co/csukuangfj/sherpa-onnx-zipformer-ctc-en-2023-10-02 |
|
|
log "Downloading pre-trained model from $repo_url" |
|
|
git lfs install |
|
|
git clone $repo_url |
|
|
repo=$(basename $repo_url) |
|
|
|
|
|
log "Display test files" |
|
|
tree $repo/ |
|
|
ls -lh $repo/test_wavs/*.wav |
|
|
|
|
|
log "CTC greedy search" |
|
|
|
|
|
./zipformer/onnx_pretrained_ctc.py \ |
|
|
--nn-model $repo/model.onnx \ |
|
|
--tokens $repo/tokens.txt \ |
|
|
$repo/test_wavs/0.wav \ |
|
|
$repo/test_wavs/1.wav \ |
|
|
$repo/test_wavs/2.wav |
|
|
|
|
|
log "CTC H decoding" |
|
|
|
|
|
./zipformer/onnx_pretrained_ctc_H.py \ |
|
|
--nn-model $repo/model.onnx \ |
|
|
--tokens $repo/tokens.txt \ |
|
|
--H $repo/H.fst \ |
|
|
$repo/test_wavs/0.wav \ |
|
|
$repo/test_wavs/1.wav \ |
|
|
$repo/test_wavs/2.wav |
|
|
|
|
|
log "CTC HL decoding" |
|
|
|
|
|
./zipformer/onnx_pretrained_ctc_HL.py \ |
|
|
--nn-model $repo/model.onnx \ |
|
|
--words $repo/words.txt \ |
|
|
--HL $repo/HL.fst \ |
|
|
$repo/test_wavs/0.wav \ |
|
|
$repo/test_wavs/1.wav \ |
|
|
$repo/test_wavs/2.wav |
|
|
|
|
|
log "CTC HLG decoding" |
|
|
|
|
|
./zipformer/onnx_pretrained_ctc_HLG.py \ |
|
|
--nn-model $repo/model.onnx \ |
|
|
--words $repo/words.txt \ |
|
|
--HLG $repo/HLG.fst \ |
|
|
$repo/test_wavs/0.wav \ |
|
|
$repo/test_wavs/1.wav \ |
|
|
$repo/test_wavs/2.wav |
|
|
|
|
|
rm -rf $repo |
|
|
} |
|
|
|
|
|
function test_conformer_ctc_jit_bpe_500_2021_11_09() { |
|
|
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-conformer-ctc-jit-bpe-500-2021-11-09 |
|
|
log "Downloading pre-trained model from $repo_url" |
|
|
GIT_LFS_SKIP_SMUDGE=1 git clone $repo_url |
|
|
repo=$(basename $repo_url) |
|
|
pushd $repo |
|
|
|
|
|
git lfs pull --include "exp/pretrained.pt" |
|
|
git lfs pull --include "data/lang_bpe_500/HLG.pt" |
|
|
git lfs pull --include "data/lang_bpe_500/L.pt" |
|
|
git lfs pull --include "data/lang_bpe_500/L_disambig.pt" |
|
|
git lfs pull --include "data/lang_bpe_500/Linv.pt" |
|
|
git lfs pull --include "data/lang_bpe_500/bpe.model" |
|
|
git lfs pull --include "data/lang_bpe_500/lexicon.txt" |
|
|
git lfs pull --include "data/lang_bpe_500/lexicon_disambig.txt" |
|
|
git lfs pull --include "data/lang_bpe_500/tokens.txt" |
|
|
git lfs pull --include "data/lang_bpe_500/words.txt" |
|
|
git lfs pull --include "data/lm/G_3_gram.fst.txt" |
|
|
|
|
|
popd |
|
|
|
|
|
log "Display test files" |
|
|
tree $repo/ |
|
|
ls -lh $repo/test_wavs/*.wav |
|
|
|
|
|
log "CTC decoding" |
|
|
|
|
|
./conformer_ctc/pretrained.py \ |
|
|
--method ctc-decoding \ |
|
|
--num-classes 500 \ |
|
|
--checkpoint $repo/exp/pretrained.pt \ |
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \ |
|
|
$repo/test_wavs/1089-134686-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0002.wav |
|
|
|
|
|
log "HLG decoding" |
|
|
|
|
|
./conformer_ctc/pretrained.py \ |
|
|
--method 1best \ |
|
|
--num-classes 500 \ |
|
|
--checkpoint $repo/exp/pretrained.pt \ |
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \ |
|
|
--words-file $repo/data/lang_bpe_500/words.txt \ |
|
|
--HLG $repo/data/lang_bpe_500/HLG.pt \ |
|
|
$repo/test_wavs/1089-134686-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0002.wav |
|
|
|
|
|
log "CTC decoding on CPU with kaldi decoders using OpenFst" |
|
|
|
|
|
log "Exporting model with torchscript" |
|
|
|
|
|
pushd $repo/exp |
|
|
ln -s pretrained.pt epoch-99.pt |
|
|
popd |
|
|
|
|
|
./conformer_ctc/export.py \ |
|
|
--epoch 99 \ |
|
|
--avg 1 \ |
|
|
--exp-dir $repo/exp \ |
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \ |
|
|
--jit 1 |
|
|
|
|
|
ls -lh $repo/exp |
|
|
|
|
|
|
|
|
log "Generating H.fst, HL.fst" |
|
|
|
|
|
./local/prepare_lang_fst.py --lang-dir $repo/data/lang_bpe_500 --ngram-G $repo/data/lm/G_3_gram.fst.txt |
|
|
|
|
|
ls -lh $repo/data/lang_bpe_500 |
|
|
|
|
|
log "Decoding with H on CPU with OpenFst" |
|
|
|
|
|
./conformer_ctc/jit_pretrained_decode_with_H.py \ |
|
|
--nn-model $repo/exp/cpu_jit.pt \ |
|
|
--H $repo/data/lang_bpe_500/H.fst \ |
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \ |
|
|
$repo/test_wavs/1089-134686-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0002.wav |
|
|
|
|
|
log "Decoding with HL on CPU with OpenFst" |
|
|
|
|
|
./conformer_ctc/jit_pretrained_decode_with_HL.py \ |
|
|
--nn-model $repo/exp/cpu_jit.pt \ |
|
|
--HL $repo/data/lang_bpe_500/HL.fst \ |
|
|
--words $repo/data/lang_bpe_500/words.txt \ |
|
|
$repo/test_wavs/1089-134686-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0002.wav |
|
|
|
|
|
log "Decoding with HLG on CPU with OpenFst" |
|
|
|
|
|
./conformer_ctc/jit_pretrained_decode_with_HLG.py \ |
|
|
--nn-model $repo/exp/cpu_jit.pt \ |
|
|
--HLG $repo/data/lang_bpe_500/HLG.fst \ |
|
|
--words $repo/data/lang_bpe_500/words.txt \ |
|
|
$repo/test_wavs/1089-134686-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0002.wav |
|
|
|
|
|
rm -rf $repo |
|
|
} |
|
|
|
|
|
function test_transducer_bpe_500_2021_12_23() { |
|
|
repo_url=https://huggingface.co/csukuangfj/icefall-asr-librispeech-transducer-bpe-500-2021-12-23 |
|
|
|
|
|
log "Downloading pre-trained model from $repo_url" |
|
|
git lfs install |
|
|
git clone $repo_url |
|
|
repo=$(basename $repo_url) |
|
|
|
|
|
log "Display test files" |
|
|
tree $repo/ |
|
|
ls -lh $repo/test_wavs/*.wav |
|
|
|
|
|
log "Beam search decoding" |
|
|
|
|
|
./transducer/pretrained.py \ |
|
|
--method beam_search \ |
|
|
--beam-size 4 \ |
|
|
--checkpoint $repo/exp/pretrained.pt \ |
|
|
--tokens $repo/data/lang_bpe_500/tokens.txt \ |
|
|
$repo/test_wavs/1089-134686-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0001.wav \ |
|
|
$repo/test_wavs/1221-135766-0002.wav |
|
|
|
|
|
rm -rf $repo |
|
|
} |
|
|
|
|
|
prepare_data |
|
|
run_diagnostics |
|
|
test_streaming_zipformer_ctc_hlg |
|
|
test_pruned_transducer_stateless_2022_03_12 |
|
|
test_pruned_transducer_stateless2_2022_04_29 |
|
|
test_pruned_transducer_stateless3_2022_04_29 |
|
|
test_pruned_transducer_stateless5_2022_05_13 |
|
|
test_pruned_transducer_stateless7_2022_11_11 |
|
|
test_pruned_transducer_stateless8_2022_11_14 |
|
|
test_pruned_transducer_stateless7_ctc_2022_12_01 |
|
|
test_zipformer_mmi_2022_12_08 |
|
|
test_pruned_transducer_stateless7_streaming_2022_12_29 |
|
|
test_pruned_transducer_stateless7_ctc_bs_2023_01_29 |
|
|
test_conformer_ctc3_2022_11_27 |
|
|
test_lstm_transducer_stateless2_2022_09_03 |
|
|
test_pruned_transducer_stateless3_2022_05_13 |
|
|
test_streaming_pruned_transducer_stateless2_20220625 |
|
|
test_streaming_zipformer_2023_05_17 |
|
|
test_zipformer_2023_05_18 |
|
|
test_transducer_stateless2_torchaudio_2022_04_19 |
|
|
test_zipformer_transducer_ctc_2023_06_13 |
|
|
test_100h_transducer_stateless_multi_datasets_bpe_500_2022_02_21 |
|
|
test_transducer_stateless_multi_datasets_bpe_500_2022_03_01 |
|
|
test_transducer_stateless_bpe_500_2022_02_07 |
|
|
test_zipformer_ctc_en_2023_10_02 |
|
|
|
|
|
test_transducer_bpe_500_2021_12_23 |
|
|
|