| #!/usr/bin/env bash |
|
|
| |
| export PROTOCOL_BUFFERS_PYTHON_IMPLEMENTATION=python |
|
|
| set -eou pipefail |
|
|
| num_phones=39 |
| |
|
|
| nj=15 |
| stage=-1 |
| stop_stage=100 |
|
|
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| |
| dl_dir=$PWD/download |
| splits_dir=$PWD/splits_dir |
|
|
| . shared/parse_options.sh || exit 1 |
|
|
| |
| |
| mkdir -p data |
|
|
| log() { |
| |
| local fname=${BASH_SOURCE[1]##*/} |
| echo -e "$(date '+%Y-%m-%d %H:%M:%S') (${fname}:${BASH_LINENO[0]}:${FUNCNAME[1]}) $*" |
| } |
|
|
| log "dl_dir: $dl_dir" |
|
|
| if [ $stage -le -1 ] && [ $stop_stage -ge -1 ]; then |
| log "Stage -1: Download LM" |
| |
| |
| [ ! -e $dl_dir/lm ] && mkdir -p $dl_dir/lm |
| git clone https://huggingface.co/luomingshuang/timit_lm $dl_dir/lm |
| pushd $dl_dir/lm |
| git lfs pull |
| popd |
| fi |
|
|
| if [ $stage -le 0 ] && [ $stop_stage -ge 0 ]; then |
| log "Stage 0: Download data" |
|
|
| |
| |
| |
| |
| |
| if [ ! -d $dl_dir/timit ]; then |
| lhotse download timit $dl_dir |
| fi |
|
|
| |
| |
| |
| |
| |
| if [ ! -d $dl_dir/musan ]; then |
| lhotse download musan $dl_dir |
| fi |
| fi |
|
|
| if [ $stage -le 1 ] && [ $stop_stage -ge 1 ]; then |
| log "Stage 1: Prepare timit manifest" |
| |
| |
| mkdir -p data/manifests |
| lhotse prepare timit -p $num_phones -j $nj $dl_dir/timit/data data/manifests |
| fi |
|
|
| if [ $stage -le 2 ] && [ $stop_stage -ge 2 ]; then |
| log "Stage 2: Prepare musan manifest" |
| |
| |
| mkdir -p data/manifests |
| lhotse prepare musan $dl_dir/musan data/manifests |
| fi |
|
|
| if [ $stage -le 3 ] && [ $stop_stage -ge 3 ]; then |
| log "Stage 3: Compute fbank for timit" |
| mkdir -p data/fbank |
| ./local/compute_fbank_timit.py |
| fi |
|
|
| if [ $stage -le 4 ] && [ $stop_stage -ge 4 ]; then |
| log "Stage 4: Compute fbank for musan" |
| mkdir -p data/fbank |
| ./local/compute_fbank_musan.py |
| fi |
|
|
| if [ $stage -le 5 ] && [ $stop_stage -ge 5 ]; then |
| log "Stage 5: Prepare phone based lang" |
| lang_dir=data/lang_phone |
| mkdir -p $lang_dir |
|
|
| ./local/prepare_lexicon.py \ |
| --manifests-dir data/manifests \ |
| --lang-dir $lang_dir |
|
|
| if [ ! -f $lang_dir/L_disambig.pt ]; then |
| ./local/prepare_lang.py --lang-dir $lang_dir |
| fi |
| fi |
|
|
| if [ $stage -le 6 ] && [ $stop_stage -ge 6 ]; then |
| log "Stage 6: Prepare G" |
| |
| |
|
|
| mkdir -p data/lm |
| if [ ! -f data/lm/G_3_gram.fst.txt ]; then |
| |
| python3 -m kaldilm \ |
| --read-symbol-table="data/lang_phone/words.txt" \ |
| --disambig-symbol='#0' \ |
| --max-order=3 \ |
| $dl_dir/lm/lm_3_gram.arpa > data/lm/G_3_gram.fst.txt |
| fi |
|
|
| if [ ! -f data/lm/G_4_gram.fst.txt ]; then |
| |
| python3 -m kaldilm \ |
| --read-symbol-table="data/lang_phone/words.txt" \ |
| --disambig-symbol='#0' \ |
| --max-order=4 \ |
| $dl_dir/lm/lm_4_gram.arpa > data/lm/G_4_gram.fst.txt |
| fi |
| fi |
|
|
| if [ $stage -le 7 ] && [ $stop_stage -ge 7 ]; then |
| log "Stage 7: Compile HLG" |
| ./local/compile_hlg.py --lang-dir data/lang_phone |
| fi |
|
|