ppg / train.yaml
wanghappy's picture
Upload folder using huggingface_hub
9f72363 verified
accum_grad: 2
cmvn_file: exp/stream_wenet_giga/global_cmvn
dataset_conf:
batch_conf:
batch_size: 8
batch_type: static
cv_emb_conf:
emb_type: ivector
feat_type: fbank
h5_path: /apdcephfs/share_1316500/nenali/corpus/M2Met_beam/dev/data/embedding/ivector/python/ivector_python_dev_fbank.h5
perb_h5_path: /apdcephfs/share_1316500/nenali/corpus/M2Met_beam/dev/data/embedding/ivector/python/ivector_python_dev_fbank_perb.h5
fbank_conf:
dither: 0.1
frame_length: 25
frame_shift: 10
num_mel_bins: 80
filter_conf:
max_length: 1500
min_length: 10
token_max_length: 1500
token_min_length: 10
resample_conf:
resample_rate: 16000
shuffle: true
shuffle_conf:
shuffle_size: 1500
sort: true
sort_conf:
sort_size: 500
spec_aug: true
spec_aug_conf:
max_f: 30
max_t: 40
num_f_mask: 2
num_t_mask: 2
speed_perturb: false
test_emb_conf:
emb_type: ivector
feat_type: fbank
h5_path: /apdcephfs/share_1316500/nenali/corpus/M2Met_beam/test/data/embedding/ivector/python/ivector_python_test_fbank.h5
train_emb_conf:
emb_type: ivector
feat_type: fbank
h5_path: /apdcephfs/share_1316500/nenali/corpus/M2Met_beam/train/data/embedding/ivector/python/ivector_python_train_fbank.h5
perb_h5_path: /apdcephfs/share_1316500/nenali/corpus/M2Met_beam/train/data/embedding/ivector/python/ivector_python_train_fbank_perb.h5
use_emb: false
use_seg: false
wav_aug: true
wav_aug_conf:
musan_path: /apdcephfs/share_1149801/speech_user/nenali/private_nenali/corpus/wav_h5/musan_new.h5
rir_path: /apdcephfs/share_1149801/speech_user/nenali/private_nenali/corpus/wav_h5/rir_new.h5
decoder: transformer
decoder_conf:
attention_heads: 4
dropout_rate: 0.1
linear_units: 2048
num_blocks: 6
positional_dropout_rate: 0.1
self_attention_dropout_rate: 0.0
src_attention_dropout_rate: 0.0
encoder: conformer
encoder_conf:
activation_type: swish
attention_dropout_rate: 0.0
attention_heads: 4
causal: true
cnn_module_kernel: 15
cnn_module_norm: layer_norm
dropout_rate: 0.1
emb_dim: 100
input_layer: conv2d
linear_units: 2048
normalize_before: true
num_blocks: 7
output_size: 256
pos_enc_layer_type: rel_pos
positional_dropout_rate: 0.1
selfattention_layer_type: rel_selfattn
use_cnn_module: true
use_dynamic_chunk: true
use_emb: false
grad_clip: 5
input_dim: 80
is_json_cmvn: true
log_interval: 400
max_epoch: 100
model_conf:
ctc_weight: 0.0
length_normalized_loss: false
loss_margin: 0.2
lsm_weight: 0.1
ppg_loss: softmax
ppg_weight: 1.0
sv_conf:
pooling_type: stats
spk_dim: 128
spk_num: 339
sv_loss: softmax
sv_weight: 0.5
use_sv: false
optim: adam
optim_conf:
lr: 0.001
output_dim: 600
scheduler: warmuplr
scheduler_conf:
warmup_steps: 2000