File size: 2,839 Bytes
9f72363
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
accum_grad: 2
cmvn_file: exp/stream_wenet_giga/global_cmvn
dataset_conf:
  batch_conf:
    batch_size: 8
    batch_type: static
  cv_emb_conf:
    emb_type: ivector
    feat_type: fbank
    h5_path: /apdcephfs/share_1316500/nenali/corpus/M2Met_beam/dev/data/embedding/ivector/python/ivector_python_dev_fbank.h5
    perb_h5_path: /apdcephfs/share_1316500/nenali/corpus/M2Met_beam/dev/data/embedding/ivector/python/ivector_python_dev_fbank_perb.h5
  fbank_conf:
    dither: 0.1
    frame_length: 25
    frame_shift: 10
    num_mel_bins: 80
  filter_conf:
    max_length: 1500
    min_length: 10
    token_max_length: 1500
    token_min_length: 10
  resample_conf:
    resample_rate: 16000
  shuffle: true
  shuffle_conf:
    shuffle_size: 1500
  sort: true
  sort_conf:
    sort_size: 500
  spec_aug: true
  spec_aug_conf:
    max_f: 30
    max_t: 40
    num_f_mask: 2
    num_t_mask: 2
  speed_perturb: false
  test_emb_conf:
    emb_type: ivector
    feat_type: fbank
    h5_path: /apdcephfs/share_1316500/nenali/corpus/M2Met_beam/test/data/embedding/ivector/python/ivector_python_test_fbank.h5
  train_emb_conf:
    emb_type: ivector
    feat_type: fbank
    h5_path: /apdcephfs/share_1316500/nenali/corpus/M2Met_beam/train/data/embedding/ivector/python/ivector_python_train_fbank.h5
    perb_h5_path: /apdcephfs/share_1316500/nenali/corpus/M2Met_beam/train/data/embedding/ivector/python/ivector_python_train_fbank_perb.h5
  use_emb: false
  use_seg: false
  wav_aug: true
  wav_aug_conf:
    musan_path: /apdcephfs/share_1149801/speech_user/nenali/private_nenali/corpus/wav_h5/musan_new.h5
    rir_path: /apdcephfs/share_1149801/speech_user/nenali/private_nenali/corpus/wav_h5/rir_new.h5
decoder: transformer
decoder_conf:
  attention_heads: 4
  dropout_rate: 0.1
  linear_units: 2048
  num_blocks: 6
  positional_dropout_rate: 0.1
  self_attention_dropout_rate: 0.0
  src_attention_dropout_rate: 0.0
encoder: conformer
encoder_conf:
  activation_type: swish
  attention_dropout_rate: 0.0
  attention_heads: 4
  causal: true
  cnn_module_kernel: 15
  cnn_module_norm: layer_norm
  dropout_rate: 0.1
  emb_dim: 100
  input_layer: conv2d
  linear_units: 2048
  normalize_before: true
  num_blocks: 7
  output_size: 256
  pos_enc_layer_type: rel_pos
  positional_dropout_rate: 0.1
  selfattention_layer_type: rel_selfattn
  use_cnn_module: true
  use_dynamic_chunk: true
  use_emb: false
grad_clip: 5
input_dim: 80
is_json_cmvn: true
log_interval: 400
max_epoch: 100
model_conf:
  ctc_weight: 0.0
  length_normalized_loss: false
  loss_margin: 0.2
  lsm_weight: 0.1
  ppg_loss: softmax
  ppg_weight: 1.0
  sv_conf:
    pooling_type: stats
    spk_dim: 128
    spk_num: 339
    sv_loss: softmax
    sv_weight: 0.5
    use_sv: false
optim: adam
optim_conf:
  lr: 0.001
output_dim: 600
scheduler: warmuplr
scheduler_conf:
  warmup_steps: 2000