jiangdongwei commited on
Commit
84e3c56
·
1 Parent(s): 8f4d327

add more info in README

Browse files
Files changed (1) hide show
  1. README.md +213 -0
README.md CHANGED
@@ -32,3 +32,216 @@
32
  |---|---|
33
  |exp/vad_train_asr_transformer_raw/decode_rnn_vad_model_valid.acc.ave/ihm_dev/result.txt|0.9294|
34
  |exp/vad_train_asr_transformer_raw/decode_rnn_vad_model_valid.acc.ave/ihm_eval/result.txt|0.9479|
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
32
  |---|---|
33
  |exp/vad_train_asr_transformer_raw/decode_rnn_vad_model_valid.acc.ave/ihm_dev/result.txt|0.9294|
34
  |exp/vad_train_asr_transformer_raw/decode_rnn_vad_model_valid.acc.ave/ihm_eval/result.txt|0.9479|
35
+
36
+
37
+ ## VAD config
38
+
39
+ <details><summary>expand</summary>
40
+
41
+ ```
42
+ config: conf/tuning/train_vad_rnn.yaml
43
+ print_config: false
44
+ log_level: INFO
45
+ dry_run: false
46
+ iterator_type: sequence
47
+ output_dir: exp/vad_train_vad_rnn_raw
48
+ ngpu: 1
49
+ seed: 0
50
+ num_workers: 3
51
+ num_att_plot: 3
52
+ dist_backend: nccl
53
+ dist_init_method: env://
54
+ dist_world_size: null
55
+ dist_rank: null
56
+ local_rank: 0
57
+ dist_master_addr: null
58
+ dist_master_port: null
59
+ dist_launcher: null
60
+ multiprocessing_distributed: false
61
+ unused_parameters: false
62
+ sharded_ddp: false
63
+ cudnn_enabled: true
64
+ cudnn_benchmark: false
65
+ cudnn_deterministic: true
66
+ collect_stats: false
67
+ write_collected_feats: false
68
+ max_epoch: 2
69
+ patience: null
70
+ val_scheduler_criterion:
71
+ - valid
72
+ - loss
73
+ early_stopping_criterion:
74
+ - valid
75
+ - loss
76
+ - min
77
+ best_model_criterion:
78
+ - - valid
79
+ - acc
80
+ - max
81
+ keep_nbest_models: 5
82
+ nbest_averaging_interval: 0
83
+ grad_clip: 5.0
84
+ grad_clip_type: 2.0
85
+ grad_noise: false
86
+ accum_grad: 1
87
+ no_forward_run: false
88
+ resume: true
89
+ train_dtype: float32
90
+ use_amp: false
91
+ log_interval: null
92
+ use_matplotlib: true
93
+ use_tensorboard: true
94
+ create_graph_in_tensorboard: false
95
+ use_wandb: false
96
+ wandb_project: null
97
+ wandb_id: null
98
+ wandb_entity: null
99
+ wandb_name: null
100
+ wandb_model_log_interval: -1
101
+ detect_anomaly: false
102
+ pretrain_path: null
103
+ init_param: []
104
+ ignore_init_mismatch: false
105
+ freeze_param: []
106
+ num_iters_per_epoch: null
107
+ batch_size: 20
108
+ valid_batch_size: null
109
+ batch_bins: 14000000
110
+ valid_batch_bins: null
111
+ train_shape_file:
112
+ - exp/vad_stats_raw/train/speech_shape
113
+ - exp/vad_stats_raw/train/text_shape
114
+ valid_shape_file:
115
+ - exp/vad_stats_raw/valid/speech_shape
116
+ - exp/vad_stats_raw/valid/text_shape
117
+ batch_type: numel
118
+ valid_batch_type: null
119
+ fold_length:
120
+ - 80000
121
+ - 150
122
+ sort_in_batch: descending
123
+ sort_batch: descending
124
+ multiple_iterator: false
125
+ chunk_length: 500
126
+ chunk_shift_ratio: 0.5
127
+ num_cache_chunks: 1024
128
+ chunk_excluded_key_prefixes: []
129
+ train_data_path_and_name_and_type:
130
+ - - dump/raw/ihm_train/wav.scp
131
+ - speech
132
+ - sound
133
+ - - dump/raw/ihm_train/text
134
+ - text
135
+ - text
136
+ valid_data_path_and_name_and_type:
137
+ - - dump/raw/ihm_dev/wav.scp
138
+ - speech
139
+ - sound
140
+ - - dump/raw/ihm_dev/text
141
+ - text
142
+ - text
143
+ allow_variable_data_keys: false
144
+ max_cache_size: 0.0
145
+ max_cache_fd: 32
146
+ valid_max_cache_size: null
147
+ exclude_weight_decay: false
148
+ exclude_weight_decay_conf: {}
149
+ optim: adam
150
+ optim_conf:
151
+ lr: 0.003
152
+ scheduler: warmuplr
153
+ scheduler_conf:
154
+ warmup_steps: 25000
155
+ pre_postencoder_norm: false
156
+ init: null
157
+ input_size: null
158
+ use_preprocessor: true
159
+ speech_volume_normalize: null
160
+ rir_scp: null
161
+ rir_apply_prob: 1.0
162
+ noise_scp: null
163
+ noise_apply_prob: 1.0
164
+ noise_db_range: '13_15'
165
+ short_noise_thres: 0.5
166
+ segment_length: 10.0
167
+ frontend: default
168
+ frontend_conf:
169
+ n_fft: 512
170
+ win_length: 400
171
+ hop_length: 160
172
+ fs: 16k
173
+ specaug: specaug
174
+ specaug_conf:
175
+ apply_time_warp: true
176
+ time_warp_window: 5
177
+ time_warp_mode: bicubic
178
+ apply_freq_mask: true
179
+ freq_mask_width_range:
180
+ - 0
181
+ - 30
182
+ num_freq_mask: 2
183
+ apply_time_mask: true
184
+ time_mask_width_range:
185
+ - 0
186
+ - 40
187
+ num_time_mask: 2
188
+ normalize: global_mvn
189
+ normalize_conf:
190
+ stats_file: exp/vad_stats_raw/train/feats_stats.npz
191
+ model: espnet
192
+ model_conf:
193
+ length_normalized_loss: false
194
+ preencoder: null
195
+ preencoder_conf: {}
196
+ encoder: rnn
197
+ encoder_conf:
198
+ rnn_type: gru
199
+ bidirectional: true
200
+ use_projection: true
201
+ num_layers: 4
202
+ hidden_size: 320
203
+ output_size: 320
204
+ dropout: 0.2
205
+ subsample:
206
+ - 1
207
+ - 1
208
+ - 1
209
+ - 1
210
+ required:
211
+ - output_dir
212
+ version: '202304'
213
+ distributed: false
214
+ ```
215
+
216
+ </details>
217
+
218
+
219
+
220
+ ### Citing ESPnet
221
+
222
+ ```BibTex
223
+ @inproceedings{watanabe2018espnet,
224
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
225
+ title={{ESPnet}: End-to-End Speech Processing Toolkit},
226
+ year={2018},
227
+ booktitle={Proceedings of Interspeech},
228
+ pages={2207--2211},
229
+ doi={10.21437/Interspeech.2018-1456},
230
+ url={http://dx.doi.org/10.21437/Interspeech.2018-1456}
231
+ }
232
+
233
+
234
+ ```
235
+
236
+ or arXiv:
237
+
238
+ ```bibtex
239
+ @misc{watanabe2018espnet,
240
+ title={ESPnet: End-to-End Speech Processing Toolkit},
241
+ author={Shinji Watanabe and Takaaki Hori and Shigeki Karita and Tomoki Hayashi and Jiro Nishitoba and Yuya Unno and Nelson Yalta and Jahn Heymann and Matthew Wiesner and Nanxin Chen and Adithya Renduchintala and Tsubasa Ochiai},
242
+ year={2018},
243
+ eprint={1804.00015},
244
+ archivePrefix={arXiv},
245
+ primaryClass={cs.CL}
246
+ }
247
+ ```