yuekai commited on
Commit
4aec593
·
1 Parent(s): 85bcaa2

Upload config.yaml

Browse files
Files changed (1) hide show
  1. config.yaml +295 -0
config.yaml ADDED
@@ -0,0 +1,295 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ config: conf/tuning/train_asr_hubert_conformer.yaml
2
+ print_config: false
3
+ log_level: INFO
4
+ dry_run: false
5
+ iterator_type: sequence
6
+ output_dir: exp/asr_train_asr_hubert_conformer_raw_bpe100_init_paramexpexp_hubert_large_ll60k_weighted_perturbasr_train_asr_conformer7_hubert_960hr_large_raw_en_bpe5000_sp26epoch.pth:::decoder.output_layer,decoder.embed.0,ctc.ctc_lo_sp
7
+ ngpu: 1
8
+ seed: 0
9
+ num_workers: 1
10
+ num_att_plot: 3
11
+ dist_backend: nccl
12
+ dist_init_method: env://
13
+ dist_world_size: null
14
+ dist_rank: null
15
+ local_rank: null
16
+ dist_master_addr: null
17
+ dist_master_port: null
18
+ dist_launcher: null
19
+ multiprocessing_distributed: false
20
+ unused_parameters: false
21
+ sharded_ddp: false
22
+ cudnn_enabled: true
23
+ cudnn_benchmark: false
24
+ cudnn_deterministic: true
25
+ collect_stats: false
26
+ write_collected_feats: false
27
+ max_epoch: 500
28
+ patience: null
29
+ val_scheduler_criterion:
30
+ - valid
31
+ - loss
32
+ early_stopping_criterion:
33
+ - valid
34
+ - loss
35
+ - min
36
+ best_model_criterion:
37
+ - - valid
38
+ - acc
39
+ - max
40
+ keep_nbest_models: 10
41
+ grad_clip: 5.0
42
+ grad_clip_type: 2.0
43
+ grad_noise: false
44
+ accum_grad: 1
45
+ no_forward_run: false
46
+ resume: true
47
+ train_dtype: float32
48
+ use_amp: false
49
+ log_interval: null
50
+ use_tensorboard: true
51
+ use_wandb: false
52
+ wandb_project: null
53
+ wandb_id: null
54
+ wandb_entity: null
55
+ wandb_name: null
56
+ wandb_model_log_interval: -1
57
+ detect_anomaly: false
58
+ pretrain_path: null
59
+ init_param:
60
+ - exp/exp_hubert_large_ll60k_weighted_perturb/asr_train_asr_conformer7_hubert_960hr_large_raw_en_bpe5000_sp/26epoch.pth:::decoder.output_layer,decoder.embed.0,ctc.ctc_lo
61
+ ignore_init_mismatch: false
62
+ freeze_param:
63
+ - frontend.upstream
64
+ num_iters_per_epoch: null
65
+ batch_size: 20
66
+ valid_batch_size: null
67
+ batch_bins: 1000000
68
+ valid_batch_bins: null
69
+ train_shape_file:
70
+ - exp/asr_stats_raw_bpe100_sp/train/speech_shape
71
+ - exp/asr_stats_raw_bpe100_sp/train/text_shape.bpe
72
+ valid_shape_file:
73
+ - exp/asr_stats_raw_bpe100_sp/valid/speech_shape
74
+ - exp/asr_stats_raw_bpe100_sp/valid/text_shape.bpe
75
+ batch_type: folded
76
+ valid_batch_type: null
77
+ fold_length:
78
+ - 80000
79
+ - 150
80
+ sort_in_batch: descending
81
+ sort_batch: descending
82
+ multiple_iterator: false
83
+ chunk_length: 500
84
+ chunk_shift_ratio: 0.5
85
+ num_cache_chunks: 1024
86
+ train_data_path_and_name_and_type:
87
+ - - dump/raw/train_sp/wav.scp
88
+ - speech
89
+ - sound
90
+ - - dump/raw/train_sp/text
91
+ - text
92
+ - text
93
+ valid_data_path_and_name_and_type:
94
+ - - dump/raw/dev/wav.scp
95
+ - speech
96
+ - sound
97
+ - - dump/raw/dev/text
98
+ - text
99
+ - text
100
+ allow_variable_data_keys: false
101
+ max_cache_size: 0.0
102
+ max_cache_fd: 32
103
+ valid_max_cache_size: null
104
+ optim: adam
105
+ optim_conf:
106
+ lr: 0.0002
107
+ scheduler: warmuplr
108
+ scheduler_conf:
109
+ warmup_steps: 2000
110
+ token_list:
111
+ - <blank>
112
+ - <unk>
113
+ - DECREASEBRIGHTNESS
114
+ - INCREASEBRIGHTNESS
115
+ - SETLIGHTBRIGHTNESS
116
+ - SETLIGHTCOLOR
117
+ - SWITCHLIGHTOFF
118
+ - SWITCHLIGHTON
119
+ - ▁
120
+ - ▁the
121
+ - ▁lights
122
+ - ▁to
123
+ - e
124
+ - ▁in
125
+ - ▁turn
126
+ - i
127
+ - s
128
+ - l
129
+ - d
130
+ - t
131
+ - ▁please
132
+ - o
133
+ - ▁room
134
+ - ▁light
135
+ - ke
136
+ - ▁brightness
137
+ - ▁i
138
+ - ▁off
139
+ - a
140
+ - ▁be
141
+ - ▁on
142
+ - m
143
+ - ▁ma
144
+ - nt
145
+ - ▁wa
146
+ - r
147
+ - ▁change
148
+ - u
149
+ - ▁set
150
+ - re
151
+ - ▁you
152
+ - y
153
+ - ▁can
154
+ - ▁li
155
+ - g
156
+ - ing
157
+ - ▁down
158
+ - ▁pink
159
+ - p
160
+ - ▁two
161
+ - v
162
+ - ▁lighting
163
+ - ▁of
164
+ - w
165
+ - ▁red
166
+ - at
167
+ - ting
168
+ - ▁bedroom
169
+ - ▁s
170
+ - ▁la
171
+ - ▁need
172
+ - ▁twenty
173
+ - ▁up
174
+ - ▁it
175
+ - eve
176
+ - ▁me
177
+ - f
178
+ - ou
179
+ - ▁green
180
+ - ld
181
+ - ▁increase
182
+ - ▁brighter
183
+ - ▁blue
184
+ - ▁color
185
+ - ▁bright
186
+ - ▁toilet
187
+ - ▁kitchen
188
+ - ▁dim
189
+ - ry
190
+ - ▁lower
191
+ - ▁bathroom
192
+ - ▁switch
193
+ - all
194
+ - ▁twelve
195
+ - ▁dark
196
+ - ▁basement
197
+ - ▁percent
198
+ - x
199
+ - j
200
+ - k
201
+ - c
202
+ - b
203
+ - n
204
+ - '0'
205
+ - '3'
206
+ - q
207
+ - z
208
+ - '4'
209
+ - h
210
+ - <sos/eos>
211
+ init: null
212
+ input_size: null
213
+ ctc_conf:
214
+ dropout_rate: 0.0
215
+ ctc_type: builtin
216
+ reduce: true
217
+ ignore_nan_grad: true
218
+ model_conf:
219
+ ctc_weight: 0.3
220
+ lsm_weight: 0.1
221
+ length_normalized_loss: false
222
+ extract_feats_in_collect_stats: false
223
+ use_preprocessor: true
224
+ token_type: bpe
225
+ bpemodel: data/token_list/bpe_unigram100/bpe.model
226
+ non_linguistic_symbols: null
227
+ cleaner: null
228
+ g2p: null
229
+ speech_volume_normalize: null
230
+ rir_scp: null
231
+ rir_apply_prob: 1.0
232
+ noise_scp: null
233
+ noise_apply_prob: 1.0
234
+ noise_db_range: '13_15'
235
+ frontend: s3prl
236
+ frontend_conf:
237
+ frontend_conf:
238
+ upstream: hubert_large_ll60k
239
+ download_dir: ./hub
240
+ multilayer_feature: true
241
+ fs: 16k
242
+ specaug: specaug
243
+ specaug_conf:
244
+ apply_time_warp: true
245
+ time_warp_window: 5
246
+ time_warp_mode: bicubic
247
+ apply_freq_mask: true
248
+ freq_mask_width_range:
249
+ - 0
250
+ - 30
251
+ num_freq_mask: 2
252
+ apply_time_mask: true
253
+ time_mask_width_range:
254
+ - 0
255
+ - 40
256
+ num_time_mask: 2
257
+ normalize: utterance_mvn
258
+ normalize_conf: {}
259
+ preencoder: linear
260
+ preencoder_conf:
261
+ input_size: 1024
262
+ output_size: 80
263
+ encoder: conformer
264
+ encoder_conf:
265
+ output_size: 512
266
+ attention_heads: 8
267
+ linear_units: 2048
268
+ num_blocks: 12
269
+ dropout_rate: 0.1
270
+ positional_dropout_rate: 0.1
271
+ attention_dropout_rate: 0.1
272
+ input_layer: conv2d
273
+ normalize_before: true
274
+ macaron_style: true
275
+ pos_enc_layer_type: rel_pos
276
+ selfattention_layer_type: rel_selfattn
277
+ activation_type: swish
278
+ use_cnn_module: true
279
+ cnn_module_kernel: 31
280
+ postencoder: null
281
+ postencoder_conf: {}
282
+ decoder: transformer
283
+ decoder_conf:
284
+ attention_heads: 8
285
+ linear_units: 2048
286
+ num_blocks: 6
287
+ dropout_rate: 0.1
288
+ positional_dropout_rate: 0.1
289
+ self_attention_dropout_rate: 0.1
290
+ src_attention_dropout_rate: 0.1
291
+ required:
292
+ - output_dir
293
+ - token_list
294
+ version: 0.10.3a3
295
+ distributed: false