Text-to-Speech
ONNX
zero-shot
multilingual
File size: 25,311 Bytes
2df45bb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
{
  "_name": null,
  "common": {
    "_name": null,
    "no_progress_bar": false,
    "log_interval": 100,
    "log_format": "simple",
    "log_file": null,
    "aim_repo": null,
    "aim_run_hash": null,
    "tensorboard_logdir": "/checkpoint/mjhwang/experiments/230930-noiseaug_p2v-mls_multilingual_6lang/231005-noiseaug_p2v-mls_multilingual_6lang-alignfix.config_v2.langemb1.vuv_logit1.denoise.ngpu16",
    "wandb_project": null,
    "azureml_logging": false,
    "seed": 1,
    "cpu": false,
    "tpu": false,
    "bf16": false,
    "fp16": false,
    "memory_efficient_fp16": false,
    "fp16_no_flatten_grads": false,
    "fp16_init_scale": 128,
    "fp16_scale_window": null,
    "fp16_scale_tolerance": 0.0,
    "on_cpu_convert_precision": false,
    "min_loss_scale": 0.0001,
    "threshold_loss_scale": null,
    "amp": false,
    "amp_batch_retries": 2,
    "amp_init_scale": 128,
    "amp_scale_window": null,
    "user_dir": null,
    "empty_cache_freq": 0,
    "all_gather_list_size": 9999999,
    "model_parallel_size": 1,
    "quantization_config_path": null,
    "profile": false,
    "reset_logging": false,
    "suppress_crashes": false,
    "use_plasma_view": false,
    "plasma_path": "/tmp/plasma",
    "log_nvidia_smi": false,
    "use_tutel_moe": false
  },
  "common_eval": {
    "_name": null,
    "path": null,
    "post_process": null,
    "quiet": false,
    "model_overrides": "{}",
    "results_path": null,
    "is_moe": false,
    "moe_generation": false
  },
  "distributed_training": {
    "_name": null,
    "distributed_world_size": 16,
    "distributed_num_procs": 8,
    "distributed_rank": 0,
    "distributed_backend": "nccl",
    "distributed_init_method": "tcp://learnfair0791:15129",
    "distributed_port": 15129,
    "device_id": 0,
    "distributed_no_spawn": false,
    "ddp_backend": "legacy_ddp",
    "ddp_comm_hook": "none",
    "bucket_cap_mb": 25,
    "fix_batches_to_gpus": false,
    "find_unused_parameters": true,
    "gradient_as_bucket_view": false,
    "fast_stat_sync": false,
    "heartbeat_timeout": -1,
    "broadcast_buffers": false,
    "slowmo_momentum": null,
    "slowmo_base_algorithm": "localsgd",
    "localsgd_frequency": 3,
    "nprocs_per_node": 8,
    "pipeline_model_parallel": false,
    "pipeline_balance": null,
    "pipeline_devices": null,
    "pipeline_chunks": 0,
    "pipeline_encoder_balance": null,
    "pipeline_encoder_devices": null,
    "pipeline_decoder_balance": null,
    "pipeline_decoder_devices": null,
    "pipeline_checkpoint": "never",
    "zero_sharding": "none",
    "fp16": false,
    "bf16": false,
    "memory_efficient_fp16": false,
    "tpu": false,
    "no_reshard_after_forward": false,
    "fp32_reduce_scatter": false,
    "cpu_offload": false,
    "use_sharded_state": false,
    "not_fsdp_flatten_parameters": false,
    "freeze_up_to_layer": null
  },
  "dataset": {
    "_name": null,
    "num_workers": 0,
    "num_workers_valid": 0,
    "skip_invalid_size_inputs_valid_test": true,
    "max_tokens": 300000,
    "batch_size": null,
    "required_batch_size_multiple": 8,
    "required_seq_len_multiple": 1,
    "dataset_impl": null,
    "data_buffer_size": 10,
    "train_subset": "train_wenet_cmn_9_10,train_wenet_cmn_8_10,train_wenet_cmn_7_10,train_wenet_cmn_6_10,train_wenet_cmn_5_10,train_wenet_cmn_4_10,train_wenet_cmn_3_10,train_wenet_cmn_2_10,train_wenet_cmn_1_10,train_wenet_cmn_0_10,train_mls_en_9_10,train_mls_en_8_10,train_mls_en_7_10,train_mls_en_6_10,train_mls_en_5_10,train_mls_en_4_10,train_mls_en_3_10,train_mls_en_2_10,train_mls_en_1_10,train_mls_en_0_10,train_mls_deu,train_mls_fra,train_mls_spa,train_cv12_cmn,train_mls_ita,train_cv12_cmn_2,train_vl107_cmn",
    "valid_subset": "dev_all",
    "combine_valid_subsets": null,
    "ignore_unused_valid_subsets": false,
    "validate_interval": 1,
    "validate_interval_updates": 5000,
    "validate_after_updates": 0,
    "fixed_validation_seed": null,
    "disable_validation": false,
    "max_tokens_valid": 300000,
    "batch_size_valid": null,
    "max_valid_steps": null,
    "curriculum": 0,
    "gen_subset": "test",
    "num_shards": 1,
    "shard_id": 0,
    "grouped_shuffling": false,
    "update_epoch_batch_itr": false,
    "update_ordered_indices_seed": false
  },
  "optimization": {
    "_name": null,
    "max_epoch": 0,
    "max_update": 500000,
    "stop_time_hours": 0.0,
    "clip_norm": 1.0,
    "clip_norm_type": "l2",
    "sentence_avg": false,
    "update_freq": [
      4
    ],
    "lr": [
      0.0001
    ],
    "stop_min_lr": -1.0,
    "use_bmuf": false,
    "skip_remainder_batch": false
  },
  "checkpoint": {
    "_name": null,
    "save_dir": "/checkpoint/mjhwang/experiments/230930-noiseaug_p2v-mls_multilingual_6lang/231005-noiseaug_p2v-mls_multilingual_6lang-alignfix.config_v2.langemb1.vuv_logit1.denoise.ngpu16",
    "restore_file": "checkpoint_last.pt",
    "continue_once": null,
    "finetune_from_model": null,
    "ignore_suffix": false,
    "reset_dataloader": true,
    "reset_lr_scheduler": false,
    "reset_meters": false,
    "reset_optimizer": false,
    "optimizer_overrides": "{}",
    "save_interval": 1,
    "save_interval_updates": 10000,
    "keep_interval_updates": 1,
    "keep_interval_updates_pattern": -1,
    "keep_last_epochs": -1,
    "keep_best_checkpoints": 10,
    "no_save": false,
    "no_epoch_checkpoints": true,
    "no_last_checkpoints": false,
    "no_best_checkpoints": false,
    "no_save_optimizer_state": false,
    "no_save_optimizer_state_on_training_finished": false,
    "synchronize_checkpoints_before_copy": false,
    "symlink_best_and_last_checkpoints": false,
    "best_checkpoint_metric": "mse_loss",
    "maximize_best_checkpoint_metric": false,
    "patience": 20,
    "checkpoint_suffix": "",
    "checkpoint_shard_count": 1,
    "load_checkpoint_on_all_dp_ranks": false,
    "write_checkpoints_asynchronously": false,
    "s3_upload_path": null,
    "replication_count": 1,
    "model_parallel_size": 1
  },
  "bmuf": {
    "_name": null,
    "block_lr": 1.0,
    "block_momentum": 0.875,
    "global_sync_iter": 50,
    "warmup_iterations": 500,
    "use_nbm": false,
    "average_sync": false,
    "distributed_world_size": 16
  },
  "generation": {
    "_name": null,
    "beam": 5,
    "beam_mt": 0,
    "nbest": 1,
    "max_len_a": 0.0,
    "max_len_b": 200,
    "max_len_a_mt": 0.0,
    "max_len_b_mt": 200,
    "min_len": 1,
    "match_source_len": false,
    "unnormalized": false,
    "no_early_stop": false,
    "no_beamable_mm": false,
    "lenpen": 1.0,
    "lenpen_mt": 1.0,
    "unkpen": 0.0,
    "blankpen": 0.0,
    "replace_unk": null,
    "sacrebleu": false,
    "score_reference": false,
    "prefix_size": 0,
    "no_repeat_ngram_size": 0,
    "sampling": false,
    "sampling_topk": -1,
    "sampling_topp": -1.0,
    "constraints": null,
    "temperature": 1.0,
    "diverse_beam_groups": -1,
    "diverse_beam_strength": 0.5,
    "diversity_rate": -1.0,
    "print_alignment": null,
    "print_step": false,
    "lm_path": null,
    "lm_weight": 0.0,
    "iter_decode_eos_penalty": 0.0,
    "iter_decode_max_iter": 10,
    "iter_decode_force_max_iter": false,
    "iter_decode_with_beam": 1,
    "iter_decode_with_external_reranker": false,
    "retain_iter_history": false,
    "retain_dropout": false,
    "retain_dropout_modules": null,
    "decoding_format": null,
    "no_seed_provided": false,
    "eos_token": null
  },
  "eval_lm": {
    "_name": null,
    "output_word_probs": false,
    "output_word_stats": false,
    "context_window": 0,
    "softmax_batch": 9223372036854775807,
    "stats_path": null,
    "max_valid_steps": null
  },
  "interactive": {
    "_name": null,
    "buffer_size": 0,
    "input": "-"
  },
  "model": {
    "no_progress_bar": false,
    "log_interval": 100,
    "log_format": "simple",
    "log_file": null,
    "aim_repo": null,
    "aim_run_hash": null,
    "tensorboard_logdir": "/checkpoint/mjhwang/experiments/230930-noiseaug_p2v-mls_multilingual_6lang/231005-noiseaug_p2v-mls_multilingual_6lang-alignfix.config_v2.langemb1.vuv_logit1.denoise.ngpu16",
    "wandb_project": null,
    "azureml_logging": false,
    "seed": 1,
    "cpu": false,
    "tpu": false,
    "bf16": false,
    "fp16": false,
    "memory_efficient_fp16": false,
    "fp16_no_flatten_grads": false,
    "fp16_init_scale": 128,
    "fp16_scale_window": null,
    "fp16_scale_tolerance": 0.0,
    "on_cpu_convert_precision": false,
    "min_loss_scale": 0.0001,
    "threshold_loss_scale": null,
    "amp": false,
    "amp_batch_retries": 2,
    "amp_init_scale": 128,
    "amp_scale_window": null,
    "user_dir": null,
    "empty_cache_freq": 0,
    "all_gather_list_size": 9999999,
    "model_parallel_size": 1,
    "quantization_config_path": null,
    "profile": false,
    "reset_logging": false,
    "suppress_crashes": false,
    "use_plasma_view": false,
    "plasma_path": "/tmp/plasma",
    "log_nvidia_smi": false,
    "use_tutel_moe": false,
    "tokenizer": null,
    "bpe": null,
    "optimizer": "adam",
    "lr_scheduler": "fixed",
    "simul_type": null,
    "criterion": "nar_prosody2vec",
    "scoring": "bleu",
    "task": "prosody2vec",
    "num_workers": 0,
    "num_workers_valid": 0,
    "skip_invalid_size_inputs_valid_test": true,
    "max_tokens": 300000,
    "batch_size": null,
    "required_batch_size_multiple": 8,
    "required_seq_len_multiple": 1,
    "dataset_impl": null,
    "data_buffer_size": 10,
    "train_subset": "train_wenet_cmn_9_10,train_wenet_cmn_8_10,train_wenet_cmn_7_10,train_wenet_cmn_6_10,train_wenet_cmn_5_10,train_wenet_cmn_4_10,train_wenet_cmn_3_10,train_wenet_cmn_2_10,train_wenet_cmn_1_10,train_wenet_cmn_0_10,train_mls_en_9_10,train_mls_en_8_10,train_mls_en_7_10,train_mls_en_6_10,train_mls_en_5_10,train_mls_en_4_10,train_mls_en_3_10,train_mls_en_2_10,train_mls_en_1_10,train_mls_en_0_10,train_mls_deu,train_mls_fra,train_mls_spa,train_cv12_cmn,train_mls_ita,train_cv12_cmn_2,train_vl107_cmn",
    "valid_subset": "dev_all",
    "combine_valid_subsets": null,
    "ignore_unused_valid_subsets": false,
    "validate_interval": 1,
    "validate_interval_updates": 5000,
    "validate_after_updates": 0,
    "fixed_validation_seed": null,
    "disable_validation": false,
    "max_tokens_valid": "300000",
    "batch_size_valid": null,
    "max_valid_steps": null,
    "curriculum": 0,
    "gen_subset": "test",
    "num_shards": 1,
    "shard_id": 0,
    "grouped_shuffling": false,
    "update_epoch_batch_itr": false,
    "update_ordered_indices_seed": false,
    "distributed_world_size": 16,
    "distributed_num_procs": 8,
    "distributed_rank": 0,
    "distributed_backend": "nccl",
    "distributed_init_method": null,
    "distributed_port": 15129,
    "device_id": 0,
    "distributed_no_spawn": false,
    "ddp_backend": "legacy_ddp",
    "ddp_comm_hook": "none",
    "bucket_cap_mb": 25,
    "fix_batches_to_gpus": false,
    "find_unused_parameters": true,
    "gradient_as_bucket_view": false,
    "fast_stat_sync": false,
    "heartbeat_timeout": -1,
    "broadcast_buffers": false,
    "slowmo_momentum": null,
    "slowmo_base_algorithm": "localsgd",
    "localsgd_frequency": 3,
    "nprocs_per_node": 8,
    "pipeline_model_parallel": false,
    "pipeline_balance": null,
    "pipeline_devices": null,
    "pipeline_chunks": 0,
    "pipeline_encoder_balance": null,
    "pipeline_encoder_devices": null,
    "pipeline_decoder_balance": null,
    "pipeline_decoder_devices": null,
    "pipeline_checkpoint": "never",
    "zero_sharding": "none",
    "no_reshard_after_forward": false,
    "fp32_reduce_scatter": false,
    "cpu_offload": false,
    "use_sharded_state": false,
    "not_fsdp_flatten_parameters": false,
    "freeze_up_to_layer": null,
    "arch": "nar_p2v",
    "max_epoch": 0,
    "max_update": 500000,
    "stop_time_hours": 0,
    "clip_norm": 1.0,
    "clip_norm_type": "l2",
    "sentence_avg": false,
    "update_freq": [
      4
    ],
    "lr": [
      0.0001
    ],
    "stop_min_lr": -1.0,
    "use_bmuf": false,
    "skip_remainder_batch": false,
    "save_dir": "/checkpoint/mjhwang/experiments/230930-noiseaug_p2v-mls_multilingual_6lang/231005-noiseaug_p2v-mls_multilingual_6lang-alignfix.config_v2.langemb1.vuv_logit1.denoise.ngpu16",
    "restore_file": "checkpoint_last.pt",
    "continue_once": null,
    "finetune_from_model": null,
    "ignore_suffix": false,
    "reset_dataloader": true,
    "reset_lr_scheduler": false,
    "reset_meters": false,
    "reset_optimizer": false,
    "optimizer_overrides": "{}",
    "save_interval": 1,
    "save_interval_updates": 10000,
    "keep_interval_updates": 1,
    "keep_interval_updates_pattern": -1,
    "keep_last_epochs": -1,
    "keep_best_checkpoints": 10,
    "no_save": false,
    "no_epoch_checkpoints": true,
    "no_last_checkpoints": false,
    "no_best_checkpoints": false,
    "no_save_optimizer_state": false,
    "no_save_optimizer_state_on_training_finished": false,
    "synchronize_checkpoints_before_copy": false,
    "symlink_best_and_last_checkpoints": false,
    "best_checkpoint_metric": "mse_loss",
    "maximize_best_checkpoint_metric": false,
    "patience": 20,
    "checkpoint_suffix": "",
    "checkpoint_shard_count": 1,
    "load_checkpoint_on_all_dp_ranks": false,
    "write_checkpoints_asynchronously": false,
    "s3_upload_path": null,
    "replication_count": 1,
    "store_ema": false,
    "ema_decay": 0.9999,
    "ema_start_update": 0,
    "ema_seed_model": null,
    "ema_update_freq": 1,
    "ema_fp32": false,
    "load_prosody_encoder_from": null,
    "freeze_prosody_encoder": false,
    "unit_encoder_arch": "daft_exprt_encoder",
    "prosody_encoder_arch": "ecapa_tdnn2",
    "decoder_arch": "daft_exprt_decoder",
    "data": "/large_experiments/seamless/ust/mjhwang/data/denoise_prosody2vec/mls_multilingual_6lang_xlsr_10k_noiseaug",
    "config_yaml": "config_v2.yaml",
    "max_source_positions": 300000,
    "max_target_positions": 300000,
    "n_frames_per_step": 1,
    "eos_prob_threshold": 0.5,
    "eval_inference": true,
    "eval_tb_nsample": 8,
    "eval_bleu": false,
    "vocoder": "griffin_lim",
    "spec_bwd_max_iter": 8,
    "jit_data_offloading": true,
    "jit_data_root": "/scratch/slurm_tmpdir/${SLURM_JOB_ID}",
    "adam_betas": "(0.9, 0.98)",
    "adam_eps": 1e-08,
    "weight_decay": 0.0,
    "use_old_adam": false,
    "fp16_adam_stats": false,
    "block_wise": false,
    "force_anneal": null,
    "lr_shrink": 0.1,
    "warmup_updates": 1000,
    "ctc_weight": 0.0,
    "forward_sum_weight": 1.0,
    "bin_loss_start_ratio": 0.1,
    "bin_loss_warmup_steps": 6000,
    "film_regul_weight": 0.001,
    "pros_consist_weight": 0.0,
    "denoise_target": true,
    "snr_threshold": 2000000000000000.0,
    "pad": 1,
    "eos": 2,
    "unk": 3,
    "use_spkr_emb": 0,
    "use_lang_emb": 1,
    "prosody_embed_dim": 512,
    "use_ucmvn": 0,
    "use_spec_augment": 1,
    "use_prosody_layernorm": 1,
    "var_pred_hidden_dim": 512,
    "var_pred_kernel_size": 5,
    "var_pred_n_bins": -1,
    "add_variance_parallel": 1,
    "use_film_decoder": 1,
    "predict_var_vuv": 1,
    "predict_vuv_logit": 1,
    "predict_frm_f0_vuv": 0,
    "no_seed_provided": false,
    "speaker_embed_dim": 192,
    "use_utterance_speaker_embed": false,
    "lang_embed_dim": 64,
    "_name": "nar_p2v",
    "lang_to_id": {
      "cmn": 0,
      "deu": 1,
      "eng": 2,
      "fra": 3,
      "ita": 4,
      "spa": 5
    },
    "pitch_min": 0.0,
    "pitch_max": 6.858574643755327,
    "energy_min": 0.0,
    "energy_max": 6.360039234161377,
    "speaker_emb_path": null,
    "input_feat_per_channel": 80,
    "input_channels": 1,
    "speaker_to_id": null,
    "dropout": 0.2,
    "fft_hidden_dim": 1024,
    "fft_kernel_size": 9,
    "attention_dropout": 0.0,
    "encoder_layers": 4,
    "encoder_embed_dim": 256,
    "encoder_attention_heads": 2,
    "output_frame_dim": 80,
    "prosody_channels": [
      512,
      512,
      512,
      512,
      1536
    ],
    "prosody_kernel_sizes": [
      5,
      3,
      3,
      3,
      1
    ],
    "prosody_dilations": [
      1,
      2,
      3,
      4,
      1
    ],
    "prosody_attention_channels": 128,
    "prosody_res2net_scale": 8,
    "prosody_se_channels": 128,
    "prosody_global_context": true,
    "prosody_groups": [
      1,
      1,
      1,
      1,
      1
    ],
    "decoder_layers": 4,
    "decoder_embed_dim": 256,
    "decoder_attention_heads": 2,
    "var_pred_dropout": 0.5,
    "add_postnet": true,
    "postnet_dropout": 0.5,
    "postnet_layers": 5,
    "postnet_conv_dim": 512,
    "postnet_conv_kernel_size": 5,
    "upsampling": "gaussian"
  },
  "task": {
    "no_progress_bar": false,
    "log_interval": 100,
    "log_format": "simple",
    "log_file": null,
    "aim_repo": null,
    "aim_run_hash": null,
    "tensorboard_logdir": "/checkpoint/mjhwang/experiments/230930-noiseaug_p2v-mls_multilingual_6lang/231005-noiseaug_p2v-mls_multilingual_6lang-alignfix.config_v2.langemb1.vuv_logit1.denoise.ngpu16",
    "wandb_project": null,
    "azureml_logging": false,
    "seed": 1,
    "cpu": false,
    "tpu": false,
    "bf16": false,
    "fp16": false,
    "memory_efficient_fp16": false,
    "fp16_no_flatten_grads": false,
    "fp16_init_scale": 128,
    "fp16_scale_window": null,
    "fp16_scale_tolerance": 0.0,
    "on_cpu_convert_precision": false,
    "min_loss_scale": 0.0001,
    "threshold_loss_scale": null,
    "amp": false,
    "amp_batch_retries": 2,
    "amp_init_scale": 128,
    "amp_scale_window": null,
    "user_dir": null,
    "empty_cache_freq": 0,
    "all_gather_list_size": 9999999,
    "model_parallel_size": 1,
    "quantization_config_path": null,
    "profile": false,
    "reset_logging": false,
    "suppress_crashes": false,
    "use_plasma_view": false,
    "plasma_path": "/tmp/plasma",
    "log_nvidia_smi": false,
    "use_tutel_moe": false,
    "tokenizer": null,
    "bpe": null,
    "optimizer": "adam",
    "lr_scheduler": "fixed",
    "simul_type": null,
    "criterion": "nar_prosody2vec",
    "scoring": "bleu",
    "task": "prosody2vec",
    "num_workers": 0,
    "num_workers_valid": 0,
    "skip_invalid_size_inputs_valid_test": true,
    "max_tokens": 300000,
    "batch_size": null,
    "required_batch_size_multiple": 8,
    "required_seq_len_multiple": 1,
    "dataset_impl": null,
    "data_buffer_size": 10,
    "train_subset": "train_wenet_cmn_9_10,train_wenet_cmn_8_10,train_wenet_cmn_7_10,train_wenet_cmn_6_10,train_wenet_cmn_5_10,train_wenet_cmn_4_10,train_wenet_cmn_3_10,train_wenet_cmn_2_10,train_wenet_cmn_1_10,train_wenet_cmn_0_10,train_mls_en_9_10,train_mls_en_8_10,train_mls_en_7_10,train_mls_en_6_10,train_mls_en_5_10,train_mls_en_4_10,train_mls_en_3_10,train_mls_en_2_10,train_mls_en_1_10,train_mls_en_0_10,train_mls_deu,train_mls_fra,train_mls_spa,train_cv12_cmn,train_mls_ita,train_cv12_cmn_2,train_vl107_cmn",
    "valid_subset": "dev_all",
    "combine_valid_subsets": null,
    "ignore_unused_valid_subsets": false,
    "validate_interval": 1,
    "validate_interval_updates": 5000,
    "validate_after_updates": 0,
    "fixed_validation_seed": null,
    "disable_validation": false,
    "max_tokens_valid": "300000",
    "batch_size_valid": null,
    "max_valid_steps": null,
    "curriculum": 0,
    "gen_subset": "test",
    "num_shards": 1,
    "shard_id": 0,
    "grouped_shuffling": false,
    "update_epoch_batch_itr": false,
    "update_ordered_indices_seed": false,
    "distributed_world_size": 16,
    "distributed_num_procs": 8,
    "distributed_rank": 0,
    "distributed_backend": "nccl",
    "distributed_init_method": null,
    "distributed_port": 15129,
    "device_id": 0,
    "distributed_no_spawn": false,
    "ddp_backend": "legacy_ddp",
    "ddp_comm_hook": "none",
    "bucket_cap_mb": 25,
    "fix_batches_to_gpus": false,
    "find_unused_parameters": true,
    "gradient_as_bucket_view": false,
    "fast_stat_sync": false,
    "heartbeat_timeout": -1,
    "broadcast_buffers": false,
    "slowmo_momentum": null,
    "slowmo_base_algorithm": "localsgd",
    "localsgd_frequency": 3,
    "nprocs_per_node": 8,
    "pipeline_model_parallel": false,
    "pipeline_balance": null,
    "pipeline_devices": null,
    "pipeline_chunks": 0,
    "pipeline_encoder_balance": null,
    "pipeline_encoder_devices": null,
    "pipeline_decoder_balance": null,
    "pipeline_decoder_devices": null,
    "pipeline_checkpoint": "never",
    "zero_sharding": "none",
    "no_reshard_after_forward": false,
    "fp32_reduce_scatter": false,
    "cpu_offload": false,
    "use_sharded_state": false,
    "not_fsdp_flatten_parameters": false,
    "freeze_up_to_layer": null,
    "arch": "nar_p2v",
    "max_epoch": 0,
    "max_update": 500000,
    "stop_time_hours": 0,
    "clip_norm": 1.0,
    "clip_norm_type": "l2",
    "sentence_avg": false,
    "update_freq": [
      4
    ],
    "lr": [
      0.0001
    ],
    "stop_min_lr": -1.0,
    "use_bmuf": false,
    "skip_remainder_batch": false,
    "save_dir": "/checkpoint/mjhwang/experiments/230930-noiseaug_p2v-mls_multilingual_6lang/231005-noiseaug_p2v-mls_multilingual_6lang-alignfix.config_v2.langemb1.vuv_logit1.denoise.ngpu16",
    "restore_file": "checkpoint_last.pt",
    "continue_once": null,
    "finetune_from_model": null,
    "ignore_suffix": false,
    "reset_dataloader": true,
    "reset_lr_scheduler": false,
    "reset_meters": false,
    "reset_optimizer": false,
    "optimizer_overrides": "{}",
    "save_interval": 1,
    "save_interval_updates": 10000,
    "keep_interval_updates": 1,
    "keep_interval_updates_pattern": -1,
    "keep_last_epochs": -1,
    "keep_best_checkpoints": 10,
    "no_save": false,
    "no_epoch_checkpoints": true,
    "no_last_checkpoints": false,
    "no_best_checkpoints": false,
    "no_save_optimizer_state": false,
    "no_save_optimizer_state_on_training_finished": false,
    "synchronize_checkpoints_before_copy": false,
    "symlink_best_and_last_checkpoints": false,
    "best_checkpoint_metric": "mse_loss",
    "maximize_best_checkpoint_metric": false,
    "patience": 20,
    "checkpoint_suffix": "",
    "checkpoint_shard_count": 1,
    "load_checkpoint_on_all_dp_ranks": false,
    "write_checkpoints_asynchronously": false,
    "s3_upload_path": null,
    "replication_count": 1,
    "store_ema": false,
    "ema_decay": 0.9999,
    "ema_start_update": 0,
    "ema_seed_model": null,
    "ema_update_freq": 1,
    "ema_fp32": false,
    "load_prosody_encoder_from": null,
    "freeze_prosody_encoder": false,
    "unit_encoder_arch": "daft_exprt_encoder",
    "prosody_encoder_arch": "ecapa_tdnn2",
    "decoder_arch": "daft_exprt_decoder",
    "data": "/large_experiments/seamless/ust/mjhwang/data/denoise_prosody2vec/mls_multilingual_6lang_xlsr_10k_noiseaug",
    "config_yaml": "config_v2.yaml",
    "max_source_positions": 300000,
    "max_target_positions": 300000,
    "n_frames_per_step": 1,
    "eos_prob_threshold": 0.5,
    "eval_inference": true,
    "eval_tb_nsample": 8,
    "eval_bleu": false,
    "vocoder": "griffin_lim",
    "spec_bwd_max_iter": 8,
    "jit_data_offloading": true,
    "jit_data_root": "/scratch/slurm_tmpdir/${SLURM_JOB_ID}",
    "adam_betas": "(0.9, 0.98)",
    "adam_eps": 1e-08,
    "weight_decay": 0.0,
    "use_old_adam": false,
    "fp16_adam_stats": false,
    "block_wise": false,
    "force_anneal": null,
    "lr_shrink": 0.1,
    "warmup_updates": 1000,
    "ctc_weight": 0.0,
    "forward_sum_weight": 1.0,
    "bin_loss_start_ratio": 0.1,
    "bin_loss_warmup_steps": 6000,
    "film_regul_weight": 0.001,
    "pros_consist_weight": 0.0,
    "denoise_target": true,
    "snr_threshold": 2000000000000000.0,
    "pad": 1,
    "eos": 2,
    "unk": 3,
    "use_spkr_emb": 0,
    "use_lang_emb": 1,
    "prosody_embed_dim": 512,
    "use_ucmvn": 0,
    "use_spec_augment": 1,
    "use_prosody_layernorm": 1,
    "var_pred_hidden_dim": 512,
    "var_pred_kernel_size": 5,
    "var_pred_n_bins": -1,
    "add_variance_parallel": 1,
    "use_film_decoder": 1,
    "predict_var_vuv": 1,
    "predict_vuv_logit": 1,
    "predict_frm_f0_vuv": 0,
    "no_seed_provided": false,
    "speaker_embed_dim": 192,
    "use_utterance_speaker_embed": false,
    "lang_embed_dim": 64,
    "_name": "prosody2vec"
  },
  "criterion": {
    "_name": "nar_prosody2vec",
    "ctc_weight": 0.0,
    "forward_sum_weight": 1.0,
    "bin_loss_start_ratio": 0.1,
    "bin_loss_warmup_steps": 6000,
    "film_regul_weight": 0.001,
    "pros_consist_weight": 0.0,
    "denoise_target": true,
    "snr_threshold": 2000000000000000.0
  },
  "optimizer": {
    "_name": "adam",
    "adam_betas": "(0.9, 0.98)",
    "adam_eps": 1e-08,
    "weight_decay": 0.0,
    "use_old_adam": false,
    "fp16_adam_stats": false,
    "tpu": false,
    "lr": [
      0.0001
    ],
    "block_wise": false
  },
  "lr_scheduler": {
    "_name": "fixed",
    "force_anneal": null,
    "lr_shrink": 0.1,
    "warmup_updates": 1000,
    "lr": [
      0.0001
    ]
  },
  "scoring": {
    "_name": "bleu",
    "pad": 1,
    "eos": 2,
    "unk": 3
  },
  "bpe": null,
  "tokenizer": null,
  "ema": {
    "_name": null,
    "store_ema": false,
    "ema_decay": 0.9999,
    "ema_start_update": 0,
    "ema_seed_model": null,
    "ema_update_freq": 1,
    "ema_fp32": false
  },
  "simul_type": null
}