| train: |
| seed: 1234 |
| epochs: 100 |
| batch_size: 6 |
| gradient_accumulation: 4 |
| save_every_n_epoch: 1 |
| precision: 32 |
| gradient_clip: 1.0 |
| optimizer: |
| lr: 0.01 |
| lr_init: 0.00001 |
| lr_end: 0.0001 |
| warmup_steps: 2000 |
| decay_steps: 40000 |
| data: |
| max_eval_sample: 8 |
| max_sec: 40 |
| num_workers: 1 |
| pad_val: 1024 |
| model: |
| saving_path: "ckpt/" |
| resume_checkpoint: null |
| vocoder_config_path: "quantizer/new_ckpt/config.json" |
| vocoder_ckpt_path: "quantizer/new_ckpt/g_00600000" |
| datadir: "/home/liweiche/GigaSpeech/wavs" |
| metapath: "/home/liweiche/GigaSpeech/train2.json" |
| val_metapath: "/home/liweiche/GigaSpeech/dev2.json" |
| sampledir: "logs/" |
| pretrained_path: null |
| lr: 0.0001 |
| batch_size: 200.0 |
| train_bucket_size: 8192 |
| training_step: 800000 |
| optim_flat_percent: 0.0 |
| warmup_step: 50 |
| adam_beta1: 0.9 |
| adam_beta2: 0.98 |
| ffd_size: 3072 |
| hidden_size: 768 |
| enc_nlayers: 6 |
| dec_nlayers: 6 |
| nheads: 12 |
| ar_layer: 4 |
| ar_ffd_size: 1024 |
| ar_hidden_size: 256 |
| ar_nheads: 4 |
| aligner_softmax_temp: 1.0 |
| layer_norm_eps: 0.00001 |
| speaker_embed_dropout: 0.05 |
| label_smoothing: 0.0 |
| val_check_interval: 5000 |
| check_val_every_n_epoch: 1 |
| precision: "fp16" |
| nworkers: 16 |
| distributed: true |
| accelerator: "ddp" |
| version: null |
| accumulate_grad_batches: 1 |
| use_repetition_token: true |
| use_repetition_gating: false |
| repetition_penalty: 1.0 |
| sampling_temperature: 1.0 |
| top_k: -1 |
| min_top_k: 3 |
| top_p: 0.8 |
| sample_num: 4 |
| length_penalty_max_length: 15000 |
| length_penalty_max_prob: 0.95 |
| max_input_length: 2048 |
| max_output_length: 2000 |
| sample_rate: 16000 |
| n_codes: 1024 |
| n_cluster_groups: 1 |
| phone_context_window: 4 |
| phoneset_size: 1000 |
| inference: |
| top_k: 5 |
|
|