| hydra: |
| run: |
| dir: ckpts/${model.name}_${model.mel_spec.mel_spec_type}_${model.tokenizer}_${datasets.name}/${now:%Y-%m-%d}/${now:%H-%M-%S} |
| |
| datasets: |
| name: Emilia_ZH_EN |
| batch_size_per_gpu: 38400 |
| batch_size_type: frame |
| max_samples: 64 |
| num_workers: 16 |
|
|
| optim: |
| epochs: 15 |
| learning_rate: 7.5e-5 |
| num_warmup_updates: 20000 |
| grad_accumulation_steps: 1 |
| max_grad_norm: 1.0 |
| bnb_optimizer: False |
|
|
| model: |
| name: E2TTS_Base |
| tokenizer: pinyin |
| tokenizer_path: None |
| arch: |
| dim: 1024 |
| depth: 24 |
| heads: 16 |
| ff_mult: 4 |
| mel_spec: |
| target_sample_rate: 24000 |
| n_mel_channels: 100 |
| hop_length: 256 |
| win_length: 1024 |
| n_fft: 1024 |
| mel_spec_type: vocos |
| vocoder: |
| is_local: False |
| local_path: None |
|
|
| ckpts: |
| logger: wandb |
| save_per_updates: 50000 |
| last_per_steps: 5000 |
| save_dir: ckpts/${model.name}_${model.mel_spec.mel_spec_type}_${model.tokenizer}_${datasets.name} |