| dataset: | |
| target_sample_rate: 24000 | |
| n_mel_channels: 100 | |
| hop_length: 256 | |
| win_length: 1024 | |
| n_fft: 1024 | |
| mel_spec_type: vocos | |
| tokenizer: pinyin | |
| tokenizer_path: data/HFDatasetNew_pinyin/vocab.txt | |
| name: /apdcephfs_cq10/share_1297902/user/nenali/project/chukewang/data/Emilia-Dataset | |
| type: HFDatasetNew | |
| cache_dir: /apdcephfs_cq10/share_1297902/user/nenali/project/chukewang/data/Emilia-Dataset-Cache | |
| duration_path: scripts/duration.json | |
| valid_text_path: scripts/valid.json | |
| training: | |
| exp_name: F5TTS_PPG_ONLY | |
| learning_rate: 7.5e-05 | |
| batch_size_per_gpu: 4096 | |
| batch_size_type: frame | |
| max_samples: 64 | |
| grad_accumulation_steps: 4 | |
| max_grad_norm: 1.0 | |
| epochs: 11 | |
| num_warmup_updates: 20000 | |
| save_per_updates: 20000 | |
| last_per_steps: 5000 | |
| checkpoint_path: ckpts/ppg_only_cq | |
| combined_cond_drop_prob: | |
| - 1 | |
| - 0 | |
| - 0 | |
| - 0 | |
| repeat_space_token: false | |
| mix_condition: false | |
| model: | |
| wandb_resume_id: None | |
| cls: DiT | |
| cfg: | |
| DiT: | |
| dim: 1024 | |
| depth: 22 | |
| heads: 16 | |
| ff_mult: 2 | |
| text_dim: 512 | |
| conv_layers: 4 | |
| UNetT: | |
| dim: 1024 | |
| depth: 24 | |
| heads: 16 | |
| ff_mult: 4 | |
| ppg_input: true | |
| ppg: | |
| model_path: pretrained_models/ppg/33.pt | |
| config: pretrained_models/ppg/train.yaml | |
| frame_length: 20 | |
| mel_frame_shift: 10 | |
| dim: 256 | |
| output_type: map | |
| map: | |
| map_mix_ratio: 1.0 | |
| global_phn_center_path: pretrained_models/ppg/7layer_20ms_33pt/phn_center.npy | |
| para_softmax_path: pretrained_models/ppg/7layer_20ms_33pt/ce_layer.pkl | |