dataset: target_sample_rate: 24000 n_mel_channels: 100 hop_length: 256 win_length: 1024 n_fft: 1024 mel_spec_type: vocos tokenizer: pinyin tokenizer_path: data/HFDatasetNew_pinyin/vocab.txt name: /apdcephfs_cq10/share_1297902/user/nenali/project/chukewang/data/Emilia-Dataset type: HFDatasetNew cache_dir: /apdcephfs_cq10/share_1297902/user/nenali/project/chukewang/data/Emilia-Dataset-Cache duration_path: scripts/duration.json valid_text_path: scripts/valid.json training: exp_name: F5TTS_PPG_ONLY learning_rate: 7.5e-05 batch_size_per_gpu: 4096 batch_size_type: frame max_samples: 64 grad_accumulation_steps: 4 max_grad_norm: 1.0 epochs: 11 num_warmup_updates: 20000 save_per_updates: 20000 last_per_steps: 5000 checkpoint_path: ckpts/ppg_only_cq combined_cond_drop_prob: - 1 - 0 - 0 - 0 repeat_space_token: false mix_condition: false model: wandb_resume_id: None cls: DiT cfg: DiT: dim: 1024 depth: 22 heads: 16 ff_mult: 2 text_dim: 512 conv_layers: 4 UNetT: dim: 1024 depth: 24 heads: 16 ff_mult: 4 ppg_input: true ppg: model_path: pretrained_models/ppg/33.pt config: pretrained_models/ppg/train.yaml frame_length: 20 mel_frame_shift: 10 dim: 256 output_type: map map: map_mix_ratio: 1.0 global_phn_center_path: pretrained_models/ppg/7layer_20ms_33pt/phn_center.npy para_softmax_path: pretrained_models/ppg/7layer_20ms_33pt/ce_layer.pkl