ppg / config.yaml
wanghappy's picture
Upload folder using huggingface_hub
7a54974 verified
dataset:
target_sample_rate: 24000
n_mel_channels: 100
hop_length: 256
win_length: 1024
n_fft: 1024
mel_spec_type: vocos
tokenizer: pinyin
tokenizer_path: data/HFDatasetNew_pinyin/vocab.txt
name: /apdcephfs_cq10/share_1297902/user/nenali/project/chukewang/data/Emilia-Dataset
type: HFDatasetNew
cache_dir: /apdcephfs_cq10/share_1297902/user/nenali/project/chukewang/data/Emilia-Dataset-Cache
duration_path: scripts/duration.json
valid_text_path: scripts/valid.json
training:
exp_name: F5TTS_PPG_ONLY
learning_rate: 7.5e-05
batch_size_per_gpu: 4096
batch_size_type: frame
max_samples: 64
grad_accumulation_steps: 4
max_grad_norm: 1.0
epochs: 11
num_warmup_updates: 20000
save_per_updates: 20000
last_per_steps: 5000
checkpoint_path: ckpts/ppg_only_cq
combined_cond_drop_prob:
- 1
- 0
- 0
- 0
repeat_space_token: false
mix_condition: false
model:
wandb_resume_id: None
cls: DiT
cfg:
DiT:
dim: 1024
depth: 22
heads: 16
ff_mult: 2
text_dim: 512
conv_layers: 4
UNetT:
dim: 1024
depth: 24
heads: 16
ff_mult: 4
ppg_input: true
ppg:
model_path: pretrained_models/ppg/33.pt
config: pretrained_models/ppg/train.yaml
frame_length: 20
mel_frame_shift: 10
dim: 256
output_type: map
map:
map_mix_ratio: 1.0
global_phn_center_path: pretrained_models/ppg/7layer_20ms_33pt/phn_center.npy
para_softmax_path: pretrained_models/ppg/7layer_20ms_33pt/ce_layer.pkl