SoulX-Singer / config.yaml
williamchangtw's picture
Duplicate from Soul-AILab/SoulX-Singer
b645b2d
raw
history blame contribute delete
579 Bytes
infer:
n_steps: 32
cfg: 3
audio:
hop_size: 480
sample_rate: 24000
max_length: 36000
n_fft: 1920
num_mels: 128
win_size: 1920
fmin: 0
fmax: 12000
mel_var: 8.14
mel_mean: -4.92
model:
encoder:
vocab_size: 3000
text_dim: 512
pitch_dim: 512
type_dim: 512
f0_bin: 361
f0_dim: 512
num_layers: 4
flow_matching:
mel_dim: 128
hidden_size: 1024
num_layers: 22
num_heads: 16
cfg_drop_prob: 0.2
use_embedding: False
cond_codebook_size: 512
cond_scale_factor: 1
sigma: 1e-5
time_scheduler: cos