File size: 950 Bytes
0a97d6c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 | version: 1.0
system: "base"
model:
cls_embedding:
speaker_dim: 256
feature_dim: 512
content_dim: 768
content_hidden: 256
use_pitch: false
unet:
sample_size: [128, 256]
in_channels: 257
out_channels: 1
layers_per_block: 2
block_out_channels: [128, 256, 256, 512]
down_block_types:
[
"DownBlock2D",
"DownBlock2D",
"AttnDownBlock2D",
"AttnDownBlock2D",
]
up_block_types:
[
"AttnUpBlock2D",
"AttnUpBlock2D",
"UpBlock2D",
"UpBlock2D"
]
attention_head_dim: 32
class_embed_type: 'identity'
scheduler:
num_train_steps: 1000
beta_schedule: 'linear'
beta_start: 0.0001
beta_end: 0.02
num_infer_steps: 50
rescale_betas_zero_snr: true
timestep_spacing: "trailing"
clip_sample: false
prediction_type: 'v_prediction'
scale: 2.75
shift: 5.80
|