|
|
defaults: |
|
|
- base_pytorch_algo |
|
|
- _self_ |
|
|
|
|
|
lr: ${experiment.training.lr} |
|
|
betas: [0.9, 0.95] |
|
|
weight_decay: 5e-2 |
|
|
lr_scheduler: |
|
|
name: constant_with_warmup |
|
|
num_warmup_steps: 1000 |
|
|
|
|
|
load_video_latent: ${dataset.load_video_latent} |
|
|
load_prompt_embed: ${dataset.load_prompt_embed} |
|
|
|
|
|
diffusion_forcing: |
|
|
enabled: true |
|
|
mode: rand_history |
|
|
clean_hist_prob: 0.5 |
|
|
|
|
|
n_frames: ${dataset.n_frames} |
|
|
height: ${dataset.height} |
|
|
width: ${dataset.width} |
|
|
num_train_timesteps: 1000 |
|
|
diffusion_type: "continuous" |
|
|
sample_solver: unipc |
|
|
sample_steps: 40 |
|
|
sample_shift: 3.0 |
|
|
lang_guidance: 3.0 |
|
|
neg_prompt: "" |
|
|
hist_guidance: 2.0 |
|
|
sliding_hist: 1 |
|
|
gradient_checkpointing_rate: 1.0 |
|
|
max_text_tokens: 512 |
|
|
|
|
|
logging: |
|
|
loss_freq: 1 |
|
|
video_freq: 1000 |
|
|
video_type: grid |
|
|
fps: ${dataset.fps} |
|
|
|
|
|
serving: |
|
|
port: 6688 |
|
|
|
|
|
text_encoder: |
|
|
text_len: 512 |
|
|
text_dim: 4096 |
|
|
compile: false |
|
|
name: google/umt5-xxl |
|
|
ckpt_path: data/ckpts/Wan2.1-T2V-1.3B/models_t5_umt5-xxl-enc-bf16.pth |
|
|
|
|
|
vae: |
|
|
ckpt_path: data/ckpts/Wan2.1-T2V-1.3B/Wan2.1_VAE.pth |
|
|
compile: false |
|
|
z_dim: 16 |
|
|
stride: [4, 8, 8] |
|
|
mean: [-0.7571, -0.7089, -0.9113, 0.1075, -0.1745, 0.9653, -0.1517, 1.5508, 0.4134, -0.0715, 0.5517, -0.3632, -0.1922, -0.9497, 0.2503, -0.2921] |
|
|
std: [2.8184, 1.4541, 2.3275, 2.6558, 1.2196, 1.7708, 2.6052, 2.0743, 3.2687, 2.1526, 2.8652, 1.5579, 1.6382, 1.1253, 2.8251, 1.9160] |
|
|
|
|
|
model: |
|
|
ckpt_path: data/ckpts/Wan2.1-T2V-1.3B |
|
|
tuned_ckpt_path: null |
|
|
compile: false |
|
|
model_type: t2v |
|
|
patch_size: [1, 2, 2] |
|
|
in_dim: ${algorithm.vae.z_dim} |
|
|
dim: 1536 |
|
|
ffn_dim: 8960 |
|
|
freq_dim: 256 |
|
|
out_dim: ${algorithm.vae.z_dim} |
|
|
num_heads: 12 |
|
|
num_layers: 30 |
|
|
window_size: [-1, -1] |
|
|
qk_norm: True |
|
|
cross_attn_norm: True |
|
|
eps: 1e-6 |
|
|
|
|
|
|