Spaces:

KempnerInstituteAI
/

LVP

Running on Zero

App Files Files Community

LVP / configurations /algorithm /wan_t2v.yaml

kiwhansong

add demo

142a1ac 11 days ago

raw

history blame contribute delete

2.28 kB

	defaults:
	- base_pytorch_algo # inherits from configurations/algorithm/base_algo.yaml
	- _self_

	lr: ${experiment.training.lr}
	betas: [0.9, 0.95]
	weight_decay: 5e-2
	lr_scheduler:
	name: constant_with_warmup
	num_warmup_steps: 1000

	load_video_latent: ${dataset.load_video_latent} # if true, load latent from disk instead of using video vae
	load_prompt_embed: ${dataset.load_prompt_embed} # if true, load prompt embedding from disk instead of running language model online

	diffusion_forcing:
	enabled: true
	mode: rand_history # independent, rand_history
	clean_hist_prob: 0.5 # probability of giving first frame image condition when finetuning image-to-video, overriding diffusion forcing's noise level for first frame

	n_frames: ${dataset.n_frames}
	height: ${dataset.height}
	width: ${dataset.width}
	num_train_timesteps: 1000
	diffusion_type: "continuous" # or "discrete"
	sample_solver: unipc
	sample_steps: 40
	sample_shift: 3.0
	lang_guidance: 3.0
	neg_prompt: ""
	hist_guidance: 2.0 #2.0
	sliding_hist: 1 # use 2 latent frames as history when extending videos
	gradient_checkpointing_rate: 1.0 # gradient checkpointing blocks as a ratio of total blocks
	max_text_tokens: 512

	logging:
	loss_freq: 1
	video_freq: 1000
	video_type: grid # grid or single
	fps: ${dataset.fps}

	serving:
	port: 6688

	text_encoder:
	text_len: 512
	text_dim: 4096
	compile: false
	name: google/umt5-xxl
	ckpt_path: data/ckpts/Wan2.1-T2V-1.3B/models_t5_umt5-xxl-enc-bf16.pth

	vae:
	ckpt_path: data/ckpts/Wan2.1-T2V-1.3B/Wan2.1_VAE.pth
	compile: false
	z_dim: 16
	stride: [4, 8, 8]
	mean: [-0.7571, -0.7089, -0.9113, 0.1075, -0.1745, 0.9653, -0.1517, 1.5508, 0.4134, -0.0715, 0.5517, -0.3632, -0.1922, -0.9497, 0.2503, -0.2921]
	std: [2.8184, 1.4541, 2.3275, 2.6558, 1.2196, 1.7708, 2.6052, 2.0743, 3.2687, 2.1526, 2.8652, 1.5579, 1.6382, 1.1253, 2.8251, 1.9160]

	model:
	ckpt_path: data/ckpts/Wan2.1-T2V-1.3B
	tuned_ckpt_path: null
	compile: false #true
	model_type: t2v # if i2v, this flag will let the model take in CLIP features
	patch_size: [1, 2, 2]
	in_dim: ${algorithm.vae.z_dim}
	dim: 1536
	ffn_dim: 8960
	freq_dim: 256
	out_dim: ${algorithm.vae.z_dim}
	num_heads: 12
	num_layers: 30
	window_size: [-1, -1]
	qk_norm: True
	cross_attn_norm: True
	eps: 1e-6