HuMo_local

Running on Zero

App Files Files Community

HuMo_local / humo /configs /inference /generate.yaml

alexnasa

Upload 54 files

295978e verified 3 months ago

raw

history blame contribute delete

2.17 kB

	__object__:
	path: humo.generate
	name: Generator

	dit:
	model:
	__inherit__: humo/configs/models/Wan_14B_I2V.yaml
	__object__:
	path: humo.models.wan_modules.model_humo
	name: WanModel
	insert_audio: True
	zero_vae_path: ./weights/HuMo/zero_vae_129frame.pt
	zero_vae_720p_path: ./weights/HuMo/zero_vae_720p_161frame.pt
	checkpoint_dir: ./weights/HuMo/HuMo-17B
	compile: False
	init_with_meta_device: True
	gradient_checkpoint: True
	fsdp:
	sharding_strategy: _HYBRID_SHARD_ZERO2
	sp_size: 1
	dtype: bfloat16

	vae:
	checkpoint: ./weights/Wan2.1-T2V-1.3B/Wan2.1_VAE.pth
	vae_stride: [ 4, 8, 8 ]
	scaling_factor: 0.9152
	compile: False
	grouping: True
	use_sample: False
	dtype: bfloat16

	text:
	t5_checkpoint: ./weights/Wan2.1-T2V-1.3B/models_t5_umt5-xxl-enc-bf16.pth
	t5_tokenizer: ./weights/Wan2.1-T2V-1.3B/google/umt5-xxl
	dropout: 0.1
	dtype: bfloat16
	fsdp:
	enabled: True
	sharding_strategy: HYBRID_SHARD

	diffusion:
	schedule:
	type: lerp
	T: 1000.0
	sampler:
	type: euler
	prediction_type: v_lerp
	timesteps:
	training:
	type: logitnormal
	loc: 0.0
	scale: 1.0
	sampling:
	type: uniform_trailing
	steps: 50
	shift: 5.0

	audio:
	vocal_separator: ./weights/HuMo/audio_separator/Kim_Vocal_2.onnx
	wav2vec_model: ./weights/whisper-large-v3

	generation:
	mode: "TIA" # TA, TIA
	extract_audio_feat: True
	seed: 666666
	frames: 97
	fps: 25
	height: 480 # 720 480
	width: 832 # 1280 832
	batch_size: 1
	sequence_parallel: 8
	output:
	dir: ./output
	# positive_prompt: ./examples/test_case.json
	sample_neg_prompt: '色调艳丽，过曝，静态，细节模糊不清，字幕，风格，作品，画作，画面，静止，整体发灰，最差质量，低质量，JPEG压缩残留，丑陋的，残缺的，多余的手指，画得不好的手部，画得不好的脸部，畸形的，毁容的，形态畸形的肢体，手指融合，静止不动的画面，杂乱的背景，三条腿，背景人很多，倒着走'
	scale_a: 5.5
	scale_t: 5.0
	step_change: 980