DyStream / tools /visualization_0416 /configs /audio_head_animator.yaml

upload ckpt

872b1a7 20 days ago

3.55 kB

	# 此配置文件主要用于 img_to_mask.py 获取 face detection 相关参数
	debug: false
	seed: 39
	root_name: audio_head_animator
	exp_name: ${root_name}/inference
	mode: train
	n_epochs: null
	cache_dir: cache
	ckpt_dir: ${exp_name}/ckpt
	resume_ckpt: null

	only_resume_state_dict: False
	pretrained_ckpt: null

	model:
	module_name: model.head_animation.head_animator
	class_name: HeadAnimatorModule
	pretrained_ckpt: ${pretrained_ckpt}
	using_hybrid_mask: True
	output_dir: ${exp_name}

	face_encoder:
	module_name: model.head_animation.LIA_3d.face_encoder
	class_name: FaceEncoder
	image_size: 512
	image_channel: 3
	block_expansion: 64
	num_down_blocks: 3
	max_features: 512
	reshape_channel: 32
	reshape_depth: 16
	num_resblocks: 6

	motion_encoder:
	module_name: model.head_animation.LIA_3d.motion_encoder
	class_name: MotionEncoder
	latent_dim: 512
	size: ${model.face_encoder.image_size}

	flow_estimator:
	module_name: model.head_animation.LIA_3d.flow_estimator
	class_name: FlowEstimator
	latent_dim: ${model.motion_encoder.latent_dim}
	motion_space: 64

	face_generator:
	module_name: model.head_animation.LIA_3d.face_generator
	class_name: FaceGenerator
	size: ${model.face_encoder.image_size}
	latent_dim: ${model.motion_encoder.latent_dim}
	outputsize: ${data.train_width}
	reshape_channel: ${model.face_encoder.reshape_channel}
	group_norm_channel: 32
	flag_estimate_occlusion_map: True

	discriminator:
	module_name: model.head_animation.LIA.discriminator
	class_name: Discriminator
	size: ${data.train_width}

	vgg_loss:
	module_name: model.head_animation.VASA1.loss
	class_name: VGGLoss

	loss:
	l_w_recon: 1
	l_w_face_l1: 0
	l_w_vgg: 2
	l_w_gan: 0.2
	l_w_face: 0
	l_w_headpose: 0
	l_w_gaze: 0
	l_w_foreground: 0
	l_w_local: 0

	optimizer:
	lr: 0.0001
	discriminator_lr: 0.002
	warmup_steps: 0
	adam_beta1: 0.9
	adam_beta2: 0.999
	adam_epsilon: 1.0e-08
	weight_decay: 0.0
	g_reg_every: 4
	d_reg_every: 16

	logger:
	neptune_project: null
	neptune_api_token: null
	wandb:
	enabled: false
	entity: null
	project: "real-time"

	callbacks:
	- module_name: lightning.pytorch.callbacks
	class_name: ModelCheckpoint
	dirpath: ${ckpt_dir}
	every_n_train_steps: 2000
	save_top_k: -1

	trainer:
	accelerator: gpu
	log_every_n_steps: 1
	val_check_interval: 100000

	data:
	debug: False
	train_bs: 12
	accumulate_grad_batches: 1
	n_sample_frames: 1
	past_n: 1
	num_workers: 8
	ref_sample_margin: 10
	train_width: 512
	train_height: 512
	union_bbox_scale: [1.2, 1.4]
	mouth_bbox_scale: 1.5
	eye_bbox_scale: 2.0
	hybrid_face_mask: ${model.using_hybrid_mask}
	flip_aug: True
	filter_hand_videos: true
	random_sample: False
	dataset_file_path: []
	cache_file_path: []
	train_fps: 25
	dataloader: FastVideoDatasetV2

	val_data:
	train_bs: 1
	n_sample_frames: 40
	past_n: 2
	num_workers: 6
	ref_sample_margin: ${data.ref_sample_margin}
	train_width: ${data.train_width}
	train_height: ${data.train_height}
	union_bbox_scale: [1.2, 1.4]
	mouth_bbox_scale: ${data.mouth_bbox_scale}
	eye_bbox_scale: ${data.eye_bbox_scale}
	hybrid_face_mask: ${data.hybrid_face_mask}
	flip_aug: False
	filter_hand_videos: ${data.filter_hand_videos}
	random_sample: False
	dataset_file_path: []
	train_fps: ${data.train_fps}
	dataloader: ${data.dataloader}

	test_data:
	height: 384
	width: 672
	image_paths_and_scales: []

	inference:
	output_dir: inference_outputs/${exp_name}