File size: 3,547 Bytes
872b1a7 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 | # 此配置文件主要用于 img_to_mask.py 获取 face detection 相关参数
debug: false
seed: 39
root_name: audio_head_animator
exp_name: ${root_name}/inference
mode: train
n_epochs: null
cache_dir: cache
ckpt_dir: ${exp_name}/ckpt
resume_ckpt: null
only_resume_state_dict: False
pretrained_ckpt: null
model:
module_name: model.head_animation.head_animator
class_name: HeadAnimatorModule
pretrained_ckpt: ${pretrained_ckpt}
using_hybrid_mask: True
output_dir: ${exp_name}
face_encoder:
module_name: model.head_animation.LIA_3d.face_encoder
class_name: FaceEncoder
image_size: 512
image_channel: 3
block_expansion: 64
num_down_blocks: 3
max_features: 512
reshape_channel: 32
reshape_depth: 16
num_resblocks: 6
motion_encoder:
module_name: model.head_animation.LIA_3d.motion_encoder
class_name: MotionEncoder
latent_dim: 512
size: ${model.face_encoder.image_size}
flow_estimator:
module_name: model.head_animation.LIA_3d.flow_estimator
class_name: FlowEstimator
latent_dim: ${model.motion_encoder.latent_dim}
motion_space: 64
face_generator:
module_name: model.head_animation.LIA_3d.face_generator
class_name: FaceGenerator
size: ${model.face_encoder.image_size}
latent_dim: ${model.motion_encoder.latent_dim}
outputsize: ${data.train_width}
reshape_channel: ${model.face_encoder.reshape_channel}
group_norm_channel: 32
flag_estimate_occlusion_map: True
discriminator:
module_name: model.head_animation.LIA.discriminator
class_name: Discriminator
size: ${data.train_width}
vgg_loss:
module_name: model.head_animation.VASA1.loss
class_name: VGGLoss
loss:
l_w_recon: 1
l_w_face_l1: 0
l_w_vgg: 2
l_w_gan: 0.2
l_w_face: 0
l_w_headpose: 0
l_w_gaze: 0
l_w_foreground: 0
l_w_local: 0
optimizer:
lr: 0.0001
discriminator_lr: 0.002
warmup_steps: 0
adam_beta1: 0.9
adam_beta2: 0.999
adam_epsilon: 1.0e-08
weight_decay: 0.0
g_reg_every: 4
d_reg_every: 16
logger:
neptune_project: null
neptune_api_token: null
wandb:
enabled: false
entity: null
project: "real-time"
callbacks:
- module_name: lightning.pytorch.callbacks
class_name: ModelCheckpoint
dirpath: ${ckpt_dir}
every_n_train_steps: 2000
save_top_k: -1
trainer:
accelerator: gpu
log_every_n_steps: 1
val_check_interval: 100000
data:
debug: False
train_bs: 12
accumulate_grad_batches: 1
n_sample_frames: 1
past_n: 1
num_workers: 8
ref_sample_margin: 10
train_width: 512
train_height: 512
union_bbox_scale: [1.2, 1.4]
mouth_bbox_scale: 1.5
eye_bbox_scale: 2.0
hybrid_face_mask: ${model.using_hybrid_mask}
flip_aug: True
filter_hand_videos: true
random_sample: False
dataset_file_path: []
cache_file_path: []
train_fps: 25
dataloader: FastVideoDatasetV2
val_data:
train_bs: 1
n_sample_frames: 40
past_n: 2
num_workers: 6
ref_sample_margin: ${data.ref_sample_margin}
train_width: ${data.train_width}
train_height: ${data.train_height}
union_bbox_scale: [1.2, 1.4]
mouth_bbox_scale: ${data.mouth_bbox_scale}
eye_bbox_scale: ${data.eye_bbox_scale}
hybrid_face_mask: ${data.hybrid_face_mask}
flip_aug: False
filter_hand_videos: ${data.filter_hand_videos}
random_sample: False
dataset_file_path: []
train_fps: ${data.train_fps}
dataloader: ${data.dataloader}
test_data:
height: 384
width: 672
image_paths_and_scales: []
inference:
output_dir: inference_outputs/${exp_name}
|