| | |
| | |
| | |
| | |
| | |
| | |
| | |
| |
|
| |
|
| | from dataclasses import dataclass |
| | from pathlib import Path |
| | from typing import Optional, Literal, Tuple |
| | import tyro |
| | import importlib |
| | from vhap.util.log import get_logger |
| | logger = get_logger(__name__) |
| |
|
| |
|
| | def import_module(module_name: str): |
| | module_name, class_name = module_name.rsplit(".", 1) |
| | module = getattr(importlib.import_module(module_name), class_name) |
| | return module |
| |
|
| |
|
| | class Config: |
| | def __getitem__(self, __name: str): |
| | if hasattr(self, __name): |
| | return getattr(self, __name) |
| | else: |
| | raise AttributeError(f"{self.__class__.__name__} has no attribute '{__name}'") |
| |
|
| |
|
| | @dataclass() |
| | class DataConfig(Config): |
| | root_folder: Path = '' |
| | """The root folder for the dataset.""" |
| | sequence: str = '' |
| | """The sequence name""" |
| | _target: str = "vhap.data.video_dataset.VideoDataset" |
| | """The target dataset class""" |
| | division: Optional[str] = None |
| | subset: Optional[str] = None |
| | calibrated: bool = False |
| | """Whether the cameras parameters are available""" |
| | align_cameras_to_axes: bool = True |
| | """Adjust how cameras distribute in the space with a global rotation""" |
| | camera_convention_conversion: str = 'opencv->opengl' |
| | target_extrinsic_type: Literal['w2c', 'c2w'] = 'w2c' |
| | n_downsample_rgb: Optional[int] = None |
| | """Load from downsampled RGB images to save data IO time""" |
| | scale_factor: float = 1.0 |
| | """Further apply a scaling transformation after the downsampling of RGB""" |
| | background_color: Optional[Literal['white', 'black']] = 'white' |
| | use_alpha_map: bool = False |
| | use_landmark: bool = True |
| | landmark_source: Optional[Literal['face-alignment', 'star']] = "star" |
| |
|
| |
|
| | @dataclass() |
| | class ModelConfig(Config): |
| | n_shape: int = 300 |
| | n_expr: int = 100 |
| | n_tex: int = 100 |
| |
|
| | use_static_offset: bool = False |
| | """Optimize static offsets on top of FLAME vertices in the canonical space""" |
| | use_dynamic_offset: bool = False |
| | """Optimize dynamic offsets on top of the FLAME vertices in the canonical space""" |
| | add_teeth: bool = True |
| | """Add teeth to the FLAME model""" |
| | remove_lip_inside: bool = False |
| | """Remove the inner part of the lips from the FLAME model""" |
| |
|
| | tex_resolution: int = 2048 |
| | """The resolution of the extra texture map""" |
| | tex_painted: bool = True |
| | """Use a painted texture map instead the pca texture space as the base texture map""" |
| | tex_extra: bool = True |
| | """Optimize an extra texture map as the base texture map or the residual texture map""" |
| | |
| | tex_clusters: tuple[str, ...] = ("skin", "hair", "boundary", "lips_tight", "sclerae", "irises") |
| | """Regions that are supposed to share a similar color inside""" |
| | residual_tex: bool = True |
| | """Use the extra texture map as a residual component on top of the base texture""" |
| | occluded: tuple[str, ...] = () |
| | """The regions that are occluded by the hair or garments""" |
| | |
| | flame_params_path: Optional[Path] = None |
| |
|
| |
|
| | @dataclass() |
| | class RenderConfig(Config): |
| | backend: Literal['nvdiffrast', 'pytorch3d'] = 'nvdiffrast' |
| | """The rendering backend""" |
| | use_opengl: bool = False |
| | """Use OpenGL for NVDiffRast""" |
| | background_train: Literal['white', 'black', 'target'] = 'target' |
| | """Background color/image for training""" |
| | disturb_rate_fg: Optional[float] = 0.5 |
| | """The rate of disturbance for the foreground""" |
| | disturb_rate_bg: Optional[float] = 0.5 |
| | """The rate of disturbance for the background. 0.6 best for multi-view, 0.3 best for single-view""" |
| | background_eval: Literal['white', 'black', 'target'] = 'target' |
| | """Background color/image for evaluation""" |
| | lighting_type: Literal['constant', 'front', 'front-range', 'SH'] = 'SH' |
| | """The type of lighting""" |
| | lighting_space: Literal['world', 'camera'] = 'world' |
| | """The space of lighting""" |
| |
|
| |
|
| | @dataclass() |
| | class LearningRateConfig(Config): |
| | base: float = 5e-3 |
| | """shape, texture, rotation, eyes, neck, jaw""" |
| | translation: float = 1e-3 |
| | expr: float = 5e-2 |
| | static_offset: float = 5e-4 |
| | dynamic_offset: float = 5e-4 |
| | camera: float = 5e-3 |
| | light: float = 5e-3 |
| |
|
| |
|
| | @dataclass() |
| | class LossWeightConfig(Config): |
| | landmark: Optional[float] = 10. |
| | always_enable_jawline_landmarks: bool = True |
| | """Always enable the landmark loss for the jawline landmarks. Ignore disable_jawline_landmarks in stages.""" |
| |
|
| | photo: Optional[float] = 30. |
| |
|
| | reg_shape: float = 3e-1 |
| | reg_expr: float = 3e-2 |
| | reg_tex_pca: float = 1e-4 |
| | |
| | reg_tex_res: Optional[float] = None |
| | """Regularize the residual texture map""" |
| | reg_tex_res_clusters: Optional[float] = 1e1 |
| | """Regularize the residual texture map inside each texture cluster""" |
| | reg_tex_res_for: tuple[str, ...] = ("sclerae", "teeth") |
| | """Regularize the residual texture map for the clusters specified""" |
| | reg_tex_tv: Optional[float] = 1e4 |
| | """Regularize the total variation of the texture map""" |
| |
|
| | reg_light: Optional[float] = None |
| | """Regularize lighting parameters""" |
| | reg_diffuse: Optional[float] = 1e2 |
| | """Regularize lighting parameters by the diffuse term""" |
| |
|
| | reg_offset: Optional[float] = 3e2 |
| | """Regularize the norm of offsets""" |
| | reg_offset_relax_coef: float = 1. |
| | """The coefficient for relaxing reg_offset for the regions specified""" |
| | reg_offset_relax_for: tuple[str, ...] = ("hair", "ears") |
| | """Relax the offset loss for the regions specified""" |
| |
|
| | reg_offset_lap: Optional[float] = 1e6 |
| | """Regularize the difference of laplacian coordinate caused by offsets""" |
| | reg_offset_lap_relax_coef: float = 0.1 |
| | """The coefficient for relaxing reg_offset_lap for the regions specified""" |
| | reg_offset_lap_relax_for: tuple[str, ...] = ("hair", "ears") |
| | """Relax the offset loss for the regions specified""" |
| |
|
| | reg_offset_rigid: Optional[float] = 3e2 |
| | """Regularize the the offsets to be as-rigid-as-possible""" |
| | reg_offset_rigid_for: tuple[str, ...] = ("left_ear", "right_ear", "neck", "left_eye", "right_eye", "lips_tight") |
| | """Regularize the the offsets to be as-rigid-as-possible for the regions specified""" |
| |
|
| | reg_offset_dynamic: Optional[float] = 3e5 |
| | """Regularize the dynamic offsets to be temporally smooth""" |
| |
|
| | blur_iter: int = 0 |
| | """The number of iterations for blurring vertex weights""" |
| | |
| | smooth_trans: float = 3e2 |
| | """global translation""" |
| | smooth_rot: float = 3e1 |
| | """global rotation""" |
| |
|
| | smooth_neck: float = 3e1 |
| | """neck joint""" |
| | smooth_jaw: float = 1e-1 |
| | """jaw joint""" |
| | smooth_eyes: float = 0 |
| | """eyes joints""" |
| |
|
| | prior_neck: float = 3e-1 |
| | """Regularize the neck joint towards neutral""" |
| | prior_jaw: float = 3e-1 |
| | """Regularize the jaw joint towards neutral""" |
| | prior_eyes: float = 3e-2 |
| | """Regularize the eyes joints towards neutral""" |
| | |
| |
|
| | @dataclass() |
| | class LogConfig(Config): |
| | interval_scalar: Optional[int] = 100 |
| | """The step interval of scalar logging. Using an interval of stage_tracking.num_steps // 5 unless specified.""" |
| | interval_media: Optional[int] = 500 |
| | """The step interval of media logging. Using an interval of stage_tracking.num_steps unless specified.""" |
| | image_format: Literal['jpg', 'png'] = 'jpg' |
| | """Output image format""" |
| | view_indices: Tuple[int, ...] = () |
| | """Manually specify the view indices for log""" |
| | max_num_views: int = 3 |
| | """The maximum number of views for log""" |
| | stack_views_in_rows: bool = True |
| |
|
| |
|
| | @dataclass() |
| | class ExperimentConfig(Config): |
| | output_folder: Path = Path('output/track') |
| | reuse_landmarks: bool = True |
| | keyframes: Tuple[int, ...] = tuple() |
| | photometric: bool = False |
| | """enable photometric optimization, otherwise only landmark optimization""" |
| |
|
| | @dataclass() |
| | class StageConfig(Config): |
| | disable_jawline_landmarks: bool = False |
| | """Disable the landmark loss for the jawline landmarks since they are not accurate""" |
| |
|
| | @dataclass() |
| | class StageLmkInitRigidConfig(StageConfig): |
| | """The stage for initializing the rigid parameters""" |
| | num_steps: int = 300 |
| | optimizable_params: tuple[str, ...] = ("cam", "pose") |
| |
|
| | @dataclass() |
| | class StageLmkInitAllConfig(StageConfig): |
| | """The stage for initializing all the parameters optimizable with landmark loss""" |
| | num_steps: int = 300 |
| | optimizable_params: tuple[str, ...] = ("cam", "pose", "shape", "joints", "expr") |
| |
|
| | @dataclass() |
| | class StageLmkSequentialTrackingConfig(StageConfig): |
| | """The stage for sequential tracking with landmark loss""" |
| | num_steps: int = 50 |
| | optimizable_params: tuple[str, ...] = ("pose", "joints", "expr") |
| |
|
| | @dataclass() |
| | class StageLmkGlobalTrackingConfig(StageConfig): |
| | """The stage for global tracking with landmark loss""" |
| | num_epochs: int = 0 |
| | optimizable_params: tuple[str, ...] = ("cam", "pose", "shape", "joints", "expr") |
| |
|
| | @dataclass() |
| | class PhotometricStageConfig(StageConfig): |
| | align_texture_except: tuple[str, ...] = () |
| | """Align the inner region of rendered FLAME to the image, except for the regions specified""" |
| | align_boundary_except: tuple[str, ...] = ("bottomline",) |
| | """Align the boundary of FLAME to the image, except for the regions specified""" |
| |
|
| | @dataclass() |
| | class StageRgbInitTextureConfig(PhotometricStageConfig): |
| | """The stage for initializing the texture map with photometric loss""" |
| | num_steps: int = 500 |
| | optimizable_params: tuple[str, ...] = ("cam", "shape", "texture", "lights") |
| | align_texture_except: tuple[str, ...] = ("hair", "boundary", "neck") |
| | align_boundary_except: tuple[str, ...] = ("hair", "boundary") |
| |
|
| | @dataclass() |
| | class StageRgbInitAllConfig(PhotometricStageConfig): |
| | """The stage for initializing all the parameters except the offsets with photometric loss""" |
| | num_steps: int = 500 |
| | optimizable_params: tuple[str, ...] = ("cam", "pose", "shape", "joints", "expr", "texture", "lights") |
| | disable_jawline_landmarks: bool = True |
| | align_texture_except: tuple[str, ...] = ("hair", "boundary", "neck") |
| | align_boundary_except: tuple[str, ...] = ("hair", "bottomline") |
| |
|
| | @dataclass() |
| | class StageRgbInitOffsetConfig(PhotometricStageConfig): |
| | """The stage for initializing the offsets with photometric loss""" |
| | num_steps: int = 500 |
| | optimizable_params: tuple[str, ...] = ("cam", "pose", "shape", "joints", "expr", "texture", "lights", "static_offset") |
| | disable_jawline_landmarks: bool = True |
| | align_texture_except: tuple[str, ...] = ("hair", "boundary", "neck") |
| |
|
| | @dataclass() |
| | class StageRgbSequentialTrackingConfig(PhotometricStageConfig): |
| | """The stage for sequential tracking with photometric loss""" |
| | num_steps: int = 50 |
| | optimizable_params: tuple[str, ...] = ("pose", "joints", "expr", "texture", "dynamic_offset") |
| | disable_jawline_landmarks: bool = True |
| |
|
| | @dataclass() |
| | class StageRgbGlobalTrackingConfig(PhotometricStageConfig): |
| | """The stage for global tracking with photometric loss""" |
| | num_epochs: int = 30 |
| | optimizable_params: tuple[str, ...] = ("cam", "pose", "shape", "joints", "expr", "texture", "lights", "static_offset", "dynamic_offset") |
| | disable_jawline_landmarks: bool = True |
| |
|
| | @dataclass() |
| | class PipelineConfig(Config): |
| | lmk_init_rigid: StageLmkInitRigidConfig |
| | lmk_init_all: StageLmkInitAllConfig |
| | lmk_sequential_tracking: StageLmkSequentialTrackingConfig |
| | lmk_global_tracking: StageLmkGlobalTrackingConfig |
| | rgb_init_texture: StageRgbInitTextureConfig |
| | rgb_init_all: StageRgbInitAllConfig |
| | rgb_init_offset: StageRgbInitOffsetConfig |
| | rgb_sequential_tracking: StageRgbSequentialTrackingConfig |
| | rgb_global_tracking: StageRgbGlobalTrackingConfig |
| |
|
| | |
| | @dataclass() |
| | class BaseTrackingConfig(Config): |
| | data: DataConfig |
| | model: ModelConfig |
| | render: RenderConfig |
| | log: LogConfig |
| | exp: ExperimentConfig |
| | lr: LearningRateConfig |
| | w: LossWeightConfig |
| | pipeline: PipelineConfig |
| |
|
| | begin_stage: Optional[str] = None |
| | """Begin from the specified stage for debugging""" |
| | begin_frame_idx: int = 0 |
| | """Begin from the specified frame index for debugging""" |
| | async_func: bool = True |
| | """Allow asynchronous function calls for speed up""" |
| | device: Literal['cuda', 'cpu'] = 'cuda' |
| |
|
| | def get_occluded(self): |
| | occluded_table = { |
| | } |
| | if self.data.sequence in occluded_table: |
| | logger.info(f"Automatically setting cfg.model.occluded to {occluded_table[self.data.sequence]}") |
| | self.model.occluded = occluded_table[self.data.sequence] |
| |
|
| | def __post_init__(self): |
| | self.get_occluded() |
| |
|
| | if not self.model.use_static_offset and not self.model.use_dynamic_offset: |
| | self.model.occluded = tuple(list(self.model.occluded) + ['hair']) |
| |
|
| | for cfg_stage in self.pipeline.__dict__.values(): |
| | if isinstance(cfg_stage, PhotometricStageConfig): |
| | cfg_stage.align_texture_except = tuple(list(cfg_stage.align_texture_except) + list(self.model.occluded)) |
| | cfg_stage.align_boundary_except = tuple(list(cfg_stage.align_boundary_except) + list(self.model.occluded)) |
| |
|
| | if self.begin_stage is not None: |
| | skip = True |
| | for cfg_stage in self.pipeline.__dict__.values(): |
| | if cfg_stage.__class__.__name__.lower() == self.begin_stage: |
| | skip = False |
| | if skip: |
| | cfg_stage.num_steps = 0 |
| |
|
| |
|
| | if __name__ == "__main__": |
| | config = tyro.cli(BaseTrackingConfig) |
| | config_yml = tyro.to_yaml(config) |
| | print(config_yml) |
| | from yaml import safe_dump, safe_load |
| | with open("configs/vhap_tracking/base_tracking_config.yaml", 'w') as file: |
| | safe_dump(config_yml, file) |
| |
|