Spaces:

3DAIGC
/

LAM_h5

Build error

App Files Files Community

LAM_h5 / vhap /config /base.py

Ethan18

Upload folder using huggingface_hub

5c48b81 verified 10 months ago

raw

history blame contribute delete

14.4 kB

	#
	# Toyota Motor Europe NV/SA and its affiliated companies retain all intellectual
	# property and proprietary rights in and to this software and related documentation.
	# Any commercial use, reproduction, disclosure or distribution of this software and
	# related documentation without an express license agreement from Toyota Motor Europe NV/SA
	# is strictly prohibited.
	#


	from dataclasses import dataclass
	from pathlib import Path
	from typing import Optional, Literal, Tuple
	import tyro
	import importlib
	from vhap.util.log import get_logger
	logger = get_logger(__name__)


	def import_module(module_name: str):
	module_name, class_name = module_name.rsplit(".", 1)
	module = getattr(importlib.import_module(module_name), class_name)
	return module


	class Config:
	def __getitem__(self, __name: str):
	if hasattr(self, __name):
	return getattr(self, __name)
	else:
	raise AttributeError(f"{self.__class__.__name__} has no attribute '{__name}'")


	@dataclass()
	class DataConfig(Config):
	root_folder: Path = ''
	"""The root folder for the dataset."""
	sequence: str = ''
	"""The sequence name"""
	_target: str = "vhap.data.video_dataset.VideoDataset"
	"""The target dataset class"""
	division: Optional[str] = None
	subset: Optional[str] = None
	calibrated: bool = False
	"""Whether the cameras parameters are available"""
	align_cameras_to_axes: bool = True
	"""Adjust how cameras distribute in the space with a global rotation"""
	camera_convention_conversion: str = 'opencv->opengl'
	target_extrinsic_type: Literal['w2c', 'c2w'] = 'w2c'
	n_downsample_rgb: Optional[int] = None
	"""Load from downsampled RGB images to save data IO time"""
	scale_factor: float = 1.0
	"""Further apply a scaling transformation after the downsampling of RGB"""
	background_color: Optional[Literal['white', 'black']] = 'white'
	use_alpha_map: bool = False
	use_landmark: bool = True
	landmark_source: Optional[Literal['face-alignment', 'star']] = "star"


	@dataclass()
	class ModelConfig(Config):
	n_shape: int = 300
	n_expr: int = 100
	n_tex: int = 100

	use_static_offset: bool = False
	"""Optimize static offsets on top of FLAME vertices in the canonical space"""
	use_dynamic_offset: bool = False
	"""Optimize dynamic offsets on top of the FLAME vertices in the canonical space"""
	add_teeth: bool = True
	"""Add teeth to the FLAME model"""
	remove_lip_inside: bool = False
	"""Remove the inner part of the lips from the FLAME model"""

	tex_resolution: int = 2048
	"""The resolution of the extra texture map"""
	tex_painted: bool = True
	"""Use a painted texture map instead the pca texture space as the base texture map"""
	tex_extra: bool = True
	"""Optimize an extra texture map as the base texture map or the residual texture map"""
	# tex_clusters: tuple[str, ...] = ("skin", "hair", "sclerae", "lips_tight", "teeth", "boundary")
	tex_clusters: tuple[str, ...] = ("skin", "hair", "boundary", "lips_tight", "sclerae", "irises")
	"""Regions that are supposed to share a similar color inside"""
	residual_tex: bool = True
	"""Use the extra texture map as a residual component on top of the base texture"""
	occluded: tuple[str, ...] = () # to be used for updating stage configs in __post_init__
	"""The regions that are occluded by the hair or garments"""

	flame_params_path: Optional[Path] = None


	@dataclass()
	class RenderConfig(Config):
	backend: Literal['nvdiffrast', 'pytorch3d'] = 'nvdiffrast'
	"""The rendering backend"""
	use_opengl: bool = False
	"""Use OpenGL for NVDiffRast"""
	background_train: Literal['white', 'black', 'target'] = 'target'
	"""Background color/image for training"""
	disturb_rate_fg: Optional[float] = 0.5
	"""The rate of disturbance for the foreground"""
	disturb_rate_bg: Optional[float] = 0.5
	"""The rate of disturbance for the background. 0.6 best for multi-view, 0.3 best for single-view"""
	background_eval: Literal['white', 'black', 'target'] = 'target'
	"""Background color/image for evaluation"""
	lighting_type: Literal['constant', 'front', 'front-range', 'SH'] = 'SH'
	"""The type of lighting"""
	lighting_space: Literal['world', 'camera'] = 'world'
	"""The space of lighting"""


	@dataclass()
	class LearningRateConfig(Config):
	base: float = 5e-3
	"""shape, texture, rotation, eyes, neck, jaw"""
	translation: float = 1e-3
	expr: float = 5e-2
	static_offset: float = 5e-4
	dynamic_offset: float = 5e-4
	camera: float = 5e-3
	light: float = 5e-3


	@dataclass()
	class LossWeightConfig(Config):
	landmark: Optional[float] = 10.
	always_enable_jawline_landmarks: bool = True
	"""Always enable the landmark loss for the jawline landmarks. Ignore disable_jawline_landmarks in stages."""

	photo: Optional[float] = 30.

	reg_shape: float = 3e-1
	reg_expr: float = 3e-2
	reg_tex_pca: float = 1e-4 # will make it hard to model hair color when too high

	reg_tex_res: Optional[float] = None # 1e2 (when w/o reg_var)
	"""Regularize the residual texture map"""
	reg_tex_res_clusters: Optional[float] = 1e1
	"""Regularize the residual texture map inside each texture cluster"""
	reg_tex_res_for: tuple[str, ...] = ("sclerae", "teeth")
	"""Regularize the residual texture map for the clusters specified"""
	reg_tex_tv: Optional[float] = 1e4 # important to split regions apart
	"""Regularize the total variation of the texture map"""

	reg_light: Optional[float] = None
	"""Regularize lighting parameters"""
	reg_diffuse: Optional[float] = 1e2
	"""Regularize lighting parameters by the diffuse term"""

	reg_offset: Optional[float] = 3e2
	"""Regularize the norm of offsets"""
	reg_offset_relax_coef: float = 1.
	"""The coefficient for relaxing reg_offset for the regions specified"""
	reg_offset_relax_for: tuple[str, ...] = ("hair", "ears")
	"""Relax the offset loss for the regions specified"""

	reg_offset_lap: Optional[float] = 1e6
	"""Regularize the difference of laplacian coordinate caused by offsets"""
	reg_offset_lap_relax_coef: float = 0.1
	"""The coefficient for relaxing reg_offset_lap for the regions specified"""
	reg_offset_lap_relax_for: tuple[str, ...] = ("hair", "ears")
	"""Relax the offset loss for the regions specified"""

	reg_offset_rigid: Optional[float] = 3e2
	"""Regularize the the offsets to be as-rigid-as-possible"""
	reg_offset_rigid_for: tuple[str, ...] = ("left_ear", "right_ear", "neck", "left_eye", "right_eye", "lips_tight")
	"""Regularize the the offsets to be as-rigid-as-possible for the regions specified"""

	reg_offset_dynamic: Optional[float] = 3e5
	"""Regularize the dynamic offsets to be temporally smooth"""

	blur_iter: int = 0
	"""The number of iterations for blurring vertex weights"""

	smooth_trans: float = 3e2
	"""global translation"""
	smooth_rot: float = 3e1
	"""global rotation"""

	smooth_neck: float = 3e1
	"""neck joint"""
	smooth_jaw: float = 1e-1
	"""jaw joint"""
	smooth_eyes: float = 0
	"""eyes joints"""

	prior_neck: float = 3e-1
	"""Regularize the neck joint towards neutral"""
	prior_jaw: float = 3e-1
	"""Regularize the jaw joint towards neutral"""
	prior_eyes: float = 3e-2
	"""Regularize the eyes joints towards neutral"""


	@dataclass()
	class LogConfig(Config):
	interval_scalar: Optional[int] = 100
	"""The step interval of scalar logging. Using an interval of stage_tracking.num_steps // 5 unless specified."""
	interval_media: Optional[int] = 500
	"""The step interval of media logging. Using an interval of stage_tracking.num_steps unless specified."""
	image_format: Literal['jpg', 'png'] = 'jpg'
	"""Output image format"""
	view_indices: Tuple[int, ...] = ()
	"""Manually specify the view indices for log"""
	max_num_views: int = 3
	"""The maximum number of views for log"""
	stack_views_in_rows: bool = True


	@dataclass()
	class ExperimentConfig(Config):
	output_folder: Path = Path('output/track')
	reuse_landmarks: bool = True
	keyframes: Tuple[int, ...] = tuple()
	photometric: bool = False
	"""enable photometric optimization, otherwise only landmark optimization"""

	@dataclass()
	class StageConfig(Config):
	disable_jawline_landmarks: bool = False
	"""Disable the landmark loss for the jawline landmarks since they are not accurate"""

	@dataclass()
	class StageLmkInitRigidConfig(StageConfig):
	"""The stage for initializing the rigid parameters"""
	num_steps: int = 300
	optimizable_params: tuple[str, ...] = ("cam", "pose")

	@dataclass()
	class StageLmkInitAllConfig(StageConfig):
	"""The stage for initializing all the parameters optimizable with landmark loss"""
	num_steps: int = 300
	optimizable_params: tuple[str, ...] = ("cam", "pose", "shape", "joints", "expr")

	@dataclass()
	class StageLmkSequentialTrackingConfig(StageConfig):
	"""The stage for sequential tracking with landmark loss"""
	num_steps: int = 50
	optimizable_params: tuple[str, ...] = ("pose", "joints", "expr")

	@dataclass()
	class StageLmkGlobalTrackingConfig(StageConfig):
	"""The stage for global tracking with landmark loss"""
	num_epochs: int = 0
	optimizable_params: tuple[str, ...] = ("cam", "pose", "shape", "joints", "expr")

	@dataclass()
	class PhotometricStageConfig(StageConfig):
	align_texture_except: tuple[str, ...] = ()
	"""Align the inner region of rendered FLAME to the image, except for the regions specified"""
	align_boundary_except: tuple[str, ...] = ("bottomline",) # necessary to avoid the bottomline of FLAME from being stretched to the bottom of the image
	"""Align the boundary of FLAME to the image, except for the regions specified"""

	@dataclass()
	class StageRgbInitTextureConfig(PhotometricStageConfig):
	"""The stage for initializing the texture map with photometric loss"""
	num_steps: int = 500
	optimizable_params: tuple[str, ...] = ("cam", "shape", "texture", "lights")
	align_texture_except: tuple[str, ...] = ("hair", "boundary", "neck")
	align_boundary_except: tuple[str, ...] = ("hair", "boundary")

	@dataclass()
	class StageRgbInitAllConfig(PhotometricStageConfig):
	"""The stage for initializing all the parameters except the offsets with photometric loss"""
	num_steps: int = 500
	optimizable_params: tuple[str, ...] = ("cam", "pose", "shape", "joints", "expr", "texture", "lights")
	disable_jawline_landmarks: bool = True
	align_texture_except: tuple[str, ...] = ("hair", "boundary", "neck")
	align_boundary_except: tuple[str, ...] = ("hair", "bottomline")

	@dataclass()
	class StageRgbInitOffsetConfig(PhotometricStageConfig):
	"""The stage for initializing the offsets with photometric loss"""
	num_steps: int = 500
	optimizable_params: tuple[str, ...] = ("cam", "pose", "shape", "joints", "expr", "texture", "lights", "static_offset")
	disable_jawline_landmarks: bool = True
	align_texture_except: tuple[str, ...] = ("hair", "boundary", "neck")

	@dataclass()
	class StageRgbSequentialTrackingConfig(PhotometricStageConfig):
	"""The stage for sequential tracking with photometric loss"""
	num_steps: int = 50
	optimizable_params: tuple[str, ...] = ("pose", "joints", "expr", "texture", "dynamic_offset")
	disable_jawline_landmarks: bool = True

	@dataclass()
	class StageRgbGlobalTrackingConfig(PhotometricStageConfig):
	"""The stage for global tracking with photometric loss"""
	num_epochs: int = 30
	optimizable_params: tuple[str, ...] = ("cam", "pose", "shape", "joints", "expr", "texture", "lights", "static_offset", "dynamic_offset")
	disable_jawline_landmarks: bool = True

	@dataclass()
	class PipelineConfig(Config):
	lmk_init_rigid: StageLmkInitRigidConfig
	lmk_init_all: StageLmkInitAllConfig
	lmk_sequential_tracking: StageLmkSequentialTrackingConfig
	lmk_global_tracking: StageLmkGlobalTrackingConfig
	rgb_init_texture: StageRgbInitTextureConfig
	rgb_init_all: StageRgbInitAllConfig
	rgb_init_offset: StageRgbInitOffsetConfig
	rgb_sequential_tracking: StageRgbSequentialTrackingConfig
	rgb_global_tracking: StageRgbGlobalTrackingConfig


	@dataclass()
	class BaseTrackingConfig(Config):
	data: DataConfig
	model: ModelConfig
	render: RenderConfig
	log: LogConfig
	exp: ExperimentConfig
	lr: LearningRateConfig
	w: LossWeightConfig
	pipeline: PipelineConfig

	begin_stage: Optional[str] = None
	"""Begin from the specified stage for debugging"""
	begin_frame_idx: int = 0
	"""Begin from the specified frame index for debugging"""
	async_func: bool = True
	"""Allow asynchronous function calls for speed up"""
	device: Literal['cuda', 'cpu'] = 'cuda'

	def get_occluded(self):
	occluded_table = {
	}
	if self.data.sequence in occluded_table:
	logger.info(f"Automatically setting cfg.model.occluded to {occluded_table[self.data.sequence]}")
	self.model.occluded = occluded_table[self.data.sequence]

	def __post_init__(self):
	self.get_occluded()

	if not self.model.use_static_offset and not self.model.use_dynamic_offset:
	self.model.occluded = tuple(list(self.model.occluded) + ['hair']) # disable boundary alignment for the hair region if no offset is used

	for cfg_stage in self.pipeline.__dict__.values():
	if isinstance(cfg_stage, PhotometricStageConfig):
	cfg_stage.align_texture_except = tuple(list(cfg_stage.align_texture_except) + list(self.model.occluded))
	cfg_stage.align_boundary_except = tuple(list(cfg_stage.align_boundary_except) + list(self.model.occluded))

	if self.begin_stage is not None:
	skip = True
	for cfg_stage in self.pipeline.__dict__.values():
	if cfg_stage.__class__.__name__.lower() == self.begin_stage:
	skip = False
	if skip:
	cfg_stage.num_steps = 0


	if __name__ == "__main__":
	config = tyro.cli(BaseTrackingConfig)
	config_yml = tyro.to_yaml(config)
	print(config_yml)
	from yaml import safe_dump, safe_load
	with open("configs/vhap_tracking/base_tracking_config.yaml", 'w') as file:
	safe_dump(config_yml, file)