|
|
"""Contains params for backbone. |
|
|
|
|
|
For licensing see accompanying LICENSE file. |
|
|
Copyright (C) 2025 Apple Inc. All Rights Reserved. |
|
|
""" |
|
|
|
|
|
import dataclasses |
|
|
from typing import Literal |
|
|
|
|
|
import sharp.utils.math as math_utils |
|
|
from sharp.models.blocks import NormLayerName, UpsamplingMode |
|
|
from sharp.models.presets import ViTPreset |
|
|
from sharp.utils.color_space import ColorSpace |
|
|
|
|
|
DimsDecoder = tuple[int, int, int, int, int] |
|
|
DPTImageEncoderType = Literal["skip_conv", "skip_conv_kernel2"] |
|
|
|
|
|
ColorInitOption = Literal[ |
|
|
"none", |
|
|
"first_layer", |
|
|
"all_layers", |
|
|
] |
|
|
DepthInitOption = Literal[ |
|
|
|
|
|
"surface_min", |
|
|
|
|
|
"surface_max", |
|
|
|
|
|
"base_depth", |
|
|
|
|
|
"linear_disparity", |
|
|
] |
|
|
|
|
|
|
|
|
@dataclasses.dataclass |
|
|
class AlignmentParams: |
|
|
"""Parameters for depth alignment.""" |
|
|
|
|
|
kernel_size: int = 16 |
|
|
stride: int = 1 |
|
|
frozen: bool = False |
|
|
|
|
|
|
|
|
|
|
|
steps: int = 4 |
|
|
|
|
|
activation_type: math_utils.ActivationType = "exp" |
|
|
|
|
|
depth_decoder_features: bool = False |
|
|
|
|
|
base_width: int = 16 |
|
|
|
|
|
|
|
|
@dataclasses.dataclass |
|
|
class DeltaFactor: |
|
|
"""Factors to multiply deltas with before activation. |
|
|
|
|
|
These factors effectively selectively reduce the learning rate. |
|
|
""" |
|
|
|
|
|
xy: float = 0.001 |
|
|
z: float = 0.001 |
|
|
color: float = 0.1 |
|
|
opacity: float = 1.0 |
|
|
scale: float = 1.0 |
|
|
quaternion: float = 1.0 |
|
|
|
|
|
|
|
|
@dataclasses.dataclass |
|
|
class InitializerParams: |
|
|
"""Parameters for initializer.""" |
|
|
|
|
|
|
|
|
|
|
|
scale_factor: float = 1.0 |
|
|
|
|
|
disparity_factor: float = 1.0 |
|
|
|
|
|
stride: int = 2 |
|
|
|
|
|
|
|
|
|
|
|
num_layers: int = 2 |
|
|
|
|
|
first_layer_depth_option: DepthInitOption = "surface_min" |
|
|
rest_layer_depth_option: DepthInitOption = "surface_min" |
|
|
|
|
|
color_option: ColorInitOption = "all_layers" |
|
|
|
|
|
base_depth: float = 10.0 |
|
|
|
|
|
feature_input_stop_grad: bool = False |
|
|
|
|
|
|
|
|
normalize_depth: bool = True |
|
|
|
|
|
|
|
|
output_inpainted_layer_only: bool = False |
|
|
|
|
|
set_uninpainted_opacity_to_zero: bool = False |
|
|
|
|
|
concat_inpainting_mask: bool = False |
|
|
|
|
|
|
|
|
@dataclasses.dataclass |
|
|
class MonodepthParams: |
|
|
"""Parameters for monodepth network.""" |
|
|
|
|
|
patch_encoder_preset: ViTPreset = "dinov2l16_384" |
|
|
image_encoder_preset: ViTPreset = "dinov2l16_384" |
|
|
|
|
|
checkpoint_uri: str | None = None |
|
|
unfreeze_patch_encoder: bool = False |
|
|
unfreeze_image_encoder: bool = False |
|
|
unfreeze_decoder: bool = False |
|
|
unfreeze_head: bool = False |
|
|
unfreeze_norm_layers: bool = False |
|
|
grad_checkpointing: bool = False |
|
|
use_patch_overlap: bool = True |
|
|
dims_decoder: DimsDecoder = (256, 256, 256, 256, 256) |
|
|
|
|
|
|
|
|
@dataclasses.dataclass |
|
|
class MonodepthAdaptorParams: |
|
|
"""Parameters for monodepth network feature adaptor.""" |
|
|
|
|
|
encoder_features: bool = True |
|
|
decoder_features: bool = False |
|
|
|
|
|
|
|
|
@dataclasses.dataclass |
|
|
class GaussianDecoderParams: |
|
|
"""Parameters for backbone with default values.""" |
|
|
|
|
|
dim_in: int = 5 |
|
|
dim_out: int = 32 |
|
|
|
|
|
norm_type: NormLayerName = "group_norm" |
|
|
|
|
|
norm_num_groups: int = 8 |
|
|
|
|
|
stride: int = 2 |
|
|
|
|
|
patch_encoder_preset: ViTPreset = "dinov2l16_384" |
|
|
image_encoder_preset: ViTPreset = "dinov2l16_384" |
|
|
|
|
|
|
|
|
dims_decoder: DimsDecoder = (128, 128, 128, 128, 128) |
|
|
|
|
|
|
|
|
use_depth_input: bool = True |
|
|
|
|
|
|
|
|
grad_checkpointing: bool = False |
|
|
|
|
|
|
|
|
upsampling_mode: UpsamplingMode = "transposed_conv" |
|
|
|
|
|
|
|
|
image_encoder_type: DPTImageEncoderType = "skip_conv_kernel2" |
|
|
|
|
|
|
|
|
@dataclasses.dataclass |
|
|
class PredictorParams: |
|
|
"""Parameters for predictors with default values.""" |
|
|
|
|
|
|
|
|
initializer: InitializerParams = dataclasses.field(default_factory=InitializerParams) |
|
|
monodepth: MonodepthParams = dataclasses.field(default_factory=MonodepthParams) |
|
|
monodepth_adaptor: MonodepthAdaptorParams = dataclasses.field( |
|
|
default_factory=MonodepthAdaptorParams |
|
|
) |
|
|
gaussian_decoder: GaussianDecoderParams = dataclasses.field( |
|
|
default_factory=GaussianDecoderParams |
|
|
) |
|
|
|
|
|
depth_alignment: AlignmentParams = dataclasses.field(default_factory=AlignmentParams) |
|
|
|
|
|
|
|
|
delta_factor: DeltaFactor = dataclasses.field(default_factory=DeltaFactor) |
|
|
|
|
|
max_scale: float = 10.0 |
|
|
|
|
|
min_scale: float = 0.0 |
|
|
|
|
|
norm_type: NormLayerName = "group_norm" |
|
|
|
|
|
norm_num_groups: int = 8 |
|
|
|
|
|
use_predicted_mean: bool = False |
|
|
|
|
|
color_activation_type: math_utils.ActivationType = "sigmoid" |
|
|
opacity_activation_type: math_utils.ActivationType = "sigmoid" |
|
|
|
|
|
color_space: ColorSpace = "linearRGB" |
|
|
|
|
|
low_pass_filter_eps: float = 1e-2 |
|
|
|
|
|
num_monodepth_layers: int = 2 |
|
|
|
|
|
sorting_monodepth: bool = False |
|
|
|
|
|
base_scale_on_predicted_mean: bool = True |
|
|
|