Sam3d_OBJ / checkpoints /ss_generator.yaml
SumitMathur8956's picture
Add SAM 3D checkpoints
ee0b416 verified
module:
condition_embedder:
backbone:
_target_: sam3d_objects.model.backbone.dit.embedder.embedder_fuser.EmbedderFuser
drop_modalities_weight:
- - - pointmap
- rgb_pointmap
- 1.0
dropout_prob: 0.1
embedder_list:
- - _target_: sam3d_objects.model.backbone.dit.embedder.dino.Dino
dino_model: dinov2_vitl14_reg
input_size: 518
normalize_images: true
- - - image
- cropped
- - rgb_image
- full
- - _target_: sam3d_objects.model.backbone.dit.embedder.dino.Dino
dino_model: dinov2_vitl14_reg
input_size: 518
normalize_images: true
- - - mask
- cropped
- - rgb_image_mask
- full
- - _target_: sam3d_objects.model.backbone.dit.embedder.pointmap.PointPatchEmbed
embed_dim: 512
input_size: 256
patch_size: 8
remap_output: linear
- - - pointmap
- cropped
- - rgb_pointmap
- full
force_drop_modalities: null
freeze: true
projection_net_hidden_dim_multiplier: 4.0
use_pos_embedding: learned
generator:
backbone:
_target_: sam3d_objects.model.backbone.generator.shortcut.model.ShortCut
batch_mode: true
cfg_modalities:
- shape
inference_steps: 2
loss_weights:
6drotation_normalized: 0.1
_target_: sam3d_objects.config.utils.make_dict
scale: 0.1
shape: 0
translation: 1.0
translation_scale: 0.0
ratio_cfg_samples_in_self_consistency_target: 0.25
rescale_t: 1
reverse_fn:
_target_: sam3d_objects.model.backbone.generator.classifier_free_guidance.ClassifierFreeGuidanceWithExternalUnconditionalProbability
backbone:
_target_: sam3d_objects.model.backbone.tdfy_dit.models.mot_sparse_structure_flow.SparseStructureFlowTdfyWrapper
cond_channels: 1024
condition_embedder: null
force_zeros_cond: true
freeze_d_time_embedder: true
freeze_shared_parameters: true
in_channels: 8
is_shortcut_model: true
latent_mapping:
6drotation_normalized:
_target_: sam3d_objects.model.backbone.tdfy_dit.models.mm_latent.Latent
in_channels: 6
model_channels: 1024
pos_embedder:
_target_: sam3d_objects.model.backbone.tdfy_dit.models.mm_latent.LearntPositionEmbedder
model_channels: 1024
token_len: 1
scale:
_target_: sam3d_objects.model.backbone.tdfy_dit.models.mm_latent.Latent
in_channels: 3
model_channels: 1024
pos_embedder:
_target_: sam3d_objects.model.backbone.tdfy_dit.models.mm_latent.LearntPositionEmbedder
model_channels: 1024
token_len: 1
shape:
_target_: sam3d_objects.model.backbone.tdfy_dit.models.mm_latent.Latent
in_channels: 8
model_channels: 1024
pos_embedder:
_target_: sam3d_objects.model.backbone.tdfy_dit.models.mm_latent.ShapePositionEmbedder
model_channels: 1024
patch_size: 1
resolution: 16
translation:
_target_: sam3d_objects.model.backbone.tdfy_dit.models.mm_latent.Latent
in_channels: 3
model_channels: 1024
pos_embedder:
_target_: sam3d_objects.model.backbone.tdfy_dit.models.mm_latent.LearntPositionEmbedder
model_channels: 1024
token_len: 1
translation_scale:
_target_: sam3d_objects.model.backbone.tdfy_dit.models.mm_latent.Latent
in_channels: 1
model_channels: 1024
pos_embedder:
_target_: sam3d_objects.model.backbone.tdfy_dit.models.mm_latent.LearntPositionEmbedder
model_channels: 1024
token_len: 1
latent_share_transformer:
6drotation_normalized:
- 6drotation_normalized
- translation
- scale
- translation_scale
mlp_ratio: 4
model_channels: 1024
num_blocks: 24
num_heads: 16
out_channels: 8
patch_size: 1
pe_mode: ape
qk_rms_norm: true
resolution: 16
use_checkpoint: false
use_fp16: false
interval:
- 0
- 500
p_unconditional: 0.1
strength: 2.0
unconditional_handling: add_flag
self_consistency_cfg_strength: 2.0
self_consistency_prob: 0.25
shortcut_loss_weight: 1.0
sigma_min: 0.0
time_scale: 1000.0
training_time_sampler_fn:
_partial_: true
_target_: sam3d_objects.model.backbone.generator.flow_matching.model.lognorm_sampler
mean: -1.0
std: 1.0