sam-3d-objects / checkpoints /slat_generator.yaml
jetjodh's picture
Upload 16 files
f29e43c verified
module:
condition_embedder:
backbone:
_target_: sam3d_objects.model.backbone.dit.embedder.embedder_fuser.EmbedderFuser
embedder_list:
- - _target_: sam3d_objects.model.backbone.dit.embedder.dino.Dino
dino_model: dinov2_vitl14_reg
input_size: 518
normalize_images: true
prenorm_features: true
- - - image
- cropped
- - rgb_image
- full
- - _target_: sam3d_objects.model.backbone.dit.embedder.dino.Dino
dino_model: dinov2_vitl14_reg
input_size: 518
normalize_images: true
prenorm_features: true
- - - mask
- cropped
- - rgb_image_mask
- full
projection_net_hidden_dim_multiplier: 4.0
use_pos_embedding: learned
generator:
backbone:
_target_: sam3d_objects.model.backbone.generator.flow_matching.model.FlowMatching
inference_steps: 12
reverse_fn:
_target_: sam3d_objects.model.backbone.generator.classifier_free_guidance.ClassifierFreeGuidance
backbone:
_target_: sam3d_objects.model.backbone.tdfy_dit.models.structured_latent_flow.SLatFlowModelTdfyWrapper
cond_channels: 1024
condition_embedder: null
force_zeros_cond: true
in_channels: 8
io_block_channels:
- 128
mlp_ratio: 4
model_channels: 1024
num_blocks: 24
num_heads: 16
num_io_res_blocks: 2
out_channels: 8
patch_size: 2
pe_mode: ape
qk_rms_norm: true
resolution: 64
use_fp16: true
p_unconditional: 0.0
strength: 0.0
unconditional_handling: add_flag
sigma_min: 0.0
time_scale: 1000.0
training_time_sampler_fn:
_partial_: true
_target_: sam3d_objects.model.backbone.generator.flow_matching.model.lognorm_sampler
mean: -1.0
std: 1.0