apozz
/

sam-3d-objects-safetensors

Model card Files Files and versions

xet

Community

apozz commited on Feb 17

Commit

9bc54ed

verified ·

1 Parent(s): 94d3272

Upload ss_generator.yaml with huggingface_hub

Browse files

Files changed (1) hide show

ss_generator.yaml +141 -0

ss_generator.yaml ADDED Viewed

	@@ -0,0 +1,141 @@

+module:
+  condition_embedder:
+    backbone:
+      _target_: sam3d_objects.model.backbone.dit.embedder.embedder_fuser.EmbedderFuser
+      drop_modalities_weight:
+      - - - pointmap
+          - rgb_pointmap
+        - 1.0
+      dropout_prob: 0.1
+      embedder_list:
+      - - _target_: sam3d_objects.model.backbone.dit.embedder.dino.Dino
+          dino_model: dinov2_vitl14_reg
+          input_size: 518
+          normalize_images: true
+        - - - image
+            - cropped
+          - - rgb_image
+            - full
+      - - _target_: sam3d_objects.model.backbone.dit.embedder.dino.Dino
+          dino_model: dinov2_vitl14_reg
+          input_size: 518
+          normalize_images: true
+        - - - mask
+            - cropped
+          - - rgb_image_mask
+            - full
+      - - _target_: sam3d_objects.model.backbone.dit.embedder.pointmap.PointPatchEmbed
+          embed_dim: 512
+          input_size: 256
+          patch_size: 8
+          remap_output: linear
+        - - - pointmap
+            - cropped
+          - - rgb_pointmap
+            - full
+      force_drop_modalities: null
+      freeze: true
+      projection_net_hidden_dim_multiplier: 4.0
+      use_pos_embedding: learned
+  generator:
+    backbone:
+      _target_: sam3d_objects.model.backbone.generator.shortcut.model.ShortCut
+      batch_mode: true
+      cfg_modalities:
+      - shape
+      inference_steps: 2
+      loss_weights:
+        6drotation_normalized: 0.1
+        _target_: sam3d_objects.config.utils.make_dict
+        scale: 0.1
+        shape: 0
+        translation: 1.0
+        translation_scale: 0.0
+      ratio_cfg_samples_in_self_consistency_target: 0.25
+      rescale_t: 1
+      reverse_fn:
+        _target_: sam3d_objects.model.backbone.generator.classifier_free_guidance.ClassifierFreeGuidanceWithExternalUnconditionalProbability
+        backbone:
+          _target_: sam3d_objects.model.backbone.tdfy_dit.models.mot_sparse_structure_flow.SparseStructureFlowTdfyWrapper
+          cond_channels: 1024
+          condition_embedder: null
+          force_zeros_cond: true
+          freeze_d_time_embedder: true
+          freeze_shared_parameters: true
+          in_channels: 8
+          is_shortcut_model: true
+          latent_mapping:
+            6drotation_normalized:
+              _target_: sam3d_objects.model.backbone.tdfy_dit.models.mm_latent.Latent
+              in_channels: 6
+              model_channels: 1024
+              pos_embedder:
+                _target_: sam3d_objects.model.backbone.tdfy_dit.models.mm_latent.LearntPositionEmbedder
+                model_channels: 1024
+                token_len: 1
+            scale:
+              _target_: sam3d_objects.model.backbone.tdfy_dit.models.mm_latent.Latent
+              in_channels: 3
+              model_channels: 1024
+              pos_embedder:
+                _target_: sam3d_objects.model.backbone.tdfy_dit.models.mm_latent.LearntPositionEmbedder
+                model_channels: 1024
+                token_len: 1
+            shape:
+              _target_: sam3d_objects.model.backbone.tdfy_dit.models.mm_latent.Latent
+              in_channels: 8
+              model_channels: 1024
+              pos_embedder:
+                _target_: sam3d_objects.model.backbone.tdfy_dit.models.mm_latent.ShapePositionEmbedder
+                model_channels: 1024
+                patch_size: 1
+                resolution: 16
+            translation:
+              _target_: sam3d_objects.model.backbone.tdfy_dit.models.mm_latent.Latent
+              in_channels: 3
+              model_channels: 1024
+              pos_embedder:
+                _target_: sam3d_objects.model.backbone.tdfy_dit.models.mm_latent.LearntPositionEmbedder
+                model_channels: 1024
+                token_len: 1
+            translation_scale:
+              _target_: sam3d_objects.model.backbone.tdfy_dit.models.mm_latent.Latent
+              in_channels: 1
+              model_channels: 1024
+              pos_embedder:
+                _target_: sam3d_objects.model.backbone.tdfy_dit.models.mm_latent.LearntPositionEmbedder
+                model_channels: 1024
+                token_len: 1
+          latent_share_transformer:
+            6drotation_normalized:
+            - 6drotation_normalized
+            - translation
+            - scale
+            - translation_scale
+          mlp_ratio: 4
+          model_channels: 1024
+          num_blocks: 24
+          num_heads: 16
+          out_channels: 8
+          patch_size: 1
+          pe_mode: ape
+          qk_rms_norm: true
+          resolution: 16
+          use_checkpoint: false
+          use_fp16: false
+        interval:
+        - 0
+        - 500
+        p_unconditional: 0.1
+        strength: 2.0
+        unconditional_handling: add_flag
+      self_consistency_cfg_strength: 2.0
+      self_consistency_prob: 0.25
+      shortcut_loss_weight: 1.0
+      sigma_min: 0.0
+      time_scale: 1000.0
+      training_time_sampler_fn:
+        _partial_: true
+        _target_: sam3d_objects.model.backbone.generator.flow_matching.model.lognorm_sampler
+        mean: -1.0
+        std: 1.0