danelcsb commited on
Commit
34509a7
·
verified ·
1 Parent(s): a1fce3a

Upload model

Browse files
Files changed (2) hide show
  1. config.json +26 -38
  2. model.safetensors +2 -2
config.json CHANGED
@@ -1,28 +1,13 @@
1
  {
2
- "_bb_feat_sizes": [
3
- [
4
- 256,
5
- 256
6
- ],
7
- [
8
- 128,
9
- 128
10
- ],
11
- [
12
- 64,
13
- 64
14
- ]
15
- ],
16
  "add_all_frames_to_correct_as_cond": false,
17
- "add_tpos_enc_to_object_pointers": false,
18
  "architectures": [
19
  "Sam2Model"
20
  ],
21
  "backbone_stride": 16,
22
- "binarize_mask_from_pts_for_mem_enc": false,
23
- "compile_image_encoder": false,
24
- "directly_add_no_memory_embedding": true,
25
- "fixed_no_object_pointer": true,
26
  "image_encoder_config": {
27
  "backbone_channel_list": [
28
  768,
@@ -30,6 +15,20 @@
30
  192,
31
  96
32
  ],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  "dim_mul": 2.0,
34
  "drop_path_rate": 0.0,
35
  "fpn_hidden_size": 256,
@@ -54,6 +53,7 @@
54
  "layer_norm_eps": 1e-06,
55
  "model_type": "",
56
  "num_channels": 3,
 
57
  "num_heads": 1,
58
  "patch_kernel_size": 7,
59
  "patch_padding": 3,
@@ -95,15 +95,12 @@
95
  "iou_prediction_use_sigmoid": true,
96
  "model_type": "",
97
  "num_multimask_outputs": 3,
98
- "pred_obj_scores": true,
99
- "pred_obj_scores_mlp": true,
100
  "two_way_transformer_activation": "relu",
101
  "two_way_transformer_attention_downsample_rate": 2,
102
  "two_way_transformer_depth": 2,
103
  "two_way_transformer_embedding_dim": 256,
104
  "two_way_transformer_mlp_dim": 2048,
105
  "two_way_transformer_num_heads": 8,
106
- "use_high_resolution_features": true,
107
  "use_multimask_token_for_object_pointer": true
108
  },
109
  "max_cond_frames_in_attn": -1,
@@ -111,9 +108,7 @@
111
  "memory_attention_config": {
112
  "apply_pe_at_cross_attn_keys": true,
113
  "apply_pe_at_cross_attn_queries": false,
114
- "apply_pe_at_input": true,
115
  "apply_pe_at_self_attn": false,
116
- "batch_first": true,
117
  "dim_feedforward": 2048,
118
  "dropout": 0.1,
119
  "hidden_act": "relu",
@@ -140,7 +135,6 @@
140
  "mask_downsampler_total_stride": 16,
141
  "memory_fuser_embed_dim": 256,
142
  "memory_fuser_hidden_act": "gelu",
143
- "memory_fuser_input_projection": false,
144
  "memory_fuser_kernel_size": 7,
145
  "memory_fuser_layer_scale_init_value": 1e-06,
146
  "memory_fuser_num_layers": 2,
@@ -155,13 +149,11 @@
155
  "multimask_min_pt_num": 0,
156
  "multimask_output_for_tracking": true,
157
  "multimask_output_in_sam": true,
158
- "no_obj_embed_spatial": true,
159
  "non_overlap_masks_for_mem_enc": false,
160
  "num_maskmem": 7,
161
- "only_object_pointers_in_the_past_for_eval": true,
162
- "pred_obj_scores": true,
163
- "pred_obj_scores_mlp": true,
164
- "proj_tpos_enc_in_object_pointers": true,
165
  "prompt_encoder_config": {
166
  "hidden_act": "gelu",
167
  "hidden_size": 256,
@@ -173,15 +165,11 @@
173
  "patch_size": 16,
174
  "scale": 1
175
  },
176
- "sam_mask_decoder_extra_args": null,
177
- "sigmoid_bias_for_mem_enc": -10,
178
- "sigmoid_scale_for_mem_enc": 20,
179
- "soft_no_object_pointer": false,
180
  "torch_dtype": "float32",
181
- "transformers_version": "4.50.0.dev0",
182
  "use_mask_input_as_output_without_sam": true,
183
- "use_mlp_for_object_pointer_proj": true,
184
  "use_multimask_token_for_object_pointer": true,
185
- "use_object_pointers_in_encoder": true,
186
- "use_signed_tpos_enc_to_object_pointers": true
187
  }
 
1
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "add_all_frames_to_correct_as_cond": false,
 
3
  "architectures": [
4
  "Sam2Model"
5
  ],
6
  "backbone_stride": 16,
7
+ "binarize_mask_from_pts_for_mem_enc": true,
8
+ "enable_occlusion_spatial_embedding": true,
9
+ "enable_temporal_pos_encoding_for_object_pointers": true,
10
+ "fill_hole_area": 8,
11
  "image_encoder_config": {
12
  "backbone_channel_list": [
13
  768,
 
15
  192,
16
  96
17
  ],
18
+ "backbone_feature_sizes": [
19
+ [
20
+ 256,
21
+ 256
22
+ ],
23
+ [
24
+ 128,
25
+ 128
26
+ ],
27
+ [
28
+ 64,
29
+ 64
30
+ ]
31
+ ],
32
  "dim_mul": 2.0,
33
  "drop_path_rate": 0.0,
34
  "fpn_hidden_size": 256,
 
53
  "layer_norm_eps": 1e-06,
54
  "model_type": "",
55
  "num_channels": 3,
56
+ "num_feature_levels": 3,
57
  "num_heads": 1,
58
  "patch_kernel_size": 7,
59
  "patch_padding": 3,
 
95
  "iou_prediction_use_sigmoid": true,
96
  "model_type": "",
97
  "num_multimask_outputs": 3,
 
 
98
  "two_way_transformer_activation": "relu",
99
  "two_way_transformer_attention_downsample_rate": 2,
100
  "two_way_transformer_depth": 2,
101
  "two_way_transformer_embedding_dim": 256,
102
  "two_way_transformer_mlp_dim": 2048,
103
  "two_way_transformer_num_heads": 8,
 
104
  "use_multimask_token_for_object_pointer": true
105
  },
106
  "max_cond_frames_in_attn": -1,
 
108
  "memory_attention_config": {
109
  "apply_pe_at_cross_attn_keys": true,
110
  "apply_pe_at_cross_attn_queries": false,
 
111
  "apply_pe_at_self_attn": false,
 
112
  "dim_feedforward": 2048,
113
  "dropout": 0.1,
114
  "hidden_act": "relu",
 
135
  "mask_downsampler_total_stride": 16,
136
  "memory_fuser_embed_dim": 256,
137
  "memory_fuser_hidden_act": "gelu",
 
138
  "memory_fuser_kernel_size": 7,
139
  "memory_fuser_layer_scale_init_value": 1e-06,
140
  "memory_fuser_num_layers": 2,
 
149
  "multimask_min_pt_num": 0,
150
  "multimask_output_for_tracking": true,
151
  "multimask_output_in_sam": true,
152
+ "non_overlap_masks": false,
153
  "non_overlap_masks_for_mem_enc": false,
154
  "num_maskmem": 7,
155
+ "preserve_temporal_direction_in_object_pointers": true,
156
+ "project_temporal_pos_encoding_in_object_pointers": true,
 
 
157
  "prompt_encoder_config": {
158
  "hidden_act": "gelu",
159
  "hidden_size": 256,
 
165
  "patch_size": 16,
166
  "scale": 1
167
  },
168
+ "sigmoid_bias_for_mem_enc": -10.0,
169
+ "sigmoid_scale_for_mem_enc": 20.0,
 
 
170
  "torch_dtype": "float32",
171
+ "transformers_version": "4.53.0.dev0",
172
  "use_mask_input_as_output_without_sam": true,
 
173
  "use_multimask_token_for_object_pointer": true,
174
+ "use_object_pointers_in_encoder": true
 
175
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c5644fa49b4c724cb7ff6fe267e31484dc27f64a28d15f26800ca6335d4be640
3
- size 155906128
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:030a362dae8f614f7aa8afc76097ecacd3a22b00baf76f8f1d92819a5eedf3f0
3
+ size 155906184