prasadsachin commited on
Commit
7b932f9
·
verified ·
1 Parent(s): 30419f2

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: keras-hub
3
+ ---
4
+ This is a [`SAM3PromptableConcept` model](https://keras.io/api/keras_hub/models/sam3_promptable_concept) uploaded using the KerasHub library and can be used with JAX, TensorFlow, and PyTorch backends.
5
+ Model config:
6
+ * **name:** sam3_promptable_concept_backbone
7
+ * **trainable:** True
8
+ * **dtype:** {'module': 'keras', 'class_name': 'DTypePolicy', 'config': {'name': 'float32'}, 'registered_name': None}
9
+ * **vision_encoder:** {'module': 'keras_hub.src.models.sam3.sam3_vision_encoder', 'class_name': 'SAM3VisionEncoder', 'config': {'name': 'sam3_vision_encoder', 'trainable': True, 'dtype': {'module': 'keras', 'class_name': 'DTypePolicy', 'config': {'name': 'float32'}, 'registered_name': None}, 'image_shape': [1008, 1008, 3], 'patch_size': 14, 'num_layers': 32, 'hidden_dim': 1024, 'intermediate_dim': 4736, 'num_heads': 16, 'fpn_hidden_dim': 256, 'fpn_scale_factors': [4.0, 2.0, 1.0, 0.5], 'pretrain_image_shape': [336, 336, 3], 'hidden_activation': 'gelu', 'rope_theta': 10000.0, 'window_size': 24, 'global_attn_indexes': [7, 15, 23, 31], 'attention_dropout_rate': 0.0, 'hidden_dropout_rate': 0.0, 'layer_norm_epsilon': 1e-06}, 'registered_name': 'keras_hub>SAM3VisionEncoder'}
10
+ * **text_encoder:** {'module': 'keras_hub.src.models.sam3.sam3_text_encoder', 'class_name': 'SAM3TextEncoder', 'config': {'name': 'sam3_text_encoder', 'trainable': True, 'dtype': {'module': 'keras', 'class_name': 'DTypePolicy', 'config': {'name': 'float32'}, 'registered_name': None}, 'vocabulary_size': 49408, 'embedding_dim': 1024, 'hidden_dim': 1024, 'num_layers': 24, 'num_heads': 16, 'intermediate_dim': 4096, 'intermediate_activation': 'gelu', 'max_sequence_length': 32, 'layer_norm_epsilon': 1e-05}, 'registered_name': 'keras_hub>SAM3TextEncoder'}
11
+ * **geometry_encoder:** {'module': 'keras_hub.src.models.sam3.sam3_geometry_encoder', 'class_name': 'SAM3GeometryEncoder', 'config': {'name': 'sam3_geometry_encoder', 'trainable': True, 'dtype': {'module': 'keras', 'class_name': 'DTypePolicy', 'config': {'name': 'float32'}, 'registered_name': None}, 'num_layers': 3, 'hidden_dim': 256, 'intermediate_dim': 2048, 'num_heads': 8, 'roi_size': 7, 'hidden_activation': 'relu', 'dropout_rate': 0.0, 'layer_norm_epsilon': 1e-06}, 'registered_name': 'keras_hub>SAM3GeometryEncoder'}
12
+ * **detr_encoder:** {'module': 'keras_hub.src.models.sam3.sam3_detr_encoder', 'class_name': 'SAM3DetrEncoder', 'config': {'name': 'sam3_detr_encoder', 'trainable': True, 'dtype': {'module': 'keras', 'class_name': 'DTypePolicy', 'config': {'name': 'float32'}, 'registered_name': None}, 'num_layers': 6, 'hidden_dim': 256, 'intermediate_dim': 2048, 'num_heads': 8, 'hidden_activation': 'relu', 'dropout_rate': 0.1, 'layer_norm_epsilon': 1e-06}, 'registered_name': 'keras_hub>SAM3DetrEncoder'}
13
+ * **detr_decoder:** {'module': 'keras_hub.src.models.sam3.sam3_detr_decoder', 'class_name': 'SAM3DetrDecoder', 'config': {'name': 'sam3_detr_decoder', 'trainable': True, 'dtype': {'module': 'keras', 'class_name': 'DTypePolicy', 'config': {'name': 'float32'}, 'registered_name': None}, 'image_shape': [1008, 1008, 3], 'patch_size': 14, 'num_layers': 6, 'hidden_dim': 256, 'intermediate_dim': 2048, 'num_heads': 8, 'num_queries': 200, 'hidden_activation': 'relu', 'dropout_rate': 0.1, 'layer_norm_epsilon': 1e-06}, 'registered_name': 'keras_hub>SAM3DetrDecoder'}
14
+ * **mask_decoder:** {'module': 'keras_hub.src.models.sam3.sam3_mask_decoder', 'class_name': 'SAM3MaskDecoder', 'config': {'name': 'sam3_mask_decoder', 'trainable': True, 'dtype': {'module': 'keras', 'class_name': 'DTypePolicy', 'config': {'name': 'float32'}, 'registered_name': None}, 'num_upsampling_stages': 3, 'hidden_dim': 256, 'num_heads': 8, 'dropout_rate': 0.0, 'layer_norm_epsilon': 1e-06}, 'registered_name': 'keras_hub>SAM3MaskDecoder'}
15
+
16
+ This model card has been generated automatically and should be completed by the model author. See [Model Cards documentation](https://huggingface.co/docs/hub/model-cards) for more information.
assets/tokenizer/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
assets/tokenizer/vocabulary.json ADDED
The diff for this file is too large to render. See raw diff
 
config.json ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "module": "keras_hub.src.models.sam3.sam3_pc_backbone",
3
+ "class_name": "SAM3PromptableConceptBackbone",
4
+ "config": {
5
+ "name": "sam3_promptable_concept_backbone",
6
+ "trainable": true,
7
+ "dtype": {
8
+ "module": "keras",
9
+ "class_name": "DTypePolicy",
10
+ "config": {
11
+ "name": "float32"
12
+ },
13
+ "registered_name": null
14
+ },
15
+ "vision_encoder": {
16
+ "module": "keras_hub.src.models.sam3.sam3_vision_encoder",
17
+ "class_name": "SAM3VisionEncoder",
18
+ "config": {
19
+ "name": "sam3_vision_encoder",
20
+ "trainable": true,
21
+ "dtype": {
22
+ "module": "keras",
23
+ "class_name": "DTypePolicy",
24
+ "config": {
25
+ "name": "float32"
26
+ },
27
+ "registered_name": null
28
+ },
29
+ "image_shape": [
30
+ 1008,
31
+ 1008,
32
+ 3
33
+ ],
34
+ "patch_size": 14,
35
+ "num_layers": 32,
36
+ "hidden_dim": 1024,
37
+ "intermediate_dim": 4736,
38
+ "num_heads": 16,
39
+ "fpn_hidden_dim": 256,
40
+ "fpn_scale_factors": [
41
+ 4.0,
42
+ 2.0,
43
+ 1.0,
44
+ 0.5
45
+ ],
46
+ "pretrain_image_shape": [
47
+ 336,
48
+ 336,
49
+ 3
50
+ ],
51
+ "hidden_activation": "gelu",
52
+ "rope_theta": 10000.0,
53
+ "window_size": 24,
54
+ "global_attn_indexes": [
55
+ 7,
56
+ 15,
57
+ 23,
58
+ 31
59
+ ],
60
+ "attention_dropout_rate": 0.0,
61
+ "hidden_dropout_rate": 0.0,
62
+ "layer_norm_epsilon": 1e-06
63
+ },
64
+ "registered_name": "keras_hub>SAM3VisionEncoder"
65
+ },
66
+ "text_encoder": {
67
+ "module": "keras_hub.src.models.sam3.sam3_text_encoder",
68
+ "class_name": "SAM3TextEncoder",
69
+ "config": {
70
+ "name": "sam3_text_encoder",
71
+ "trainable": true,
72
+ "dtype": {
73
+ "module": "keras",
74
+ "class_name": "DTypePolicy",
75
+ "config": {
76
+ "name": "float32"
77
+ },
78
+ "registered_name": null
79
+ },
80
+ "vocabulary_size": 49408,
81
+ "embedding_dim": 1024,
82
+ "hidden_dim": 1024,
83
+ "num_layers": 24,
84
+ "num_heads": 16,
85
+ "intermediate_dim": 4096,
86
+ "intermediate_activation": "gelu",
87
+ "max_sequence_length": 32,
88
+ "layer_norm_epsilon": 1e-05
89
+ },
90
+ "registered_name": "keras_hub>SAM3TextEncoder"
91
+ },
92
+ "geometry_encoder": {
93
+ "module": "keras_hub.src.models.sam3.sam3_geometry_encoder",
94
+ "class_name": "SAM3GeometryEncoder",
95
+ "config": {
96
+ "name": "sam3_geometry_encoder",
97
+ "trainable": true,
98
+ "dtype": {
99
+ "module": "keras",
100
+ "class_name": "DTypePolicy",
101
+ "config": {
102
+ "name": "float32"
103
+ },
104
+ "registered_name": null
105
+ },
106
+ "num_layers": 3,
107
+ "hidden_dim": 256,
108
+ "intermediate_dim": 2048,
109
+ "num_heads": 8,
110
+ "roi_size": 7,
111
+ "hidden_activation": "relu",
112
+ "dropout_rate": 0.0,
113
+ "layer_norm_epsilon": 1e-06
114
+ },
115
+ "registered_name": "keras_hub>SAM3GeometryEncoder"
116
+ },
117
+ "detr_encoder": {
118
+ "module": "keras_hub.src.models.sam3.sam3_detr_encoder",
119
+ "class_name": "SAM3DetrEncoder",
120
+ "config": {
121
+ "name": "sam3_detr_encoder",
122
+ "trainable": true,
123
+ "dtype": {
124
+ "module": "keras",
125
+ "class_name": "DTypePolicy",
126
+ "config": {
127
+ "name": "float32"
128
+ },
129
+ "registered_name": null
130
+ },
131
+ "num_layers": 6,
132
+ "hidden_dim": 256,
133
+ "intermediate_dim": 2048,
134
+ "num_heads": 8,
135
+ "hidden_activation": "relu",
136
+ "dropout_rate": 0.1,
137
+ "layer_norm_epsilon": 1e-06
138
+ },
139
+ "registered_name": "keras_hub>SAM3DetrEncoder"
140
+ },
141
+ "detr_decoder": {
142
+ "module": "keras_hub.src.models.sam3.sam3_detr_decoder",
143
+ "class_name": "SAM3DetrDecoder",
144
+ "config": {
145
+ "name": "sam3_detr_decoder",
146
+ "trainable": true,
147
+ "dtype": {
148
+ "module": "keras",
149
+ "class_name": "DTypePolicy",
150
+ "config": {
151
+ "name": "float32"
152
+ },
153
+ "registered_name": null
154
+ },
155
+ "image_shape": [
156
+ 1008,
157
+ 1008,
158
+ 3
159
+ ],
160
+ "patch_size": 14,
161
+ "num_layers": 6,
162
+ "hidden_dim": 256,
163
+ "intermediate_dim": 2048,
164
+ "num_heads": 8,
165
+ "num_queries": 200,
166
+ "hidden_activation": "relu",
167
+ "dropout_rate": 0.1,
168
+ "layer_norm_epsilon": 1e-06
169
+ },
170
+ "registered_name": "keras_hub>SAM3DetrDecoder"
171
+ },
172
+ "mask_decoder": {
173
+ "module": "keras_hub.src.models.sam3.sam3_mask_decoder",
174
+ "class_name": "SAM3MaskDecoder",
175
+ "config": {
176
+ "name": "sam3_mask_decoder",
177
+ "trainable": true,
178
+ "dtype": {
179
+ "module": "keras",
180
+ "class_name": "DTypePolicy",
181
+ "config": {
182
+ "name": "float32"
183
+ },
184
+ "registered_name": null
185
+ },
186
+ "num_upsampling_stages": 3,
187
+ "hidden_dim": 256,
188
+ "num_heads": 8,
189
+ "dropout_rate": 0.0,
190
+ "layer_norm_epsilon": 1e-06
191
+ },
192
+ "registered_name": "keras_hub>SAM3MaskDecoder"
193
+ }
194
+ },
195
+ "registered_name": "keras_hub>SAM3PromptableConceptBackbone"
196
+ }
image_converter.json ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "module": "keras_hub.src.models.sam3.sam3_image_converter",
3
+ "class_name": "SAM3ImageConverter",
4
+ "config": {
5
+ "name": "sam3_image_converter",
6
+ "trainable": true,
7
+ "dtype": {
8
+ "module": "keras",
9
+ "class_name": "DTypePolicy",
10
+ "config": {
11
+ "name": "float32"
12
+ },
13
+ "registered_name": null
14
+ },
15
+ "image_size": [
16
+ 1008,
17
+ 1008
18
+ ],
19
+ "scale": [
20
+ 0.00784313725490196,
21
+ 0.00784313725490196,
22
+ 0.00784313725490196
23
+ ],
24
+ "offset": [
25
+ -1.0,
26
+ -1.0,
27
+ -1.0
28
+ ],
29
+ "interpolation": "bilinear",
30
+ "antialias": true,
31
+ "crop_to_aspect_ratio": false,
32
+ "pad_to_aspect_ratio": false,
33
+ "bounding_box_format": "yxyx"
34
+ },
35
+ "registered_name": "keras_hub>SAM3ImageConverter"
36
+ }
metadata.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "keras_version": "3.13.2",
3
+ "keras_hub_version": "0.26.0.dev0",
4
+ "parameter_count": 849288502,
5
+ "date_saved": "2026-01-30@22:36:53",
6
+ "tasks": [
7
+ "ImageSegmenter"
8
+ ]
9
+ }
model.weights.h5 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd36085331939d1c89b8714f16882f7dff33bc150d3b8d1f4e890112ccab2073
3
+ size 3400207368
preprocessor.json ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "module": "keras_hub.src.models.sam3.sam3_pc_image_segmenter_preprocessor",
3
+ "class_name": "SAM3PromptableConceptImageSegmenterPreprocessor",
4
+ "config": {
5
+ "name": "sam3_promptable_concept_image_segmenter_preprocessor",
6
+ "trainable": true,
7
+ "dtype": {
8
+ "module": "keras",
9
+ "class_name": "DTypePolicy",
10
+ "config": {
11
+ "name": "float32"
12
+ },
13
+ "registered_name": null
14
+ },
15
+ "tokenizer": {
16
+ "module": "keras_hub.src.models.sam3.sam3_tokenizer",
17
+ "class_name": "SAM3Tokenizer",
18
+ "config": {
19
+ "name": "sam3_tokenizer",
20
+ "trainable": true,
21
+ "dtype": {
22
+ "module": "keras",
23
+ "class_name": "DTypePolicy",
24
+ "config": {
25
+ "name": "int32"
26
+ },
27
+ "registered_name": null
28
+ },
29
+ "config_file": "tokenizer.json",
30
+ "sequence_length": null,
31
+ "add_prefix_space": false
32
+ },
33
+ "registered_name": "keras_hub>SAM3Tokenizer"
34
+ },
35
+ "image_converter": {
36
+ "module": "keras_hub.src.models.sam3.sam3_image_converter",
37
+ "class_name": "SAM3ImageConverter",
38
+ "config": {
39
+ "name": "sam3_image_converter",
40
+ "trainable": true,
41
+ "dtype": {
42
+ "module": "keras",
43
+ "class_name": "DTypePolicy",
44
+ "config": {
45
+ "name": "float32"
46
+ },
47
+ "registered_name": null
48
+ },
49
+ "image_size": [
50
+ 1008,
51
+ 1008
52
+ ],
53
+ "scale": [
54
+ 0.00784313725490196,
55
+ 0.00784313725490196,
56
+ 0.00784313725490196
57
+ ],
58
+ "offset": [
59
+ -1.0,
60
+ -1.0,
61
+ -1.0
62
+ ],
63
+ "interpolation": "bilinear",
64
+ "antialias": true,
65
+ "crop_to_aspect_ratio": false,
66
+ "pad_to_aspect_ratio": false,
67
+ "bounding_box_format": "yxyx"
68
+ },
69
+ "registered_name": "keras_hub>SAM3ImageConverter"
70
+ },
71
+ "config_file": "preprocessor.json",
72
+ "sequence_length": 32,
73
+ "add_start_token": true,
74
+ "add_end_token": true
75
+ },
76
+ "registered_name": "keras_hub>SAM3PromptableConceptImageSegmenterPreprocessor"
77
+ }
tokenizer.json ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "module": "keras_hub.src.models.sam3.sam3_tokenizer",
3
+ "class_name": "SAM3Tokenizer",
4
+ "config": {
5
+ "name": "sam3_tokenizer",
6
+ "trainable": true,
7
+ "dtype": {
8
+ "module": "keras",
9
+ "class_name": "DTypePolicy",
10
+ "config": {
11
+ "name": "int32"
12
+ },
13
+ "registered_name": null
14
+ },
15
+ "config_file": "tokenizer.json",
16
+ "sequence_length": null,
17
+ "add_prefix_space": false
18
+ },
19
+ "registered_name": "keras_hub>SAM3Tokenizer"
20
+ }