Xenova HF Staff commited on
Commit
36fb679
·
verified ·
1 Parent(s): 454b597

Upload folder using huggingface_hub

Browse files
Files changed (36) hide show
  1. .gitattributes +16 -0
  2. config.json +239 -0
  3. onnx/prompt_encoder_mask_decoder.onnx +3 -0
  4. onnx/prompt_encoder_mask_decoder.onnx_data +3 -0
  5. onnx/prompt_encoder_mask_decoder_bnb4.onnx +3 -0
  6. onnx/prompt_encoder_mask_decoder_bnb4.onnx_data +3 -0
  7. onnx/prompt_encoder_mask_decoder_fp16.onnx +3 -0
  8. onnx/prompt_encoder_mask_decoder_fp16.onnx_data +3 -0
  9. onnx/prompt_encoder_mask_decoder_int8.onnx +3 -0
  10. onnx/prompt_encoder_mask_decoder_int8.onnx_data +3 -0
  11. onnx/prompt_encoder_mask_decoder_q4.onnx +3 -0
  12. onnx/prompt_encoder_mask_decoder_q4.onnx_data +3 -0
  13. onnx/prompt_encoder_mask_decoder_q4f16.onnx +3 -0
  14. onnx/prompt_encoder_mask_decoder_q4f16.onnx_data +3 -0
  15. onnx/prompt_encoder_mask_decoder_quantized.onnx +3 -0
  16. onnx/prompt_encoder_mask_decoder_quantized.onnx_data +3 -0
  17. onnx/prompt_encoder_mask_decoder_uint8.onnx +3 -0
  18. onnx/prompt_encoder_mask_decoder_uint8.onnx_data +3 -0
  19. onnx/vision_encoder.onnx +3 -0
  20. onnx/vision_encoder.onnx_data +3 -0
  21. onnx/vision_encoder_bnb4.onnx +3 -0
  22. onnx/vision_encoder_bnb4.onnx_data +3 -0
  23. onnx/vision_encoder_fp16.onnx +3 -0
  24. onnx/vision_encoder_fp16.onnx_data +3 -0
  25. onnx/vision_encoder_int8.onnx +3 -0
  26. onnx/vision_encoder_int8.onnx_data +3 -0
  27. onnx/vision_encoder_q4.onnx +3 -0
  28. onnx/vision_encoder_q4.onnx_data +3 -0
  29. onnx/vision_encoder_q4f16.onnx +3 -0
  30. onnx/vision_encoder_q4f16.onnx_data +3 -0
  31. onnx/vision_encoder_quantized.onnx +3 -0
  32. onnx/vision_encoder_quantized.onnx_data +3 -0
  33. onnx/vision_encoder_uint8.onnx +3 -0
  34. onnx/vision_encoder_uint8.onnx_data +3 -0
  35. preprocessor_config.json +39 -0
  36. processor_config.json +44 -0
.gitattributes CHANGED
@@ -33,3 +33,19 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ onnx/prompt_encoder_mask_decoder.onnx_data filter=lfs diff=lfs merge=lfs -text
37
+ onnx/prompt_encoder_mask_decoder_bnb4.onnx_data filter=lfs diff=lfs merge=lfs -text
38
+ onnx/prompt_encoder_mask_decoder_fp16.onnx_data filter=lfs diff=lfs merge=lfs -text
39
+ onnx/prompt_encoder_mask_decoder_int8.onnx_data filter=lfs diff=lfs merge=lfs -text
40
+ onnx/prompt_encoder_mask_decoder_q4.onnx_data filter=lfs diff=lfs merge=lfs -text
41
+ onnx/prompt_encoder_mask_decoder_q4f16.onnx_data filter=lfs diff=lfs merge=lfs -text
42
+ onnx/prompt_encoder_mask_decoder_quantized.onnx_data filter=lfs diff=lfs merge=lfs -text
43
+ onnx/prompt_encoder_mask_decoder_uint8.onnx_data filter=lfs diff=lfs merge=lfs -text
44
+ onnx/vision_encoder.onnx_data filter=lfs diff=lfs merge=lfs -text
45
+ onnx/vision_encoder_bnb4.onnx_data filter=lfs diff=lfs merge=lfs -text
46
+ onnx/vision_encoder_fp16.onnx_data filter=lfs diff=lfs merge=lfs -text
47
+ onnx/vision_encoder_int8.onnx_data filter=lfs diff=lfs merge=lfs -text
48
+ onnx/vision_encoder_q4.onnx_data filter=lfs diff=lfs merge=lfs -text
49
+ onnx/vision_encoder_q4f16.onnx_data filter=lfs diff=lfs merge=lfs -text
50
+ onnx/vision_encoder_quantized.onnx_data filter=lfs diff=lfs merge=lfs -text
51
+ onnx/vision_encoder_uint8.onnx_data filter=lfs diff=lfs merge=lfs -text
config.json ADDED
@@ -0,0 +1,239 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Sam2Model"
4
+ ],
5
+ "dtype": "float32",
6
+ "enable_occlusion_spatial_embedding": false,
7
+ "enable_temporal_pos_encoding_for_object_pointers": false,
8
+ "image_size": 1024,
9
+ "initializer_range": 0.02,
10
+ "mask_decoder_config": {
11
+ "attention_downsample_rate": 2,
12
+ "dtype": "float32",
13
+ "dynamic_multimask_stability_delta": 0.05,
14
+ "dynamic_multimask_stability_thresh": 0.98,
15
+ "dynamic_multimask_via_stability": true,
16
+ "hidden_act": "gelu",
17
+ "hidden_size": 256,
18
+ "iou_head_depth": 3,
19
+ "iou_head_hidden_dim": 256,
20
+ "mlp_dim": 2048,
21
+ "model_type": "",
22
+ "num_attention_heads": 8,
23
+ "num_hidden_layers": 2,
24
+ "num_multimask_outputs": 3
25
+ },
26
+ "mask_downsampler_embed_dim": 256,
27
+ "mask_downsampler_hidden_act": "gelu",
28
+ "mask_downsampler_kernel_size": 3,
29
+ "mask_downsampler_padding": 1,
30
+ "mask_downsampler_stride": 2,
31
+ "mask_downsampler_total_stride": 16,
32
+ "max_object_pointers_in_encoder": 16,
33
+ "memory_attention_downsample_rate": 1,
34
+ "memory_attention_dropout": 0.1,
35
+ "memory_attention_feed_forward_hidden_act": "relu",
36
+ "memory_attention_feed_forward_hidden_size": 2048,
37
+ "memory_attention_hidden_size": 256,
38
+ "memory_attention_num_attention_heads": 1,
39
+ "memory_attention_num_layers": 4,
40
+ "memory_attention_rope_dropout": 0.1,
41
+ "memory_attention_rope_feat_sizes": [
42
+ 64,
43
+ 64
44
+ ],
45
+ "memory_attention_rope_theta": 10000,
46
+ "memory_encoder_hidden_size": 256,
47
+ "memory_encoder_output_channels": 64,
48
+ "memory_fuser_embed_dim": 256,
49
+ "memory_fuser_hidden_act": "gelu",
50
+ "memory_fuser_intermediate_dim": 1024,
51
+ "memory_fuser_kernel_size": 7,
52
+ "memory_fuser_layer_scale_init_value": 1e-06,
53
+ "memory_fuser_num_layers": 2,
54
+ "memory_fuser_padding": 3,
55
+ "model_type": "sam2",
56
+ "multimask_max_pt_num": 1,
57
+ "multimask_min_pt_num": 0,
58
+ "multimask_output_for_tracking": true,
59
+ "multimask_output_in_sam": true,
60
+ "num_maskmem": 7,
61
+ "prompt_encoder_config": {
62
+ "dtype": "float32",
63
+ "hidden_act": "gelu",
64
+ "hidden_size": 256,
65
+ "image_size": 1024,
66
+ "layer_norm_eps": 1e-06,
67
+ "mask_input_channels": 16,
68
+ "model_type": "",
69
+ "num_point_embeddings": 4,
70
+ "patch_size": 16,
71
+ "scale": 1
72
+ },
73
+ "sigmoid_bias_for_mem_enc": -10.0,
74
+ "sigmoid_scale_for_mem_enc": 20.0,
75
+ "transformers_version": "5.0.0.dev0",
76
+ "vision_config": {
77
+ "backbone_channel_list": [
78
+ 768,
79
+ 384,
80
+ 192,
81
+ 96
82
+ ],
83
+ "backbone_config": {
84
+ "_name_or_path": "",
85
+ "add_cross_attention": false,
86
+ "architectures": null,
87
+ "bad_words_ids": null,
88
+ "begin_suppress_tokens": null,
89
+ "blocks_per_stage": [
90
+ 1,
91
+ 2,
92
+ 7,
93
+ 2
94
+ ],
95
+ "bos_token_id": null,
96
+ "chunk_size_feed_forward": 0,
97
+ "cross_attention_hidden_size": null,
98
+ "decoder_start_token_id": null,
99
+ "diversity_penalty": 0.0,
100
+ "do_sample": false,
101
+ "dtype": null,
102
+ "early_stopping": false,
103
+ "embed_dim_per_stage": [
104
+ 96,
105
+ 192,
106
+ 384,
107
+ 768
108
+ ],
109
+ "encoder_no_repeat_ngram_size": 0,
110
+ "eos_token_id": null,
111
+ "exponential_decay_length_penalty": null,
112
+ "finetuning_task": null,
113
+ "forced_bos_token_id": null,
114
+ "forced_eos_token_id": null,
115
+ "global_attention_blocks": [
116
+ 5,
117
+ 7,
118
+ 9
119
+ ],
120
+ "hidden_act": "gelu",
121
+ "hidden_size": 96,
122
+ "id2label": {
123
+ "0": "LABEL_0",
124
+ "1": "LABEL_1"
125
+ },
126
+ "image_size": [
127
+ 1024,
128
+ 1024
129
+ ],
130
+ "initializer_range": 0.02,
131
+ "is_decoder": false,
132
+ "is_encoder_decoder": false,
133
+ "label2id": {
134
+ "LABEL_0": 0,
135
+ "LABEL_1": 1
136
+ },
137
+ "layer_norm_eps": 1e-06,
138
+ "length_penalty": 1.0,
139
+ "max_length": 20,
140
+ "min_length": 0,
141
+ "mlp_ratio": 4.0,
142
+ "model_type": "sam2_hiera_det_model",
143
+ "no_repeat_ngram_size": 0,
144
+ "num_attention_heads": 1,
145
+ "num_attention_heads_per_stage": [
146
+ 1,
147
+ 2,
148
+ 4,
149
+ 8
150
+ ],
151
+ "num_beam_groups": 1,
152
+ "num_beams": 1,
153
+ "num_channels": 3,
154
+ "num_query_pool_stages": 3,
155
+ "num_return_sequences": 1,
156
+ "output_attentions": false,
157
+ "output_hidden_states": false,
158
+ "output_scores": false,
159
+ "pad_token_id": null,
160
+ "patch_kernel_size": [
161
+ 7,
162
+ 7
163
+ ],
164
+ "patch_padding": [
165
+ 3,
166
+ 3
167
+ ],
168
+ "patch_stride": [
169
+ 4,
170
+ 4
171
+ ],
172
+ "prefix": null,
173
+ "problem_type": null,
174
+ "pruned_heads": {},
175
+ "query_stride": [
176
+ 2,
177
+ 2
178
+ ],
179
+ "remove_invalid_values": false,
180
+ "repetition_penalty": 1.0,
181
+ "return_dict": true,
182
+ "return_dict_in_generate": false,
183
+ "sep_token_id": null,
184
+ "suppress_tokens": null,
185
+ "task_specific_params": null,
186
+ "temperature": 1.0,
187
+ "tf_legacy_loss": false,
188
+ "tie_encoder_decoder": false,
189
+ "tie_word_embeddings": true,
190
+ "tokenizer_class": null,
191
+ "top_k": 50,
192
+ "top_p": 1.0,
193
+ "torchscript": false,
194
+ "typical_p": 1.0,
195
+ "use_bfloat16": false,
196
+ "window_positional_embedding_background_size": [
197
+ 7,
198
+ 7
199
+ ],
200
+ "window_size_per_stage": [
201
+ 8,
202
+ 4,
203
+ 14,
204
+ 7
205
+ ]
206
+ },
207
+ "backbone_feature_sizes": [
208
+ [
209
+ 256,
210
+ 256
211
+ ],
212
+ [
213
+ 128,
214
+ 128
215
+ ],
216
+ [
217
+ 64,
218
+ 64
219
+ ]
220
+ ],
221
+ "dtype": "float32",
222
+ "fpn_hidden_size": 256,
223
+ "fpn_kernel_size": 1,
224
+ "fpn_padding": 0,
225
+ "fpn_stride": 1,
226
+ "fpn_top_down_levels": [
227
+ 2,
228
+ 3
229
+ ],
230
+ "hidden_act": "gelu",
231
+ "initializer_range": 0.02,
232
+ "layer_norm_eps": 1e-06,
233
+ "model_type": "sam2_vision_model",
234
+ "num_feature_levels": 3
235
+ },
236
+ "transformers.js_config": {
237
+ "use_external_data_format": true
238
+ }
239
+ }
onnx/prompt_encoder_mask_decoder.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbcdeb3696544e81c7b58f682f857b20c2cf1eb8fae7d5dbf61f8844d879a84b
3
+ size 213114
onnx/prompt_encoder_mask_decoder.onnx_data ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f43b57795ac5b7c71d4ff1a72ffbb3bfe36d54cd911edb7ab5b6a9ca6acd12b9
3
+ size 20958208
onnx/prompt_encoder_mask_decoder_bnb4.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be00eb5842055b2339f34177a0b4f6cd8d0ceac3bfad76180f979e4be8559235
3
+ size 228219
onnx/prompt_encoder_mask_decoder_bnb4.onnx_data ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c46c58af74fc19a5908ed908e30ab0b282f65473948a541021ad22915a6db64
3
+ size 6870016
onnx/prompt_encoder_mask_decoder_fp16.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:080e4508e5df765a6d7d959d25c1d53755e04c2b5ee213403a386deec03c38ab
3
+ size 229799
onnx/prompt_encoder_mask_decoder_fp16.onnx_data ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe430a918c9e5765f182207c7fce2984e292eefea0f6f4301dbb9e98fec24400
3
+ size 10454016
onnx/prompt_encoder_mask_decoder_int8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76859b7c543528b957c5b95a120bbf0b916e9ce4ff711e1a2f30bca6d6e5bde9
3
+ size 289885
onnx/prompt_encoder_mask_decoder_int8.onnx_data ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0157c99b78c66cb4104df576fcc24434638ae7c1e446b19e1659a37a3ccb40d
3
+ size 8662016
onnx/prompt_encoder_mask_decoder_q4.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d8d9f4fccdfa8b47be7c44e92ca9e463bc4f082e46d8ae1c95477d85d6fc0bf
3
+ size 226444
onnx/prompt_encoder_mask_decoder_q4.onnx_data ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a01ca7a785d21605a1887e9687e15dc531c04bb0a8d33560df53e0834d7c594a
3
+ size 7130112
onnx/prompt_encoder_mask_decoder_q4f16.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03d3b196e4f187d5787a86eed5cf7a8dfbc224520e8b8ebe1029076fe2ee07af
3
+ size 243598
onnx/prompt_encoder_mask_decoder_q4f16.onnx_data ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:17227a1b60a361c0433a8185eac74601d9d644df4a5fd41d07a87ff44e8fb62e
3
+ size 4563968
onnx/prompt_encoder_mask_decoder_quantized.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1becce19628b45b79677fdc4070907cc5eebaae1355b03bde56309af1766334d
3
+ size 290416
onnx/prompt_encoder_mask_decoder_quantized.onnx_data ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0157c99b78c66cb4104df576fcc24434638ae7c1e446b19e1659a37a3ccb40d
3
+ size 8662016
onnx/prompt_encoder_mask_decoder_uint8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3baffceb20fabbb1b0f43b15d0b9ef59690b3f25419136c44970d0a4758b0035
3
+ size 290021
onnx/prompt_encoder_mask_decoder_uint8.onnx_data ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:874b5237c3e6418f40f6f09e1c96b7765a0ef27f85d19c781bb711c2199156e4
3
+ size 8662016
onnx/vision_encoder.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed1487d6ef0a449f99c8d0dc401c49335a5ae383533d0aa8c7903b2734a1b893
3
+ size 354238
onnx/vision_encoder.onnx_data ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20eef3b82600c5f51ae366516675074eeb5043c45ab24a8af5f643834066fc3a
3
+ size 134084864
onnx/vision_encoder_bnb4.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8fbf56817f5115b70e0910982074fa817cef6a399b4f31e477eb5aa09412defa
3
+ size 367275
onnx/vision_encoder_bnb4.onnx_data ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c70352a648e474c85fd667c18e799dceb18eb2ab1ef7bd2d3775778d1f6c21e
3
+ size 42085568
onnx/vision_encoder_fp16.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5c318d9ce41420758dd1b0cd555fe064de1e1dc5437362b93aac75cb1af7c25a
3
+ size 314925
onnx/vision_encoder_fp16.onnx_data ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c025fccaf314aea6b7ce904db949764e97f34b3e0c127feb4d73cb2e1751af96
3
+ size 67005504
onnx/vision_encoder_int8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f5a1fd42956634c31397781b1d9569a25eddb4e3124ab53f88138806174941e
3
+ size 440423
onnx/vision_encoder_int8.onnx_data ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c57174725941eeafd0f7ec43e96fd165569514fb154f45ab50a58bf7a6ba90e
3
+ size 52573088
onnx/vision_encoder_q4.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:646acb6b5b52b80448524654d02a26cc550d2ae7701318fcb22e78665cf117aa
3
+ size 366089
onnx/vision_encoder_q4.onnx_data ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f60abfaab485489fcc028afd949e28284523f91d5fdc7eb87bdee94603d15bc6
3
+ size 43758848
onnx/vision_encoder_q4f16.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:687c0b8db970682b9d238d20229fa950dde951c31ddc930645e449cc98dd564a
3
+ size 327108
onnx/vision_encoder_q4f16.onnx_data ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad2373ff4c004ea828a5c91f74d01bf7ce0f93a0df85b274bc65052293a5b160
3
+ size 28532736
onnx/vision_encoder_quantized.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2cacc51b627cdb5549ee4042c668edad7a6131bc3a982c3f01dd9ef620debcbe
3
+ size 441167
onnx/vision_encoder_quantized.onnx_data ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c231954acd1255c6670065e53516f331dad02e2de64bb8a3b3ac77363bd16813
3
+ size 52573088
onnx/vision_encoder_uint8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:200c39f302a8d36eb0196d47fb97c000f82e790cabe25f5d95805c4e0112778b
3
+ size 440595
onnx/vision_encoder_uint8.onnx_data ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c231954acd1255c6670065e53516f331dad02e2de64bb8a3b3ac77363bd16813
3
+ size 52573088
preprocessor_config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": null,
3
+ "data_format": "channels_first",
4
+ "default_to_square": true,
5
+ "device": null,
6
+ "disable_grouping": null,
7
+ "do_center_crop": null,
8
+ "do_convert_rgb": true,
9
+ "do_normalize": true,
10
+ "do_pad": null,
11
+ "do_rescale": true,
12
+ "do_resize": true,
13
+ "image_mean": [
14
+ 0.485,
15
+ 0.456,
16
+ 0.406
17
+ ],
18
+ "image_processor_type": "Sam2ImageProcessorFast",
19
+ "image_seq_length": null,
20
+ "image_std": [
21
+ 0.229,
22
+ 0.224,
23
+ 0.225
24
+ ],
25
+ "input_data_format": null,
26
+ "mask_size": {
27
+ "height": 256,
28
+ "width": 256
29
+ },
30
+ "pad_size": null,
31
+ "processor_class": "Sam2VideoProcessor",
32
+ "resample": 2,
33
+ "rescale_factor": 0.00392156862745098,
34
+ "return_tensors": null,
35
+ "size": {
36
+ "height": 1024,
37
+ "width": 1024
38
+ }
39
+ }
processor_config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "image_processor": {
3
+ "crop_size": null,
4
+ "data_format": "channels_first",
5
+ "default_to_square": true,
6
+ "device": null,
7
+ "disable_grouping": null,
8
+ "do_center_crop": null,
9
+ "do_convert_rgb": true,
10
+ "do_normalize": true,
11
+ "do_pad": null,
12
+ "do_rescale": true,
13
+ "do_resize": true,
14
+ "image_mean": [
15
+ 0.485,
16
+ 0.456,
17
+ 0.406
18
+ ],
19
+ "image_processor_type": "Sam2ImageProcessorFast",
20
+ "image_seq_length": null,
21
+ "image_std": [
22
+ 0.229,
23
+ 0.224,
24
+ 0.225
25
+ ],
26
+ "input_data_format": null,
27
+ "mask_size": {
28
+ "height": 256,
29
+ "width": 256
30
+ },
31
+ "pad_size": null,
32
+ "processor_class": "Sam2Processor",
33
+ "resample": 2,
34
+ "rescale_factor": 0.00392156862745098,
35
+ "return_tensors": null,
36
+ "size": {
37
+ "height": 1024,
38
+ "width": 1024
39
+ }
40
+ },
41
+ "point_pad_value": -10,
42
+ "processor_class": "Sam2Processor",
43
+ "target_size": 1024
44
+ }