hermeschen-ezcon commited on
Commit
6e5f9bd
·
verified ·
1 Parent(s): 5d04ca9

Upload folder using huggingface_hub

Browse files
Files changed (4) hide show
  1. README.md +1 -1
  2. config.json +12 -15
  3. model.safetensors +1 -1
  4. video_preprocessor_config.json +2 -1
README.md CHANGED
@@ -25,7 +25,7 @@ base_model:
25
  ---
26
 
27
  # EZCon/SmolVLM2-2.2B-Instruct-4bit-mlx
28
- This model was converted to MLX format from [`HuggingFaceTB/SmolVLM2-2.2B-Instruct`]() using mlx-vlm version **0.3.2**.
29
  Refer to the [original model card](https://huggingface.co/HuggingFaceTB/SmolVLM2-2.2B-Instruct) for more details on the model.
30
  ## Use with mlx
31
 
 
25
  ---
26
 
27
  # EZCon/SmolVLM2-2.2B-Instruct-4bit-mlx
28
+ This model was converted to MLX format from [`HuggingFaceTB/SmolVLM2-2.2B-Instruct`]() using mlx-vlm version **0.3.3**.
29
  Refer to the [original model card](https://huggingface.co/HuggingFaceTB/SmolVLM2-2.2B-Instruct) for more details on the model.
30
  ## Use with mlx
31
 
config.json CHANGED
@@ -11,6 +11,7 @@
11
  "decoder_start_token_id": null,
12
  "diversity_penalty": 0.0,
13
  "do_sample": false,
 
14
  "early_stopping": false,
15
  "encoder_no_repeat_ngram_size": 0,
16
  "eos_token_id": null,
@@ -46,11 +47,13 @@
46
  "pruned_heads": {},
47
  "quantization": {
48
  "group_size": 64,
49
- "bits": 4
 
50
  },
51
  "quantization_config": {
52
  "group_size": 64,
53
- "bits": 4
 
54
  },
55
  "remove_invalid_values": false,
56
  "repetition_penalty": 1.0,
@@ -83,7 +86,7 @@
83
  "return_dict": true,
84
  "output_hidden_states": false,
85
  "torchscript": false,
86
- "torch_dtype": "bfloat16",
87
  "pruned_heads": {},
88
  "tie_word_embeddings": false,
89
  "chunk_size_feed_forward": 0,
@@ -118,8 +121,6 @@
118
  "do_sample": false,
119
  "early_stopping": false,
120
  "num_beams": 1,
121
- "num_beam_groups": 1,
122
- "diversity_penalty": 0.0,
123
  "temperature": 1.0,
124
  "top_k": 50,
125
  "top_p": 1.0,
@@ -138,6 +139,8 @@
138
  "exponential_decay_length_penalty": null,
139
  "suppress_tokens": null,
140
  "begin_suppress_tokens": null,
 
 
141
  "_name_or_path": "None",
142
  "_flash_attn_2_enabled": true,
143
  "model_type": "llama",
@@ -224,27 +227,23 @@
224
  }
225
  },
226
  "use_resampler": false,
227
- "tf_legacy_loss": false,
228
- "use_bfloat16": false,
229
  "output_attentions": false
230
  },
231
- "tf_legacy_loss": false,
232
  "tie_encoder_decoder": false,
233
  "tie_word_embeddings": false,
234
  "tokenizer_class": null,
235
  "top_k": 50,
236
  "top_p": 1.0,
237
  "torchscript": false,
238
- "transformers_version": "4.56.0.dev0",
239
  "typical_p": 1.0,
240
- "use_bfloat16": false,
241
  "use_cache": false,
242
  "use_reentrant_checkpointing": false,
243
  "vision_config": {
244
  "return_dict": true,
245
  "output_hidden_states": false,
246
  "torchscript": false,
247
- "torch_dtype": null,
248
  "pruned_heads": {},
249
  "tie_word_embeddings": false,
250
  "chunk_size_feed_forward": 0,
@@ -277,8 +276,6 @@
277
  "do_sample": false,
278
  "early_stopping": false,
279
  "num_beams": 1,
280
- "num_beam_groups": 1,
281
- "diversity_penalty": 0.0,
282
  "temperature": 1.0,
283
  "top_k": 50,
284
  "top_p": 1.0,
@@ -297,6 +294,8 @@
297
  "exponential_decay_length_penalty": null,
298
  "suppress_tokens": null,
299
  "begin_suppress_tokens": null,
 
 
300
  "_name_or_path": "",
301
  "max_image_size": {
302
  "longest_edge": 384
@@ -306,8 +305,6 @@
306
  "longest_edge": 1920
307
  },
308
  "use_base_siglip": false,
309
- "tf_legacy_loss": false,
310
- "use_bfloat16": false,
311
  "hidden_size": 1152,
312
  "intermediate_size": 4304,
313
  "num_hidden_layers": 27,
 
11
  "decoder_start_token_id": null,
12
  "diversity_penalty": 0.0,
13
  "do_sample": false,
14
+ "dtype": "float32",
15
  "early_stopping": false,
16
  "encoder_no_repeat_ngram_size": 0,
17
  "eos_token_id": null,
 
47
  "pruned_heads": {},
48
  "quantization": {
49
  "group_size": 64,
50
+ "bits": 4,
51
+ "mode": "affine"
52
  },
53
  "quantization_config": {
54
  "group_size": 64,
55
+ "bits": 4,
56
+ "mode": "affine"
57
  },
58
  "remove_invalid_values": false,
59
  "repetition_penalty": 1.0,
 
86
  "return_dict": true,
87
  "output_hidden_states": false,
88
  "torchscript": false,
89
+ "dtype": "bfloat16",
90
  "pruned_heads": {},
91
  "tie_word_embeddings": false,
92
  "chunk_size_feed_forward": 0,
 
121
  "do_sample": false,
122
  "early_stopping": false,
123
  "num_beams": 1,
 
 
124
  "temperature": 1.0,
125
  "top_k": 50,
126
  "top_p": 1.0,
 
139
  "exponential_decay_length_penalty": null,
140
  "suppress_tokens": null,
141
  "begin_suppress_tokens": null,
142
+ "num_beam_groups": 1,
143
+ "diversity_penalty": 0.0,
144
  "_name_or_path": "None",
145
  "_flash_attn_2_enabled": true,
146
  "model_type": "llama",
 
227
  }
228
  },
229
  "use_resampler": false,
 
 
230
  "output_attentions": false
231
  },
 
232
  "tie_encoder_decoder": false,
233
  "tie_word_embeddings": false,
234
  "tokenizer_class": null,
235
  "top_k": 50,
236
  "top_p": 1.0,
237
  "torchscript": false,
238
+ "transformers_version": "4.57.0.dev0",
239
  "typical_p": 1.0,
 
240
  "use_cache": false,
241
  "use_reentrant_checkpointing": false,
242
  "vision_config": {
243
  "return_dict": true,
244
  "output_hidden_states": false,
245
  "torchscript": false,
246
+ "dtype": null,
247
  "pruned_heads": {},
248
  "tie_word_embeddings": false,
249
  "chunk_size_feed_forward": 0,
 
276
  "do_sample": false,
277
  "early_stopping": false,
278
  "num_beams": 1,
 
 
279
  "temperature": 1.0,
280
  "top_k": 50,
281
  "top_p": 1.0,
 
294
  "exponential_decay_length_penalty": null,
295
  "suppress_tokens": null,
296
  "begin_suppress_tokens": null,
297
+ "num_beam_groups": 1,
298
+ "diversity_penalty": 0.0,
299
  "_name_or_path": "",
300
  "max_image_size": {
301
  "longest_edge": 384
 
305
  "longest_edge": 1920
306
  },
307
  "use_base_siglip": false,
 
 
308
  "hidden_size": 1152,
309
  "intermediate_size": 4304,
310
  "num_hidden_layers": 27,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c30695d4dcea2dafa67ee9e3a7bfff906da85d92ae61c22539acc80eef2a53bb
3
  size 1857752703
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e407554634bd555aa87388f98b490fd4575463b615c68d7b9e2fb7d30c6add94
3
  size 1857752703
video_preprocessor_config.json CHANGED
@@ -28,13 +28,14 @@
28
  "longest_edge": 384
29
  },
30
  "num_frames": 64,
 
31
  "processor_class": "SmolVLMProcessor",
32
  "resample": 1,
33
  "rescale_factor": 0.00392156862745098,
 
34
  "size": {
35
  "longest_edge": 1536
36
  },
37
- "size_divisor": null,
38
  "video_metadata": null,
39
  "video_processor_type": "SmolVLMVideoProcessor",
40
  "video_sampling": {
 
28
  "longest_edge": 384
29
  },
30
  "num_frames": 64,
31
+ "pad_size": null,
32
  "processor_class": "SmolVLMProcessor",
33
  "resample": 1,
34
  "rescale_factor": 0.00392156862745098,
35
+ "return_metadata": false,
36
  "size": {
37
  "longest_edge": 1536
38
  },
 
39
  "video_metadata": null,
40
  "video_processor_type": "SmolVLMVideoProcessor",
41
  "video_sampling": {