mlx-community
/

FastVLM-0.5B-bf16

Image-Text-to-Text

Model card Files Files and versions

Add vision_config

#3

by pcuenq HF Staff - opened Oct 18, 2025

base: refs/heads/main

←

from: refs/pr/3

Discussion Files changed

Files changed (1) hide show

config.json +62 -1

config.json CHANGED Viewed

@@ -126,5 +126,66 @@
     "use_cache": true,
     "use_mm_proj": true,
     "use_sliding_window": false,
     "vocab_size": 151936
-}

     "use_cache": true,
     "use_mm_proj": true,
     "use_sliding_window": false,
+    "vision_config": {
+        "cls_ratio": 2.0,
+        "down_patch_size": 7,
+        "down_stride": 2,
+        "downsamples": [
+            true,
+            true,
+            true,
+            true,
+            true
+        ],
+        "embed_dims": [
+            96,
+            192,
+            384,
+            768,
+            1536
+        ],
+        "hidden_size": 1024,
+        "image_size": 1024,
+        "intermediate_size": 3072,
+        "layer_scale_init_value": 1e-05,
+        "layers": [
+            2,
+            12,
+            24,
+            4,
+            2
+        ],
+        "mlp_ratios": [
+            4,
+            4,
+            4,
+            4,
+            4
+        ],
+        "num_classes": 1000,
+        "patch_size": 64,
+        "pos_embs_shapes": [
+            null,
+            null,
+            null,
+            [
+                7,
+                7
+            ],
+            [
+                7,
+                7
+            ]
+        ],
+        "projection_dim": 768,
+        "repmixer_kernel_size": 3,
+        "token_mixers": [
+            "repmixer",
+            "repmixer",
+            "repmixer",
+            "attention",
+            "attention"
+        ]
+    },
     "vocab_size": 151936
+}