Replace bf16 with Q4 MLX — consumer-ready quantisation

Files changed (4) hide show

config.json CHANGED Viewed

@@ -57,6 +57,16 @@
     "image_token_id": 258880,
     "initializer_range": 0.02,
     "model_type": "gemma4",
     "text_config": {
         "attention_bias": false,
         "attention_dropout": 0.0,

     "image_token_id": 258880,
     "initializer_range": 0.02,
     "model_type": "gemma4",
+    "quantization": {
+        "group_size": 64,
+        "bits": 4,
+        "mode": "affine"
+    },
+    "quantization_config": {
+        "group_size": 64,
+        "bits": 4,
+        "mode": "affine"
+    },
     "text_config": {
         "attention_bias": false,
         "attention_dropout": 0.0,

model-00002-of-00002.safetensors DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:13f40b9f566d121355dbccb72e37ba5fb9239169788c30ec9b613020d8c0edfa
-size 4725146516

model-00001-of-00002.safetensors → model.safetensors RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:98a147a745501c0374f5c78036ad5c5214b1e0a21bd68ed089efded9dc10e6b6
-size 4569831614

 version https://git-lfs.github.com/spec/v1
+oid sha256:3fa6ebcb3b3f748c4c65d4763e88188b32ecd4617e2d7330d932ef58d7cd59d9
+size 2634553193

model.safetensors.index.json CHANGED Viewed

The diff for this file is too large to render. See raw diff