lthn commited on
Commit
8c306a8
·
verified ·
1 Parent(s): 6067f14

Replace bf16 with Q4 MLX — consumer-ready quantisation

Browse files
config.json CHANGED
@@ -57,6 +57,16 @@
57
  "image_token_id": 258880,
58
  "initializer_range": 0.02,
59
  "model_type": "gemma4",
 
 
 
 
 
 
 
 
 
 
60
  "text_config": {
61
  "attention_bias": false,
62
  "attention_dropout": 0.0,
 
57
  "image_token_id": 258880,
58
  "initializer_range": 0.02,
59
  "model_type": "gemma4",
60
+ "quantization": {
61
+ "group_size": 64,
62
+ "bits": 4,
63
+ "mode": "affine"
64
+ },
65
+ "quantization_config": {
66
+ "group_size": 64,
67
+ "bits": 4,
68
+ "mode": "affine"
69
+ },
70
  "text_config": {
71
  "attention_bias": false,
72
  "attention_dropout": 0.0,
model-00002-of-00002.safetensors DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:13f40b9f566d121355dbccb72e37ba5fb9239169788c30ec9b613020d8c0edfa
3
- size 4725146516
 
 
 
 
model-00001-of-00002.safetensors → model.safetensors RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:98a147a745501c0374f5c78036ad5c5214b1e0a21bd68ed089efded9dc10e6b6
3
- size 4569831614
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3fa6ebcb3b3f748c4c65d4763e88188b32ecd4617e2d7330d932ef58d7cd59d9
3
+ size 2634553193
model.safetensors.index.json CHANGED
The diff for this file is too large to render. See raw diff