Firworks commited on
Commit
80fbbcf
·
verified ·
1 Parent(s): 9e8f4bc

Add NVFP4 quantized checkpoint

Browse files
config.json CHANGED
@@ -17,9 +17,11 @@
17
  "num_bits": 4,
18
  "observer": "static_minmax",
19
  "observer_kwargs": {},
 
20
  "strategy": "tensor_group",
21
  "symmetric": true,
22
- "type": "float"
 
23
  },
24
  "output_activations": null,
25
  "targets": [
@@ -33,15 +35,133 @@
33
  "num_bits": 4,
34
  "observer": "static_minmax",
35
  "observer_kwargs": {},
 
36
  "strategy": "tensor_group",
37
  "symmetric": true,
38
- "type": "float"
 
39
  }
40
  }
41
  },
42
  "format": "nvfp4-pack-quantized",
43
  "global_compression_ratio": null,
44
  "ignore": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  "lm_head"
46
  ],
47
  "kv_cache_scheme": null,
 
17
  "num_bits": 4,
18
  "observer": "static_minmax",
19
  "observer_kwargs": {},
20
+ "scale_dtype": "torch.float8_e4m3fn",
21
  "strategy": "tensor_group",
22
  "symmetric": true,
23
+ "type": "float",
24
+ "zp_dtype": null
25
  },
26
  "output_activations": null,
27
  "targets": [
 
35
  "num_bits": 4,
36
  "observer": "static_minmax",
37
  "observer_kwargs": {},
38
+ "scale_dtype": "torch.float8_e4m3fn",
39
  "strategy": "tensor_group",
40
  "symmetric": true,
41
+ "type": "float",
42
+ "zp_dtype": null
43
  }
44
  }
45
  },
46
  "format": "nvfp4-pack-quantized",
47
  "global_compression_ratio": null,
48
  "ignore": [
49
+ "model.visual.blocks.0.attn.qkv",
50
+ "model.visual.blocks.0.attn.proj",
51
+ "model.visual.blocks.0.mlp.linear_fc1",
52
+ "model.visual.blocks.0.mlp.linear_fc2",
53
+ "model.visual.blocks.1.attn.qkv",
54
+ "model.visual.blocks.1.attn.proj",
55
+ "model.visual.blocks.1.mlp.linear_fc1",
56
+ "model.visual.blocks.1.mlp.linear_fc2",
57
+ "model.visual.blocks.2.attn.qkv",
58
+ "model.visual.blocks.2.attn.proj",
59
+ "model.visual.blocks.2.mlp.linear_fc1",
60
+ "model.visual.blocks.2.mlp.linear_fc2",
61
+ "model.visual.blocks.3.attn.qkv",
62
+ "model.visual.blocks.3.attn.proj",
63
+ "model.visual.blocks.3.mlp.linear_fc1",
64
+ "model.visual.blocks.3.mlp.linear_fc2",
65
+ "model.visual.blocks.4.attn.qkv",
66
+ "model.visual.blocks.4.attn.proj",
67
+ "model.visual.blocks.4.mlp.linear_fc1",
68
+ "model.visual.blocks.4.mlp.linear_fc2",
69
+ "model.visual.blocks.5.attn.qkv",
70
+ "model.visual.blocks.5.attn.proj",
71
+ "model.visual.blocks.5.mlp.linear_fc1",
72
+ "model.visual.blocks.5.mlp.linear_fc2",
73
+ "model.visual.blocks.6.attn.qkv",
74
+ "model.visual.blocks.6.attn.proj",
75
+ "model.visual.blocks.6.mlp.linear_fc1",
76
+ "model.visual.blocks.6.mlp.linear_fc2",
77
+ "model.visual.blocks.7.attn.qkv",
78
+ "model.visual.blocks.7.attn.proj",
79
+ "model.visual.blocks.7.mlp.linear_fc1",
80
+ "model.visual.blocks.7.mlp.linear_fc2",
81
+ "model.visual.blocks.8.attn.qkv",
82
+ "model.visual.blocks.8.attn.proj",
83
+ "model.visual.blocks.8.mlp.linear_fc1",
84
+ "model.visual.blocks.8.mlp.linear_fc2",
85
+ "model.visual.blocks.9.attn.qkv",
86
+ "model.visual.blocks.9.attn.proj",
87
+ "model.visual.blocks.9.mlp.linear_fc1",
88
+ "model.visual.blocks.9.mlp.linear_fc2",
89
+ "model.visual.blocks.10.attn.qkv",
90
+ "model.visual.blocks.10.attn.proj",
91
+ "model.visual.blocks.10.mlp.linear_fc1",
92
+ "model.visual.blocks.10.mlp.linear_fc2",
93
+ "model.visual.blocks.11.attn.qkv",
94
+ "model.visual.blocks.11.attn.proj",
95
+ "model.visual.blocks.11.mlp.linear_fc1",
96
+ "model.visual.blocks.11.mlp.linear_fc2",
97
+ "model.visual.blocks.12.attn.qkv",
98
+ "model.visual.blocks.12.attn.proj",
99
+ "model.visual.blocks.12.mlp.linear_fc1",
100
+ "model.visual.blocks.12.mlp.linear_fc2",
101
+ "model.visual.blocks.13.attn.qkv",
102
+ "model.visual.blocks.13.attn.proj",
103
+ "model.visual.blocks.13.mlp.linear_fc1",
104
+ "model.visual.blocks.13.mlp.linear_fc2",
105
+ "model.visual.blocks.14.attn.qkv",
106
+ "model.visual.blocks.14.attn.proj",
107
+ "model.visual.blocks.14.mlp.linear_fc1",
108
+ "model.visual.blocks.14.mlp.linear_fc2",
109
+ "model.visual.blocks.15.attn.qkv",
110
+ "model.visual.blocks.15.attn.proj",
111
+ "model.visual.blocks.15.mlp.linear_fc1",
112
+ "model.visual.blocks.15.mlp.linear_fc2",
113
+ "model.visual.blocks.16.attn.qkv",
114
+ "model.visual.blocks.16.attn.proj",
115
+ "model.visual.blocks.16.mlp.linear_fc1",
116
+ "model.visual.blocks.16.mlp.linear_fc2",
117
+ "model.visual.blocks.17.attn.qkv",
118
+ "model.visual.blocks.17.attn.proj",
119
+ "model.visual.blocks.17.mlp.linear_fc1",
120
+ "model.visual.blocks.17.mlp.linear_fc2",
121
+ "model.visual.blocks.18.attn.qkv",
122
+ "model.visual.blocks.18.attn.proj",
123
+ "model.visual.blocks.18.mlp.linear_fc1",
124
+ "model.visual.blocks.18.mlp.linear_fc2",
125
+ "model.visual.blocks.19.attn.qkv",
126
+ "model.visual.blocks.19.attn.proj",
127
+ "model.visual.blocks.19.mlp.linear_fc1",
128
+ "model.visual.blocks.19.mlp.linear_fc2",
129
+ "model.visual.blocks.20.attn.qkv",
130
+ "model.visual.blocks.20.attn.proj",
131
+ "model.visual.blocks.20.mlp.linear_fc1",
132
+ "model.visual.blocks.20.mlp.linear_fc2",
133
+ "model.visual.blocks.21.attn.qkv",
134
+ "model.visual.blocks.21.attn.proj",
135
+ "model.visual.blocks.21.mlp.linear_fc1",
136
+ "model.visual.blocks.21.mlp.linear_fc2",
137
+ "model.visual.blocks.22.attn.qkv",
138
+ "model.visual.blocks.22.attn.proj",
139
+ "model.visual.blocks.22.mlp.linear_fc1",
140
+ "model.visual.blocks.22.mlp.linear_fc2",
141
+ "model.visual.blocks.23.attn.qkv",
142
+ "model.visual.blocks.23.attn.proj",
143
+ "model.visual.blocks.23.mlp.linear_fc1",
144
+ "model.visual.blocks.23.mlp.linear_fc2",
145
+ "model.visual.blocks.24.attn.qkv",
146
+ "model.visual.blocks.24.attn.proj",
147
+ "model.visual.blocks.24.mlp.linear_fc1",
148
+ "model.visual.blocks.24.mlp.linear_fc2",
149
+ "model.visual.blocks.25.attn.qkv",
150
+ "model.visual.blocks.25.attn.proj",
151
+ "model.visual.blocks.25.mlp.linear_fc1",
152
+ "model.visual.blocks.25.mlp.linear_fc2",
153
+ "model.visual.blocks.26.attn.qkv",
154
+ "model.visual.blocks.26.attn.proj",
155
+ "model.visual.blocks.26.mlp.linear_fc1",
156
+ "model.visual.blocks.26.mlp.linear_fc2",
157
+ "model.visual.merger.linear_fc1",
158
+ "model.visual.merger.linear_fc2",
159
+ "model.visual.deepstack_merger_list.0.linear_fc1",
160
+ "model.visual.deepstack_merger_list.0.linear_fc2",
161
+ "model.visual.deepstack_merger_list.1.linear_fc1",
162
+ "model.visual.deepstack_merger_list.1.linear_fc2",
163
+ "model.visual.deepstack_merger_list.2.linear_fc1",
164
+ "model.visual.deepstack_merger_list.2.linear_fc2",
165
  "lm_head"
166
  ],
167
  "kv_cache_scheme": null,
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:377b1046a002e429b8610b3e59ffff9cc4b4b6745b125c326720e3bdccaa5ceb
3
- size 4992901880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d20daeaa7471227ae3628d0a8a59c66dd718bc6310c1a6388eb0da9a175df2b8
3
+ size 4999482728
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:db0e497163c6207862129267630fb7f887b274a1d454dad3cba5fd1a1ee9377d
3
- size 1735503728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44da7d89e1adc347272621e16c84e8a5ed151fe220021df9cd6ac5911d5b0cba
3
+ size 2550415400
model.safetensors.index.json CHANGED
The diff for this file is too large to render. See raw diff
 
recipe.yaml CHANGED
@@ -2,6 +2,6 @@ default_stage:
2
  default_modifiers:
3
  QuantizationModifier:
4
  targets: [Linear]
5
- ignore: [lm_head, 're:visual.*', 're:.*vision_tower.*', 're:.*video_tower.*', 're:.*audio_tower.*',
6
  're:.*multi_modal_projector.*']
7
  scheme: NVFP4
 
2
  default_modifiers:
3
  QuantizationModifier:
4
  targets: [Linear]
5
+ ignore: [lm_head, 're:.*visual.*', 're:.*vision_tower.*', 're:.*video_tower.*', 're:.*audio_tower.*',
6
  're:.*multi_modal_projector.*']
7
  scheme: NVFP4
tokenizer.json CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f1298e298f2fe0059aba46f037697a339ccba45a1908780ce8ca14b45582f23
3
- size 11422753
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aeb13307a71acd8fe81861d94ad54ab689df773318809eed3cbe794b4492dae4
3
+ size 11422654