TechCarbasa commited on
Commit
1f8bdb7
·
verified ·
1 Parent(s): b1b082b

Upload /workspace/ComfyUI/models/LLM/Qwen-VL/Qwen3-VL-4B-Instruct-FP8/config.json with huggingface_hub

Browse files
workspace/ComfyUI/models/LLM/Qwen-VL/Qwen3-VL-4B-Instruct-FP8/config.json ADDED
@@ -0,0 +1,323 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen3VLForConditionalGeneration"
4
+ ],
5
+ "image_token_id": 151655,
6
+ "model_type": "qwen3_vl",
7
+ "text_config": {
8
+ "attention_bias": false,
9
+ "attention_dropout": 0.0,
10
+ "bos_token_id": 151643,
11
+ "dtype": "bfloat16",
12
+ "eos_token_id": 151645,
13
+ "head_dim": 128,
14
+ "hidden_act": "silu",
15
+ "hidden_size": 2560,
16
+ "initializer_range": 0.02,
17
+ "intermediate_size": 9728,
18
+ "max_position_embeddings": 262144,
19
+ "model_type": "qwen3_vl_text",
20
+ "num_attention_heads": 32,
21
+ "num_hidden_layers": 36,
22
+ "num_key_value_heads": 8,
23
+ "rms_norm_eps": 1e-06,
24
+ "rope_scaling": {
25
+ "mrope_interleaved": true,
26
+ "mrope_section": [
27
+ 24,
28
+ 20,
29
+ 20
30
+ ],
31
+ "rope_type": "default"
32
+ },
33
+ "rope_theta": 5000000,
34
+ "tie_word_embeddings": true,
35
+ "use_cache": true,
36
+ "vocab_size": 151936
37
+ },
38
+ "tie_word_embeddings": false,
39
+ "transformers_version": "4.57.0.dev0",
40
+ "video_token_id": 151656,
41
+ "vision_config": {
42
+ "deepstack_visual_indexes": [
43
+ 5,
44
+ 11,
45
+ 17
46
+ ],
47
+ "depth": 24,
48
+ "hidden_act": "gelu_pytorch_tanh",
49
+ "hidden_size": 1024,
50
+ "in_channels": 3,
51
+ "initializer_range": 0.02,
52
+ "intermediate_size": 4096,
53
+ "model_type": "qwen3_vl",
54
+ "num_heads": 16,
55
+ "num_position_embeddings": 2304,
56
+ "out_hidden_size": 2560,
57
+ "patch_size": 16,
58
+ "spatial_merge_size": 2,
59
+ "temporal_patch_size": 2
60
+ },
61
+ "vision_end_token_id": 151653,
62
+ "vision_start_token_id": 151652,
63
+ "quantization_config": {
64
+ "activation_scheme": "dynamic",
65
+ "fmt": "e4m3",
66
+ "quant_method": "fp8",
67
+ "ignored_layers": [
68
+ "lm_head",
69
+ "model.visual.merger.linear_fc1",
70
+ "model.visual.merger.linear_fc2",
71
+ "model.visual.merger.norm",
72
+ "model.visual.patch_embed.proj",
73
+ "model.visual.pos_embed",
74
+ "visual.merger.linear_fc1",
75
+ "visual.merger.linear_fc2",
76
+ "visual.merger.norm",
77
+ "visual.patch_embed.proj",
78
+ "visual.pos_embed",
79
+ "model.visual.blocks.0.attn.proj",
80
+ "model.visual.blocks.0.attn.qkv",
81
+ "model.visual.blocks.0.mlp.linear_fc1",
82
+ "model.visual.blocks.0.mlp.linear_fc2",
83
+ "visual.blocks.0.attn.proj",
84
+ "visual.blocks.0.attn.qkv_proj",
85
+ "visual.blocks.0.mlp.linear_fc1",
86
+ "visual.blocks.0.mlp.linear_fc2",
87
+ "model.visual.blocks.1.attn.proj",
88
+ "model.visual.blocks.1.attn.qkv",
89
+ "model.visual.blocks.1.mlp.linear_fc1",
90
+ "model.visual.blocks.1.mlp.linear_fc2",
91
+ "visual.blocks.1.attn.proj",
92
+ "visual.blocks.1.attn.qkv_proj",
93
+ "visual.blocks.1.mlp.linear_fc1",
94
+ "visual.blocks.1.mlp.linear_fc2",
95
+ "model.visual.blocks.2.attn.proj",
96
+ "model.visual.blocks.2.attn.qkv",
97
+ "model.visual.blocks.2.mlp.linear_fc1",
98
+ "model.visual.blocks.2.mlp.linear_fc2",
99
+ "visual.blocks.2.attn.proj",
100
+ "visual.blocks.2.attn.qkv_proj",
101
+ "visual.blocks.2.mlp.linear_fc1",
102
+ "visual.blocks.2.mlp.linear_fc2",
103
+ "model.visual.blocks.3.attn.proj",
104
+ "model.visual.blocks.3.attn.qkv",
105
+ "model.visual.blocks.3.mlp.linear_fc1",
106
+ "model.visual.blocks.3.mlp.linear_fc2",
107
+ "visual.blocks.3.attn.proj",
108
+ "visual.blocks.3.attn.qkv_proj",
109
+ "visual.blocks.3.mlp.linear_fc1",
110
+ "visual.blocks.3.mlp.linear_fc2",
111
+ "model.visual.blocks.4.attn.proj",
112
+ "model.visual.blocks.4.attn.qkv",
113
+ "model.visual.blocks.4.mlp.linear_fc1",
114
+ "model.visual.blocks.4.mlp.linear_fc2",
115
+ "visual.blocks.4.attn.proj",
116
+ "visual.blocks.4.attn.qkv_proj",
117
+ "visual.blocks.4.mlp.linear_fc1",
118
+ "visual.blocks.4.mlp.linear_fc2",
119
+ "model.visual.blocks.5.attn.proj",
120
+ "model.visual.blocks.5.attn.qkv",
121
+ "model.visual.blocks.5.mlp.linear_fc1",
122
+ "model.visual.blocks.5.mlp.linear_fc2",
123
+ "visual.blocks.5.attn.proj",
124
+ "visual.blocks.5.attn.qkv_proj",
125
+ "visual.blocks.5.mlp.linear_fc1",
126
+ "visual.blocks.5.mlp.linear_fc2",
127
+ "model.visual.blocks.6.attn.proj",
128
+ "model.visual.blocks.6.attn.qkv",
129
+ "model.visual.blocks.6.mlp.linear_fc1",
130
+ "model.visual.blocks.6.mlp.linear_fc2",
131
+ "visual.blocks.6.attn.proj",
132
+ "visual.blocks.6.attn.qkv_proj",
133
+ "visual.blocks.6.mlp.linear_fc1",
134
+ "visual.blocks.6.mlp.linear_fc2",
135
+ "model.visual.blocks.7.attn.proj",
136
+ "model.visual.blocks.7.attn.qkv",
137
+ "model.visual.blocks.7.mlp.linear_fc1",
138
+ "model.visual.blocks.7.mlp.linear_fc2",
139
+ "visual.blocks.7.attn.proj",
140
+ "visual.blocks.7.attn.qkv_proj",
141
+ "visual.blocks.7.mlp.linear_fc1",
142
+ "visual.blocks.7.mlp.linear_fc2",
143
+ "model.visual.blocks.8.attn.proj",
144
+ "model.visual.blocks.8.attn.qkv",
145
+ "model.visual.blocks.8.mlp.linear_fc1",
146
+ "model.visual.blocks.8.mlp.linear_fc2",
147
+ "visual.blocks.8.attn.proj",
148
+ "visual.blocks.8.attn.qkv_proj",
149
+ "visual.blocks.8.mlp.linear_fc1",
150
+ "visual.blocks.8.mlp.linear_fc2",
151
+ "model.visual.blocks.9.attn.proj",
152
+ "model.visual.blocks.9.attn.qkv",
153
+ "model.visual.blocks.9.mlp.linear_fc1",
154
+ "model.visual.blocks.9.mlp.linear_fc2",
155
+ "visual.blocks.9.attn.proj",
156
+ "visual.blocks.9.attn.qkv_proj",
157
+ "visual.blocks.9.mlp.linear_fc1",
158
+ "visual.blocks.9.mlp.linear_fc2",
159
+ "model.visual.blocks.10.attn.proj",
160
+ "model.visual.blocks.10.attn.qkv",
161
+ "model.visual.blocks.10.mlp.linear_fc1",
162
+ "model.visual.blocks.10.mlp.linear_fc2",
163
+ "visual.blocks.10.attn.proj",
164
+ "visual.blocks.10.attn.qkv_proj",
165
+ "visual.blocks.10.mlp.linear_fc1",
166
+ "visual.blocks.10.mlp.linear_fc2",
167
+ "model.visual.blocks.11.attn.proj",
168
+ "model.visual.blocks.11.attn.qkv",
169
+ "model.visual.blocks.11.mlp.linear_fc1",
170
+ "model.visual.blocks.11.mlp.linear_fc2",
171
+ "visual.blocks.11.attn.proj",
172
+ "visual.blocks.11.attn.qkv_proj",
173
+ "visual.blocks.11.mlp.linear_fc1",
174
+ "visual.blocks.11.mlp.linear_fc2",
175
+ "model.visual.blocks.12.attn.proj",
176
+ "model.visual.blocks.12.attn.qkv",
177
+ "model.visual.blocks.12.mlp.linear_fc1",
178
+ "model.visual.blocks.12.mlp.linear_fc2",
179
+ "visual.blocks.12.attn.proj",
180
+ "visual.blocks.12.attn.qkv_proj",
181
+ "visual.blocks.12.mlp.linear_fc1",
182
+ "visual.blocks.12.mlp.linear_fc2",
183
+ "model.visual.blocks.13.attn.proj",
184
+ "model.visual.blocks.13.attn.qkv",
185
+ "model.visual.blocks.13.mlp.linear_fc1",
186
+ "model.visual.blocks.13.mlp.linear_fc2",
187
+ "visual.blocks.13.attn.proj",
188
+ "visual.blocks.13.attn.qkv_proj",
189
+ "visual.blocks.13.mlp.linear_fc1",
190
+ "visual.blocks.13.mlp.linear_fc2",
191
+ "model.visual.blocks.14.attn.proj",
192
+ "model.visual.blocks.14.attn.qkv",
193
+ "model.visual.blocks.14.mlp.linear_fc1",
194
+ "model.visual.blocks.14.mlp.linear_fc2",
195
+ "visual.blocks.14.attn.proj",
196
+ "visual.blocks.14.attn.qkv_proj",
197
+ "visual.blocks.14.mlp.linear_fc1",
198
+ "visual.blocks.14.mlp.linear_fc2",
199
+ "model.visual.blocks.15.attn.proj",
200
+ "model.visual.blocks.15.attn.qkv",
201
+ "model.visual.blocks.15.mlp.linear_fc1",
202
+ "model.visual.blocks.15.mlp.linear_fc2",
203
+ "visual.blocks.15.attn.proj",
204
+ "visual.blocks.15.attn.qkv_proj",
205
+ "visual.blocks.15.mlp.linear_fc1",
206
+ "visual.blocks.15.mlp.linear_fc2",
207
+ "model.visual.blocks.16.attn.proj",
208
+ "model.visual.blocks.16.attn.qkv",
209
+ "model.visual.blocks.16.mlp.linear_fc1",
210
+ "model.visual.blocks.16.mlp.linear_fc2",
211
+ "visual.blocks.16.attn.proj",
212
+ "visual.blocks.16.attn.qkv_proj",
213
+ "visual.blocks.16.mlp.linear_fc1",
214
+ "visual.blocks.16.mlp.linear_fc2",
215
+ "model.visual.blocks.17.attn.proj",
216
+ "model.visual.blocks.17.attn.qkv",
217
+ "model.visual.blocks.17.mlp.linear_fc1",
218
+ "model.visual.blocks.17.mlp.linear_fc2",
219
+ "visual.blocks.17.attn.proj",
220
+ "visual.blocks.17.attn.qkv_proj",
221
+ "visual.blocks.17.mlp.linear_fc1",
222
+ "visual.blocks.17.mlp.linear_fc2",
223
+ "model.visual.blocks.18.attn.proj",
224
+ "model.visual.blocks.18.attn.qkv",
225
+ "model.visual.blocks.18.mlp.linear_fc1",
226
+ "model.visual.blocks.18.mlp.linear_fc2",
227
+ "visual.blocks.18.attn.proj",
228
+ "visual.blocks.18.attn.qkv_proj",
229
+ "visual.blocks.18.mlp.linear_fc1",
230
+ "visual.blocks.18.mlp.linear_fc2",
231
+ "model.visual.blocks.19.attn.proj",
232
+ "model.visual.blocks.19.attn.qkv",
233
+ "model.visual.blocks.19.mlp.linear_fc1",
234
+ "model.visual.blocks.19.mlp.linear_fc2",
235
+ "visual.blocks.19.attn.proj",
236
+ "visual.blocks.19.attn.qkv_proj",
237
+ "visual.blocks.19.mlp.linear_fc1",
238
+ "visual.blocks.19.mlp.linear_fc2",
239
+ "model.visual.blocks.20.attn.proj",
240
+ "model.visual.blocks.20.attn.qkv",
241
+ "model.visual.blocks.20.mlp.linear_fc1",
242
+ "model.visual.blocks.20.mlp.linear_fc2",
243
+ "visual.blocks.20.attn.proj",
244
+ "visual.blocks.20.attn.qkv_proj",
245
+ "visual.blocks.20.mlp.linear_fc1",
246
+ "visual.blocks.20.mlp.linear_fc2",
247
+ "model.visual.blocks.21.attn.proj",
248
+ "model.visual.blocks.21.attn.qkv",
249
+ "model.visual.blocks.21.mlp.linear_fc1",
250
+ "model.visual.blocks.21.mlp.linear_fc2",
251
+ "visual.blocks.21.attn.proj",
252
+ "visual.blocks.21.attn.qkv_proj",
253
+ "visual.blocks.21.mlp.linear_fc1",
254
+ "visual.blocks.21.mlp.linear_fc2",
255
+ "model.visual.blocks.22.attn.proj",
256
+ "model.visual.blocks.22.attn.qkv",
257
+ "model.visual.blocks.22.mlp.linear_fc1",
258
+ "model.visual.blocks.22.mlp.linear_fc2",
259
+ "visual.blocks.22.attn.proj",
260
+ "visual.blocks.22.attn.qkv_proj",
261
+ "visual.blocks.22.mlp.linear_fc1",
262
+ "visual.blocks.22.mlp.linear_fc2",
263
+ "model.visual.blocks.23.attn.proj",
264
+ "model.visual.blocks.23.attn.qkv",
265
+ "model.visual.blocks.23.mlp.linear_fc1",
266
+ "model.visual.blocks.23.mlp.linear_fc2",
267
+ "visual.blocks.23.attn.proj",
268
+ "visual.blocks.23.attn.qkv_proj",
269
+ "visual.blocks.23.mlp.linear_fc1",
270
+ "visual.blocks.23.mlp.linear_fc2",
271
+ "model.visual.blocks.24.attn.proj",
272
+ "model.visual.blocks.24.attn.qkv",
273
+ "model.visual.blocks.24.mlp.linear_fc1",
274
+ "model.visual.blocks.24.mlp.linear_fc2",
275
+ "visual.blocks.24.attn.proj",
276
+ "visual.blocks.24.attn.qkv_proj",
277
+ "visual.blocks.24.mlp.linear_fc1",
278
+ "visual.blocks.24.mlp.linear_fc2",
279
+ "model.visual.blocks.25.attn.proj",
280
+ "model.visual.blocks.25.attn.qkv",
281
+ "model.visual.blocks.25.mlp.linear_fc1",
282
+ "model.visual.blocks.25.mlp.linear_fc2",
283
+ "visual.blocks.25.attn.proj",
284
+ "visual.blocks.25.attn.qkv_proj",
285
+ "visual.blocks.25.mlp.linear_fc1",
286
+ "visual.blocks.25.mlp.linear_fc2",
287
+ "model.visual.blocks.26.attn.proj",
288
+ "model.visual.blocks.26.attn.qkv",
289
+ "model.visual.blocks.26.mlp.linear_fc1",
290
+ "model.visual.blocks.26.mlp.linear_fc2",
291
+ "visual.blocks.26.attn.proj",
292
+ "visual.blocks.26.attn.qkv_proj",
293
+ "visual.blocks.26.mlp.linear_fc1",
294
+ "visual.blocks.26.mlp.linear_fc2",
295
+ "model.visual.deepstack_merger_list.0.linear_fc1",
296
+ "model.visual.deepstack_merger_list.0.linear_fc2",
297
+ "model.visual.deepstack_merger_list.0.norm",
298
+ "visual.deepstack_merger_list.0.linear_fc1",
299
+ "visual.deepstack_merger_list.0.linear_fc2",
300
+ "visual.deepstack_merger_list.0.norm",
301
+ "model.visual.deepstack_merger_list.1.linear_fc1",
302
+ "model.visual.deepstack_merger_list.1.linear_fc2",
303
+ "model.visual.deepstack_merger_list.1.norm",
304
+ "visual.deepstack_merger_list.1.linear_fc1",
305
+ "visual.deepstack_merger_list.1.linear_fc2",
306
+ "visual.deepstack_merger_list.1.norm",
307
+ "model.visual.deepstack_merger_list.2.linear_fc1",
308
+ "model.visual.deepstack_merger_list.2.linear_fc2",
309
+ "model.visual.deepstack_merger_list.2.norm",
310
+ "visual.deepstack_merger_list.2.linear_fc1",
311
+ "visual.deepstack_merger_list.2.linear_fc2",
312
+ "visual.deepstack_merger_list.2.norm"
313
+ ],
314
+ "modules_to_not_convert":[
315
+ "lm_head",
316
+ "model.visual"
317
+ ],
318
+ "weight_block_size": [
319
+ 128,
320
+ 128
321
+ ]
322
+ }
323
+ }