woodchen7 commited on
Commit
b7e5cb7
·
verified ·
1 Parent(s): 3925f14

Upload config.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. config.json +189 -0
config.json ADDED
@@ -0,0 +1,189 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen3VLForConditionalGeneration"
4
+ ],
5
+ "dtype": "bfloat16",
6
+ "image_token_id": 151655,
7
+ "model_type": "qwen3_vl",
8
+ "quantization_config": {
9
+ "activation_scheme": "static",
10
+ "ignored_layers": [
11
+ "model.visual.patch_embed.proj",
12
+ "model.lm_head",
13
+ "model.language_model.embed_tokens",
14
+ "model.visual.blocks.0.attn.qkv",
15
+ "model.visual.blocks.0.attn.proj",
16
+ "model.visual.blocks.0.mlp.linear_fc1",
17
+ "model.visual.blocks.0.mlp.linear_fc2",
18
+ "model.visual.blocks.1.attn.qkv",
19
+ "model.visual.blocks.1.attn.proj",
20
+ "model.visual.blocks.1.mlp.linear_fc1",
21
+ "model.visual.blocks.1.mlp.linear_fc2",
22
+ "model.visual.blocks.2.attn.qkv",
23
+ "model.visual.blocks.2.attn.proj",
24
+ "model.visual.blocks.2.mlp.linear_fc1",
25
+ "model.visual.blocks.2.mlp.linear_fc2",
26
+ "model.visual.blocks.3.attn.qkv",
27
+ "model.visual.blocks.3.attn.proj",
28
+ "model.visual.blocks.3.mlp.linear_fc1",
29
+ "model.visual.blocks.3.mlp.linear_fc2",
30
+ "model.visual.blocks.4.attn.qkv",
31
+ "model.visual.blocks.4.attn.proj",
32
+ "model.visual.blocks.4.mlp.linear_fc1",
33
+ "model.visual.blocks.4.mlp.linear_fc2",
34
+ "model.visual.blocks.5.attn.qkv",
35
+ "model.visual.blocks.5.attn.proj",
36
+ "model.visual.blocks.5.mlp.linear_fc1",
37
+ "model.visual.blocks.5.mlp.linear_fc2",
38
+ "model.visual.blocks.6.attn.qkv",
39
+ "model.visual.blocks.6.attn.proj",
40
+ "model.visual.blocks.6.mlp.linear_fc1",
41
+ "model.visual.blocks.6.mlp.linear_fc2",
42
+ "model.visual.blocks.7.attn.qkv",
43
+ "model.visual.blocks.7.attn.proj",
44
+ "model.visual.blocks.7.mlp.linear_fc1",
45
+ "model.visual.blocks.7.mlp.linear_fc2",
46
+ "model.visual.blocks.8.attn.qkv",
47
+ "model.visual.blocks.8.attn.proj",
48
+ "model.visual.blocks.8.mlp.linear_fc1",
49
+ "model.visual.blocks.8.mlp.linear_fc2",
50
+ "model.visual.blocks.9.attn.qkv",
51
+ "model.visual.blocks.9.attn.proj",
52
+ "model.visual.blocks.9.mlp.linear_fc1",
53
+ "model.visual.blocks.9.mlp.linear_fc2",
54
+ "model.visual.blocks.10.attn.qkv",
55
+ "model.visual.blocks.10.attn.proj",
56
+ "model.visual.blocks.10.mlp.linear_fc1",
57
+ "model.visual.blocks.10.mlp.linear_fc2",
58
+ "model.visual.blocks.11.attn.qkv",
59
+ "model.visual.blocks.11.attn.proj",
60
+ "model.visual.blocks.11.mlp.linear_fc1",
61
+ "model.visual.blocks.11.mlp.linear_fc2",
62
+ "model.visual.blocks.12.attn.qkv",
63
+ "model.visual.blocks.12.attn.proj",
64
+ "model.visual.blocks.12.mlp.linear_fc1",
65
+ "model.visual.blocks.12.mlp.linear_fc2",
66
+ "model.visual.blocks.13.attn.qkv",
67
+ "model.visual.blocks.13.attn.proj",
68
+ "model.visual.blocks.13.mlp.linear_fc1",
69
+ "model.visual.blocks.13.mlp.linear_fc2",
70
+ "model.visual.blocks.14.attn.qkv",
71
+ "model.visual.blocks.14.attn.proj",
72
+ "model.visual.blocks.14.mlp.linear_fc1",
73
+ "model.visual.blocks.14.mlp.linear_fc2",
74
+ "model.visual.blocks.15.attn.qkv",
75
+ "model.visual.blocks.15.attn.proj",
76
+ "model.visual.blocks.15.mlp.linear_fc1",
77
+ "model.visual.blocks.15.mlp.linear_fc2",
78
+ "model.visual.blocks.16.attn.qkv",
79
+ "model.visual.blocks.16.attn.proj",
80
+ "model.visual.blocks.16.mlp.linear_fc1",
81
+ "model.visual.blocks.16.mlp.linear_fc2",
82
+ "model.visual.blocks.17.attn.qkv",
83
+ "model.visual.blocks.17.attn.proj",
84
+ "model.visual.blocks.17.mlp.linear_fc1",
85
+ "model.visual.blocks.17.mlp.linear_fc2",
86
+ "model.visual.blocks.18.attn.qkv",
87
+ "model.visual.blocks.18.attn.proj",
88
+ "model.visual.blocks.18.mlp.linear_fc1",
89
+ "model.visual.blocks.18.mlp.linear_fc2",
90
+ "model.visual.blocks.19.attn.qkv",
91
+ "model.visual.blocks.19.attn.proj",
92
+ "model.visual.blocks.19.mlp.linear_fc1",
93
+ "model.visual.blocks.19.mlp.linear_fc2",
94
+ "model.visual.blocks.20.attn.qkv",
95
+ "model.visual.blocks.20.attn.proj",
96
+ "model.visual.blocks.20.mlp.linear_fc1",
97
+ "model.visual.blocks.20.mlp.linear_fc2",
98
+ "model.visual.blocks.21.attn.qkv",
99
+ "model.visual.blocks.21.attn.proj",
100
+ "model.visual.blocks.21.mlp.linear_fc1",
101
+ "model.visual.blocks.21.mlp.linear_fc2",
102
+ "model.visual.blocks.22.attn.qkv",
103
+ "model.visual.blocks.22.attn.proj",
104
+ "model.visual.blocks.22.mlp.linear_fc1",
105
+ "model.visual.blocks.22.mlp.linear_fc2",
106
+ "model.visual.blocks.23.attn.qkv",
107
+ "model.visual.blocks.23.attn.proj",
108
+ "model.visual.blocks.23.mlp.linear_fc1",
109
+ "model.visual.blocks.23.mlp.linear_fc2",
110
+ "model.visual.blocks.24.attn.qkv",
111
+ "model.visual.blocks.24.attn.proj",
112
+ "model.visual.blocks.24.mlp.linear_fc1",
113
+ "model.visual.blocks.24.mlp.linear_fc2",
114
+ "model.visual.blocks.25.attn.qkv",
115
+ "model.visual.blocks.25.attn.proj",
116
+ "model.visual.blocks.25.mlp.linear_fc1",
117
+ "model.visual.blocks.25.mlp.linear_fc2",
118
+ "model.visual.blocks.26.attn.qkv",
119
+ "model.visual.blocks.26.attn.proj",
120
+ "model.visual.blocks.26.mlp.linear_fc1",
121
+ "model.visual.blocks.26.mlp.linear_fc2",
122
+ "model.visual.merger.linear_fc1",
123
+ "model.visual.merger.linear_fc2",
124
+ "model.visual.deepstack_merger_list.0.linear_fc1",
125
+ "model.visual.deepstack_merger_list.0.linear_fc2",
126
+ "model.visual.deepstack_merger_list.1.linear_fc1",
127
+ "model.visual.deepstack_merger_list.1.linear_fc2",
128
+ "model.visual.deepstack_merger_list.2.linear_fc1",
129
+ "model.visual.deepstack_merger_list.2.linear_fc2",
130
+ "lm_head"
131
+ ],
132
+ "quant_method": "fp8"
133
+ },
134
+ "text_config": {
135
+ "attention_bias": false,
136
+ "attention_dropout": 0.0,
137
+ "bos_token_id": 151643,
138
+ "dtype": "bfloat16",
139
+ "eos_token_id": 151645,
140
+ "head_dim": 128,
141
+ "hidden_act": "silu",
142
+ "hidden_size": 4096,
143
+ "initializer_range": 0.02,
144
+ "intermediate_size": 12288,
145
+ "max_position_embeddings": 262144,
146
+ "model_type": "qwen3_vl_text",
147
+ "num_attention_heads": 32,
148
+ "num_hidden_layers": 36,
149
+ "num_key_value_heads": 8,
150
+ "rms_norm_eps": 1e-06,
151
+ "rope_scaling": {
152
+ "mrope_interleaved": true,
153
+ "mrope_section": [
154
+ 24,
155
+ 20,
156
+ 20
157
+ ],
158
+ "rope_type": "default"
159
+ },
160
+ "rope_theta": 5000000,
161
+ "use_cache": true,
162
+ "vocab_size": 151936
163
+ },
164
+ "tie_word_embeddings": false,
165
+ "transformers_version": "4.57.1",
166
+ "video_token_id": 151656,
167
+ "vision_config": {
168
+ "deepstack_visual_indexes": [
169
+ 8,
170
+ 16,
171
+ 24
172
+ ],
173
+ "depth": 27,
174
+ "hidden_act": "gelu_pytorch_tanh",
175
+ "hidden_size": 1152,
176
+ "in_channels": 3,
177
+ "initializer_range": 0.02,
178
+ "intermediate_size": 4304,
179
+ "model_type": "qwen3_vl",
180
+ "num_heads": 16,
181
+ "num_position_embeddings": 2304,
182
+ "out_hidden_size": 4096,
183
+ "patch_size": 16,
184
+ "spatial_merge_size": 2,
185
+ "temporal_patch_size": 2
186
+ },
187
+ "vision_end_token_id": 151653,
188
+ "vision_start_token_id": 151652
189
+ }