xiaomoguhzz commited on
Commit
05bf8b4
·
verified ·
1 Parent(s): cf57e25

write-path test marker

Browse files
Files changed (1) hide show
  1. ckpts/_writetest/config.json +264 -0
ckpts/_writetest/config.json ADDED
@@ -0,0 +1,264 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "LlavaQwen3_5ViTForConditionalGeneration"
4
+ ],
5
+ "bos_token_id": null,
6
+ "dtype": "bfloat16",
7
+ "eos_token_id": 151645,
8
+ "hidden_size": 2560,
9
+ "image_grid_pinpoints": [
10
+ [
11
+ 384,
12
+ 384
13
+ ],
14
+ [
15
+ 384,
16
+ 768
17
+ ],
18
+ [
19
+ 384,
20
+ 1152
21
+ ],
22
+ [
23
+ 384,
24
+ 1536
25
+ ],
26
+ [
27
+ 384,
28
+ 1920
29
+ ],
30
+ [
31
+ 384,
32
+ 2304
33
+ ],
34
+ [
35
+ 768,
36
+ 384
37
+ ],
38
+ [
39
+ 768,
40
+ 768
41
+ ],
42
+ [
43
+ 768,
44
+ 1152
45
+ ],
46
+ [
47
+ 768,
48
+ 1536
49
+ ],
50
+ [
51
+ 768,
52
+ 1920
53
+ ],
54
+ [
55
+ 768,
56
+ 2304
57
+ ],
58
+ [
59
+ 1152,
60
+ 384
61
+ ],
62
+ [
63
+ 1152,
64
+ 768
65
+ ],
66
+ [
67
+ 1152,
68
+ 1152
69
+ ],
70
+ [
71
+ 1152,
72
+ 1536
73
+ ],
74
+ [
75
+ 1152,
76
+ 1920
77
+ ],
78
+ [
79
+ 1152,
80
+ 2304
81
+ ],
82
+ [
83
+ 1536,
84
+ 384
85
+ ],
86
+ [
87
+ 1536,
88
+ 768
89
+ ],
90
+ [
91
+ 1536,
92
+ 1152
93
+ ],
94
+ [
95
+ 1536,
96
+ 1536
97
+ ],
98
+ [
99
+ 1536,
100
+ 1920
101
+ ],
102
+ [
103
+ 1536,
104
+ 2304
105
+ ],
106
+ [
107
+ 1920,
108
+ 384
109
+ ],
110
+ [
111
+ 1920,
112
+ 768
113
+ ],
114
+ [
115
+ 1920,
116
+ 1152
117
+ ],
118
+ [
119
+ 1920,
120
+ 1536
121
+ ],
122
+ [
123
+ 1920,
124
+ 1920
125
+ ],
126
+ [
127
+ 1920,
128
+ 2304
129
+ ],
130
+ [
131
+ 2304,
132
+ 384
133
+ ],
134
+ [
135
+ 2304,
136
+ 768
137
+ ],
138
+ [
139
+ 2304,
140
+ 1152
141
+ ],
142
+ [
143
+ 2304,
144
+ 1536
145
+ ],
146
+ [
147
+ 2304,
148
+ 1920
149
+ ],
150
+ [
151
+ 2304,
152
+ 2304
153
+ ]
154
+ ],
155
+ "image_token_index": 151669,
156
+ "model_type": "llava_qwen3_5vit_qwen3",
157
+ "multimodal_projector_bias": true,
158
+ "pad_token_id": 151643,
159
+ "projector_hidden_act": "gelu",
160
+ "text_config": {
161
+ "_name_or_path": "/share/m2v_intern_v3/wangjunjie09/model_cache/huggingface/Qwen/Qwen3-4B-Instruct-2507",
162
+ "architectures": [
163
+ "Qwen3ForCausalLM"
164
+ ],
165
+ "attention_bias": false,
166
+ "attention_dropout": 0.0,
167
+ "bos_token_id": 151643,
168
+ "dtype": "bfloat16",
169
+ "eos_token_id": 151645,
170
+ "head_dim": 128,
171
+ "hidden_act": "silu",
172
+ "hidden_size": 2560,
173
+ "initializer_range": 0.02,
174
+ "intermediate_size": 9728,
175
+ "layer_types": [
176
+ "full_attention",
177
+ "full_attention",
178
+ "full_attention",
179
+ "full_attention",
180
+ "full_attention",
181
+ "full_attention",
182
+ "full_attention",
183
+ "full_attention",
184
+ "full_attention",
185
+ "full_attention",
186
+ "full_attention",
187
+ "full_attention",
188
+ "full_attention",
189
+ "full_attention",
190
+ "full_attention",
191
+ "full_attention",
192
+ "full_attention",
193
+ "full_attention",
194
+ "full_attention",
195
+ "full_attention",
196
+ "full_attention",
197
+ "full_attention",
198
+ "full_attention",
199
+ "full_attention",
200
+ "full_attention",
201
+ "full_attention",
202
+ "full_attention",
203
+ "full_attention",
204
+ "full_attention",
205
+ "full_attention",
206
+ "full_attention",
207
+ "full_attention",
208
+ "full_attention",
209
+ "full_attention",
210
+ "full_attention",
211
+ "full_attention"
212
+ ],
213
+ "max_position_embeddings": 262144,
214
+ "max_window_layers": 36,
215
+ "model_type": "qwen3",
216
+ "num_attention_heads": 32,
217
+ "num_hidden_layers": 36,
218
+ "num_key_value_heads": 8,
219
+ "pad_token_id": 151643,
220
+ "rms_norm_eps": 1e-06,
221
+ "rope_parameters": {
222
+ "rope_theta": 5000000,
223
+ "rope_type": "default"
224
+ },
225
+ "sliding_window": null,
226
+ "tie_word_embeddings": true,
227
+ "use_cache": false,
228
+ "use_sliding_window": false,
229
+ "vocab_size": 151936
230
+ },
231
+ "tie_word_embeddings": true,
232
+ "transformers_version": "5.5.4",
233
+ "use_cache": false,
234
+ "video_token_index": 151670,
235
+ "vision_aspect_ratio": "anyres_max_9",
236
+ "vision_config": {
237
+ "deepstack_visual_indexes": [],
238
+ "depth": 24,
239
+ "dtype": "bfloat16",
240
+ "hidden_act": "gelu_pytorch_tanh",
241
+ "hidden_size": 1024,
242
+ "image_size": 384,
243
+ "in_channels": 3,
244
+ "initializer_range": 0.02,
245
+ "intermediate_size": 4096,
246
+ "model_type": "qwen3_5",
247
+ "num_heads": 16,
248
+ "num_position_embeddings": 2304,
249
+ "out_hidden_size": 2048,
250
+ "patch_size": 16,
251
+ "spatial_merge_size": 2,
252
+ "temporal_patch_size": 2
253
+ },
254
+ "vision_feature_layer": -1,
255
+ "vision_feature_select_strategy": "full",
256
+ "vit_register_ring": 0,
257
+ "vit_skip_last_blocks": 0,
258
+ "auto_map": {
259
+ "AutoConfig": "modeling_qwen3_5vit_qwen3.LlavaQwen3_5ViTConfig",
260
+ "AutoModel": "modeling_qwen3_5vit_qwen3.LlavaQwen3_5ViTForConditionalGeneration",
261
+ "AutoModelForCausalLM": "modeling_qwen3_5vit_qwen3.LlavaQwen3_5ViTForConditionalGeneration",
262
+ "AutoModelForImageTextToText": "modeling_qwen3_5vit_qwen3.LlavaQwen3_5ViTForConditionalGeneration"
263
+ }
264
+ }