jAmmm6 commited on
Commit
aee8798
·
verified ·
1 Parent(s): 2bfe942

(Trained with Unsloth)

Browse files
config.json CHANGED
@@ -1,248 +1,198 @@
1
  {
2
- "architectures": [
3
- "Qwen2_5_VLForConditionalGeneration"
4
- ],
5
- "attention_dropout": 0.0,
6
- "eos_token_id": 151645,
7
- "hidden_act": "silu",
8
- "hidden_size": 2048,
9
- "image_token_id": 151655,
10
- "initializer_range": 0.02,
11
- "intermediate_size": 11008,
12
- "max_position_embeddings": 128000,
13
- "max_window_layers": 70,
14
- "model_type": "qwen2_5_vl",
15
- "num_attention_heads": 16,
16
- "num_hidden_layers": 36,
17
- "num_key_value_heads": 2,
18
- "pad_token_id": 151654,
19
- "quantization_config": {
20
- "bnb_4bit_compute_dtype": "float16",
21
- "bnb_4bit_quant_type": "nf4",
22
- "bnb_4bit_use_double_quant": true,
23
- "llm_int8_enable_fp32_cpu_offload": false,
24
- "llm_int8_has_fp16_weight": false,
25
- "llm_int8_skip_modules": null,
26
- "llm_int8_threshold": 6.0,
27
- "load_in_4bit": true,
28
- "load_in_8bit": false,
29
- "quant_method": "bitsandbytes"
30
- },
31
- "rms_norm_eps": 1e-06,
32
- "rope_scaling": {
33
- "mrope_section": [
34
- 16,
35
- 24,
36
- 24
37
- ],
38
- "rope_type": "default",
39
- "type": "default"
40
- },
41
- "rope_theta": 1000000.0,
42
- "sliding_window": 32768,
43
- "text_config": {
44
  "architectures": [
45
- "Qwen2_5_VLForConditionalGeneration"
46
  ],
47
  "attention_dropout": 0.0,
48
  "eos_token_id": 151645,
49
  "hidden_act": "silu",
50
  "hidden_size": 2048,
51
- "image_token_id": null,
52
  "initializer_range": 0.02,
53
  "intermediate_size": 11008,
54
- "layer_types": [
55
- "full_attention",
56
- "full_attention",
57
- "full_attention",
58
- "full_attention",
59
- "full_attention",
60
- "full_attention",
61
- "full_attention",
62
- "full_attention",
63
- "full_attention",
64
- "full_attention",
65
- "full_attention",
66
- "full_attention",
67
- "full_attention",
68
- "full_attention",
69
- "full_attention",
70
- "full_attention",
71
- "full_attention",
72
- "full_attention",
73
- "full_attention",
74
- "full_attention",
75
- "full_attention",
76
- "full_attention",
77
- "full_attention",
78
- "full_attention",
79
- "full_attention",
80
- "full_attention",
81
- "full_attention",
82
- "full_attention",
83
- "full_attention",
84
- "full_attention",
85
- "full_attention",
86
- "full_attention",
87
- "full_attention",
88
- "full_attention",
89
- "full_attention",
90
- "full_attention"
91
- ],
92
  "max_position_embeddings": 128000,
93
  "max_window_layers": 70,
94
- "model_type": "qwen2_5_vl_text",
95
  "num_attention_heads": 16,
96
  "num_hidden_layers": 36,
97
  "num_key_value_heads": 2,
98
  "pad_token_id": 151654,
99
- "quantization_config": {
100
- "_load_in_4bit": true,
101
- "_load_in_8bit": false,
102
- "bnb_4bit_compute_dtype": "bfloat16",
103
- "bnb_4bit_quant_storage": "uint8",
104
- "bnb_4bit_quant_type": "nf4",
105
- "bnb_4bit_use_double_quant": true,
106
- "llm_int8_enable_fp32_cpu_offload": false,
107
- "llm_int8_has_fp16_weight": false,
108
- "llm_int8_skip_modules": [
109
- "lm_head",
110
- "multi_modal_projector",
111
- "merger",
112
- "modality_projection",
113
- "model.layers.5.mlp",
114
- "visual.blocks.25.attn",
115
- "visual.merger.mlp",
116
- "visual.blocks.24.attn",
117
- "visual.blocks.29.attn",
118
- "visual.blocks.30.attn",
119
- "visual.blocks.26.attn",
120
- "visual.blocks.22.attn",
121
- "visual.blocks.31.attn",
122
- "visual.blocks.27.attn",
123
- "model.layers.30.mlp",
124
- "visual.blocks.30.mlp",
125
- "visual.blocks.28.attn",
126
- "visual.blocks.29.mlp",
127
- "visual.blocks.25.mlp",
128
- "visual.blocks.21.attn",
129
- "visual.blocks.18.attn",
130
- "visual.blocks.20.attn",
131
- "visual.blocks.26.mlp",
132
- "visual.blocks.16.attn",
133
- "visual.blocks.31.mlp",
134
- "visual.blocks.28.mlp",
135
- "visual.blocks.27.mlp",
136
- "visual.blocks.24.mlp",
137
- "visual.blocks.19.attn",
138
- "visual.blocks.23.mlp",
139
- "visual.blocks.19.mlp",
140
- "visual.blocks.17.attn",
141
- "visual.blocks.20.mlp",
142
- "visual.blocks.23.attn",
143
- "visual.blocks.13.attn",
144
- "visual.blocks.22.mlp",
145
- "visual.blocks.9.mlp",
146
- "visual.blocks.10.mlp",
147
- "visual.blocks.16.mlp",
148
- "visual.blocks.12.attn",
149
- "visual.blocks.18.mlp",
150
- "visual.blocks.21.mlp",
151
- "visual.blocks.6.mlp",
152
- "model.layers.1.mlp",
153
- "visual.blocks.14.attn",
154
- "visual.blocks.11.mlp",
155
- "visual.blocks.11.attn",
156
- "visual.blocks.9.attn",
157
- "model.layers.2.mlp",
158
- "visual.blocks.12.mlp",
159
- "visual.blocks.10.attn",
160
- "visual.blocks.6.attn",
161
- "visual.blocks.13.mlp",
162
- "visual.blocks.8.mlp",
163
- "visual.blocks.14.mlp",
164
- "visual.blocks.7.mlp",
165
- "visual.blocks.5.attn",
166
- "visual.blocks.8.attn",
167
- "visual.blocks.15.mlp",
168
- "visual.blocks.5.mlp",
169
- "visual.blocks.3.mlp",
170
- "visual.blocks.2.mlp",
171
- "visual.blocks.4.mlp",
172
- "visual.blocks.2.attn",
173
- "visual.blocks.7.attn",
174
- "visual.blocks.1.attn",
175
- "visual.blocks.17.mlp",
176
- "visual.blocks.15.attn",
177
- "visual.blocks.4.attn",
178
- "visual.blocks.1.mlp",
179
- "visual.blocks.0.attn",
180
- "visual.blocks.0.mlp",
181
- "visual.blocks.3.attn",
182
- "visual.blocks.31.mlp.down_proj"
183
- ],
184
- "llm_int8_threshold": 6.0,
185
- "load_in_4bit": true,
186
- "load_in_8bit": false,
187
- "quant_method": "bitsandbytes"
188
- },
189
  "rms_norm_eps": 1e-06,
190
  "rope_scaling": {
191
- "mrope_section": [
192
- 16,
193
- 24,
194
- 24
195
- ],
196
- "rope_type": "default",
197
- "type": "default"
198
  },
199
  "rope_theta": 1000000.0,
200
- "sliding_window": null,
201
- "tie_word_embeddings": true,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
202
  "torch_dtype": "float16",
 
203
  "unsloth_fixed": true,
 
204
  "use_cache": true,
205
  "use_sliding_window": false,
206
- "video_token_id": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
207
  "vision_end_token_id": 151653,
208
  "vision_start_token_id": 151652,
209
  "vision_token_id": 151654,
210
  "vocab_size": 151936
211
- },
212
- "torch_dtype": "float16",
213
- "transformers_version": "4.54.0.dev0",
214
- "unsloth_fixed": true,
215
- "unsloth_version": "2025.6.8",
216
- "use_cache": true,
217
- "use_sliding_window": false,
218
- "video_token_id": 151656,
219
- "vision_config": {
220
- "depth": 32,
221
- "fullatt_block_indexes": [
222
- 7,
223
- 15,
224
- 23,
225
- 31
226
- ],
227
- "hidden_act": "silu",
228
- "hidden_size": 1280,
229
- "in_channels": 3,
230
- "in_chans": 3,
231
- "initializer_range": 0.02,
232
- "intermediate_size": 3420,
233
- "model_type": "qwen2_5_vl",
234
- "num_heads": 16,
235
- "out_hidden_size": 2048,
236
- "patch_size": 14,
237
- "spatial_merge_size": 2,
238
- "spatial_patch_size": 14,
239
- "temporal_patch_size": 2,
240
- "tokens_per_second": 2,
241
- "torch_dtype": "float16",
242
- "window_size": 112
243
- },
244
- "vision_end_token_id": 151653,
245
- "vision_start_token_id": 151652,
246
- "vision_token_id": 151654,
247
- "vocab_size": 151936
248
- }
 
1
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "architectures": [
3
+ "Qwen2_5_VLForConditionalGeneration"
4
  ],
5
  "attention_dropout": 0.0,
6
  "eos_token_id": 151645,
7
  "hidden_act": "silu",
8
  "hidden_size": 2048,
9
+ "image_token_id": 151655,
10
  "initializer_range": 0.02,
11
  "intermediate_size": 11008,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
12
  "max_position_embeddings": 128000,
13
  "max_window_layers": 70,
14
+ "model_type": "qwen2_5_vl",
15
  "num_attention_heads": 16,
16
  "num_hidden_layers": 36,
17
  "num_key_value_heads": 2,
18
  "pad_token_id": 151654,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  "rms_norm_eps": 1e-06,
20
  "rope_scaling": {
21
+ "mrope_section": [
22
+ 16,
23
+ 24,
24
+ 24
25
+ ],
26
+ "rope_type": "default",
27
+ "type": "default"
28
  },
29
  "rope_theta": 1000000.0,
30
+ "sliding_window": 32768,
31
+ "text_config": {
32
+ "architectures": [
33
+ "Qwen2_5_VLForConditionalGeneration"
34
+ ],
35
+ "attention_dropout": 0.0,
36
+ "eos_token_id": 151645,
37
+ "hidden_act": "silu",
38
+ "hidden_size": 2048,
39
+ "image_token_id": null,
40
+ "initializer_range": 0.02,
41
+ "intermediate_size": 11008,
42
+ "max_position_embeddings": 128000,
43
+ "max_window_layers": 70,
44
+ "model_type": "qwen2_5_vl_text",
45
+ "num_attention_heads": 16,
46
+ "num_hidden_layers": 36,
47
+ "num_key_value_heads": 2,
48
+ "pad_token_id": 151654,
49
+ "quantization_config": {
50
+ "_load_in_4bit": true,
51
+ "_load_in_8bit": false,
52
+ "bnb_4bit_compute_dtype": "bfloat16",
53
+ "bnb_4bit_quant_storage": "uint8",
54
+ "bnb_4bit_quant_type": "nf4",
55
+ "bnb_4bit_use_double_quant": true,
56
+ "llm_int8_enable_fp32_cpu_offload": false,
57
+ "llm_int8_has_fp16_weight": false,
58
+ "llm_int8_skip_modules": [
59
+ "lm_head",
60
+ "multi_modal_projector",
61
+ "merger",
62
+ "modality_projection",
63
+ "model.layers.5.mlp",
64
+ "visual.blocks.25.attn",
65
+ "visual.merger.mlp",
66
+ "visual.blocks.24.attn",
67
+ "visual.blocks.29.attn",
68
+ "visual.blocks.30.attn",
69
+ "visual.blocks.26.attn",
70
+ "visual.blocks.22.attn",
71
+ "visual.blocks.31.attn",
72
+ "visual.blocks.27.attn",
73
+ "model.layers.30.mlp",
74
+ "visual.blocks.30.mlp",
75
+ "visual.blocks.28.attn",
76
+ "visual.blocks.29.mlp",
77
+ "visual.blocks.25.mlp",
78
+ "visual.blocks.21.attn",
79
+ "visual.blocks.18.attn",
80
+ "visual.blocks.20.attn",
81
+ "visual.blocks.26.mlp",
82
+ "visual.blocks.16.attn",
83
+ "visual.blocks.31.mlp",
84
+ "visual.blocks.28.mlp",
85
+ "visual.blocks.27.mlp",
86
+ "visual.blocks.24.mlp",
87
+ "visual.blocks.19.attn",
88
+ "visual.blocks.23.mlp",
89
+ "visual.blocks.19.mlp",
90
+ "visual.blocks.17.attn",
91
+ "visual.blocks.20.mlp",
92
+ "visual.blocks.23.attn",
93
+ "visual.blocks.13.attn",
94
+ "visual.blocks.22.mlp",
95
+ "visual.blocks.9.mlp",
96
+ "visual.blocks.10.mlp",
97
+ "visual.blocks.16.mlp",
98
+ "visual.blocks.12.attn",
99
+ "visual.blocks.18.mlp",
100
+ "visual.blocks.21.mlp",
101
+ "visual.blocks.6.mlp",
102
+ "model.layers.1.mlp",
103
+ "visual.blocks.14.attn",
104
+ "visual.blocks.11.mlp",
105
+ "visual.blocks.11.attn",
106
+ "visual.blocks.9.attn",
107
+ "model.layers.2.mlp",
108
+ "visual.blocks.12.mlp",
109
+ "visual.blocks.10.attn",
110
+ "visual.blocks.6.attn",
111
+ "visual.blocks.13.mlp",
112
+ "visual.blocks.8.mlp",
113
+ "visual.blocks.14.mlp",
114
+ "visual.blocks.7.mlp",
115
+ "visual.blocks.5.attn",
116
+ "visual.blocks.8.attn",
117
+ "visual.blocks.15.mlp",
118
+ "visual.blocks.5.mlp",
119
+ "visual.blocks.3.mlp",
120
+ "visual.blocks.2.mlp",
121
+ "visual.blocks.4.mlp",
122
+ "visual.blocks.2.attn",
123
+ "visual.blocks.7.attn",
124
+ "visual.blocks.1.attn",
125
+ "visual.blocks.17.mlp",
126
+ "visual.blocks.15.attn",
127
+ "visual.blocks.4.attn",
128
+ "visual.blocks.1.mlp",
129
+ "visual.blocks.0.attn",
130
+ "visual.blocks.0.mlp",
131
+ "visual.blocks.3.attn",
132
+ "visual.blocks.31.mlp.down_proj"
133
+ ],
134
+ "llm_int8_threshold": 6.0,
135
+ "load_in_4bit": true,
136
+ "load_in_8bit": false,
137
+ "quant_method": "bitsandbytes"
138
+ },
139
+ "rms_norm_eps": 1e-06,
140
+ "rope_scaling": {
141
+ "mrope_section": [
142
+ 16,
143
+ 24,
144
+ 24
145
+ ],
146
+ "rope_type": "default",
147
+ "type": "default"
148
+ },
149
+ "rope_theta": 1000000.0,
150
+ "sliding_window": 32768,
151
+ "tie_word_embeddings": true,
152
+ "torch_dtype": "float16",
153
+ "unsloth_fixed": true,
154
+ "use_cache": true,
155
+ "use_sliding_window": false,
156
+ "video_token_id": null,
157
+ "vision_end_token_id": 151653,
158
+ "vision_start_token_id": 151652,
159
+ "vision_token_id": 151654,
160
+ "vocab_size": 151936
161
+ },
162
  "torch_dtype": "float16",
163
+ "transformers_version": "4.52.4",
164
  "unsloth_fixed": true,
165
+ "unsloth_version": "2025.6.8",
166
  "use_cache": true,
167
  "use_sliding_window": false,
168
+ "video_token_id": 151656,
169
+ "vision_config": {
170
+ "depth": 32,
171
+ "fullatt_block_indexes": [
172
+ 7,
173
+ 15,
174
+ 23,
175
+ 31
176
+ ],
177
+ "hidden_act": "silu",
178
+ "hidden_size": 1280,
179
+ "in_channels": 3,
180
+ "in_chans": 3,
181
+ "initializer_range": 0.02,
182
+ "intermediate_size": 3420,
183
+ "model_type": "qwen2_5_vl",
184
+ "num_heads": 16,
185
+ "out_hidden_size": 2048,
186
+ "patch_size": 14,
187
+ "spatial_merge_size": 2,
188
+ "spatial_patch_size": 14,
189
+ "temporal_patch_size": 2,
190
+ "tokens_per_second": 2,
191
+ "torch_dtype": "float16",
192
+ "window_size": 112
193
+ },
194
  "vision_end_token_id": 151653,
195
  "vision_start_token_id": 151652,
196
  "vision_token_id": 151654,
197
  "vocab_size": 151936
198
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
generation_config.json CHANGED
@@ -9,5 +9,5 @@
9
  "pad_token_id": 151654,
10
  "repetition_penalty": 1.05,
11
  "temperature": 1e-06,
12
- "transformers_version": "4.54.0.dev0"
13
  }
 
9
  "pad_token_id": 151654,
10
  "repetition_penalty": 1.05,
11
  "temperature": 1e-06,
12
+ "transformers_version": "4.52.4"
13
  }
video_preprocessor_config.json CHANGED
@@ -1,4 +1,28 @@
1
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "crop_size": null,
3
  "data_format": "channels_first",
4
  "default_to_square": true,
@@ -9,25 +33,45 @@
9
  "do_pad": null,
10
  "do_rescale": true,
11
  "do_resize": true,
12
- "do_sample_frames": false,
13
- "fps": null,
14
  "image_mean": [
15
  0.48145466,
16
  0.4578275,
17
  0.40821073
18
  ],
 
19
  "image_std": [
20
  0.26862954,
21
  0.26130258,
22
  0.27577711
23
  ],
24
  "input_data_format": null,
25
- "max_frames": 768,
26
  "max_pixels": 12845056,
27
  "merge_size": 2,
28
- "min_frames": 4,
29
  "min_pixels": 3136,
30
- "num_frames": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
31
  "patch_size": 14,
32
  "processor_class": "Qwen2_5_VLProcessor",
33
  "resample": 3,
@@ -38,6 +82,5 @@
38
  },
39
  "size_divisor": null,
40
  "temporal_patch_size": 2,
41
- "video_metadata": null,
42
  "video_processor_type": "Qwen2VLVideoProcessor"
43
  }
 
1
  {
2
+ "_valid_kwargs_names": [
3
+ "do_convert_rgb",
4
+ "do_resize",
5
+ "size",
6
+ "size_divisor",
7
+ "default_to_square",
8
+ "resample",
9
+ "do_rescale",
10
+ "rescale_factor",
11
+ "do_normalize",
12
+ "image_mean",
13
+ "image_std",
14
+ "do_pad",
15
+ "do_center_crop",
16
+ "crop_size",
17
+ "data_format",
18
+ "input_data_format",
19
+ "device",
20
+ "min_pixels",
21
+ "max_pixels",
22
+ "patch_size",
23
+ "temporal_patch_size",
24
+ "merge_size"
25
+ ],
26
  "crop_size": null,
27
  "data_format": "channels_first",
28
  "default_to_square": true,
 
33
  "do_pad": null,
34
  "do_rescale": true,
35
  "do_resize": true,
 
 
36
  "image_mean": [
37
  0.48145466,
38
  0.4578275,
39
  0.40821073
40
  ],
41
+ "image_processor_type": "Qwen2VLImageProcessor",
42
  "image_std": [
43
  0.26862954,
44
  0.26130258,
45
  0.27577711
46
  ],
47
  "input_data_format": null,
 
48
  "max_pixels": 12845056,
49
  "merge_size": 2,
 
50
  "min_pixels": 3136,
51
+ "model_valid_processing_keys": [
52
+ "do_convert_rgb",
53
+ "do_resize",
54
+ "size",
55
+ "size_divisor",
56
+ "default_to_square",
57
+ "resample",
58
+ "do_rescale",
59
+ "rescale_factor",
60
+ "do_normalize",
61
+ "image_mean",
62
+ "image_std",
63
+ "do_pad",
64
+ "do_center_crop",
65
+ "crop_size",
66
+ "data_format",
67
+ "input_data_format",
68
+ "device",
69
+ "min_pixels",
70
+ "max_pixels",
71
+ "patch_size",
72
+ "temporal_patch_size",
73
+ "merge_size"
74
+ ],
75
  "patch_size": 14,
76
  "processor_class": "Qwen2_5_VLProcessor",
77
  "resample": 3,
 
82
  },
83
  "size_divisor": null,
84
  "temporal_patch_size": 2,
 
85
  "video_processor_type": "Qwen2VLVideoProcessor"
86
  }