jAmmm6 commited on
Commit
ba5ebf3
·
verified ·
1 Parent(s): c1b4daf

Upload model trained with Unsloth

Browse files

Upload model trained with Unsloth 2x faster

Files changed (4) hide show
  1. README.md +2 -0
  2. config.json +248 -0
  3. generation_config.json +13 -0
  4. model.safetensors +3 -0
README.md CHANGED
@@ -1,3 +1,5 @@
1
  ---
2
  license: mit
 
 
3
  ---
 
1
  ---
2
  license: mit
3
+ tags:
4
+ - unsloth
5
  ---
config.json ADDED
@@ -0,0 +1,248 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "Qwen2_5_VLForConditionalGeneration"
4
+ ],
5
+ "attention_dropout": 0.0,
6
+ "eos_token_id": 151645,
7
+ "hidden_act": "silu",
8
+ "hidden_size": 2048,
9
+ "image_token_id": 151655,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 11008,
12
+ "max_position_embeddings": 128000,
13
+ "max_window_layers": 70,
14
+ "model_type": "qwen2_5_vl",
15
+ "num_attention_heads": 16,
16
+ "num_hidden_layers": 36,
17
+ "num_key_value_heads": 2,
18
+ "pad_token_id": 151654,
19
+ "quantization_config": {
20
+ "bnb_4bit_compute_dtype": "float16",
21
+ "bnb_4bit_quant_type": "nf4",
22
+ "bnb_4bit_use_double_quant": true,
23
+ "llm_int8_enable_fp32_cpu_offload": false,
24
+ "llm_int8_has_fp16_weight": false,
25
+ "llm_int8_skip_modules": null,
26
+ "llm_int8_threshold": 6.0,
27
+ "load_in_4bit": true,
28
+ "load_in_8bit": false,
29
+ "quant_method": "bitsandbytes"
30
+ },
31
+ "rms_norm_eps": 1e-06,
32
+ "rope_scaling": {
33
+ "mrope_section": [
34
+ 16,
35
+ 24,
36
+ 24
37
+ ],
38
+ "rope_type": "default",
39
+ "type": "default"
40
+ },
41
+ "rope_theta": 1000000.0,
42
+ "sliding_window": 32768,
43
+ "text_config": {
44
+ "architectures": [
45
+ "Qwen2_5_VLForConditionalGeneration"
46
+ ],
47
+ "attention_dropout": 0.0,
48
+ "eos_token_id": 151645,
49
+ "hidden_act": "silu",
50
+ "hidden_size": 2048,
51
+ "image_token_id": null,
52
+ "initializer_range": 0.02,
53
+ "intermediate_size": 11008,
54
+ "layer_types": [
55
+ "full_attention",
56
+ "full_attention",
57
+ "full_attention",
58
+ "full_attention",
59
+ "full_attention",
60
+ "full_attention",
61
+ "full_attention",
62
+ "full_attention",
63
+ "full_attention",
64
+ "full_attention",
65
+ "full_attention",
66
+ "full_attention",
67
+ "full_attention",
68
+ "full_attention",
69
+ "full_attention",
70
+ "full_attention",
71
+ "full_attention",
72
+ "full_attention",
73
+ "full_attention",
74
+ "full_attention",
75
+ "full_attention",
76
+ "full_attention",
77
+ "full_attention",
78
+ "full_attention",
79
+ "full_attention",
80
+ "full_attention",
81
+ "full_attention",
82
+ "full_attention",
83
+ "full_attention",
84
+ "full_attention",
85
+ "full_attention",
86
+ "full_attention",
87
+ "full_attention",
88
+ "full_attention",
89
+ "full_attention",
90
+ "full_attention"
91
+ ],
92
+ "max_position_embeddings": 128000,
93
+ "max_window_layers": 70,
94
+ "model_type": "qwen2_5_vl_text",
95
+ "num_attention_heads": 16,
96
+ "num_hidden_layers": 36,
97
+ "num_key_value_heads": 2,
98
+ "pad_token_id": 151654,
99
+ "quantization_config": {
100
+ "_load_in_4bit": true,
101
+ "_load_in_8bit": false,
102
+ "bnb_4bit_compute_dtype": "bfloat16",
103
+ "bnb_4bit_quant_storage": "uint8",
104
+ "bnb_4bit_quant_type": "nf4",
105
+ "bnb_4bit_use_double_quant": true,
106
+ "llm_int8_enable_fp32_cpu_offload": false,
107
+ "llm_int8_has_fp16_weight": false,
108
+ "llm_int8_skip_modules": [
109
+ "lm_head",
110
+ "multi_modal_projector",
111
+ "merger",
112
+ "modality_projection",
113
+ "model.layers.5.mlp",
114
+ "visual.blocks.25.attn",
115
+ "visual.merger.mlp",
116
+ "visual.blocks.24.attn",
117
+ "visual.blocks.29.attn",
118
+ "visual.blocks.30.attn",
119
+ "visual.blocks.26.attn",
120
+ "visual.blocks.22.attn",
121
+ "visual.blocks.31.attn",
122
+ "visual.blocks.27.attn",
123
+ "model.layers.30.mlp",
124
+ "visual.blocks.30.mlp",
125
+ "visual.blocks.28.attn",
126
+ "visual.blocks.29.mlp",
127
+ "visual.blocks.25.mlp",
128
+ "visual.blocks.21.attn",
129
+ "visual.blocks.18.attn",
130
+ "visual.blocks.20.attn",
131
+ "visual.blocks.26.mlp",
132
+ "visual.blocks.16.attn",
133
+ "visual.blocks.31.mlp",
134
+ "visual.blocks.28.mlp",
135
+ "visual.blocks.27.mlp",
136
+ "visual.blocks.24.mlp",
137
+ "visual.blocks.19.attn",
138
+ "visual.blocks.23.mlp",
139
+ "visual.blocks.19.mlp",
140
+ "visual.blocks.17.attn",
141
+ "visual.blocks.20.mlp",
142
+ "visual.blocks.23.attn",
143
+ "visual.blocks.13.attn",
144
+ "visual.blocks.22.mlp",
145
+ "visual.blocks.9.mlp",
146
+ "visual.blocks.10.mlp",
147
+ "visual.blocks.16.mlp",
148
+ "visual.blocks.12.attn",
149
+ "visual.blocks.18.mlp",
150
+ "visual.blocks.21.mlp",
151
+ "visual.blocks.6.mlp",
152
+ "model.layers.1.mlp",
153
+ "visual.blocks.14.attn",
154
+ "visual.blocks.11.mlp",
155
+ "visual.blocks.11.attn",
156
+ "visual.blocks.9.attn",
157
+ "model.layers.2.mlp",
158
+ "visual.blocks.12.mlp",
159
+ "visual.blocks.10.attn",
160
+ "visual.blocks.6.attn",
161
+ "visual.blocks.13.mlp",
162
+ "visual.blocks.8.mlp",
163
+ "visual.blocks.14.mlp",
164
+ "visual.blocks.7.mlp",
165
+ "visual.blocks.5.attn",
166
+ "visual.blocks.8.attn",
167
+ "visual.blocks.15.mlp",
168
+ "visual.blocks.5.mlp",
169
+ "visual.blocks.3.mlp",
170
+ "visual.blocks.2.mlp",
171
+ "visual.blocks.4.mlp",
172
+ "visual.blocks.2.attn",
173
+ "visual.blocks.7.attn",
174
+ "visual.blocks.1.attn",
175
+ "visual.blocks.17.mlp",
176
+ "visual.blocks.15.attn",
177
+ "visual.blocks.4.attn",
178
+ "visual.blocks.1.mlp",
179
+ "visual.blocks.0.attn",
180
+ "visual.blocks.0.mlp",
181
+ "visual.blocks.3.attn",
182
+ "visual.blocks.31.mlp.down_proj"
183
+ ],
184
+ "llm_int8_threshold": 6.0,
185
+ "load_in_4bit": true,
186
+ "load_in_8bit": false,
187
+ "quant_method": "bitsandbytes"
188
+ },
189
+ "rms_norm_eps": 1e-06,
190
+ "rope_scaling": {
191
+ "mrope_section": [
192
+ 16,
193
+ 24,
194
+ 24
195
+ ],
196
+ "rope_type": "default",
197
+ "type": "default"
198
+ },
199
+ "rope_theta": 1000000.0,
200
+ "sliding_window": null,
201
+ "tie_word_embeddings": true,
202
+ "torch_dtype": "float16",
203
+ "unsloth_fixed": true,
204
+ "use_cache": true,
205
+ "use_sliding_window": false,
206
+ "video_token_id": null,
207
+ "vision_end_token_id": 151653,
208
+ "vision_start_token_id": 151652,
209
+ "vision_token_id": 151654,
210
+ "vocab_size": 151936
211
+ },
212
+ "torch_dtype": "float16",
213
+ "transformers_version": "4.54.0.dev0",
214
+ "unsloth_fixed": true,
215
+ "unsloth_version": "2025.6.8",
216
+ "use_cache": true,
217
+ "use_sliding_window": false,
218
+ "video_token_id": 151656,
219
+ "vision_config": {
220
+ "depth": 32,
221
+ "fullatt_block_indexes": [
222
+ 7,
223
+ 15,
224
+ 23,
225
+ 31
226
+ ],
227
+ "hidden_act": "silu",
228
+ "hidden_size": 1280,
229
+ "in_channels": 3,
230
+ "in_chans": 3,
231
+ "initializer_range": 0.02,
232
+ "intermediate_size": 3420,
233
+ "model_type": "qwen2_5_vl",
234
+ "num_heads": 16,
235
+ "out_hidden_size": 2048,
236
+ "patch_size": 14,
237
+ "spatial_merge_size": 2,
238
+ "spatial_patch_size": 14,
239
+ "temporal_patch_size": 2,
240
+ "tokens_per_second": 2,
241
+ "torch_dtype": "float16",
242
+ "window_size": 112
243
+ },
244
+ "vision_end_token_id": 151653,
245
+ "vision_start_token_id": 151652,
246
+ "vision_token_id": 151654,
247
+ "vocab_size": 151936
248
+ }
generation_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 151643,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 151645,
6
+ 151643
7
+ ],
8
+ "max_length": 128000,
9
+ "pad_token_id": 151654,
10
+ "repetition_penalty": 1.05,
11
+ "temperature": 1e-06,
12
+ "transformers_version": "4.54.0.dev0"
13
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c69fdaa8161dd276012d9172be12f837d620323d9fd3df1aa8cea3d8d846d30
3
+ size 3793519787