currentfear commited on
Commit
35db2f3
·
verified ·
1 Parent(s): 0c6ed33

(Trained with Unsloth)

Browse files
config.json CHANGED
@@ -2,7 +2,7 @@
2
  "architectures": [
3
  "Qwen3VLForConditionalGeneration"
4
  ],
5
- "dtype": "bfloat16",
6
  "eos_token_id": 151645,
7
  "image_token_id": 151655,
8
  "model_type": "qwen3_vl",
@@ -13,7 +13,132 @@
13
  "bnb_4bit_use_double_quant": true,
14
  "llm_int8_enable_fp32_cpu_offload": false,
15
  "llm_int8_has_fp16_weight": false,
16
- "llm_int8_skip_modules": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  "llm_int8_threshold": 6.0,
18
  "load_in_4bit": true,
19
  "load_in_8bit": false,
@@ -23,7 +148,7 @@
23
  "attention_bias": false,
24
  "attention_dropout": 0.0,
25
  "bos_token_id": 151643,
26
- "dtype": "bfloat16",
27
  "eos_token_id": 151645,
28
  "head_dim": 128,
29
  "hidden_act": "silu",
@@ -61,7 +186,7 @@
61
  24
62
  ],
63
  "depth": 27,
64
- "dtype": "bfloat16",
65
  "hidden_act": "gelu_pytorch_tanh",
66
  "hidden_size": 1152,
67
  "in_channels": 3,
 
2
  "architectures": [
3
  "Qwen3VLForConditionalGeneration"
4
  ],
5
+ "torch_dtype": "bfloat16",
6
  "eos_token_id": 151645,
7
  "image_token_id": 151655,
8
  "model_type": "qwen3_vl",
 
13
  "bnb_4bit_use_double_quant": true,
14
  "llm_int8_enable_fp32_cpu_offload": false,
15
  "llm_int8_has_fp16_weight": false,
16
+ "llm_int8_skip_modules": [
17
+ "model.visual.blocks.0.attn.qkv",
18
+ "model.visual.blocks.0.attn.proj",
19
+ "model.visual.blocks.0.mlp.linear_fc1",
20
+ "model.visual.blocks.0.mlp.linear_fc2",
21
+ "model.visual.blocks.1.attn.qkv",
22
+ "model.visual.blocks.1.attn.proj",
23
+ "model.visual.blocks.1.mlp.linear_fc1",
24
+ "model.visual.blocks.1.mlp.linear_fc2",
25
+ "model.visual.blocks.2.attn.qkv",
26
+ "model.visual.blocks.2.attn.proj",
27
+ "model.visual.blocks.2.mlp.linear_fc1",
28
+ "model.visual.blocks.2.mlp.linear_fc2",
29
+ "model.visual.blocks.3.attn.qkv",
30
+ "model.visual.blocks.3.attn.proj",
31
+ "model.visual.blocks.3.mlp.linear_fc1",
32
+ "model.visual.blocks.3.mlp.linear_fc2",
33
+ "model.visual.blocks.4.attn.qkv",
34
+ "model.visual.blocks.4.attn.proj",
35
+ "model.visual.blocks.4.mlp.linear_fc1",
36
+ "model.visual.blocks.4.mlp.linear_fc2",
37
+ "model.visual.blocks.5.attn.qkv",
38
+ "model.visual.blocks.5.attn.proj",
39
+ "model.visual.blocks.5.mlp.linear_fc1",
40
+ "model.visual.blocks.5.mlp.linear_fc2",
41
+ "model.visual.blocks.6.attn.qkv",
42
+ "model.visual.blocks.6.attn.proj",
43
+ "model.visual.blocks.6.mlp.linear_fc1",
44
+ "model.visual.blocks.6.mlp.linear_fc2",
45
+ "model.visual.blocks.7.attn.qkv",
46
+ "model.visual.blocks.7.attn.proj",
47
+ "model.visual.blocks.7.mlp.linear_fc1",
48
+ "model.visual.blocks.7.mlp.linear_fc2",
49
+ "model.visual.blocks.8.attn.qkv",
50
+ "model.visual.blocks.8.attn.proj",
51
+ "model.visual.blocks.8.mlp.linear_fc1",
52
+ "model.visual.blocks.8.mlp.linear_fc2",
53
+ "model.visual.blocks.9.attn.qkv",
54
+ "model.visual.blocks.9.attn.proj",
55
+ "model.visual.blocks.9.mlp.linear_fc1",
56
+ "model.visual.blocks.9.mlp.linear_fc2",
57
+ "model.visual.blocks.10.attn.qkv",
58
+ "model.visual.blocks.10.attn.proj",
59
+ "model.visual.blocks.10.mlp.linear_fc1",
60
+ "model.visual.blocks.10.mlp.linear_fc2",
61
+ "model.visual.blocks.11.attn.qkv",
62
+ "model.visual.blocks.11.attn.proj",
63
+ "model.visual.blocks.11.mlp.linear_fc1",
64
+ "model.visual.blocks.11.mlp.linear_fc2",
65
+ "model.visual.blocks.12.attn.qkv",
66
+ "model.visual.blocks.12.attn.proj",
67
+ "model.visual.blocks.12.mlp.linear_fc1",
68
+ "model.visual.blocks.12.mlp.linear_fc2",
69
+ "model.visual.blocks.13.attn.qkv",
70
+ "model.visual.blocks.13.attn.proj",
71
+ "model.visual.blocks.13.mlp.linear_fc1",
72
+ "model.visual.blocks.13.mlp.linear_fc2",
73
+ "model.visual.blocks.14.attn.qkv",
74
+ "model.visual.blocks.14.attn.proj",
75
+ "model.visual.blocks.14.mlp.linear_fc1",
76
+ "model.visual.blocks.14.mlp.linear_fc2",
77
+ "model.visual.blocks.15.attn.qkv",
78
+ "model.visual.blocks.15.attn.proj",
79
+ "model.visual.blocks.15.mlp.linear_fc1",
80
+ "model.visual.blocks.15.mlp.linear_fc2",
81
+ "model.visual.blocks.16.attn.qkv",
82
+ "model.visual.blocks.16.attn.proj",
83
+ "model.visual.blocks.16.mlp.linear_fc1",
84
+ "model.visual.blocks.16.mlp.linear_fc2",
85
+ "model.visual.blocks.17.attn.qkv",
86
+ "model.visual.blocks.17.attn.proj",
87
+ "model.visual.blocks.17.mlp.linear_fc1",
88
+ "model.visual.blocks.17.mlp.linear_fc2",
89
+ "model.visual.blocks.18.attn.qkv",
90
+ "model.visual.blocks.18.attn.proj",
91
+ "model.visual.blocks.18.mlp.linear_fc1",
92
+ "model.visual.blocks.18.mlp.linear_fc2",
93
+ "model.visual.blocks.19.attn.qkv",
94
+ "model.visual.blocks.19.attn.proj",
95
+ "model.visual.blocks.19.mlp.linear_fc1",
96
+ "model.visual.blocks.19.mlp.linear_fc2",
97
+ "model.visual.blocks.20.attn.qkv",
98
+ "model.visual.blocks.20.attn.proj",
99
+ "model.visual.blocks.20.mlp.linear_fc1",
100
+ "model.visual.blocks.20.mlp.linear_fc2",
101
+ "model.visual.blocks.21.attn.qkv",
102
+ "model.visual.blocks.21.attn.proj",
103
+ "model.visual.blocks.21.mlp.linear_fc1",
104
+ "model.visual.blocks.21.mlp.linear_fc2",
105
+ "model.visual.blocks.22.attn.qkv",
106
+ "model.visual.blocks.22.attn.proj",
107
+ "model.visual.blocks.22.mlp.linear_fc1",
108
+ "model.visual.blocks.22.mlp.linear_fc2",
109
+ "model.visual.blocks.23.attn.qkv",
110
+ "model.visual.blocks.23.attn.proj",
111
+ "model.visual.blocks.23.mlp.linear_fc1",
112
+ "model.visual.blocks.23.mlp.linear_fc2",
113
+ "model.visual.blocks.24.attn.qkv",
114
+ "model.visual.blocks.24.attn.proj",
115
+ "model.visual.blocks.24.mlp.linear_fc1",
116
+ "model.visual.blocks.24.mlp.linear_fc2",
117
+ "model.visual.blocks.25.attn.qkv",
118
+ "model.visual.blocks.25.attn.proj",
119
+ "model.visual.blocks.25.mlp.linear_fc1",
120
+ "model.visual.blocks.25.mlp.linear_fc2",
121
+ "model.visual.blocks.26.attn.qkv",
122
+ "model.visual.blocks.26.attn.proj",
123
+ "model.visual.blocks.26.mlp.linear_fc1",
124
+ "model.visual.blocks.26.mlp.linear_fc2",
125
+ "model.visual.merger.linear_fc1",
126
+ "model.visual.merger.linear_fc2",
127
+ "model.visual.deepstack_merger_list.0.linear_fc1",
128
+ "model.visual.deepstack_merger_list.0.linear_fc2",
129
+ "model.visual.deepstack_merger_list.1.linear_fc1",
130
+ "model.visual.deepstack_merger_list.1.linear_fc2",
131
+ "model.visual.deepstack_merger_list.2.linear_fc1",
132
+ "model.visual.deepstack_merger_list.2.linear_fc2",
133
+ "model.language_model.layers.6.mlp.gate_proj",
134
+ "model.language_model.layers.6.mlp.up_proj",
135
+ "model.language_model.layers.6.mlp.down_proj",
136
+ "model.language_model.layers.16.self_attn.o_proj",
137
+ "model.language_model.layers.35.mlp.gate_proj",
138
+ "model.language_model.layers.35.mlp.up_proj",
139
+ "model.language_model.layers.35.mlp.down_proj",
140
+ "lm_head"
141
+ ],
142
  "llm_int8_threshold": 6.0,
143
  "load_in_4bit": true,
144
  "load_in_8bit": false,
 
148
  "attention_bias": false,
149
  "attention_dropout": 0.0,
150
  "bos_token_id": 151643,
151
+ "torch_dtype": "bfloat16",
152
  "eos_token_id": 151645,
153
  "head_dim": 128,
154
  "hidden_act": "silu",
 
186
  24
187
  ],
188
  "depth": 27,
189
+ "torch_dtype": "bfloat16",
190
  "hidden_act": "gelu_pytorch_tanh",
191
  "hidden_size": 1152,
192
  "in_channels": 3,
model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:060f3701338a0efe45203d3187c78258ccab8a7dfa4947c21898da23b6bcc356
3
- size 4984017721
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bab077f98dbf0a8708b71bd07767df4692b77f37fcfdedd42771dae6e756dca5
3
+ size 4984017562
model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c089587578a3b6c17bbe41bea2407d1d1cc2dd50178853edc073c201e712c5e6
3
- size 2715459986
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2071e084feac3cdd8ed4f93e13c75eb29494e257bfb89e405ae77ec9b78b972
3
+ size 2715459912
model.safetensors.index.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "metadata": {
3
  "total_parameters": 8767123696,
4
- "total_size": 7699220483
5
  },
6
  "weight_map": {
7
  "lm_head.weight": "model-00002-of-00002.safetensors",
 
1
  {
2
  "metadata": {
3
  "total_parameters": 8767123696,
4
+ "total_size": 7699220250
5
  },
6
  "weight_map": {
7
  "lm_head.weight": "model-00002-of-00002.safetensors",
preprocessor_config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": null,
3
+ "data_format": "channels_first",
4
+ "default_to_square": true,
5
+ "device": null,
6
+ "disable_grouping": null,
7
+ "do_center_crop": null,
8
+ "do_convert_rgb": true,
9
+ "do_normalize": true,
10
+ "do_pad": null,
11
+ "do_rescale": true,
12
+ "do_resize": true,
13
+ "image_mean": [
14
+ 0.5,
15
+ 0.5,
16
+ 0.5
17
+ ],
18
+ "image_processor_type": "Qwen2VLImageProcessorFast",
19
+ "image_std": [
20
+ 0.5,
21
+ 0.5,
22
+ 0.5
23
+ ],
24
+ "input_data_format": null,
25
+ "max_pixels": null,
26
+ "merge_size": 2,
27
+ "min_pixels": null,
28
+ "pad_size": null,
29
+ "patch_size": 16,
30
+ "processor_class": "_Unsloth_Patched_Qwen3VLProcessor",
31
+ "resample": 3,
32
+ "rescale_factor": 0.00392156862745098,
33
+ "return_tensors": null,
34
+ "size": {
35
+ "longest_edge": 16777216,
36
+ "shortest_edge": 65536
37
+ },
38
+ "temporal_patch_size": 2
39
+ }
tokenizer_config.json CHANGED
@@ -233,9 +233,10 @@
233
  "extra_special_tokens": {},
234
  "model_max_length": 262144,
235
  "pad_token": "<|vision_pad|>",
236
- "padding_side": "left",
237
  "processor_class": "_Unsloth_Patched_Qwen3VLProcessor",
238
  "split_special_tokens": false,
239
  "tokenizer_class": "Qwen2Tokenizer",
240
- "unk_token": null
241
- }
 
 
233
  "extra_special_tokens": {},
234
  "model_max_length": 262144,
235
  "pad_token": "<|vision_pad|>",
236
+ "padding_side": "right",
237
  "processor_class": "_Unsloth_Patched_Qwen3VLProcessor",
238
  "split_special_tokens": false,
239
  "tokenizer_class": "Qwen2Tokenizer",
240
+ "unk_token": null,
241
+ "chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0].role == 'system' %}\n {%- if messages[0].content is string %}\n {{- messages[0].content }}\n {%- else %}\n {%- for content in messages[0].content %}\n {%- if 'text' in content %}\n {{- content.text }}\n {%- endif %}\n {%- endfor %}\n {%- endif %}\n {{- '\\n\\n' }}\n {%- endif %}\n {{- \"# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0].role == 'system' %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0].content is string %}\n {{- messages[0].content }}\n {%- else %}\n {%- for content in messages[0].content %}\n {%- if 'text' in content %}\n {{- content.text }}\n {%- endif %}\n {%- endfor %}\n {%- endif %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- set image_count = namespace(value=0) %}\n{%- set video_count = namespace(value=0) %}\n{%- for message in messages %}\n {%- if message.role == \"user\" %}\n {{- '<|im_start|>' + message.role + '\\n' }}\n {%- if message.content is string %}\n {{- message.content }}\n {%- else %}\n {%- for content in message.content %}\n {%- if content.type == 'image' or 'image' in content or 'image_url' in content %}\n {%- set image_count.value = image_count.value + 1 %}\n {%- if add_vision_id %}Picture {{ image_count.value }}: {% endif -%}\n <|vision_start|><|image_pad|><|vision_end|>\n {%- elif content.type == 'video' or 'video' in content %}\n {%- set video_count.value = video_count.value + 1 %}\n {%- if add_vision_id %}Video {{ video_count.value }}: {% endif -%}\n <|vision_start|><|video_pad|><|vision_end|>\n {%- elif 'text' in content %}\n {{- content.text }}\n {%- endif %}\n {%- endfor %}\n {%- endif %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role + '\\n' }}\n {%- if message.content is string %}\n {{- message.content }}\n {%- else %}\n {%- for content_item in message.content %}\n {%- if 'text' in content_item %}\n {{- content_item.text }}\n {%- endif %}\n {%- endfor %}\n {%- endif %}\n {%- if message.tool_calls %}\n {%- for tool_call in message.tool_calls %}\n {%- if (loop.first and message.content) or (not loop.first) %}\n {{- '\\n' }}\n {%- endif %}\n {%- if tool_call.function %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {%- if tool_call.arguments is string %}\n {{- tool_call.arguments }}\n {%- else %}\n {{- tool_call.arguments | tojson }}\n {%- endif %}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {%- endif %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {%- if message.content is string %}\n {{- message.content }}\n {%- else %}\n {%- for content in message.content %}\n {%- if content.type == 'image' or 'image' in content or 'image_url' in content %}\n {%- set image_count.value = image_count.value + 1 %}\n {%- if add_vision_id %}Picture {{ image_count.value }}: {% endif -%}\n <|vision_start|><|image_pad|><|vision_end|>\n {%- elif content.type == 'video' or 'video' in content %}\n {%- set video_count.value = video_count.value + 1 %}\n {%- if add_vision_id %}Video {{ video_count.value }}: {% endif -%}\n <|vision_start|><|video_pad|><|vision_end|>\n {%- elif 'text' in content %}\n {{- content.text }}\n {%- endif %}\n {%- endfor %}\n {%- endif %}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n"
242
+ }
video_preprocessor_config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "crop_size": null,
3
+ "data_format": "channels_first",
4
+ "default_to_square": true,
5
+ "device": null,
6
+ "do_center_crop": null,
7
+ "do_convert_rgb": true,
8
+ "do_normalize": true,
9
+ "do_rescale": true,
10
+ "do_resize": true,
11
+ "do_sample_frames": true,
12
+ "fps": 2,
13
+ "image_mean": [
14
+ 0.5,
15
+ 0.5,
16
+ 0.5
17
+ ],
18
+ "image_std": [
19
+ 0.5,
20
+ 0.5,
21
+ 0.5
22
+ ],
23
+ "input_data_format": null,
24
+ "max_frames": 768,
25
+ "merge_size": 2,
26
+ "min_frames": 4,
27
+ "num_frames": null,
28
+ "pad_size": null,
29
+ "patch_size": 16,
30
+ "processor_class": "_Unsloth_Patched_Qwen3VLProcessor",
31
+ "resample": 3,
32
+ "rescale_factor": 0.00392156862745098,
33
+ "return_metadata": false,
34
+ "size": {
35
+ "longest_edge": 25165824,
36
+ "shortest_edge": 4096
37
+ },
38
+ "temporal_patch_size": 2,
39
+ "video_metadata": null,
40
+ "video_processor_type": "Qwen3VLVideoProcessor"
41
+ }