Upload folder using huggingface_hub
Browse files- README.md +6 -2
- chat_template.json +4 -0
- config.json +1 -4
- generation_config.json +13 -12
README.md
CHANGED
|
@@ -7,6 +7,10 @@ license: apache-2.0
|
|
| 7 |
pipeline_tag: image-text-to-text
|
| 8 |
library_name: transformers
|
| 9 |
---
|
|
|
|
|
|
|
|
|
|
|
|
|
| 10 |
<div>
|
| 11 |
<p style="margin-top: 0;margin-bottom: 0;">
|
| 12 |
<em><a href="https://docs.unsloth.ai/basics/unsloth-dynamic-v2.0-gguf">Unsloth Dynamic 2.0</a> achieves superior accuracy & outperforms other leading quants.</em>
|
|
@@ -24,8 +28,8 @@ library_name: transformers
|
|
| 24 |
</div>
|
| 25 |
</div>
|
| 26 |
|
| 27 |
-
<a href="https://
|
| 28 |
-
<img alt="
|
| 29 |
</a>
|
| 30 |
|
| 31 |
|
|
|
|
| 7 |
pipeline_tag: image-text-to-text
|
| 8 |
library_name: transformers
|
| 9 |
---
|
| 10 |
+
> [!NOTE]
|
| 11 |
+
> Includes Unsloth **chat template fixes**! <br> For `llama.cpp`, use `--jinja`
|
| 12 |
+
>
|
| 13 |
+
|
| 14 |
<div>
|
| 15 |
<p style="margin-top: 0;margin-bottom: 0;">
|
| 16 |
<em><a href="https://docs.unsloth.ai/basics/unsloth-dynamic-v2.0-gguf">Unsloth Dynamic 2.0</a> achieves superior accuracy & outperforms other leading quants.</em>
|
|
|
|
| 28 |
</div>
|
| 29 |
</div>
|
| 30 |
|
| 31 |
+
<a href="https://huggingface.co/spaces/akhaliq/Qwen3-VL-2B-Instruct" target="_blank" style="margin: 2px;">
|
| 32 |
+
<img alt="Demo" src="https://img.shields.io/badge/Demo-536af5" style="display: inline-block; vertical-align: middle;"/>
|
| 33 |
</a>
|
| 34 |
|
| 35 |
|
chat_template.json
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"chat_template": "{%- if tools %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0].role == 'system' %}\n {%- if messages[0].content is string %}\n {{- messages[0].content }}\n {%- else %}\n {%- for content in messages[0].content %}\n {%- if 'text' in content %}\n {{- content.text }}\n {%- endif %}\n {%- endfor %}\n {%- endif %}\n {{- '\\n\\n' }}\n {%- endif %}\n {{- \"# Tools\\n\\nYou may call one or more functions to assist with the user query.\\n\\nYou are provided with function signatures within <tools></tools> XML tags:\\n<tools>\" }}\n {%- for tool in tools %}\n {{- \"\\n\" }}\n {{- tool | tojson }}\n {%- endfor %}\n {{- \"\\n</tools>\\n\\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\\n<tool_call>\\n{\\\"name\\\": <function-name>, \\\"arguments\\\": <args-json-object>}\\n</tool_call><|im_end|>\\n\" }}\n{%- else %}\n {%- if messages[0].role == 'system' %}\n {{- '<|im_start|>system\\n' }}\n {%- if messages[0].content is string %}\n {{- messages[0].content }}\n {%- else %}\n {%- for content in messages[0].content %}\n {%- if 'text' in content %}\n {{- content.text }}\n {%- endif %}\n {%- endfor %}\n {%- endif %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n{%- endif %}\n{%- set image_count = namespace(value=0) %}\n{%- set video_count = namespace(value=0) %}\n{%- for message in messages %}\n {%- if message.role == \"user\" %}\n {{- '<|im_start|>' + message.role + '\\n' }}\n {%- if message.content is string %}\n {{- message.content }}\n {%- else %}\n {%- for content in message.content %}\n {%- if content.type == 'image' or 'image' in content or 'image_url' in content %}\n {%- set image_count.value = image_count.value + 1 %}\n {%- if add_vision_id %}Picture {{ image_count.value }}: {% endif -%}\n <|vision_start|><|image_pad|><|vision_end|>\n {%- elif content.type == 'video' or 'video' in content %}\n {%- set video_count.value = video_count.value + 1 %}\n {%- if add_vision_id %}Video {{ video_count.value }}: {% endif -%}\n <|vision_start|><|video_pad|><|vision_end|>\n {%- elif 'text' in content %}\n {{- content.text }}\n {%- endif %}\n {%- endfor %}\n {%- endif %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"assistant\" %}\n {{- '<|im_start|>' + message.role + '\\n' }}\n {%- if message.content is string %}\n {{- message.content }}\n {%- else %}\n {%- for content_item in message.content %}\n {%- if 'text' in content_item %}\n {{- content_item.text }}\n {%- endif %}\n {%- endfor %}\n {%- endif %}\n {%- if message.tool_calls %}\n {%- for tool_call in message.tool_calls %}\n {%- if (loop.first and message.content) or (not loop.first) %}\n {{- '\\n' }}\n {%- endif %}\n {%- if tool_call.function %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '<tool_call>\\n{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {%- if tool_call.arguments is string %}\n {{- tool_call.arguments }}\n {%- else %}\n {{- tool_call.arguments | tojson }}\n {%- endif %}\n {{- '}\\n</tool_call>' }}\n {%- endfor %}\n {%- endif %}\n {{- '<|im_end|>\\n' }}\n {%- elif message.role == \"tool\" %}\n {%- if loop.first or (messages[loop.index0 - 1].role != \"tool\") %}\n {{- '<|im_start|>user' }}\n {%- endif %}\n {{- '\\n<tool_response>\\n' }}\n {%- if message.content is string %}\n {{- message.content }}\n {%- else %}\n {%- for content in message.content %}\n {%- if content.type == 'image' or 'image' in content or 'image_url' in content %}\n {%- set image_count.value = image_count.value + 1 %}\n {%- if add_vision_id %}Picture {{ image_count.value }}: {% endif -%}\n <|vision_start|><|image_pad|><|vision_end|>\n {%- elif content.type == 'video' or 'video' in content %}\n {%- set video_count.value = video_count.value + 1 %}\n {%- if add_vision_id %}Video {{ video_count.value }}: {% endif -%}\n <|vision_start|><|video_pad|><|vision_end|>\n {%- elif 'text' in content %}\n {{- content.text }}\n {%- endif %}\n {%- endfor %}\n {%- endif %}\n {{- '\\n</tool_response>' }}\n {%- if loop.last or (messages[loop.index0 + 1].role != \"tool\") %}\n {{- '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|im_start|>assistant\\n' }}\n{%- endif %}\n"
|
| 3 |
+
}
|
| 4 |
+
|
config.json
CHANGED
|
@@ -2,8 +2,6 @@
|
|
| 2 |
"architectures": [
|
| 3 |
"Qwen3VLForConditionalGeneration"
|
| 4 |
],
|
| 5 |
-
"torch_dtype": "bfloat16",
|
| 6 |
-
"eos_token_id": 151645,
|
| 7 |
"image_token_id": 151655,
|
| 8 |
"model_type": "qwen3_vl",
|
| 9 |
"pad_token_id": 151654,
|
|
@@ -49,7 +47,6 @@
|
|
| 49 |
17
|
| 50 |
],
|
| 51 |
"depth": 24,
|
| 52 |
-
"torch_dtype": "bfloat16",
|
| 53 |
"hidden_act": "gelu_pytorch_tanh",
|
| 54 |
"hidden_size": 1024,
|
| 55 |
"in_channels": 3,
|
|
@@ -65,4 +62,4 @@
|
|
| 65 |
},
|
| 66 |
"vision_end_token_id": 151653,
|
| 67 |
"vision_start_token_id": 151652
|
| 68 |
-
}
|
|
|
|
| 2 |
"architectures": [
|
| 3 |
"Qwen3VLForConditionalGeneration"
|
| 4 |
],
|
|
|
|
|
|
|
| 5 |
"image_token_id": 151655,
|
| 6 |
"model_type": "qwen3_vl",
|
| 7 |
"pad_token_id": 151654,
|
|
|
|
| 47 |
17
|
| 48 |
],
|
| 49 |
"depth": 24,
|
|
|
|
| 50 |
"hidden_act": "gelu_pytorch_tanh",
|
| 51 |
"hidden_size": 1024,
|
| 52 |
"in_channels": 3,
|
|
|
|
| 62 |
},
|
| 63 |
"vision_end_token_id": 151653,
|
| 64 |
"vision_start_token_id": 151652
|
| 65 |
+
}
|
generation_config.json
CHANGED
|
@@ -1,13 +1,14 @@
|
|
| 1 |
{
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
|
|
|
|
|
| 1 |
{
|
| 2 |
+
"bos_token_id": 151643,
|
| 3 |
+
"pad_token_id": 151643,
|
| 4 |
+
"do_sample": true,
|
| 5 |
+
"eos_token_id": [
|
| 6 |
+
151645,
|
| 7 |
+
151643
|
| 8 |
+
],
|
| 9 |
+
"top_p": 0.8,
|
| 10 |
+
"top_k": 20,
|
| 11 |
+
"temperature": 0.7,
|
| 12 |
+
"repetition_penalty": 1.0,
|
| 13 |
+
"transformers_version": "4.56.0"
|
| 14 |
+
}
|