update project
Browse filesThis view is limited to 50 files because it contains too many changes. See raw diff
- .gitattributes +34 -0
- .gitignore +1 -0
- Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/Qwen3-VL-2B-Instruct_vision.axmodel → Qwen3-VL-2B-Instruct_vision.axmodel +0 -0
- Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/Qwen3-VL-2B-Instruct_vision_1280x736.axmodel → Qwen3-VL-2B-Instruct_vision_1280x736.axmodel +0 -0
- Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/Qwen3-VL-2B-Instruct_vision_640x640.axmodel → Qwen3-VL-2B-Instruct_vision_640x640.axmodel +0 -0
- Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/Qwen3-VL-2B-Instruct_vision_u8.axmodel → Qwen3-VL-2B-Instruct_vision_u8.axmodel +0 -0
- README.md +227 -154
- config.json +27 -0
- gradio_demo.py +0 -262
- axera_logo.png → image.png +2 -2
- images/demo.jpg +0 -3
- images/demo1.jpg +0 -3
- images/recoAll_attractions_1.jpg +0 -3
- images/recoAll_attractions_2.jpg +0 -3
- images/recoAll_attractions_3.jpg +0 -3
- images/recoAll_attractions_4.jpg +0 -3
- images/ssd_car.jpg +0 -3
- images/ssd_horse.jpg +0 -3
- main_ax650 +0 -3
- main_ax650_api +0 -3
- main_axcl_aarch64 +0 -3
- main_axcl_api_aarch64 +0 -3
- main_axcl_api_x86 +0 -3
- main_axcl_x86 +0 -3
- Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/model.embed_tokens.weight.bfloat16.bin → model.embed_tokens.weight.bfloat16.bin +0 -0
- openai_cli.py +0 -66
- post_config.json +6 -6
- Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l0_together.axmodel → qwen3_vl_text_p128_l0_together.axmodel +2 -2
- Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l10_together.axmodel → qwen3_vl_text_p128_l10_together.axmodel +2 -2
- Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l11_together.axmodel → qwen3_vl_text_p128_l11_together.axmodel +2 -2
- Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l12_together.axmodel → qwen3_vl_text_p128_l12_together.axmodel +2 -2
- Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l13_together.axmodel → qwen3_vl_text_p128_l13_together.axmodel +2 -2
- Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l14_together.axmodel → qwen3_vl_text_p128_l14_together.axmodel +2 -2
- Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l15_together.axmodel → qwen3_vl_text_p128_l15_together.axmodel +2 -2
- Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l16_together.axmodel → qwen3_vl_text_p128_l16_together.axmodel +2 -2
- Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l17_together.axmodel → qwen3_vl_text_p128_l17_together.axmodel +2 -2
- Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l18_together.axmodel → qwen3_vl_text_p128_l18_together.axmodel +2 -2
- Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l19_together.axmodel → qwen3_vl_text_p128_l19_together.axmodel +2 -2
- Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l1_together.axmodel → qwen3_vl_text_p128_l1_together.axmodel +2 -2
- Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l20_together.axmodel → qwen3_vl_text_p128_l20_together.axmodel +2 -2
- Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l21_together.axmodel → qwen3_vl_text_p128_l21_together.axmodel +2 -2
- Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l22_together.axmodel → qwen3_vl_text_p128_l22_together.axmodel +2 -2
- Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l23_together.axmodel → qwen3_vl_text_p128_l23_together.axmodel +2 -2
- Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l24_together.axmodel → qwen3_vl_text_p128_l24_together.axmodel +2 -2
- Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l25_together.axmodel → qwen3_vl_text_p128_l25_together.axmodel +2 -2
- Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l26_together.axmodel → qwen3_vl_text_p128_l26_together.axmodel +2 -2
- Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l27_together.axmodel → qwen3_vl_text_p128_l27_together.axmodel +2 -2
- Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l2_together.axmodel → qwen3_vl_text_p128_l2_together.axmodel +2 -2
- Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l3_together.axmodel → qwen3_vl_text_p128_l3_together.axmodel +2 -2
- Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l4_together.axmodel → qwen3_vl_text_p128_l4_together.axmodel +2 -2
.gitattributes
CHANGED
|
@@ -88,3 +88,37 @@ main_ax650_api filter=lfs diff=lfs merge=lfs -text
|
|
| 88 |
main_axcl_api_x86 filter=lfs diff=lfs merge=lfs -text
|
| 89 |
axera_logo.png filter=lfs diff=lfs merge=lfs -text
|
| 90 |
main_axcl_api_aarch64 filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 88 |
main_axcl_api_x86 filter=lfs diff=lfs merge=lfs -text
|
| 89 |
axera_logo.png filter=lfs diff=lfs merge=lfs -text
|
| 90 |
main_axcl_api_aarch64 filter=lfs diff=lfs merge=lfs -text
|
| 91 |
+
Qwen3-VL-2B-Instruct_vision.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 92 |
+
Qwen3-VL-2B-Instruct_vision_1280x736.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 93 |
+
Qwen3-VL-2B-Instruct_vision_640x640.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 94 |
+
Qwen3-VL-2B-Instruct_vision_u8.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 95 |
+
image.png filter=lfs diff=lfs merge=lfs -text
|
| 96 |
+
qwen3_vl_text_p128_l0_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 97 |
+
qwen3_vl_text_p128_l10_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 98 |
+
qwen3_vl_text_p128_l11_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 99 |
+
qwen3_vl_text_p128_l12_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 100 |
+
qwen3_vl_text_p128_l13_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 101 |
+
qwen3_vl_text_p128_l14_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 102 |
+
qwen3_vl_text_p128_l15_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 103 |
+
qwen3_vl_text_p128_l16_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 104 |
+
qwen3_vl_text_p128_l17_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 105 |
+
qwen3_vl_text_p128_l18_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 106 |
+
qwen3_vl_text_p128_l19_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 107 |
+
qwen3_vl_text_p128_l1_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 108 |
+
qwen3_vl_text_p128_l20_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 109 |
+
qwen3_vl_text_p128_l21_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 110 |
+
qwen3_vl_text_p128_l22_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 111 |
+
qwen3_vl_text_p128_l23_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 112 |
+
qwen3_vl_text_p128_l24_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 113 |
+
qwen3_vl_text_p128_l25_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 114 |
+
qwen3_vl_text_p128_l26_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 115 |
+
qwen3_vl_text_p128_l27_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 116 |
+
qwen3_vl_text_p128_l2_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 117 |
+
qwen3_vl_text_p128_l3_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 118 |
+
qwen3_vl_text_p128_l4_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 119 |
+
qwen3_vl_text_p128_l5_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 120 |
+
qwen3_vl_text_p128_l6_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 121 |
+
qwen3_vl_text_p128_l7_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 122 |
+
qwen3_vl_text_p128_l8_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 123 |
+
qwen3_vl_text_p128_l9_together.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 124 |
+
qwen3_vl_text_post.axmodel filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
vision_cache
|
Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/Qwen3-VL-2B-Instruct_vision.axmodel → Qwen3-VL-2B-Instruct_vision.axmodel
RENAMED
|
File without changes
|
Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/Qwen3-VL-2B-Instruct_vision_1280x736.axmodel → Qwen3-VL-2B-Instruct_vision_1280x736.axmodel
RENAMED
|
File without changes
|
Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/Qwen3-VL-2B-Instruct_vision_640x640.axmodel → Qwen3-VL-2B-Instruct_vision_640x640.axmodel
RENAMED
|
File without changes
|
Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/Qwen3-VL-2B-Instruct_vision_u8.axmodel → Qwen3-VL-2B-Instruct_vision_u8.axmodel
RENAMED
|
File without changes
|
README.md
CHANGED
|
@@ -17,9 +17,9 @@ tags:
|
|
| 17 |
- GPTQ
|
| 18 |
---
|
| 19 |
|
| 20 |
-
# Qwen3-VL
|
| 21 |
|
| 22 |
-
This version of Qwen3-VL-2B-Instruct has been converted to run on the Axera NPU using **w4a16** quantization.
|
| 23 |
|
| 24 |
Compatible with Pulsar2 version: 5.0
|
| 25 |
|
|
@@ -66,184 +66,257 @@ The DDR capacity refers to the CMM memory that needs to be consumed. Ensure that
|
|
| 66 |
|
| 67 |
## How to use
|
| 68 |
|
| 69 |
-
|
|
|
|
| 70 |
|
| 71 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 72 |
|
| 73 |
-
|
| 74 |
|
| 75 |
-
|
|
|
|
|
|
|
| 76 |
|
|
|
|
| 77 |
|
| 78 |
-
|
|
|
|
|
|
|
| 79 |
|
| 80 |
-
```
|
| 81 |
-
|
|
|
|
| 82 |
```
|
| 83 |
|
| 84 |
-
|
|
|
|
| 85 |
|
| 86 |
-
|
| 87 |
-
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
| 96 |
-
|
| 97 |
-
|
| 98 |
-
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
|
| 108 |
-
|
| 109 |
-
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
[I][ Init][ 356]: grp: 9, prefill_max_token_num : 1024
|
| 113 |
-
[I][ Init][ 356]: grp: 10, prefill_max_token_num : 1152
|
| 114 |
-
[I][ Init][ 360]: prefill_max_token_num : 1152
|
| 115 |
-
[I][ Init][ 372]: LLM init ok
|
| 116 |
-
[I][ Init][ 374]: Left CMM:854 MB
|
| 117 |
-
Type "q" to exit, Ctrl+c to stop current running
|
| 118 |
-
prompt >> 描述这张图片
|
| 119 |
-
image >> images/recoAll_attractions_1.jpg
|
| 120 |
-
[I][ EncodeImage][ 440]: pixel_values size 1
|
| 121 |
-
[I][ EncodeImage][ 441]: grid_h 24 grid_w 24
|
| 122 |
-
[I][ EncodeImage][ 489]: image encode time : 237.778000 ms, size : 1
|
| 123 |
-
[I][ Encode][ 532]: input_ids size:168
|
| 124 |
-
[I][ Encode][ 540]: offset 15
|
| 125 |
-
[I][ Encode][ 569]: img_embed.size:1, 294912
|
| 126 |
-
[I][ Encode][ 583]: out_embed size:344064
|
| 127 |
-
[I][ Encode][ 584]: input_ids size 168
|
| 128 |
-
[I][ Encode][ 586]: position_ids size:168
|
| 129 |
-
[I][ Run][ 607]: input token num : 168, prefill_split_num : 2
|
| 130 |
-
[I][ Run][ 641]: input_num_token:128
|
| 131 |
-
[I][ Run][ 641]: input_num_token:40
|
| 132 |
-
[I][ Run][ 865]: ttft: 313.60 ms
|
| 133 |
-
这是一张在埃及沙漠中拍摄的风景照片。画面中,三座巨大的金字塔在晴朗的天空下矗立,它们是古埃及文明的象征。这些金字塔由巨大的石块堆叠而成,表面因岁月侵蚀而显得斑驳。在金字塔的前方,有几个���影在沙地上行走,这为整个场景提供了比例感和尺度感。整个场景充满了历史的厚重感和神秘的氛围。
|
| 134 |
-
|
| 135 |
-
[N][ Run][ 992]: hit eos,avg 14.14 token/s
|
| 136 |
```
|
| 137 |
|
| 138 |
-
##
|
| 139 |
|
| 140 |
-
|
| 141 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 142 |
```
|
| 143 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 144 |
```
|
| 145 |
|
| 146 |
-
|
| 147 |
|
| 148 |
-
|
|
|
|
| 149 |
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
[I][ Init][ 156]: LLM init start
|
| 153 |
-
[I][ Init][ 158]: Total CMM:7884 MB
|
| 154 |
-
[I][ Init][ 34]: connect http://127.0.0.1:8080 ok
|
| 155 |
-
bos_id: -1, eos_id: 151645
|
| 156 |
-
img_start_token: 151652
|
| 157 |
-
img_context_token: 151656
|
| 158 |
-
3% | ██ | 1 / 31 [0.01s<0.34s, 90.91 count/s] tokenizer init ok[I][ Init][ 26]: LLaMaEmbedSelector use mmap
|
| 159 |
-
6% | ███ | 2 / 31 [0.01s<0.23s, 133.33 count/s] embed_selector init ok[I][ Init][ 201]: attr.axmodel_num:28
|
| 160 |
-
103% | ██████████████████████████████████ | 32 / 31 [32.37s<31.36s, 0.99 count/s] init vpm axmodel ok,remain_cmm(4385 MB)[I][ Init][ 266]: IMAGE_CONTEXT_TOKEN: 151656, IMAGE_START_TOKEN: 151652
|
| 161 |
-
[I][ Init][ 309]: image encoder output float32
|
| 162 |
-
|
| 163 |
-
[I][ Init][ 339]: max_token_len : 2047
|
| 164 |
-
[I][ Init][ 344]: kv_cache_size : 1024, kv_cache_num: 2047
|
| 165 |
-
[I][ Init][ 352]: prefill_token_num : 128
|
| 166 |
-
[I][ Init][ 356]: grp: 1, prefill_max_token_num : 1
|
| 167 |
-
[I][ Init][ 356]: grp: 2, prefill_max_token_num : 128
|
| 168 |
-
[I][ Init][ 356]: grp: 3, prefill_max_token_num : 256
|
| 169 |
-
[I][ Init][ 356]: grp: 4, prefill_max_token_num : 384
|
| 170 |
-
[I][ Init][ 356]: grp: 5, prefill_max_token_num : 512
|
| 171 |
-
[I][ Init][ 356]: grp: 6, prefill_max_token_num : 640
|
| 172 |
-
[I][ Init][ 356]: grp: 7, prefill_max_token_num : 768
|
| 173 |
-
[I][ Init][ 356]: grp: 8, prefill_max_token_num : 896
|
| 174 |
-
[I][ Init][ 356]: grp: 9, prefill_max_token_num : 1024
|
| 175 |
-
[I][ Init][ 356]: grp: 10, prefill_max_token_num : 1152
|
| 176 |
-
[I][ Init][ 360]: prefill_max_token_num : 1152
|
| 177 |
-
[I][ Init][ 372]: LLM init ok
|
| 178 |
-
[I][ Init][ 374]: Left CMM:4385 MB
|
| 179 |
-
Type "q" to exit, Ctrl+c to stop current running
|
| 180 |
-
prompt >> 描述这个视频
|
| 181 |
-
video >> video
|
| 182 |
-
video/frame_0000.jpg
|
| 183 |
-
video/frame_0008.jpg
|
| 184 |
-
video/frame_0016.jpg
|
| 185 |
-
video/frame_0024.jpg
|
| 186 |
-
video/frame_0032.jpg
|
| 187 |
-
video/frame_0040.jpg
|
| 188 |
-
video/frame_0048.jpg
|
| 189 |
-
video/frame_0056.jpg
|
| 190 |
-
[I][ EncodeImage][ 440]: pixel_values size 4
|
| 191 |
-
[I][ EncodeImage][ 441]: grid_h 24 grid_w 24
|
| 192 |
-
[I][ EncodeImage][ 489]: image encode time : 751.481018 ms, size : 4
|
| 193 |
-
[I][ Encode][ 532]: input_ids size:600
|
| 194 |
-
[I][ Encode][ 540]: offset 15
|
| 195 |
-
[I][ Encode][ 569]: img_embed.size:4, 294912
|
| 196 |
-
[I][ Encode][ 574]: offset:159
|
| 197 |
-
[I][ Encode][ 574]: offset:303
|
| 198 |
-
[I][ Encode][ 574]: offset:447
|
| 199 |
-
[I][ Encode][ 583]: out_embed size:1228800
|
| 200 |
-
[I][ Encode][ 584]: input_ids size 600
|
| 201 |
-
[I][ Encode][ 586]: position_ids size:600
|
| 202 |
-
[I][ Run][ 607]: input token num : 600, prefill_split_num : 5
|
| 203 |
-
[I][ Run][ 641]: input_num_token:128
|
| 204 |
-
[I][ Run][ 641]: input_num_token:128
|
| 205 |
-
[I][ Run][ 641]: input_num_token:128
|
| 206 |
-
[I][ Run][ 641]: input_num_token:128
|
| 207 |
-
[I][ Run][ 641]: input_num_token:88
|
| 208 |
-
[I][ Run][ 865]: ttft: 843.36 ms
|
| 209 |
-
这是一段关于两只山地旱獭(也称“山地土拨鼠”)在山地环境中互动的视频。
|
| 210 |
-
|
| 211 |
-
在画面中,两只山地旱獭正站在布满碎石的山坡上,背景是连绵起伏的山脉和蓝天。它们的毛色以灰、棕、黑相间,脸部和耳朵周围有明显的黑白条纹,显得非常可爱。
|
| 212 |
-
|
| 213 |
-
这两只旱獭正在进行一场激烈的“拳击”或“格斗”游戏。它们的前爪高高举起,像在互相击打,但它们的姿势和动作表明它们可能是在进行一场激烈的“拳击”或“格斗”游戏。它们的嘴巴和前爪在空中挥舞,似乎在互相攻击或展示力量。
|
| 214 |
-
|
| 215 |
-
整个场景充满了动感和活力,展现了这些小动物在自然环境中充满活力和趣味的一面。
|
| 216 |
-
|
| 217 |
-
[N][ Run][ 992]: hit eos,avg 14.16 token/s
|
| 218 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 219 |
```
|
| 220 |
|
| 221 |
-
### Gradio demo
|
| 222 |
|
| 223 |
-
###
|
| 224 |
|
| 225 |
-
```
|
| 226 |
-
|
| 227 |
-
```
|
| 228 |
|
| 229 |
-
|
| 230 |
-
|
| 231 |
|
| 232 |
-
|
| 233 |
-
|
| 234 |
-
|
| 235 |
-
|
| 236 |
-
./run_axcl_aarch64_api.sh
|
| 237 |
-
# for ax650
|
| 238 |
-
./run_ax650_api.sh
|
| 239 |
-
```
|
| 240 |
|
| 241 |
-
|
| 242 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 243 |
|
| 244 |
-
|
| 245 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 246 |
```
|
| 247 |
|
| 248 |
-

|
| 249 |
-
|
|
|
|
| 17 |
- GPTQ
|
| 18 |
---
|
| 19 |
|
| 20 |
+
# Qwen3-VL-2B-Instruct-GPTQ-Int4
|
| 21 |
|
| 22 |
+
This version of Qwen3-VL-2B-Instruct-GPTQ-Int4 has been converted to run on the Axera NPU using **w4a16** quantization.
|
| 23 |
|
| 24 |
Compatible with Pulsar2 version: 5.0
|
| 25 |
|
|
|
|
| 66 |
|
| 67 |
## How to use
|
| 68 |
|
| 69 |
+
## 安装 axllm
|
| 70 |
+
方式一:克隆仓库后执行安装脚本:
|
| 71 |
|
| 72 |
+
```shell
|
| 73 |
+
git clone -b axllm https://github.com/AXERA-TECH/ax-llm.git
|
| 74 |
+
cd ax-llm
|
| 75 |
+
./install.sh
|
| 76 |
+
```
|
| 77 |
|
| 78 |
+
方式二:一行命令安装(默认分支 `axllm`):
|
| 79 |
|
| 80 |
+
```shell
|
| 81 |
+
curl -fsSL https://raw.githubusercontent.com/AXERA-TECH/ax-llm/axllm/install.sh | bash
|
| 82 |
+
```
|
| 83 |
|
| 84 |
+
方式三:下载Github Actions CI 导出的可执行程序(适合没有编译环境的用户):
|
| 85 |
|
| 86 |
+
如果没有编译环境,请到:
|
| 87 |
+
`https://github.com/AXERA-TECH/ax-llm/actions?query=branch%3Aaxllm`
|
| 88 |
+
下载 **最新 CI 导出的可执行程序**(`axllm`),然后:
|
| 89 |
|
| 90 |
+
```shell
|
| 91 |
+
chmod +x axllm
|
| 92 |
+
sudo mv axllm /usr/bin/axllm
|
| 93 |
```
|
| 94 |
|
| 95 |
+
## 模型下载(Hugging Face)
|
| 96 |
+
先创建模型目录并进入,然后下载到该目录:
|
| 97 |
|
| 98 |
+
```shell
|
| 99 |
+
mkdir -p AXERA-TECH/Qwen3-VL-2B-Instruct-GPTQ-Int4
|
| 100 |
+
cd AXERA-TECH/Qwen3-VL-2B-Instruct-GPTQ-Int4
|
| 101 |
+
hf download AXERA-TECH/Qwen3-VL-2B-Instruct-GPTQ-Int4 --local-dir .
|
| 102 |
+
|
| 103 |
+
# structure of the downloaded files
|
| 104 |
+
tree -L 3
|
| 105 |
+
`-- AXERA-TECH
|
| 106 |
+
`-- Qwen3-VL-2B-Instruct-GPTQ-Int4
|
| 107 |
+
|-- Qwen3-VL-2B-Instruct_vision.axmodel
|
| 108 |
+
|-- Qwen3-VL-2B-Instruct_vision_1280x736.axmodel
|
| 109 |
+
|-- Qwen3-VL-2B-Instruct_vision_640x640.axmodel
|
| 110 |
+
|-- Qwen3-VL-2B-Instruct_vision_u8.axmodel
|
| 111 |
+
|-- README.md
|
| 112 |
+
|-- config.json
|
| 113 |
+
|-- image.png
|
| 114 |
+
|-- model.embed_tokens.weight.bfloat16.bin
|
| 115 |
+
|-- post_config.json
|
| 116 |
+
|-- qwen3_tokenizer.txt
|
| 117 |
+
|-- qwen3_vl_text_p128_l0_together.axmodel
|
| 118 |
+
...
|
| 119 |
+
|-- qwen3_vl_text_p128_l9_together.axmodel
|
| 120 |
+
|-- qwen3_vl_text_post.axmodel
|
| 121 |
+
`-- vision_cache
|
| 122 |
+
|
| 123 |
+
3 directories, 39 files
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 124 |
```
|
| 125 |
|
| 126 |
+
## Inference with AX650 Host, such as M4N-Dock(爱芯派Pro) or AX650N DEMO Board
|
| 127 |
|
| 128 |
+
### 运行(CLI)
|
| 129 |
|
| 130 |
+
```shell
|
| 131 |
+
root@ax650:~# axllm run AXERA-TECH/Qwen3-VL-2B-Instruct-GPTQ-Int4/
|
| 132 |
+
[I][ Init][ 138]: LLM init start
|
| 133 |
+
tokenizer_type = 1
|
| 134 |
+
96% | ███████████████████████████████ | 30 / 31 [11.50s<11.88s, 2.61 count/s] init post axmodel ok,remain_cmm(9563 MB)
|
| 135 |
+
[I][ Init][ 199]: max_token_len : 2047
|
| 136 |
+
[I][ Init][ 202]: kv_cache_size : 1024, kv_cache_num: 2047
|
| 137 |
+
[I][ Init][ 205]: prefill_token_num : 128
|
| 138 |
+
[I][ Init][ 209]: grp: 1, prefill_max_kv_cache_num : 1
|
| 139 |
+
[I][ Init][ 209]: grp: 2, prefill_max_kv_cache_num : 128
|
| 140 |
+
[I][ Init][ 209]: grp: 3, prefill_max_kv_cache_num : 256
|
| 141 |
+
[I][ Init][ 209]: grp: 4, prefill_max_kv_cache_num : 384
|
| 142 |
+
[I][ Init][ 209]: grp: 5, prefill_max_kv_cache_num : 512
|
| 143 |
+
[I][ Init][ 209]: grp: 6, prefill_max_kv_cache_num : 640
|
| 144 |
+
[I][ Init][ 209]: grp: 7, prefill_max_kv_cache_num : 768
|
| 145 |
+
[I][ Init][ 209]: grp: 8, prefill_max_kv_cache_num : 896
|
| 146 |
+
[I][ Init][ 209]: grp: 9, prefill_max_kv_cache_num : 1024
|
| 147 |
+
[I][ Init][ 209]: grp: 10, prefill_max_kv_cache_num : 1152
|
| 148 |
+
[I][ Init][ 214]: prefill_max_token_num : 1152
|
| 149 |
+
[I][ Init][ 27]: LLaMaEmbedSelector use mmap
|
| 150 |
+
100% | ████████████████████████████████ | 31 / 31 [11.50s<11.50s, 2.70 count/s] embed_selector init ok
|
| 151 |
+
[W][ Init][ 457]: Qwen-VL vision size override: cfg=448x448 bytes=1204224, model_input_bytes=884736 -> 384x384 (square).
|
| 152 |
+
[I][ Init][ 641]: Qwen-VL token ids: vision_start=151652 image_pad=151655 video_pad=151656
|
| 153 |
+
[I][ Init][ 666]: VisionModule init ok: type=Qwen3VL, tokens_per_block=144, embed_size=2048, out_dtype=fp32
|
| 154 |
+
[I][ Init][ 672]: VisionModule deepstack enabled: layers=3
|
| 155 |
+
[I][ load_config][ 282]: load config:
|
| 156 |
+
{
|
| 157 |
+
"enable_repetition_penalty": false,
|
| 158 |
+
"enable_temperature": false,
|
| 159 |
+
"enable_top_k_sampling": false,
|
| 160 |
+
"enable_top_p_sampling": false,
|
| 161 |
+
"penalty_window": 20,
|
| 162 |
+
"repetition_penalty": 1.2,
|
| 163 |
+
"temperature": 0.9,
|
| 164 |
+
"top_k": 10,
|
| 165 |
+
"top_p": 0.8
|
| 166 |
+
}
|
| 167 |
+
|
| 168 |
+
[I][ Init][ 272]: LLM init ok
|
| 169 |
+
Type "q" to exit
|
| 170 |
+
Ctrl+c to stop current running
|
| 171 |
+
"reset" to reset kvcache
|
| 172 |
+
"dd" to remove last conversation.
|
| 173 |
+
"pp" to print history.
|
| 174 |
+
VLM enabled: after each prompt, input image path (empty = text-only). Use "video:<frames_dir>" for video.
|
| 175 |
+
----------------------------------------
|
| 176 |
+
prompt >> who are you
|
| 177 |
+
image >>
|
| 178 |
+
[I][ SetKVCache][ 406]: prefill_grpid:2 kv_cache_num:128 precompute_len:0 input_num_token:22
|
| 179 |
+
[I][ SetKVCache][ 408]: current prefill_max_token_num:1152
|
| 180 |
+
[I][ SetKVCache][ 409]: first run
|
| 181 |
+
[I][ Run][ 457]: input token num : 22, prefill_split_num : 1
|
| 182 |
+
[I][ Run][ 497]: prefill chunk p=0 history_len=0 grpid=1 kv_cache_num=0 input_tokens=22
|
| 183 |
+
[I][ Run][ 519]: prefill indices shape: p=0 idx_elems=384 idx_rows=3 pos_rows=0
|
| 184 |
+
[I][ Run][ 627]: ttft: 174.42 ms
|
| 185 |
+
I am Qwen, a large-scale language model developed by the Tongyi Lab of Alibaba Group. I can answer questions, write stories, create essays, and more. I am designed to be helpful, harmless, and honest. I hope to assist you in any way I can!
|
| 186 |
+
|
| 187 |
+
[N][ Run][ 709]: hit eos,avg 10.48 token/s
|
| 188 |
+
|
| 189 |
+
[I][ GetKVCache][ 380]: precompute_len:79, remaining:1073
|
| 190 |
+
prompt >> describe the image
|
| 191 |
+
image >> ./AXERA-TECH/Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/image.png
|
| 192 |
+
[I][ EncodeForContent][ 971]: Qwen-VL pixel_values[0] bytes=884736 min=0 max=241 (w=384 h=384 tp=2 ps=16 sm=2)
|
| 193 |
+
[I][ EncodeForContent][ 994]: vision cache store: ./AXERA-TECH/Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/image.png
|
| 194 |
+
[I][ SetKVCache][ 406]: prefill_grpid:3 kv_cache_num:256 precompute_len:79 input_num_token:159
|
| 195 |
+
[I][ SetKVCache][ 408]: current prefill_max_token_num:1024
|
| 196 |
+
[I][ Run][ 457]: input token num : 159, prefill_split_num : 2
|
| 197 |
+
[I][ Run][ 497]: prefill chunk p=0 history_len=79 grpid=2 kv_cache_num=128 input_tokens=128
|
| 198 |
+
[I][ Run][ 519]: prefill indices shape: p=0 idx_elems=384 idx_rows=3 pos_rows=3
|
| 199 |
+
[I][ Run][ 497]: prefill chunk p=1 history_len=207 grpid=3 kv_cache_num=256 input_tokens=31
|
| 200 |
+
[I][ Run][ 519]: prefill indices shape: p=1 idx_elems=384 idx_rows=3 pos_rows=3
|
| 201 |
+
[I][ Run][ 627]: ttft: 379.97 ms
|
| 202 |
+
This image depicts three astronauts in white space suits standing in a dense, leafy forest. The scene is set in a dark, shadowy environment, with the astronauts appearing to be in a natural, possibly alien, environment. The image has a monochromatic, almost grayscale color scheme, giving it a mysterious and somber atmosphere. The astronauts are positioned in the center of the frame, with one standing upright and the other two slightly bent, as if they are exploring or searching for something in the dense foliage. The overall mood of the image is mysterious and contemplative.
|
| 203 |
+
|
| 204 |
+
[N][ Run][ 709]: hit eos,avg 10.33 token/s
|
| 205 |
+
|
| 206 |
+
[I][ GetKVCache][ 380]: precompute_len:239, remaining:913
|
| 207 |
+
prompt >> how many people in the image?
|
| 208 |
+
image >>
|
| 209 |
+
[I][ EncodeForContent][ 926]: vision cache hit (mem): ./AXERA-TECH/Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/image.png
|
| 210 |
+
[I][ SetKVCache][ 406]: prefill_grpid:4 kv_cache_num:384 precompute_len:239 input_num_token:74
|
| 211 |
+
[I][ SetKVCache][ 408]: current prefill_max_token_num:896
|
| 212 |
+
[I][ Run][ 457]: input token num : 74, prefill_split_num : 1
|
| 213 |
+
[I][ Run][ 497]: prefill chunk p=0 history_len=239 grpid=3 kv_cache_num=256 input_tokens=74
|
| 214 |
+
[I][ Run][ 519]: prefill indices shape: p=0 idx_elems=384 idx_rows=3 pos_rows=3
|
| 215 |
+
[I][ Run][ 627]: ttft: 193.78 ms
|
| 216 |
+
This image depicts three astronauts in white space suits standing in a dense, leafy forest. The scene is set in a dark, shadowy environment, with the astronauts appearing to be in a natural, possibly alien, environment. The image has a monochromatic, almost grayscale color scheme, giving it a mysterious and somber atmosphere. The astronauts are positioned in the center of the frame, with one standing upright and the other two slightly bent, as if they are exploring or searching for something in the dense foliage. The overall mood of the image is mysterious and contemplative.
|
| 217 |
+
|
| 218 |
+
[N][ Run][ 709]: hit eos,avg 10.48 token/s
|
| 219 |
+
|
| 220 |
+
[I][ GetKVCache][ 380]: precompute_len:410, remaining:742
|
| 221 |
+
prompt >> q
|
| 222 |
```
|
| 223 |
+
|
| 224 |
+
### 启动服务(OpenAI 兼容)
|
| 225 |
+
|
| 226 |
+
```shell
|
| 227 |
+
root@ax650:~# axllm serve AXERA-TECH/Qwen3-VL-2B-Instruct-GPTQ-Int4
|
| 228 |
+
[I][ Init][ 138]: LLM init start
|
| 229 |
+
tokenizer_type = 1
|
| 230 |
+
96% | ███████████████████████████████ | 30 / 31 [4.63s<4.79s, 6.47 count/s] init post axmodel ok,remain_cmm(9563 MB)
|
| 231 |
+
[I][ Init][ 199]: max_token_len : 2047
|
| 232 |
+
[I][ Init][ 202]: kv_cache_size : 1024, kv_cache_num: 2047
|
| 233 |
+
[I][ Init][ 205]: prefill_token_num : 128
|
| 234 |
+
[I][ Init][ 209]: grp: 1, prefill_max_kv_cache_num : 1
|
| 235 |
+
[I][ Init][ 209]: grp: 2, prefill_max_kv_cache_num : 128
|
| 236 |
+
[I][ Init][ 209]: grp: 3, prefill_max_kv_cache_num : 256
|
| 237 |
+
[I][ Init][ 209]: grp: 4, prefill_max_kv_cache_num : 384
|
| 238 |
+
[I][ Init][ 209]: grp: 5, prefill_max_kv_cache_num : 512
|
| 239 |
+
[I][ Init][ 209]: grp: 6, prefill_max_kv_cache_num : 640
|
| 240 |
+
[I][ Init][ 209]: grp: 7, prefill_max_kv_cache_num : 768
|
| 241 |
+
[I][ Init][ 209]: grp: 8, prefill_max_kv_cache_num : 896
|
| 242 |
+
[I][ Init][ 209]: grp: 9, prefill_max_kv_cache_num : 1024
|
| 243 |
+
[I][ Init][ 209]: grp: 10, prefill_max_kv_cache_num : 1152
|
| 244 |
+
[I][ Init][ 214]: prefill_max_token_num : 1152
|
| 245 |
+
[I][ Init][ 27]: LLaMaEmbedSelector use mmap
|
| 246 |
+
100% | ████████████████████████████████ | 31 / 31 [4.64s<4.64s, 6.69 count/s] embed_selector init ok
|
| 247 |
+
[W][ Init][ 457]: Qwen-VL vision size override: cfg=448x448 bytes=1204224, model_input_bytes=884736 -> 384x384 (square).
|
| 248 |
+
[I][ Init][ 641]: Qwen-VL token ids: vision_start=151652 image_pad=151655 video_pad=151656
|
| 249 |
+
[I][ Init][ 666]: VisionModule init ok: type=Qwen3VL, tokens_per_block=144, embed_size=2048, out_dtype=fp32
|
| 250 |
+
[I][ Init][ 672]: VisionModule deepstack enabled: layers=3
|
| 251 |
+
[I][ load_config][ 282]: load config:
|
| 252 |
+
{
|
| 253 |
+
"enable_repetition_penalty": false,
|
| 254 |
+
"enable_temperature": false,
|
| 255 |
+
"enable_top_k_sampling": false,
|
| 256 |
+
"enable_top_p_sampling": false,
|
| 257 |
+
"penalty_window": 20,
|
| 258 |
+
"repetition_penalty": 1.2,
|
| 259 |
+
"temperature": 0.9,
|
| 260 |
+
"top_k": 10,
|
| 261 |
+
"top_p": 0.8
|
| 262 |
+
}
|
| 263 |
+
|
| 264 |
+
[I][ Init][ 272]: LLM init ok
|
| 265 |
+
Starting server on port 8000 with model 'AXERA-TECH/Qwen3-VL-2B-Instruct-GPTQ-Int4'...
|
| 266 |
+
OpenAI API Server starting on http://0.0.0.0:8000
|
| 267 |
+
Max concurrency: 1
|
| 268 |
+
Models: AXERA-TECH/Qwen3-VL-2B-Instruct-GPTQ-Int4
|
| 269 |
```
|
| 270 |
|
| 271 |
+
### OpenAI 调用示例
|
| 272 |
|
| 273 |
+
```python
|
| 274 |
+
from openai import OpenAI
|
| 275 |
|
| 276 |
+
API_URL = "http://127.0.0.1:8000/v1"
|
| 277 |
+
MODEL = "AXERA-TECH/Qwen3-VL-2B-Instruct-GPTQ-Int4"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 278 |
|
| 279 |
+
messages = [
|
| 280 |
+
{"role": "system", "content": [{"type": "text", "text": "you are a helpful assistant."}]},
|
| 281 |
+
{"role": "user", "content": "hello"},
|
| 282 |
+
]
|
| 283 |
+
|
| 284 |
+
client = OpenAI(api_key="not-needed", base_url=API_URL)
|
| 285 |
+
completion = client.chat.completions.create(
|
| 286 |
+
model=MODEL,
|
| 287 |
+
messages=messages,
|
| 288 |
+
)
|
| 289 |
+
|
| 290 |
+
print(completion.choices[0].message.content)
|
| 291 |
```
|
| 292 |
|
|
|
|
| 293 |
|
| 294 |
+
### OpenAI 流式调用示例
|
| 295 |
|
| 296 |
+
```python
|
| 297 |
+
from openai import OpenAI
|
|
|
|
| 298 |
|
| 299 |
+
API_URL = "http://127.0.0.1:8000/v1"
|
| 300 |
+
MODEL = "AXERA-TECH/Qwen3-VL-2B-Instruct-GPTQ-Int4"
|
| 301 |
|
| 302 |
+
messages = [
|
| 303 |
+
{"role": "system", "content": [{"type": "text", "text": "you are a helpful assistant."}]},
|
| 304 |
+
{"role": "user", "content": "hello"},
|
| 305 |
+
]
|
|
|
|
|
|
|
|
|
|
|
|
|
| 306 |
|
| 307 |
+
client = OpenAI(api_key="not-needed", base_url=API_URL)
|
| 308 |
+
stream = client.chat.completions.create(
|
| 309 |
+
model=MODEL,
|
| 310 |
+
messages=messages,
|
| 311 |
+
stream=True,
|
| 312 |
+
)
|
| 313 |
|
| 314 |
+
print("assistant:")
|
| 315 |
+
for ev in stream:
|
| 316 |
+
delta = getattr(ev.choices[0], "delta", None)
|
| 317 |
+
if delta and getattr(delta, "content", None):
|
| 318 |
+
print(delta.content, end="", flush=True)
|
| 319 |
+
print("
|
| 320 |
+
")
|
| 321 |
```
|
| 322 |
|
|
|
|
|
|
config.json
CHANGED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"system_prompt": "you are a helpful assistant.",
|
| 3 |
+
"model_name": "AXERA-TECH/Qwen3-VL-2B-Instruct",
|
| 4 |
+
"url_tokenizer_model": "qwen3_tokenizer.txt",
|
| 5 |
+
"tokenizer_type": "Qwen3VL",
|
| 6 |
+
"post_config_path": "post_config.json",
|
| 7 |
+
"template_filename_axmodel": "qwen3_vl_text_p128_l%d_together.axmodel",
|
| 8 |
+
"axmodel_num": 28,
|
| 9 |
+
"filename_post_axmodel": "qwen3_vl_text_post.axmodel",
|
| 10 |
+
"filename_tokens_embed": "model.embed_tokens.weight.bfloat16.bin",
|
| 11 |
+
"tokens_embed_num": 151936,
|
| 12 |
+
"tokens_embed_size": 2048,
|
| 13 |
+
"use_mmap_load_embed": true,
|
| 14 |
+
"vlm_type": "Qwen3VL",
|
| 15 |
+
"filename_image_encoder_axmodel": "Qwen3-VL-2B-Instruct_vision.axmodel",
|
| 16 |
+
"vision_patch_size": 16,
|
| 17 |
+
"vision_temporal_patch_size": 2,
|
| 18 |
+
"vision_spatial_merge_size": 2,
|
| 19 |
+
"vision_fps": 1,
|
| 20 |
+
"vision_tokens_per_second": 1,
|
| 21 |
+
"vision_cache_dir": "vision_cache",
|
| 22 |
+
"use_mmap_load_layer": true,
|
| 23 |
+
"devices": [
|
| 24 |
+
0,
|
| 25 |
+
1
|
| 26 |
+
]
|
| 27 |
+
}
|
gradio_demo.py
DELETED
|
@@ -1,262 +0,0 @@
|
|
| 1 |
-
# gradio_chat_single_turn.py
|
| 2 |
-
import re
|
| 3 |
-
import subprocess
|
| 4 |
-
import gradio as gr
|
| 5 |
-
import base64, cv2, os, tempfile
|
| 6 |
-
from openai import OpenAI
|
| 7 |
-
import requests
|
| 8 |
-
|
| 9 |
-
def get_all_local_ips():
|
| 10 |
-
result = subprocess.run(['ip', 'a'], capture_output=True, text=True)
|
| 11 |
-
output = result.stdout
|
| 12 |
-
|
| 13 |
-
# 匹配所有IPv4
|
| 14 |
-
ips = re.findall(r'inet (\d+\.\d+\.\d+\.\d+)', output)
|
| 15 |
-
|
| 16 |
-
# 过滤掉回环地址
|
| 17 |
-
real_ips = [ip for ip in ips if not ip.startswith('127.')]
|
| 18 |
-
|
| 19 |
-
return real_ips
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
# ---------- Helpers ----------
|
| 24 |
-
def img_to_data_url_from_cvframe(frame):
|
| 25 |
-
import base64, cv2
|
| 26 |
-
ok, buf = cv2.imencode(".jpg", frame, [int(cv2.IMWRITE_JPEG_QUALITY), 85])
|
| 27 |
-
b64 = base64.b64encode(buf).decode("ascii")
|
| 28 |
-
return f"data:image/jpeg;base64,{b64}"
|
| 29 |
-
|
| 30 |
-
def img_to_data_url_from_path(img_path: str) -> str:
|
| 31 |
-
import cv2, base64
|
| 32 |
-
img = cv2.imread(img_path)
|
| 33 |
-
return img_to_data_url_from_cvframe(img)
|
| 34 |
-
|
| 35 |
-
def video_to_data_urls(video_path: str, frame_stride: int = 30, max_frames: int = 8):
|
| 36 |
-
import cv2, base64
|
| 37 |
-
cap = cv2.VideoCapture(video_path)
|
| 38 |
-
total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
|
| 39 |
-
|
| 40 |
-
if total / frame_stride > max_frames:
|
| 41 |
-
frame_stride = int(total/max_frames)
|
| 42 |
-
|
| 43 |
-
urls = []
|
| 44 |
-
idx = 0
|
| 45 |
-
first_preview = None
|
| 46 |
-
while len(urls) < max_frames and idx < total:
|
| 47 |
-
cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
|
| 48 |
-
ret, frame = cap.read()
|
| 49 |
-
if not ret:
|
| 50 |
-
break
|
| 51 |
-
ok, buf = cv2.imencode(".jpg", frame, [int(cv2.IMWRITE_JPEG_QUALITY), 85])
|
| 52 |
-
if not ok:
|
| 53 |
-
break
|
| 54 |
-
b64 = base64.b64encode(buf).decode("ascii")
|
| 55 |
-
data_url = f"data:image/jpeg;base64,{b64}"
|
| 56 |
-
urls.append(data_url)
|
| 57 |
-
if first_preview is None:
|
| 58 |
-
first_preview = data_url
|
| 59 |
-
idx += frame_stride
|
| 60 |
-
cap.release()
|
| 61 |
-
return urls, first_preview
|
| 62 |
-
|
| 63 |
-
def save_preview_image_from_data_url(data_url: str) -> str:
|
| 64 |
-
# 仅用于在 Chatbot 里显示缩略图
|
| 65 |
-
comma = data_url.find(",")
|
| 66 |
-
if comma == -1:
|
| 67 |
-
return ""
|
| 68 |
-
b64 = data_url[comma+1:]
|
| 69 |
-
raw = base64.b64decode(b64)
|
| 70 |
-
fd, tmp_path = tempfile.mkstemp(suffix=".jpg", prefix="preview_")
|
| 71 |
-
os.close(fd)
|
| 72 |
-
with open(tmp_path, "wb") as f:
|
| 73 |
-
f.write(raw)
|
| 74 |
-
return tmp_path
|
| 75 |
-
|
| 76 |
-
def build_messages(prompt: str, image_path: str | None, video_path: str | None,
|
| 77 |
-
prefer_video: bool, frame_stride: int, max_frames: int):
|
| 78 |
-
content = []
|
| 79 |
-
if prompt and prompt.strip():
|
| 80 |
-
content.append({"type": "text", "text": prompt.strip()})
|
| 81 |
-
|
| 82 |
-
if video_path and os.path.exists(video_path) and prefer_video:
|
| 83 |
-
urls, first_preview = video_to_data_urls(video_path, frame_stride=frame_stride, max_frames=max_frames)
|
| 84 |
-
content.append({"type": "image_url", "is_video":True, "image_url": urls})
|
| 85 |
-
media_desc = f"(视频抽帧:{len(urls)} 帧,步长 {frame_stride})"
|
| 86 |
-
return {"role": "user", "content": content}, first_preview, media_desc
|
| 87 |
-
|
| 88 |
-
if image_path and os.path.exists(image_path):
|
| 89 |
-
u = img_to_data_url_from_path(image_path)
|
| 90 |
-
content.append({"type": "image_url", "image_url": u})
|
| 91 |
-
media_desc = "(已附带图片)"
|
| 92 |
-
return {"role": "user", "content": content}, u, media_desc
|
| 93 |
-
|
| 94 |
-
if video_path and os.path.exists(video_path):
|
| 95 |
-
urls, first_preview = video_to_data_urls(video_path, frame_stride=frame_stride, max_frames=max_frames)
|
| 96 |
-
content.append({"type": "image_url", "is_video":True, "image_url": urls})
|
| 97 |
-
media_desc = f"(视频抽帧:{len(urls)} 帧,步长 {frame_stride})"
|
| 98 |
-
return {"role": "user", "content": content}, first_preview, media_desc
|
| 99 |
-
|
| 100 |
-
return {"role": "user", "content": content if content else [{"type": "text", "text": prompt or ""}]}, None, ""
|
| 101 |
-
|
| 102 |
-
# ---------- Gradio callback (single-turn, stream) ----------
|
| 103 |
-
def run_single_turn(prompt, image_file, video_file, prefer_video, frame_stride, max_frames,
|
| 104 |
-
base_url, model, api_key, chatbot_state):
|
| 105 |
-
"""
|
| 106 |
-
单轮:每次发送都会重置聊天历史,只显示本轮的 user/assistant 两个气泡。
|
| 107 |
-
"""
|
| 108 |
-
try:
|
| 109 |
-
# 清空历史(单轮),构造用户气泡
|
| 110 |
-
chatbot_state = []
|
| 111 |
-
|
| 112 |
-
# 准备文件路径
|
| 113 |
-
image_path = image_file if isinstance(image_file, str) else (image_file.name if image_file else None)
|
| 114 |
-
video_path = video_file if isinstance(video_file, str) else (video_file.name if video_file else None)
|
| 115 |
-
|
| 116 |
-
# 构造 messages 和预览
|
| 117 |
-
messages, preview_data_url, media_desc = build_messages(
|
| 118 |
-
prompt=prompt or "",
|
| 119 |
-
image_path=image_path,
|
| 120 |
-
video_path=video_path,
|
| 121 |
-
prefer_video=bool(prefer_video),
|
| 122 |
-
frame_stride=int(frame_stride),
|
| 123 |
-
max_frames=int(max_frames),
|
| 124 |
-
)
|
| 125 |
-
|
| 126 |
-
# 组装用户气泡(Markdown):文本 + 预览图/视频说明
|
| 127 |
-
user_md = (prompt or "").strip()
|
| 128 |
-
if media_desc:
|
| 129 |
-
user_md = (user_md + "\n\n" if user_md else "") + f"> {media_desc}"
|
| 130 |
-
if preview_data_url:
|
| 131 |
-
# user_md = (user_md + "\n\n" if user_md else "") + f""
|
| 132 |
-
user_md = (user_md + "\n\n" if user_md else "") + f""
|
| 133 |
-
|
| 134 |
-
chatbot_state.append((user_md or "(空提示)", "")) # assistant 先空字符串,等待流式填充
|
| 135 |
-
yield chatbot_state # 先把用户气泡渲染出来
|
| 136 |
-
|
| 137 |
-
# 调后端(流式)
|
| 138 |
-
client = OpenAI(api_key=api_key or "not-needed", base_url=base_url.strip())
|
| 139 |
-
stream = client.chat.completions.create(
|
| 140 |
-
model=model.strip(),
|
| 141 |
-
messages=messages,
|
| 142 |
-
stream=True,
|
| 143 |
-
)
|
| 144 |
-
|
| 145 |
-
bot_chunks = []
|
| 146 |
-
# 先补一个空 assistant 气泡
|
| 147 |
-
if len(chatbot_state) == 1:
|
| 148 |
-
chatbot_state[0] = (chatbot_state[0][0], "")
|
| 149 |
-
yield chatbot_state
|
| 150 |
-
|
| 151 |
-
# 逐 chunk 更新 assistant 气泡(Markdown)
|
| 152 |
-
for ev in stream:
|
| 153 |
-
delta = getattr(ev.choices[0], "delta", None)
|
| 154 |
-
if delta and getattr(delta, "content", None):
|
| 155 |
-
bot_chunks.append(delta.content)
|
| 156 |
-
chatbot_state[-1] = (chatbot_state[-1][0], "".join(bot_chunks))
|
| 157 |
-
yield chatbot_state
|
| 158 |
-
|
| 159 |
-
# 结束再确保收尾
|
| 160 |
-
chatbot_state[-1] = (chatbot_state[-1][0], "".join(bot_chunks) if bot_chunks else "(empty response)")
|
| 161 |
-
yield chatbot_state
|
| 162 |
-
|
| 163 |
-
except Exception as e:
|
| 164 |
-
chatbot_state.append((
|
| 165 |
-
chatbot_state[-1][0] if chatbot_state else "(request)",
|
| 166 |
-
f"**Error:** {e}"
|
| 167 |
-
))
|
| 168 |
-
yield chatbot_state
|
| 169 |
-
|
| 170 |
-
# ---------- Gradio UI ----------
|
| 171 |
-
with gr.Blocks(css="""
|
| 172 |
-
#chat,
|
| 173 |
-
#chat * {
|
| 174 |
-
font-size: 18px !important;
|
| 175 |
-
line-height: 1.6 !important;
|
| 176 |
-
}
|
| 177 |
-
|
| 178 |
-
#chat .message,
|
| 179 |
-
#chat [data-testid="bot"],
|
| 180 |
-
#chat [data-testid="user"] {
|
| 181 |
-
font-size: 18px !important;
|
| 182 |
-
}
|
| 183 |
-
""",title="AXERA Qwen3 VL") as demo:
|
| 184 |
-
axera_logo = img_to_data_url_from_path("./axera_logo.png")
|
| 185 |
-
gr.Markdown(
|
| 186 |
-
f"""
|
| 187 |
-
<div style="display: flex; align-items: center; gap: 10px;">
|
| 188 |
-
<img src="{axera_logo}" alt="axera_logo" style="height: 60px;">
|
| 189 |
-
</div>
|
| 190 |
-
"""
|
| 191 |
-
)
|
| 192 |
-
|
| 193 |
-
chatbot = gr.Chatbot(
|
| 194 |
-
label="对话",
|
| 195 |
-
bubble_full_width=False,
|
| 196 |
-
height=500,
|
| 197 |
-
avatar_images=(None, None), # 可替换头像
|
| 198 |
-
latex_delimiters=[{"left": "$$", "right": "$$", "display": True},
|
| 199 |
-
{"left": "$", "right": "$", "display": False}],
|
| 200 |
-
show_copy_button=True,
|
| 201 |
-
render_markdown=True,
|
| 202 |
-
elem_id="chat"
|
| 203 |
-
)
|
| 204 |
-
|
| 205 |
-
with gr.Row():
|
| 206 |
-
with gr.Column(scale=2):
|
| 207 |
-
prompt = gr.Textbox(label="Prompt", placeholder="输入你的提示语", lines=2)
|
| 208 |
-
with gr.Row():
|
| 209 |
-
send_btn = gr.Button("发送 ▶️", variant="primary")
|
| 210 |
-
clear_btn = gr.Button("清空")
|
| 211 |
-
stop_btn = gr.Button("停止 ■", variant="stop")
|
| 212 |
-
with gr.Row():
|
| 213 |
-
image = gr.Image(type="filepath", label="上传图片(可选)")
|
| 214 |
-
video = gr.Video(label="上传视频(可选)")
|
| 215 |
-
|
| 216 |
-
with gr.Column(scale=1):
|
| 217 |
-
base_url = gr.Textbox(value="http://localhost:8000/v1", label="Base URL")
|
| 218 |
-
model = gr.Textbox(value="AXERA-TECH/Qwen3-VL-2B-Instruct-GPTQ-Int4", label="Model")
|
| 219 |
-
api_key = gr.Textbox(value="not-needed", label="API Key", type="password")
|
| 220 |
-
with gr.Row():
|
| 221 |
-
prefer_video = gr.Checkbox(True, label="如果有视频,优先使用视频抽帧")
|
| 222 |
-
frame_stride = gr.Slider(1, 90, value=30, step=1, label="视频抽帧间隔")
|
| 223 |
-
max_frames = gr.Slider(1, 8, value=8, step=1, label="最多抽帧数")
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
# 单轮对话需要一个 state 来承载当前这轮的气泡
|
| 227 |
-
state = gr.State([])
|
| 228 |
-
|
| 229 |
-
send_btn.click(
|
| 230 |
-
fn=run_single_turn,
|
| 231 |
-
inputs=[prompt, image, video, prefer_video, frame_stride, max_frames, base_url, model, api_key, state],
|
| 232 |
-
outputs=chatbot,
|
| 233 |
-
show_progress=True,
|
| 234 |
-
queue=True,
|
| 235 |
-
)
|
| 236 |
-
|
| 237 |
-
def stop_stream(base_url):
|
| 238 |
-
url = f"{base_url.strip()}/stop"
|
| 239 |
-
response = requests.get(url)
|
| 240 |
-
if response.status_code == 200:
|
| 241 |
-
print("Stream stopped successfully")
|
| 242 |
-
else:
|
| 243 |
-
print(f"Failed to stop stream: {response.status_code} - {response.text}")
|
| 244 |
-
|
| 245 |
-
stop_btn.click(
|
| 246 |
-
fn=stop_stream,
|
| 247 |
-
inputs=[base_url],
|
| 248 |
-
outputs=chatbot,
|
| 249 |
-
show_progress=True,
|
| 250 |
-
queue=True,
|
| 251 |
-
)
|
| 252 |
-
|
| 253 |
-
def clear_all():
|
| 254 |
-
return [], "", None, None, True, 30, 8
|
| 255 |
-
clear_btn.click(clear_all, None, [chatbot, prompt, image, video, prefer_video, frame_stride, max_frames])
|
| 256 |
-
|
| 257 |
-
if __name__ == "__main__":
|
| 258 |
-
ips = get_all_local_ips()
|
| 259 |
-
for ip in ips:
|
| 260 |
-
print(f"* Running on local URL: http://{ip}:7860")
|
| 261 |
-
ip = "0.0.0.0"
|
| 262 |
-
demo.launch(server_name=ip, server_port=7860)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
axera_logo.png → image.png
RENAMED
|
File without changes
|
images/demo.jpg
DELETED
Git LFS Details
|
images/demo1.jpg
DELETED
Git LFS Details
|
images/recoAll_attractions_1.jpg
DELETED
Git LFS Details
|
images/recoAll_attractions_2.jpg
DELETED
Git LFS Details
|
images/recoAll_attractions_3.jpg
DELETED
Git LFS Details
|
images/recoAll_attractions_4.jpg
DELETED
Git LFS Details
|
images/ssd_car.jpg
DELETED
Git LFS Details
|
images/ssd_horse.jpg
DELETED
Git LFS Details
|
main_ax650
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:bd12cddc400cd3ffb78af4a4512211af28c33f98993b9c7447aab8d8f29d7893
|
| 3 |
-
size 6821432
|
|
|
|
|
|
|
|
|
|
|
|
main_ax650_api
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:390236f0fef17d46c1bdf0b26f831335fe0e5ede1c10814c1462fdd360b1b984
|
| 3 |
-
size 6935688
|
|
|
|
|
|
|
|
|
|
|
|
main_axcl_aarch64
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:a0ded679af8f4fb115b04977d4bc4ecc63783f98d3b239cd3a73de19a6cd19ed
|
| 3 |
-
size 1952752
|
|
|
|
|
|
|
|
|
|
|
|
main_axcl_api_aarch64
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:c90d9dfae62b17ef4681f103c62b483e96a862e900a364673e57bc91d078c63d
|
| 3 |
-
size 2105232
|
|
|
|
|
|
|
|
|
|
|
|
main_axcl_api_x86
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:67be73d1a6a4c17ee6b73222d3c5988fa10d2dbcf71515f6dad090a561dcc252
|
| 3 |
-
size 2202296
|
|
|
|
|
|
|
|
|
|
|
|
main_axcl_x86
DELETED
|
@@ -1,3 +0,0 @@
|
|
| 1 |
-
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:1113a46767e5cc6c0a53172c5973848a40c65f379a428b3efc64a9fb6f6fb212
|
| 3 |
-
size 2062240
|
|
|
|
|
|
|
|
|
|
|
|
Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/model.embed_tokens.weight.bfloat16.bin → model.embed_tokens.weight.bfloat16.bin
RENAMED
|
File without changes
|
openai_cli.py
DELETED
|
@@ -1,66 +0,0 @@
|
|
| 1 |
-
import base64
|
| 2 |
-
import glob
|
| 3 |
-
from openai import OpenAI
|
| 4 |
-
import cv2
|
| 5 |
-
|
| 6 |
-
BASE_URL = "http://localhost:8000/v1"
|
| 7 |
-
|
| 8 |
-
def img_to_data_url(img_path: str):
|
| 9 |
-
img = cv2.imread(img_path)
|
| 10 |
-
if img is None:
|
| 11 |
-
raise FileNotFoundError(f"Cannot read image: {img_path}")
|
| 12 |
-
ok, buf = cv2.imencode(".jpg", img)
|
| 13 |
-
if not ok:
|
| 14 |
-
raise RuntimeError("cv2.imencode failed")
|
| 15 |
-
b64 = base64.b64encode(buf).decode("ascii")
|
| 16 |
-
return f"data:image/jpeg;base64,{b64}"
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
def test(openai_messages):
|
| 20 |
-
client = OpenAI(api_key="not-needed", base_url=BASE_URL)
|
| 21 |
-
|
| 22 |
-
stream = client.chat.completions.create(
|
| 23 |
-
model="AXERA-TECH/Qwen3-VL-2B-Instruct-GPTQ-Int4",
|
| 24 |
-
messages=openai_messages,
|
| 25 |
-
stream=True,
|
| 26 |
-
)
|
| 27 |
-
out_chunks = []
|
| 28 |
-
for ev in stream:
|
| 29 |
-
delta = ev.choices[0].delta
|
| 30 |
-
if delta and delta.content:
|
| 31 |
-
out_chunks.append(delta.content)
|
| 32 |
-
print(delta.content, end="", flush=True)
|
| 33 |
-
print()
|
| 34 |
-
assistant_text = "".join(out_chunks).strip()
|
| 35 |
-
|
| 36 |
-
def test_image():
|
| 37 |
-
image_data = img_to_data_url("../demo_cv308/frame_0075.jpg")
|
| 38 |
-
|
| 39 |
-
openai_messages = {
|
| 40 |
-
"role": "user",
|
| 41 |
-
"content": [
|
| 42 |
-
{"type": "text", "text": "描述一下这张图片"},
|
| 43 |
-
{"type": "image_url", "image_url": image_data},
|
| 44 |
-
],
|
| 45 |
-
}
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
test(openai_messages)
|
| 49 |
-
|
| 50 |
-
def test_video():
|
| 51 |
-
image_list = glob.glob("../demo_cv308/*.jpg")
|
| 52 |
-
image_list.sort()
|
| 53 |
-
|
| 54 |
-
image_data_list = [img_to_data_url(img) for img in image_list]
|
| 55 |
-
|
| 56 |
-
openai_messages = {
|
| 57 |
-
"role": "user",
|
| 58 |
-
"content": [
|
| 59 |
-
{"type": "text", "text": "描述一下这个视频"},
|
| 60 |
-
{"type": "image_url", "is_video":True, "image_url": image_data_list},
|
| 61 |
-
],
|
| 62 |
-
}
|
| 63 |
-
|
| 64 |
-
test(openai_messages)
|
| 65 |
-
|
| 66 |
-
test_video()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
post_config.json
CHANGED
|
@@ -1,14 +1,14 @@
|
|
| 1 |
{
|
| 2 |
-
"enable_temperature" :
|
| 3 |
-
"temperature" : 0.
|
| 4 |
|
| 5 |
"enable_repetition_penalty" : false,
|
| 6 |
-
"repetition_penalty" : 1,
|
| 7 |
-
"penalty_window" :
|
| 8 |
|
| 9 |
"enable_top_p_sampling" : false,
|
| 10 |
"top_p" : 0.8,
|
| 11 |
|
| 12 |
-
"enable_top_k_sampling" :
|
| 13 |
-
"top_k" :
|
| 14 |
}
|
|
|
|
| 1 |
{
|
| 2 |
+
"enable_temperature" : false,
|
| 3 |
+
"temperature" : 0.9,
|
| 4 |
|
| 5 |
"enable_repetition_penalty" : false,
|
| 6 |
+
"repetition_penalty" : 1.2,
|
| 7 |
+
"penalty_window" : 20,
|
| 8 |
|
| 9 |
"enable_top_p_sampling" : false,
|
| 10 |
"top_p" : 0.8,
|
| 11 |
|
| 12 |
+
"enable_top_k_sampling" : false,
|
| 13 |
+
"top_k" : 10
|
| 14 |
}
|
Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l0_together.axmodel → qwen3_vl_text_p128_l0_together.axmodel
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b42363b13c67753e20e80a90d51dacc6f04280a3da992b601ab09547f308c3f5
|
| 3 |
+
size 46539431
|
Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l10_together.axmodel → qwen3_vl_text_p128_l10_together.axmodel
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f0133beae7b5d7b109557df15c6f5d6a738eded2ae1feef1f94c983699f74623
|
| 3 |
+
size 46539431
|
Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l11_together.axmodel → qwen3_vl_text_p128_l11_together.axmodel
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2faf54778f2e20ea363e189f0a0a9f9acaa1337cf30109092ffb48418c485072
|
| 3 |
+
size 46539431
|
Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l12_together.axmodel → qwen3_vl_text_p128_l12_together.axmodel
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:073dab1c7c2c91f2a0fd23cc3b1553df6c6f35f1841c1358265f56be5659c53f
|
| 3 |
+
size 46539431
|
Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l13_together.axmodel → qwen3_vl_text_p128_l13_together.axmodel
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7b76578926bb45565be1bdc55f36fc074528e2835185195c69a3099f088e78c1
|
| 3 |
+
size 46539431
|
Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l14_together.axmodel → qwen3_vl_text_p128_l14_together.axmodel
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a12b5b73debb37c8c2ad8a2ea16b39bfc7b3000d0323be8e8c9cb89fb214ba54
|
| 3 |
+
size 46539431
|
Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l15_together.axmodel → qwen3_vl_text_p128_l15_together.axmodel
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:20c7ff4026d78f6e6dd7f3075283aeba63d5ea81aca1eedfa0f7210fe20f9e9b
|
| 3 |
+
size 46539431
|
Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l16_together.axmodel → qwen3_vl_text_p128_l16_together.axmodel
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:93519605f2bbb6c2e9558cacfc4655e6c2d93ae33c50f1f7e0d4d41b448a1649
|
| 3 |
+
size 46539431
|
Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l17_together.axmodel → qwen3_vl_text_p128_l17_together.axmodel
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4dbe9d98f015d52fa8e3083d666ce4438a7ed3160e9f1eac3fde94bc7fcfa30e
|
| 3 |
+
size 46539431
|
Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l18_together.axmodel → qwen3_vl_text_p128_l18_together.axmodel
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:05c421f9457cb19cf3e4bc7c8b503f5f6e50358a664d1865a78640dd1fad8835
|
| 3 |
+
size 46539431
|
Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l19_together.axmodel → qwen3_vl_text_p128_l19_together.axmodel
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0e1d98fa8ac199f64471f7535ab64de18ad99bf4cdfdc08e79d83635cda7cebb
|
| 3 |
+
size 46539431
|
Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l1_together.axmodel → qwen3_vl_text_p128_l1_together.axmodel
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:07823cd8b49a8b33bc006ae750782e317580494575bc317babd7abff55441eba
|
| 3 |
+
size 46539431
|
Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l20_together.axmodel → qwen3_vl_text_p128_l20_together.axmodel
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:62512c13b3509d6d77595616fe64804927d15e3738dd768ac6e88581c6235146
|
| 3 |
+
size 46539431
|
Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l21_together.axmodel → qwen3_vl_text_p128_l21_together.axmodel
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:fb01c185abe5eadf91f07829db2cd05121b8b05624bf9bdb145a770e2e55620a
|
| 3 |
+
size 46539431
|
Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l22_together.axmodel → qwen3_vl_text_p128_l22_together.axmodel
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:84a8454d97e489645b33144f88cdd56ffbad8362f1300f72d6f1b7f3cf3d75b2
|
| 3 |
+
size 46539431
|
Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l23_together.axmodel → qwen3_vl_text_p128_l23_together.axmodel
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9ce9e3219f96c8f7615fdb799141d1045620ea69fb77489248200c6bb3b2a2c6
|
| 3 |
+
size 46539431
|
Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l24_together.axmodel → qwen3_vl_text_p128_l24_together.axmodel
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0e47988815b6b3b330584a934bb2cd889da3ed5a57ae0e75ed34dba31d676471
|
| 3 |
+
size 46539431
|
Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l25_together.axmodel → qwen3_vl_text_p128_l25_together.axmodel
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d0d03c2cb43adb58329ce66d45f8abe579d55c6dc0c551f67ef2e6821520bd1a
|
| 3 |
+
size 46539431
|
Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l26_together.axmodel → qwen3_vl_text_p128_l26_together.axmodel
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cccaa3a03ba1a2902834ef825f06ca2b1a5f0326a5e0544b98fba4454f9c8b1b
|
| 3 |
+
size 46539431
|
Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l27_together.axmodel → qwen3_vl_text_p128_l27_together.axmodel
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a12ad8c0cf98016faf93cd5d4fdf99d0c563d2a922b1f1d78baae56387ccf010
|
| 3 |
+
size 46539431
|
Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l2_together.axmodel → qwen3_vl_text_p128_l2_together.axmodel
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:84b296fb9b14d24aa740c2fbfa54b2bd3ae7973a161ac9cb24c45b2372143892
|
| 3 |
+
size 46539431
|
Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l3_together.axmodel → qwen3_vl_text_p128_l3_together.axmodel
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a5033f1fe491462b8f44f5e117c261955b63535e3272b0cd784a59c97d073319
|
| 3 |
+
size 46539431
|
Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l4_together.axmodel → qwen3_vl_text_p128_l4_together.axmodel
RENAMED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5b67c490399d843fa68821a0f38180c300f32333113c0a1c8cd5729bcefd8a56
|
| 3 |
+
size 46539431
|