wli1995 commited on
Commit
0c25383
·
verified ·
1 Parent(s): aad163c

update project

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +34 -0
  2. .gitignore +1 -0
  3. Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/Qwen3-VL-2B-Instruct_vision.axmodel → Qwen3-VL-2B-Instruct_vision.axmodel +0 -0
  4. Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/Qwen3-VL-2B-Instruct_vision_1280x736.axmodel → Qwen3-VL-2B-Instruct_vision_1280x736.axmodel +0 -0
  5. Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/Qwen3-VL-2B-Instruct_vision_640x640.axmodel → Qwen3-VL-2B-Instruct_vision_640x640.axmodel +0 -0
  6. Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/Qwen3-VL-2B-Instruct_vision_u8.axmodel → Qwen3-VL-2B-Instruct_vision_u8.axmodel +0 -0
  7. README.md +227 -154
  8. config.json +27 -0
  9. gradio_demo.py +0 -262
  10. axera_logo.png → image.png +2 -2
  11. images/demo.jpg +0 -3
  12. images/demo1.jpg +0 -3
  13. images/recoAll_attractions_1.jpg +0 -3
  14. images/recoAll_attractions_2.jpg +0 -3
  15. images/recoAll_attractions_3.jpg +0 -3
  16. images/recoAll_attractions_4.jpg +0 -3
  17. images/ssd_car.jpg +0 -3
  18. images/ssd_horse.jpg +0 -3
  19. main_ax650 +0 -3
  20. main_ax650_api +0 -3
  21. main_axcl_aarch64 +0 -3
  22. main_axcl_api_aarch64 +0 -3
  23. main_axcl_api_x86 +0 -3
  24. main_axcl_x86 +0 -3
  25. Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/model.embed_tokens.weight.bfloat16.bin → model.embed_tokens.weight.bfloat16.bin +0 -0
  26. openai_cli.py +0 -66
  27. post_config.json +6 -6
  28. Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l0_together.axmodel → qwen3_vl_text_p128_l0_together.axmodel +2 -2
  29. Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l10_together.axmodel → qwen3_vl_text_p128_l10_together.axmodel +2 -2
  30. Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l11_together.axmodel → qwen3_vl_text_p128_l11_together.axmodel +2 -2
  31. Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l12_together.axmodel → qwen3_vl_text_p128_l12_together.axmodel +2 -2
  32. Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l13_together.axmodel → qwen3_vl_text_p128_l13_together.axmodel +2 -2
  33. Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l14_together.axmodel → qwen3_vl_text_p128_l14_together.axmodel +2 -2
  34. Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l15_together.axmodel → qwen3_vl_text_p128_l15_together.axmodel +2 -2
  35. Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l16_together.axmodel → qwen3_vl_text_p128_l16_together.axmodel +2 -2
  36. Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l17_together.axmodel → qwen3_vl_text_p128_l17_together.axmodel +2 -2
  37. Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l18_together.axmodel → qwen3_vl_text_p128_l18_together.axmodel +2 -2
  38. Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l19_together.axmodel → qwen3_vl_text_p128_l19_together.axmodel +2 -2
  39. Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l1_together.axmodel → qwen3_vl_text_p128_l1_together.axmodel +2 -2
  40. Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l20_together.axmodel → qwen3_vl_text_p128_l20_together.axmodel +2 -2
  41. Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l21_together.axmodel → qwen3_vl_text_p128_l21_together.axmodel +2 -2
  42. Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l22_together.axmodel → qwen3_vl_text_p128_l22_together.axmodel +2 -2
  43. Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l23_together.axmodel → qwen3_vl_text_p128_l23_together.axmodel +2 -2
  44. Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l24_together.axmodel → qwen3_vl_text_p128_l24_together.axmodel +2 -2
  45. Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l25_together.axmodel → qwen3_vl_text_p128_l25_together.axmodel +2 -2
  46. Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l26_together.axmodel → qwen3_vl_text_p128_l26_together.axmodel +2 -2
  47. Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l27_together.axmodel → qwen3_vl_text_p128_l27_together.axmodel +2 -2
  48. Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l2_together.axmodel → qwen3_vl_text_p128_l2_together.axmodel +2 -2
  49. Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l3_together.axmodel → qwen3_vl_text_p128_l3_together.axmodel +2 -2
  50. Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l4_together.axmodel → qwen3_vl_text_p128_l4_together.axmodel +2 -2
.gitattributes CHANGED
@@ -88,3 +88,37 @@ main_ax650_api filter=lfs diff=lfs merge=lfs -text
88
  main_axcl_api_x86 filter=lfs diff=lfs merge=lfs -text
89
  axera_logo.png filter=lfs diff=lfs merge=lfs -text
90
  main_axcl_api_aarch64 filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
88
  main_axcl_api_x86 filter=lfs diff=lfs merge=lfs -text
89
  axera_logo.png filter=lfs diff=lfs merge=lfs -text
90
  main_axcl_api_aarch64 filter=lfs diff=lfs merge=lfs -text
91
+ Qwen3-VL-2B-Instruct_vision.axmodel filter=lfs diff=lfs merge=lfs -text
92
+ Qwen3-VL-2B-Instruct_vision_1280x736.axmodel filter=lfs diff=lfs merge=lfs -text
93
+ Qwen3-VL-2B-Instruct_vision_640x640.axmodel filter=lfs diff=lfs merge=lfs -text
94
+ Qwen3-VL-2B-Instruct_vision_u8.axmodel filter=lfs diff=lfs merge=lfs -text
95
+ image.png filter=lfs diff=lfs merge=lfs -text
96
+ qwen3_vl_text_p128_l0_together.axmodel filter=lfs diff=lfs merge=lfs -text
97
+ qwen3_vl_text_p128_l10_together.axmodel filter=lfs diff=lfs merge=lfs -text
98
+ qwen3_vl_text_p128_l11_together.axmodel filter=lfs diff=lfs merge=lfs -text
99
+ qwen3_vl_text_p128_l12_together.axmodel filter=lfs diff=lfs merge=lfs -text
100
+ qwen3_vl_text_p128_l13_together.axmodel filter=lfs diff=lfs merge=lfs -text
101
+ qwen3_vl_text_p128_l14_together.axmodel filter=lfs diff=lfs merge=lfs -text
102
+ qwen3_vl_text_p128_l15_together.axmodel filter=lfs diff=lfs merge=lfs -text
103
+ qwen3_vl_text_p128_l16_together.axmodel filter=lfs diff=lfs merge=lfs -text
104
+ qwen3_vl_text_p128_l17_together.axmodel filter=lfs diff=lfs merge=lfs -text
105
+ qwen3_vl_text_p128_l18_together.axmodel filter=lfs diff=lfs merge=lfs -text
106
+ qwen3_vl_text_p128_l19_together.axmodel filter=lfs diff=lfs merge=lfs -text
107
+ qwen3_vl_text_p128_l1_together.axmodel filter=lfs diff=lfs merge=lfs -text
108
+ qwen3_vl_text_p128_l20_together.axmodel filter=lfs diff=lfs merge=lfs -text
109
+ qwen3_vl_text_p128_l21_together.axmodel filter=lfs diff=lfs merge=lfs -text
110
+ qwen3_vl_text_p128_l22_together.axmodel filter=lfs diff=lfs merge=lfs -text
111
+ qwen3_vl_text_p128_l23_together.axmodel filter=lfs diff=lfs merge=lfs -text
112
+ qwen3_vl_text_p128_l24_together.axmodel filter=lfs diff=lfs merge=lfs -text
113
+ qwen3_vl_text_p128_l25_together.axmodel filter=lfs diff=lfs merge=lfs -text
114
+ qwen3_vl_text_p128_l26_together.axmodel filter=lfs diff=lfs merge=lfs -text
115
+ qwen3_vl_text_p128_l27_together.axmodel filter=lfs diff=lfs merge=lfs -text
116
+ qwen3_vl_text_p128_l2_together.axmodel filter=lfs diff=lfs merge=lfs -text
117
+ qwen3_vl_text_p128_l3_together.axmodel filter=lfs diff=lfs merge=lfs -text
118
+ qwen3_vl_text_p128_l4_together.axmodel filter=lfs diff=lfs merge=lfs -text
119
+ qwen3_vl_text_p128_l5_together.axmodel filter=lfs diff=lfs merge=lfs -text
120
+ qwen3_vl_text_p128_l6_together.axmodel filter=lfs diff=lfs merge=lfs -text
121
+ qwen3_vl_text_p128_l7_together.axmodel filter=lfs diff=lfs merge=lfs -text
122
+ qwen3_vl_text_p128_l8_together.axmodel filter=lfs diff=lfs merge=lfs -text
123
+ qwen3_vl_text_p128_l9_together.axmodel filter=lfs diff=lfs merge=lfs -text
124
+ qwen3_vl_text_post.axmodel filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ vision_cache
Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/Qwen3-VL-2B-Instruct_vision.axmodel → Qwen3-VL-2B-Instruct_vision.axmodel RENAMED
File without changes
Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/Qwen3-VL-2B-Instruct_vision_1280x736.axmodel → Qwen3-VL-2B-Instruct_vision_1280x736.axmodel RENAMED
File without changes
Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/Qwen3-VL-2B-Instruct_vision_640x640.axmodel → Qwen3-VL-2B-Instruct_vision_640x640.axmodel RENAMED
File without changes
Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/Qwen3-VL-2B-Instruct_vision_u8.axmodel → Qwen3-VL-2B-Instruct_vision_u8.axmodel RENAMED
File without changes
README.md CHANGED
@@ -17,9 +17,9 @@ tags:
17
  - GPTQ
18
  ---
19
 
20
- # Qwen3-VL
21
 
22
- This version of Qwen3-VL-2B-Instruct has been converted to run on the Axera NPU using **w4a16** quantization.
23
 
24
  Compatible with Pulsar2 version: 5.0
25
 
@@ -66,184 +66,257 @@ The DDR capacity refers to the CMM memory that needs to be consumed. Ensure that
66
 
67
  ## How to use
68
 
69
- Download all files from this repository to the device
 
70
 
71
- **If you using AX650 Board**
 
 
 
 
72
 
73
- ### Demo Run
74
 
75
- #### Image understand demo
 
 
76
 
 
77
 
78
- - input text
 
 
79
 
80
- ```
81
- 描述这张图片
 
82
  ```
83
 
84
- - input image
 
85
 
86
- ![](./images/recoAll_attractions_1.jpg)
87
-
88
- ```
89
- root@ax650 ~/Qwen3-VL-2B-Instruct-GPTQ-Int4 # bash run_image_ax650.sh
90
- [I][ Init][ 156]: LLM init start
91
- [I][ Init][ 158]: Total CMM:4353 MB
92
- [I][ Init][ 34]: connect http://127.0.0.1:8080 ok
93
- bos_id: -1, eos_id: 151645
94
- img_start_token: 151652
95
- img_context_token: 151655
96
- 3% | ██ | 1 / 31 [0.01s<0.46s, 66.67 count/s] tokenizer init ok[I][ Init][ 26]: LLaMaEmbedSelector use mmap
97
- 6% | ███ | 2 / 31 [0.02s<0.34s, 90.91 count/s] embed_selector init ok[I][ Init][ 201]: attr.axmodel_num:28
98
- 103% | ██████████████████████████████████ | 32 / 31 [34.03s<32.96s, 0.94 count/s] init vpm axmodel ok,remain_cmm(854 MB)[I][ Init][ 266]: IMAGE_CONTEXT_TOKEN: 151655, IMAGE_START_TOKEN: 151652
99
- [I][ Init][ 309]: image encoder output float32
100
-
101
- [I][ Init][ 339]: max_token_len : 2047
102
- [I][ Init][ 344]: kv_cache_size : 1024, kv_cache_num: 2047
103
- [I][ Init][ 352]: prefill_token_num : 128
104
- [I][ Init][ 356]: grp: 1, prefill_max_token_num : 1
105
- [I][ Init][ 356]: grp: 2, prefill_max_token_num : 128
106
- [I][ Init][ 356]: grp: 3, prefill_max_token_num : 256
107
- [I][ Init][ 356]: grp: 4, prefill_max_token_num : 384
108
- [I][ Init][ 356]: grp: 5, prefill_max_token_num : 512
109
- [I][ Init][ 356]: grp: 6, prefill_max_token_num : 640
110
- [I][ Init][ 356]: grp: 7, prefill_max_token_num : 768
111
- [I][ Init][ 356]: grp: 8, prefill_max_token_num : 896
112
- [I][ Init][ 356]: grp: 9, prefill_max_token_num : 1024
113
- [I][ Init][ 356]: grp: 10, prefill_max_token_num : 1152
114
- [I][ Init][ 360]: prefill_max_token_num : 1152
115
- [I][ Init][ 372]: LLM init ok
116
- [I][ Init][ 374]: Left CMM:854 MB
117
- Type "q" to exit, Ctrl+c to stop current running
118
- prompt >> 描述这张图片
119
- image >> images/recoAll_attractions_1.jpg
120
- [I][ EncodeImage][ 440]: pixel_values size 1
121
- [I][ EncodeImage][ 441]: grid_h 24 grid_w 24
122
- [I][ EncodeImage][ 489]: image encode time : 237.778000 ms, size : 1
123
- [I][ Encode][ 532]: input_ids size:168
124
- [I][ Encode][ 540]: offset 15
125
- [I][ Encode][ 569]: img_embed.size:1, 294912
126
- [I][ Encode][ 583]: out_embed size:344064
127
- [I][ Encode][ 584]: input_ids size 168
128
- [I][ Encode][ 586]: position_ids size:168
129
- [I][ Run][ 607]: input token num : 168, prefill_split_num : 2
130
- [I][ Run][ 641]: input_num_token:128
131
- [I][ Run][ 641]: input_num_token:40
132
- [I][ Run][ 865]: ttft: 313.60 ms
133
- 这是一张在埃及沙漠中拍摄的风景照片。画面中,三座巨大的金字塔在晴朗的天空下矗立,它们是古埃及文明的象征。这些金字塔由巨大的石块堆叠而成,表面因岁月侵蚀而显得斑驳。在金字塔的前方,有几个���影在沙地上行走,这为整个场景提供了比例感和尺度感。整个场景充满了历史的厚重感和神秘的氛围。
134
-
135
- [N][ Run][ 992]: hit eos,avg 14.14 token/s
136
  ```
137
 
138
- #### Video understand demo
139
 
140
- - input text
141
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
  ```
143
- 描述这个视频
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
  ```
145
 
146
- - input video
147
 
148
- ./video
 
149
 
150
- ```
151
- root@ax650 ~/Qwen3-VL-2B-Instruct-GPTQ-Int4 # bash run_video_ax650.sh
152
- [I][ Init][ 156]: LLM init start
153
- [I][ Init][ 158]: Total CMM:7884 MB
154
- [I][ Init][ 34]: connect http://127.0.0.1:8080 ok
155
- bos_id: -1, eos_id: 151645
156
- img_start_token: 151652
157
- img_context_token: 151656
158
- 3% | ██ | 1 / 31 [0.01s<0.34s, 90.91 count/s] tokenizer init ok[I][ Init][ 26]: LLaMaEmbedSelector use mmap
159
- 6% | ███ | 2 / 31 [0.01s<0.23s, 133.33 count/s] embed_selector init ok[I][ Init][ 201]: attr.axmodel_num:28
160
- 103% | ██████████████████████████████████ | 32 / 31 [32.37s<31.36s, 0.99 count/s] init vpm axmodel ok,remain_cmm(4385 MB)[I][ Init][ 266]: IMAGE_CONTEXT_TOKEN: 151656, IMAGE_START_TOKEN: 151652
161
- [I][ Init][ 309]: image encoder output float32
162
-
163
- [I][ Init][ 339]: max_token_len : 2047
164
- [I][ Init][ 344]: kv_cache_size : 1024, kv_cache_num: 2047
165
- [I][ Init][ 352]: prefill_token_num : 128
166
- [I][ Init][ 356]: grp: 1, prefill_max_token_num : 1
167
- [I][ Init][ 356]: grp: 2, prefill_max_token_num : 128
168
- [I][ Init][ 356]: grp: 3, prefill_max_token_num : 256
169
- [I][ Init][ 356]: grp: 4, prefill_max_token_num : 384
170
- [I][ Init][ 356]: grp: 5, prefill_max_token_num : 512
171
- [I][ Init][ 356]: grp: 6, prefill_max_token_num : 640
172
- [I][ Init][ 356]: grp: 7, prefill_max_token_num : 768
173
- [I][ Init][ 356]: grp: 8, prefill_max_token_num : 896
174
- [I][ Init][ 356]: grp: 9, prefill_max_token_num : 1024
175
- [I][ Init][ 356]: grp: 10, prefill_max_token_num : 1152
176
- [I][ Init][ 360]: prefill_max_token_num : 1152
177
- [I][ Init][ 372]: LLM init ok
178
- [I][ Init][ 374]: Left CMM:4385 MB
179
- Type "q" to exit, Ctrl+c to stop current running
180
- prompt >> 描述这个视频
181
- video >> video
182
- video/frame_0000.jpg
183
- video/frame_0008.jpg
184
- video/frame_0016.jpg
185
- video/frame_0024.jpg
186
- video/frame_0032.jpg
187
- video/frame_0040.jpg
188
- video/frame_0048.jpg
189
- video/frame_0056.jpg
190
- [I][ EncodeImage][ 440]: pixel_values size 4
191
- [I][ EncodeImage][ 441]: grid_h 24 grid_w 24
192
- [I][ EncodeImage][ 489]: image encode time : 751.481018 ms, size : 4
193
- [I][ Encode][ 532]: input_ids size:600
194
- [I][ Encode][ 540]: offset 15
195
- [I][ Encode][ 569]: img_embed.size:4, 294912
196
- [I][ Encode][ 574]: offset:159
197
- [I][ Encode][ 574]: offset:303
198
- [I][ Encode][ 574]: offset:447
199
- [I][ Encode][ 583]: out_embed size:1228800
200
- [I][ Encode][ 584]: input_ids size 600
201
- [I][ Encode][ 586]: position_ids size:600
202
- [I][ Run][ 607]: input token num : 600, prefill_split_num : 5
203
- [I][ Run][ 641]: input_num_token:128
204
- [I][ Run][ 641]: input_num_token:128
205
- [I][ Run][ 641]: input_num_token:128
206
- [I][ Run][ 641]: input_num_token:128
207
- [I][ Run][ 641]: input_num_token:88
208
- [I][ Run][ 865]: ttft: 843.36 ms
209
- 这是一段关于两只山地旱獭(也称“山地土拨鼠”)在山地环境中互动的视频。
210
-
211
- 在画面中,两只山地旱獭正站在布满碎石的山坡上,背景是连绵起伏的山脉和蓝天。它们的毛色以灰、棕、黑相间,脸部和耳朵周围有明显的黑白条纹,显得非常可爱。
212
-
213
- 这两只旱獭正在进行一场激烈的“拳击”或“格斗”游戏。它们的前爪高高举起,像在互相击打,但它们的姿势和动作表明它们可能是在进行一场激烈的“拳击”或“格斗”游戏。它们的嘴巴和前爪在空中挥舞,似乎在互相攻击或展示力量。
214
-
215
- 整个场景充满了动感和活力,展现了这些小动物在自然环境中充满活力和趣味的一面。
216
-
217
- [N][ Run][ 992]: hit eos,avg 14.16 token/s
218
 
 
 
 
 
 
 
 
 
 
 
 
 
219
  ```
220
 
221
- ### Gradio demo
222
 
223
- #### install py depend
224
 
225
- ```shell
226
- pip install -r requirements.txt
227
- ```
228
 
229
- #### start openai style api server
230
- if the tokenizer server is not run in the same machine,please modify the tokenizer server ip in shell file.
231
 
232
- ```shell
233
- # for axcl x86
234
- ./run_axcl_x86_api.sh
235
- # for axcl aarch64
236
- ./run_axcl_aarch64_api.sh
237
- # for ax650
238
- ./run_ax650_api.sh
239
- ```
240
 
241
- #### start gradio demo
242
- if the api server is not run in the same machine,please modify the api url in gradio web ui.
 
 
 
 
243
 
244
- ```shell
245
- python gradio_demo.py
 
 
 
 
 
246
  ```
247
 
248
- ![image](https://cdn-uploads.huggingface.co/production/uploads/64b7837c17570fdff9b906b9/Og9fPNi0chg768gicse7M.png)
249
-
 
17
  - GPTQ
18
  ---
19
 
20
+ # Qwen3-VL-2B-Instruct-GPTQ-Int4
21
 
22
+ This version of Qwen3-VL-2B-Instruct-GPTQ-Int4 has been converted to run on the Axera NPU using **w4a16** quantization.
23
 
24
  Compatible with Pulsar2 version: 5.0
25
 
 
66
 
67
  ## How to use
68
 
69
+ ## 安装 axllm
70
+ 方式一:克隆仓库后执行安装脚本:
71
 
72
+ ```shell
73
+ git clone -b axllm https://github.com/AXERA-TECH/ax-llm.git
74
+ cd ax-llm
75
+ ./install.sh
76
+ ```
77
 
78
+ 方式二:一行命令安装(默认分支 `axllm`):
79
 
80
+ ```shell
81
+ curl -fsSL https://raw.githubusercontent.com/AXERA-TECH/ax-llm/axllm/install.sh | bash
82
+ ```
83
 
84
+ 方式三:下载Github Actions CI 导出的可执行程序(适合没有编译环境的用户):
85
 
86
+ 如果没有编译环境,请到:
87
+ `https://github.com/AXERA-TECH/ax-llm/actions?query=branch%3Aaxllm`
88
+ 下载 **最新 CI 导出的可执行程序**(`axllm`),然后:
89
 
90
+ ```shell
91
+ chmod +x axllm
92
+ sudo mv axllm /usr/bin/axllm
93
  ```
94
 
95
+ ## 模型下载(Hugging Face)
96
+ 先创建模型目录并进入,然后下载到该目录:
97
 
98
+ ```shell
99
+ mkdir -p AXERA-TECH/Qwen3-VL-2B-Instruct-GPTQ-Int4
100
+ cd AXERA-TECH/Qwen3-VL-2B-Instruct-GPTQ-Int4
101
+ hf download AXERA-TECH/Qwen3-VL-2B-Instruct-GPTQ-Int4 --local-dir .
102
+
103
+ # structure of the downloaded files
104
+ tree -L 3
105
+ `-- AXERA-TECH
106
+ `-- Qwen3-VL-2B-Instruct-GPTQ-Int4
107
+ |-- Qwen3-VL-2B-Instruct_vision.axmodel
108
+ |-- Qwen3-VL-2B-Instruct_vision_1280x736.axmodel
109
+ |-- Qwen3-VL-2B-Instruct_vision_640x640.axmodel
110
+ |-- Qwen3-VL-2B-Instruct_vision_u8.axmodel
111
+ |-- README.md
112
+ |-- config.json
113
+ |-- image.png
114
+ |-- model.embed_tokens.weight.bfloat16.bin
115
+ |-- post_config.json
116
+ |-- qwen3_tokenizer.txt
117
+ |-- qwen3_vl_text_p128_l0_together.axmodel
118
+ ...
119
+ |-- qwen3_vl_text_p128_l9_together.axmodel
120
+ |-- qwen3_vl_text_post.axmodel
121
+ `-- vision_cache
122
+
123
+ 3 directories, 39 files
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  ```
125
 
126
+ ## Inference with AX650 Host, such as M4N-Dock(爱芯派Pro) or AX650N DEMO Board
127
 
128
+ ### 运行(CLI)
129
 
130
+ ```shell
131
+ root@ax650:~# axllm run AXERA-TECH/Qwen3-VL-2B-Instruct-GPTQ-Int4/
132
+ [I][ Init][ 138]: LLM init start
133
+ tokenizer_type = 1
134
+ 96% | ███████████████████████████████ | 30 / 31 [11.50s<11.88s, 2.61 count/s] init post axmodel ok,remain_cmm(9563 MB)
135
+ [I][ Init][ 199]: max_token_len : 2047
136
+ [I][ Init][ 202]: kv_cache_size : 1024, kv_cache_num: 2047
137
+ [I][ Init][ 205]: prefill_token_num : 128
138
+ [I][ Init][ 209]: grp: 1, prefill_max_kv_cache_num : 1
139
+ [I][ Init][ 209]: grp: 2, prefill_max_kv_cache_num : 128
140
+ [I][ Init][ 209]: grp: 3, prefill_max_kv_cache_num : 256
141
+ [I][ Init][ 209]: grp: 4, prefill_max_kv_cache_num : 384
142
+ [I][ Init][ 209]: grp: 5, prefill_max_kv_cache_num : 512
143
+ [I][ Init][ 209]: grp: 6, prefill_max_kv_cache_num : 640
144
+ [I][ Init][ 209]: grp: 7, prefill_max_kv_cache_num : 768
145
+ [I][ Init][ 209]: grp: 8, prefill_max_kv_cache_num : 896
146
+ [I][ Init][ 209]: grp: 9, prefill_max_kv_cache_num : 1024
147
+ [I][ Init][ 209]: grp: 10, prefill_max_kv_cache_num : 1152
148
+ [I][ Init][ 214]: prefill_max_token_num : 1152
149
+ [I][ Init][ 27]: LLaMaEmbedSelector use mmap
150
+ 100% | ████████████████████████████████ | 31 / 31 [11.50s<11.50s, 2.70 count/s] embed_selector init ok
151
+ [W][ Init][ 457]: Qwen-VL vision size override: cfg=448x448 bytes=1204224, model_input_bytes=884736 -> 384x384 (square).
152
+ [I][ Init][ 641]: Qwen-VL token ids: vision_start=151652 image_pad=151655 video_pad=151656
153
+ [I][ Init][ 666]: VisionModule init ok: type=Qwen3VL, tokens_per_block=144, embed_size=2048, out_dtype=fp32
154
+ [I][ Init][ 672]: VisionModule deepstack enabled: layers=3
155
+ [I][ load_config][ 282]: load config:
156
+ {
157
+ "enable_repetition_penalty": false,
158
+ "enable_temperature": false,
159
+ "enable_top_k_sampling": false,
160
+ "enable_top_p_sampling": false,
161
+ "penalty_window": 20,
162
+ "repetition_penalty": 1.2,
163
+ "temperature": 0.9,
164
+ "top_k": 10,
165
+ "top_p": 0.8
166
+ }
167
+
168
+ [I][ Init][ 272]: LLM init ok
169
+ Type "q" to exit
170
+ Ctrl+c to stop current running
171
+ "reset" to reset kvcache
172
+ "dd" to remove last conversation.
173
+ "pp" to print history.
174
+ VLM enabled: after each prompt, input image path (empty = text-only). Use "video:<frames_dir>" for video.
175
+ ----------------------------------------
176
+ prompt >> who are you
177
+ image >>
178
+ [I][ SetKVCache][ 406]: prefill_grpid:2 kv_cache_num:128 precompute_len:0 input_num_token:22
179
+ [I][ SetKVCache][ 408]: current prefill_max_token_num:1152
180
+ [I][ SetKVCache][ 409]: first run
181
+ [I][ Run][ 457]: input token num : 22, prefill_split_num : 1
182
+ [I][ Run][ 497]: prefill chunk p=0 history_len=0 grpid=1 kv_cache_num=0 input_tokens=22
183
+ [I][ Run][ 519]: prefill indices shape: p=0 idx_elems=384 idx_rows=3 pos_rows=0
184
+ [I][ Run][ 627]: ttft: 174.42 ms
185
+ I am Qwen, a large-scale language model developed by the Tongyi Lab of Alibaba Group. I can answer questions, write stories, create essays, and more. I am designed to be helpful, harmless, and honest. I hope to assist you in any way I can!
186
+
187
+ [N][ Run][ 709]: hit eos,avg 10.48 token/s
188
+
189
+ [I][ GetKVCache][ 380]: precompute_len:79, remaining:1073
190
+ prompt >> describe the image
191
+ image >> ./AXERA-TECH/Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/image.png
192
+ [I][ EncodeForContent][ 971]: Qwen-VL pixel_values[0] bytes=884736 min=0 max=241 (w=384 h=384 tp=2 ps=16 sm=2)
193
+ [I][ EncodeForContent][ 994]: vision cache store: ./AXERA-TECH/Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/image.png
194
+ [I][ SetKVCache][ 406]: prefill_grpid:3 kv_cache_num:256 precompute_len:79 input_num_token:159
195
+ [I][ SetKVCache][ 408]: current prefill_max_token_num:1024
196
+ [I][ Run][ 457]: input token num : 159, prefill_split_num : 2
197
+ [I][ Run][ 497]: prefill chunk p=0 history_len=79 grpid=2 kv_cache_num=128 input_tokens=128
198
+ [I][ Run][ 519]: prefill indices shape: p=0 idx_elems=384 idx_rows=3 pos_rows=3
199
+ [I][ Run][ 497]: prefill chunk p=1 history_len=207 grpid=3 kv_cache_num=256 input_tokens=31
200
+ [I][ Run][ 519]: prefill indices shape: p=1 idx_elems=384 idx_rows=3 pos_rows=3
201
+ [I][ Run][ 627]: ttft: 379.97 ms
202
+ This image depicts three astronauts in white space suits standing in a dense, leafy forest. The scene is set in a dark, shadowy environment, with the astronauts appearing to be in a natural, possibly alien, environment. The image has a monochromatic, almost grayscale color scheme, giving it a mysterious and somber atmosphere. The astronauts are positioned in the center of the frame, with one standing upright and the other two slightly bent, as if they are exploring or searching for something in the dense foliage. The overall mood of the image is mysterious and contemplative.
203
+
204
+ [N][ Run][ 709]: hit eos,avg 10.33 token/s
205
+
206
+ [I][ GetKVCache][ 380]: precompute_len:239, remaining:913
207
+ prompt >> how many people in the image?
208
+ image >>
209
+ [I][ EncodeForContent][ 926]: vision cache hit (mem): ./AXERA-TECH/Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/image.png
210
+ [I][ SetKVCache][ 406]: prefill_grpid:4 kv_cache_num:384 precompute_len:239 input_num_token:74
211
+ [I][ SetKVCache][ 408]: current prefill_max_token_num:896
212
+ [I][ Run][ 457]: input token num : 74, prefill_split_num : 1
213
+ [I][ Run][ 497]: prefill chunk p=0 history_len=239 grpid=3 kv_cache_num=256 input_tokens=74
214
+ [I][ Run][ 519]: prefill indices shape: p=0 idx_elems=384 idx_rows=3 pos_rows=3
215
+ [I][ Run][ 627]: ttft: 193.78 ms
216
+ This image depicts three astronauts in white space suits standing in a dense, leafy forest. The scene is set in a dark, shadowy environment, with the astronauts appearing to be in a natural, possibly alien, environment. The image has a monochromatic, almost grayscale color scheme, giving it a mysterious and somber atmosphere. The astronauts are positioned in the center of the frame, with one standing upright and the other two slightly bent, as if they are exploring or searching for something in the dense foliage. The overall mood of the image is mysterious and contemplative.
217
+
218
+ [N][ Run][ 709]: hit eos,avg 10.48 token/s
219
+
220
+ [I][ GetKVCache][ 380]: precompute_len:410, remaining:742
221
+ prompt >> q
222
  ```
223
+
224
+ ### 启动服务(OpenAI 兼容)
225
+
226
+ ```shell
227
+ root@ax650:~# axllm serve AXERA-TECH/Qwen3-VL-2B-Instruct-GPTQ-Int4
228
+ [I][ Init][ 138]: LLM init start
229
+ tokenizer_type = 1
230
+ 96% | ███████████████████████████████ | 30 / 31 [4.63s<4.79s, 6.47 count/s] init post axmodel ok,remain_cmm(9563 MB)
231
+ [I][ Init][ 199]: max_token_len : 2047
232
+ [I][ Init][ 202]: kv_cache_size : 1024, kv_cache_num: 2047
233
+ [I][ Init][ 205]: prefill_token_num : 128
234
+ [I][ Init][ 209]: grp: 1, prefill_max_kv_cache_num : 1
235
+ [I][ Init][ 209]: grp: 2, prefill_max_kv_cache_num : 128
236
+ [I][ Init][ 209]: grp: 3, prefill_max_kv_cache_num : 256
237
+ [I][ Init][ 209]: grp: 4, prefill_max_kv_cache_num : 384
238
+ [I][ Init][ 209]: grp: 5, prefill_max_kv_cache_num : 512
239
+ [I][ Init][ 209]: grp: 6, prefill_max_kv_cache_num : 640
240
+ [I][ Init][ 209]: grp: 7, prefill_max_kv_cache_num : 768
241
+ [I][ Init][ 209]: grp: 8, prefill_max_kv_cache_num : 896
242
+ [I][ Init][ 209]: grp: 9, prefill_max_kv_cache_num : 1024
243
+ [I][ Init][ 209]: grp: 10, prefill_max_kv_cache_num : 1152
244
+ [I][ Init][ 214]: prefill_max_token_num : 1152
245
+ [I][ Init][ 27]: LLaMaEmbedSelector use mmap
246
+ 100% | ████████████████████████████████ | 31 / 31 [4.64s<4.64s, 6.69 count/s] embed_selector init ok
247
+ [W][ Init][ 457]: Qwen-VL vision size override: cfg=448x448 bytes=1204224, model_input_bytes=884736 -> 384x384 (square).
248
+ [I][ Init][ 641]: Qwen-VL token ids: vision_start=151652 image_pad=151655 video_pad=151656
249
+ [I][ Init][ 666]: VisionModule init ok: type=Qwen3VL, tokens_per_block=144, embed_size=2048, out_dtype=fp32
250
+ [I][ Init][ 672]: VisionModule deepstack enabled: layers=3
251
+ [I][ load_config][ 282]: load config:
252
+ {
253
+ "enable_repetition_penalty": false,
254
+ "enable_temperature": false,
255
+ "enable_top_k_sampling": false,
256
+ "enable_top_p_sampling": false,
257
+ "penalty_window": 20,
258
+ "repetition_penalty": 1.2,
259
+ "temperature": 0.9,
260
+ "top_k": 10,
261
+ "top_p": 0.8
262
+ }
263
+
264
+ [I][ Init][ 272]: LLM init ok
265
+ Starting server on port 8000 with model 'AXERA-TECH/Qwen3-VL-2B-Instruct-GPTQ-Int4'...
266
+ OpenAI API Server starting on http://0.0.0.0:8000
267
+ Max concurrency: 1
268
+ Models: AXERA-TECH/Qwen3-VL-2B-Instruct-GPTQ-Int4
269
  ```
270
 
271
+ ### OpenAI 调用示例
272
 
273
+ ```python
274
+ from openai import OpenAI
275
 
276
+ API_URL = "http://127.0.0.1:8000/v1"
277
+ MODEL = "AXERA-TECH/Qwen3-VL-2B-Instruct-GPTQ-Int4"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
278
 
279
+ messages = [
280
+ {"role": "system", "content": [{"type": "text", "text": "you are a helpful assistant."}]},
281
+ {"role": "user", "content": "hello"},
282
+ ]
283
+
284
+ client = OpenAI(api_key="not-needed", base_url=API_URL)
285
+ completion = client.chat.completions.create(
286
+ model=MODEL,
287
+ messages=messages,
288
+ )
289
+
290
+ print(completion.choices[0].message.content)
291
  ```
292
 
 
293
 
294
+ ### OpenAI 流式调用示例
295
 
296
+ ```python
297
+ from openai import OpenAI
 
298
 
299
+ API_URL = "http://127.0.0.1:8000/v1"
300
+ MODEL = "AXERA-TECH/Qwen3-VL-2B-Instruct-GPTQ-Int4"
301
 
302
+ messages = [
303
+ {"role": "system", "content": [{"type": "text", "text": "you are a helpful assistant."}]},
304
+ {"role": "user", "content": "hello"},
305
+ ]
 
 
 
 
306
 
307
+ client = OpenAI(api_key="not-needed", base_url=API_URL)
308
+ stream = client.chat.completions.create(
309
+ model=MODEL,
310
+ messages=messages,
311
+ stream=True,
312
+ )
313
 
314
+ print("assistant:")
315
+ for ev in stream:
316
+ delta = getattr(ev.choices[0], "delta", None)
317
+ if delta and getattr(delta, "content", None):
318
+ print(delta.content, end="", flush=True)
319
+ print("
320
+ ")
321
  ```
322
 
 
 
config.json CHANGED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "system_prompt": "you are a helpful assistant.",
3
+ "model_name": "AXERA-TECH/Qwen3-VL-2B-Instruct",
4
+ "url_tokenizer_model": "qwen3_tokenizer.txt",
5
+ "tokenizer_type": "Qwen3VL",
6
+ "post_config_path": "post_config.json",
7
+ "template_filename_axmodel": "qwen3_vl_text_p128_l%d_together.axmodel",
8
+ "axmodel_num": 28,
9
+ "filename_post_axmodel": "qwen3_vl_text_post.axmodel",
10
+ "filename_tokens_embed": "model.embed_tokens.weight.bfloat16.bin",
11
+ "tokens_embed_num": 151936,
12
+ "tokens_embed_size": 2048,
13
+ "use_mmap_load_embed": true,
14
+ "vlm_type": "Qwen3VL",
15
+ "filename_image_encoder_axmodel": "Qwen3-VL-2B-Instruct_vision.axmodel",
16
+ "vision_patch_size": 16,
17
+ "vision_temporal_patch_size": 2,
18
+ "vision_spatial_merge_size": 2,
19
+ "vision_fps": 1,
20
+ "vision_tokens_per_second": 1,
21
+ "vision_cache_dir": "vision_cache",
22
+ "use_mmap_load_layer": true,
23
+ "devices": [
24
+ 0,
25
+ 1
26
+ ]
27
+ }
gradio_demo.py DELETED
@@ -1,262 +0,0 @@
1
- # gradio_chat_single_turn.py
2
- import re
3
- import subprocess
4
- import gradio as gr
5
- import base64, cv2, os, tempfile
6
- from openai import OpenAI
7
- import requests
8
-
9
- def get_all_local_ips():
10
- result = subprocess.run(['ip', 'a'], capture_output=True, text=True)
11
- output = result.stdout
12
-
13
- # 匹配所有IPv4
14
- ips = re.findall(r'inet (\d+\.\d+\.\d+\.\d+)', output)
15
-
16
- # 过滤掉回环地址
17
- real_ips = [ip for ip in ips if not ip.startswith('127.')]
18
-
19
- return real_ips
20
-
21
-
22
-
23
- # ---------- Helpers ----------
24
- def img_to_data_url_from_cvframe(frame):
25
- import base64, cv2
26
- ok, buf = cv2.imencode(".jpg", frame, [int(cv2.IMWRITE_JPEG_QUALITY), 85])
27
- b64 = base64.b64encode(buf).decode("ascii")
28
- return f"data:image/jpeg;base64,{b64}"
29
-
30
- def img_to_data_url_from_path(img_path: str) -> str:
31
- import cv2, base64
32
- img = cv2.imread(img_path)
33
- return img_to_data_url_from_cvframe(img)
34
-
35
- def video_to_data_urls(video_path: str, frame_stride: int = 30, max_frames: int = 8):
36
- import cv2, base64
37
- cap = cv2.VideoCapture(video_path)
38
- total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
39
-
40
- if total / frame_stride > max_frames:
41
- frame_stride = int(total/max_frames)
42
-
43
- urls = []
44
- idx = 0
45
- first_preview = None
46
- while len(urls) < max_frames and idx < total:
47
- cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
48
- ret, frame = cap.read()
49
- if not ret:
50
- break
51
- ok, buf = cv2.imencode(".jpg", frame, [int(cv2.IMWRITE_JPEG_QUALITY), 85])
52
- if not ok:
53
- break
54
- b64 = base64.b64encode(buf).decode("ascii")
55
- data_url = f"data:image/jpeg;base64,{b64}"
56
- urls.append(data_url)
57
- if first_preview is None:
58
- first_preview = data_url
59
- idx += frame_stride
60
- cap.release()
61
- return urls, first_preview
62
-
63
- def save_preview_image_from_data_url(data_url: str) -> str:
64
- # 仅用于在 Chatbot 里显示缩略图
65
- comma = data_url.find(",")
66
- if comma == -1:
67
- return ""
68
- b64 = data_url[comma+1:]
69
- raw = base64.b64decode(b64)
70
- fd, tmp_path = tempfile.mkstemp(suffix=".jpg", prefix="preview_")
71
- os.close(fd)
72
- with open(tmp_path, "wb") as f:
73
- f.write(raw)
74
- return tmp_path
75
-
76
- def build_messages(prompt: str, image_path: str | None, video_path: str | None,
77
- prefer_video: bool, frame_stride: int, max_frames: int):
78
- content = []
79
- if prompt and prompt.strip():
80
- content.append({"type": "text", "text": prompt.strip()})
81
-
82
- if video_path and os.path.exists(video_path) and prefer_video:
83
- urls, first_preview = video_to_data_urls(video_path, frame_stride=frame_stride, max_frames=max_frames)
84
- content.append({"type": "image_url", "is_video":True, "image_url": urls})
85
- media_desc = f"(视频抽帧:{len(urls)} 帧,步长 {frame_stride})"
86
- return {"role": "user", "content": content}, first_preview, media_desc
87
-
88
- if image_path and os.path.exists(image_path):
89
- u = img_to_data_url_from_path(image_path)
90
- content.append({"type": "image_url", "image_url": u})
91
- media_desc = "(已附带图片)"
92
- return {"role": "user", "content": content}, u, media_desc
93
-
94
- if video_path and os.path.exists(video_path):
95
- urls, first_preview = video_to_data_urls(video_path, frame_stride=frame_stride, max_frames=max_frames)
96
- content.append({"type": "image_url", "is_video":True, "image_url": urls})
97
- media_desc = f"(视频抽帧:{len(urls)} 帧,步长 {frame_stride})"
98
- return {"role": "user", "content": content}, first_preview, media_desc
99
-
100
- return {"role": "user", "content": content if content else [{"type": "text", "text": prompt or ""}]}, None, ""
101
-
102
- # ---------- Gradio callback (single-turn, stream) ----------
103
- def run_single_turn(prompt, image_file, video_file, prefer_video, frame_stride, max_frames,
104
- base_url, model, api_key, chatbot_state):
105
- """
106
- 单轮:每次发送都会重置聊天历史,只显示本轮的 user/assistant 两个气泡。
107
- """
108
- try:
109
- # 清空历史(单轮),构造用户气泡
110
- chatbot_state = []
111
-
112
- # 准备文件路径
113
- image_path = image_file if isinstance(image_file, str) else (image_file.name if image_file else None)
114
- video_path = video_file if isinstance(video_file, str) else (video_file.name if video_file else None)
115
-
116
- # 构造 messages 和预览
117
- messages, preview_data_url, media_desc = build_messages(
118
- prompt=prompt or "",
119
- image_path=image_path,
120
- video_path=video_path,
121
- prefer_video=bool(prefer_video),
122
- frame_stride=int(frame_stride),
123
- max_frames=int(max_frames),
124
- )
125
-
126
- # 组装用户气泡(Markdown):文本 + 预览图/视频说明
127
- user_md = (prompt or "").strip()
128
- if media_desc:
129
- user_md = (user_md + "\n\n" if user_md else "") + f"> {media_desc}"
130
- if preview_data_url:
131
- # user_md = (user_md + "\n\n" if user_md else "") + f"![preview]({preview_path})"
132
- user_md = (user_md + "\n\n" if user_md else "") + f"![preview]({preview_data_url})"
133
-
134
- chatbot_state.append((user_md or "(空提示)", "")) # assistant 先空字符串,等待流式填充
135
- yield chatbot_state # 先把用户气泡渲染出来
136
-
137
- # 调后端(流式)
138
- client = OpenAI(api_key=api_key or "not-needed", base_url=base_url.strip())
139
- stream = client.chat.completions.create(
140
- model=model.strip(),
141
- messages=messages,
142
- stream=True,
143
- )
144
-
145
- bot_chunks = []
146
- # 先补一个空 assistant 气泡
147
- if len(chatbot_state) == 1:
148
- chatbot_state[0] = (chatbot_state[0][0], "")
149
- yield chatbot_state
150
-
151
- # 逐 chunk 更新 assistant 气泡(Markdown)
152
- for ev in stream:
153
- delta = getattr(ev.choices[0], "delta", None)
154
- if delta and getattr(delta, "content", None):
155
- bot_chunks.append(delta.content)
156
- chatbot_state[-1] = (chatbot_state[-1][0], "".join(bot_chunks))
157
- yield chatbot_state
158
-
159
- # 结束再确保收尾
160
- chatbot_state[-1] = (chatbot_state[-1][0], "".join(bot_chunks) if bot_chunks else "(empty response)")
161
- yield chatbot_state
162
-
163
- except Exception as e:
164
- chatbot_state.append((
165
- chatbot_state[-1][0] if chatbot_state else "(request)",
166
- f"**Error:** {e}"
167
- ))
168
- yield chatbot_state
169
-
170
- # ---------- Gradio UI ----------
171
- with gr.Blocks(css="""
172
- #chat,
173
- #chat * {
174
- font-size: 18px !important;
175
- line-height: 1.6 !important;
176
- }
177
-
178
- #chat .message,
179
- #chat [data-testid="bot"],
180
- #chat [data-testid="user"] {
181
- font-size: 18px !important;
182
- }
183
- """,title="AXERA Qwen3 VL") as demo:
184
- axera_logo = img_to_data_url_from_path("./axera_logo.png")
185
- gr.Markdown(
186
- f"""
187
- <div style="display: flex; align-items: center; gap: 10px;">
188
- <img src="{axera_logo}" alt="axera_logo" style="height: 60px;">
189
- </div>
190
- """
191
- )
192
-
193
- chatbot = gr.Chatbot(
194
- label="对话",
195
- bubble_full_width=False,
196
- height=500,
197
- avatar_images=(None, None), # 可替换头像
198
- latex_delimiters=[{"left": "$$", "right": "$$", "display": True},
199
- {"left": "$", "right": "$", "display": False}],
200
- show_copy_button=True,
201
- render_markdown=True,
202
- elem_id="chat"
203
- )
204
-
205
- with gr.Row():
206
- with gr.Column(scale=2):
207
- prompt = gr.Textbox(label="Prompt", placeholder="输入你的提示语", lines=2)
208
- with gr.Row():
209
- send_btn = gr.Button("发送 ▶️", variant="primary")
210
- clear_btn = gr.Button("清空")
211
- stop_btn = gr.Button("停止 ■", variant="stop")
212
- with gr.Row():
213
- image = gr.Image(type="filepath", label="上传图片(可选)")
214
- video = gr.Video(label="上传视频(可选)")
215
-
216
- with gr.Column(scale=1):
217
- base_url = gr.Textbox(value="http://localhost:8000/v1", label="Base URL")
218
- model = gr.Textbox(value="AXERA-TECH/Qwen3-VL-2B-Instruct-GPTQ-Int4", label="Model")
219
- api_key = gr.Textbox(value="not-needed", label="API Key", type="password")
220
- with gr.Row():
221
- prefer_video = gr.Checkbox(True, label="如果有视频,优先使用视频抽帧")
222
- frame_stride = gr.Slider(1, 90, value=30, step=1, label="视频抽帧间隔")
223
- max_frames = gr.Slider(1, 8, value=8, step=1, label="最多抽帧数")
224
-
225
-
226
- # 单轮对话需要一个 state 来承载当前这轮的气泡
227
- state = gr.State([])
228
-
229
- send_btn.click(
230
- fn=run_single_turn,
231
- inputs=[prompt, image, video, prefer_video, frame_stride, max_frames, base_url, model, api_key, state],
232
- outputs=chatbot,
233
- show_progress=True,
234
- queue=True,
235
- )
236
-
237
- def stop_stream(base_url):
238
- url = f"{base_url.strip()}/stop"
239
- response = requests.get(url)
240
- if response.status_code == 200:
241
- print("Stream stopped successfully")
242
- else:
243
- print(f"Failed to stop stream: {response.status_code} - {response.text}")
244
-
245
- stop_btn.click(
246
- fn=stop_stream,
247
- inputs=[base_url],
248
- outputs=chatbot,
249
- show_progress=True,
250
- queue=True,
251
- )
252
-
253
- def clear_all():
254
- return [], "", None, None, True, 30, 8
255
- clear_btn.click(clear_all, None, [chatbot, prompt, image, video, prefer_video, frame_stride, max_frames])
256
-
257
- if __name__ == "__main__":
258
- ips = get_all_local_ips()
259
- for ip in ips:
260
- print(f"* Running on local URL: http://{ip}:7860")
261
- ip = "0.0.0.0"
262
- demo.launch(server_name=ip, server_port=7860)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
axera_logo.png → image.png RENAMED
File without changes
images/demo.jpg DELETED

Git LFS Details

  • SHA256: 344d5f0e43bfd6a4a6ed655dc9c3dc76a2a6ecb1de3afeee998e61722378e8b3
  • Pointer size: 130 Bytes
  • Size of remote file: 64.5 kB
images/demo1.jpg DELETED

Git LFS Details

  • SHA256: 8d6156cbaa86cf0f4a9ebbfc8c2bf307dcf6fc64e32176d69333f6f7c7b294c5
  • Pointer size: 131 Bytes
  • Size of remote file: 101 kB
images/recoAll_attractions_1.jpg DELETED

Git LFS Details

  • SHA256: 0072430513e76580c4134b78e452a1fb729112fe5725d1f8481e697c7b5cd4a1
  • Pointer size: 130 Bytes
  • Size of remote file: 73.3 kB
images/recoAll_attractions_2.jpg DELETED

Git LFS Details

  • SHA256: 28acba6c284d06039ca57c9ce182094baafa8a48b49068dc44482a6d643689cc
  • Pointer size: 131 Bytes
  • Size of remote file: 105 kB
images/recoAll_attractions_3.jpg DELETED

Git LFS Details

  • SHA256: f308b3ebb1855b74768d9a24a256616f683ba59e52c238326501f4008a3ab9a4
  • Pointer size: 130 Bytes
  • Size of remote file: 59.4 kB
images/recoAll_attractions_4.jpg DELETED

Git LFS Details

  • SHA256: e47e13a69c16073e221b143a43263fcbe956bcfeb8ad47fddbdbc03a2b6f7261
  • Pointer size: 131 Bytes
  • Size of remote file: 115 kB
images/ssd_car.jpg DELETED

Git LFS Details

  • SHA256: 92d459a39a9eef03956257cf9fec84114d9e5df8fb9c0662fb257488cdd4f365
  • Pointer size: 130 Bytes
  • Size of remote file: 50.5 kB
images/ssd_horse.jpg DELETED

Git LFS Details

  • SHA256: ed22f6b4c8c33e50e391e089ede14e8fa9402c623b09dbcf010e804770698fbb
  • Pointer size: 131 Bytes
  • Size of remote file: 123 kB
main_ax650 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:bd12cddc400cd3ffb78af4a4512211af28c33f98993b9c7447aab8d8f29d7893
3
- size 6821432
 
 
 
 
main_ax650_api DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:390236f0fef17d46c1bdf0b26f831335fe0e5ede1c10814c1462fdd360b1b984
3
- size 6935688
 
 
 
 
main_axcl_aarch64 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a0ded679af8f4fb115b04977d4bc4ecc63783f98d3b239cd3a73de19a6cd19ed
3
- size 1952752
 
 
 
 
main_axcl_api_aarch64 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:c90d9dfae62b17ef4681f103c62b483e96a862e900a364673e57bc91d078c63d
3
- size 2105232
 
 
 
 
main_axcl_api_x86 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:67be73d1a6a4c17ee6b73222d3c5988fa10d2dbcf71515f6dad090a561dcc252
3
- size 2202296
 
 
 
 
main_axcl_x86 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:1113a46767e5cc6c0a53172c5973848a40c65f379a428b3efc64a9fb6f6fb212
3
- size 2062240
 
 
 
 
Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/model.embed_tokens.weight.bfloat16.bin → model.embed_tokens.weight.bfloat16.bin RENAMED
File without changes
openai_cli.py DELETED
@@ -1,66 +0,0 @@
1
- import base64
2
- import glob
3
- from openai import OpenAI
4
- import cv2
5
-
6
- BASE_URL = "http://localhost:8000/v1"
7
-
8
- def img_to_data_url(img_path: str):
9
- img = cv2.imread(img_path)
10
- if img is None:
11
- raise FileNotFoundError(f"Cannot read image: {img_path}")
12
- ok, buf = cv2.imencode(".jpg", img)
13
- if not ok:
14
- raise RuntimeError("cv2.imencode failed")
15
- b64 = base64.b64encode(buf).decode("ascii")
16
- return f"data:image/jpeg;base64,{b64}"
17
-
18
-
19
- def test(openai_messages):
20
- client = OpenAI(api_key="not-needed", base_url=BASE_URL)
21
-
22
- stream = client.chat.completions.create(
23
- model="AXERA-TECH/Qwen3-VL-2B-Instruct-GPTQ-Int4",
24
- messages=openai_messages,
25
- stream=True,
26
- )
27
- out_chunks = []
28
- for ev in stream:
29
- delta = ev.choices[0].delta
30
- if delta and delta.content:
31
- out_chunks.append(delta.content)
32
- print(delta.content, end="", flush=True)
33
- print()
34
- assistant_text = "".join(out_chunks).strip()
35
-
36
- def test_image():
37
- image_data = img_to_data_url("../demo_cv308/frame_0075.jpg")
38
-
39
- openai_messages = {
40
- "role": "user",
41
- "content": [
42
- {"type": "text", "text": "描述一下这张图片"},
43
- {"type": "image_url", "image_url": image_data},
44
- ],
45
- }
46
-
47
-
48
- test(openai_messages)
49
-
50
- def test_video():
51
- image_list = glob.glob("../demo_cv308/*.jpg")
52
- image_list.sort()
53
-
54
- image_data_list = [img_to_data_url(img) for img in image_list]
55
-
56
- openai_messages = {
57
- "role": "user",
58
- "content": [
59
- {"type": "text", "text": "描述一下这个视频"},
60
- {"type": "image_url", "is_video":True, "image_url": image_data_list},
61
- ],
62
- }
63
-
64
- test(openai_messages)
65
-
66
- test_video()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
post_config.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
- "enable_temperature" : true,
3
- "temperature" : 0.7,
4
 
5
  "enable_repetition_penalty" : false,
6
- "repetition_penalty" : 1,
7
- "penalty_window" : 30,
8
 
9
  "enable_top_p_sampling" : false,
10
  "top_p" : 0.8,
11
 
12
- "enable_top_k_sampling" : true,
13
- "top_k" : 20
14
  }
 
1
  {
2
+ "enable_temperature" : false,
3
+ "temperature" : 0.9,
4
 
5
  "enable_repetition_penalty" : false,
6
+ "repetition_penalty" : 1.2,
7
+ "penalty_window" : 20,
8
 
9
  "enable_top_p_sampling" : false,
10
  "top_p" : 0.8,
11
 
12
+ "enable_top_k_sampling" : false,
13
+ "top_k" : 10
14
  }
Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l0_together.axmodel → qwen3_vl_text_p128_l0_together.axmodel RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1fa23909b5fc8dc47cbaa428c250f40afa8276555e17ed670a4208447292b2dc
3
- size 40098826
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b42363b13c67753e20e80a90d51dacc6f04280a3da992b601ab09547f308c3f5
3
+ size 46539431
Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l10_together.axmodel → qwen3_vl_text_p128_l10_together.axmodel RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f89d3c6531aa6b15d01cd5938c88aa4147be70e54f3ba329bee7994174d409a3
3
- size 40098826
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0133beae7b5d7b109557df15c6f5d6a738eded2ae1feef1f94c983699f74623
3
+ size 46539431
Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l11_together.axmodel → qwen3_vl_text_p128_l11_together.axmodel RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f76c70f7f51bacceb07f6bfcbdd7f49b9e214d0617b2cac8cb918c5b72108076
3
- size 40098826
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2faf54778f2e20ea363e189f0a0a9f9acaa1337cf30109092ffb48418c485072
3
+ size 46539431
Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l12_together.axmodel → qwen3_vl_text_p128_l12_together.axmodel RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:abc49889a03f8ea03ef779e4c4bb476abbe7b293adc5a9ea332d1d84dd92a7c8
3
- size 40098826
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:073dab1c7c2c91f2a0fd23cc3b1553df6c6f35f1841c1358265f56be5659c53f
3
+ size 46539431
Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l13_together.axmodel → qwen3_vl_text_p128_l13_together.axmodel RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a9091cce989a3f58d4a92fca91a5fbfecd0eec594e69f5083693ed1da50372d7
3
- size 40098826
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7b76578926bb45565be1bdc55f36fc074528e2835185195c69a3099f088e78c1
3
+ size 46539431
Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l14_together.axmodel → qwen3_vl_text_p128_l14_together.axmodel RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f338dfaef7d7212564dc0ec2a56875b525082645a82e6fdff1749559dd3a80f9
3
- size 40098826
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a12b5b73debb37c8c2ad8a2ea16b39bfc7b3000d0323be8e8c9cb89fb214ba54
3
+ size 46539431
Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l15_together.axmodel → qwen3_vl_text_p128_l15_together.axmodel RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:39ccdb8e387ba9eac4863fe441bdb5cc331468eccade98ba2ad9e6c80cfecb6d
3
- size 40098826
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:20c7ff4026d78f6e6dd7f3075283aeba63d5ea81aca1eedfa0f7210fe20f9e9b
3
+ size 46539431
Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l16_together.axmodel → qwen3_vl_text_p128_l16_together.axmodel RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:524503e1d3730d0430844b53b510862ae46564385b33d32934f5d82eb9ee06a6
3
- size 40098826
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93519605f2bbb6c2e9558cacfc4655e6c2d93ae33c50f1f7e0d4d41b448a1649
3
+ size 46539431
Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l17_together.axmodel → qwen3_vl_text_p128_l17_together.axmodel RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a8fc419e080ac76f2a5a68c998f6f2441686da16e07d3a5ab2bfd82095056cf0
3
- size 40098826
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4dbe9d98f015d52fa8e3083d666ce4438a7ed3160e9f1eac3fde94bc7fcfa30e
3
+ size 46539431
Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l18_together.axmodel → qwen3_vl_text_p128_l18_together.axmodel RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c69713b772b88ed151af4cd1cc3f45048443240bf6c8d0bc18616162be7b6f1a
3
- size 40098826
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05c421f9457cb19cf3e4bc7c8b503f5f6e50358a664d1865a78640dd1fad8835
3
+ size 46539431
Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l19_together.axmodel → qwen3_vl_text_p128_l19_together.axmodel RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cfe2cddae863d9851d2815618c81b2b06d6de37c3efff6db9b2f0d48fd8bf4d2
3
- size 40098826
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e1d98fa8ac199f64471f7535ab64de18ad99bf4cdfdc08e79d83635cda7cebb
3
+ size 46539431
Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l1_together.axmodel → qwen3_vl_text_p128_l1_together.axmodel RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5bb4888f253aabbd88d01b2ec0a3c59f33bad391c81e435d0a2f0b8dea0a36ba
3
- size 40098826
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07823cd8b49a8b33bc006ae750782e317580494575bc317babd7abff55441eba
3
+ size 46539431
Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l20_together.axmodel → qwen3_vl_text_p128_l20_together.axmodel RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5dc0c4467d632e50a9009c080810a5ee52ac55cf8885ccaaca07b6981d6d9f0e
3
- size 40098826
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62512c13b3509d6d77595616fe64804927d15e3738dd768ac6e88581c6235146
3
+ size 46539431
Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l21_together.axmodel → qwen3_vl_text_p128_l21_together.axmodel RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:86b3c6d14cb14ff16c385bf9ae4e6edb3745d2bf354d05b181b4dd251400ad94
3
- size 40098826
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fb01c185abe5eadf91f07829db2cd05121b8b05624bf9bdb145a770e2e55620a
3
+ size 46539431
Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l22_together.axmodel → qwen3_vl_text_p128_l22_together.axmodel RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:600c5e941149b60fb9284b23484b1dc793cb2d4cb42be6dfbeeb5ceaf5b3fe9d
3
- size 40098826
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84a8454d97e489645b33144f88cdd56ffbad8362f1300f72d6f1b7f3cf3d75b2
3
+ size 46539431
Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l23_together.axmodel → qwen3_vl_text_p128_l23_together.axmodel RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:81cb89308687a02a5fdc205dff6b7c5b4b40c94e6b5d751094db734cd75da7cd
3
- size 40098826
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ce9e3219f96c8f7615fdb799141d1045620ea69fb77489248200c6bb3b2a2c6
3
+ size 46539431
Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l24_together.axmodel → qwen3_vl_text_p128_l24_together.axmodel RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d052a26ee314b7e04b554d124391b44dccfd0765ee28071f5cf388b2e90ee269
3
- size 40098826
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e47988815b6b3b330584a934bb2cd889da3ed5a57ae0e75ed34dba31d676471
3
+ size 46539431
Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l25_together.axmodel → qwen3_vl_text_p128_l25_together.axmodel RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c082d514711a688511fa02b0805993bebc3f2fc803698de59493dbf3ccabfb02
3
- size 40098826
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0d03c2cb43adb58329ce66d45f8abe579d55c6dc0c551f67ef2e6821520bd1a
3
+ size 46539431
Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l26_together.axmodel → qwen3_vl_text_p128_l26_together.axmodel RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2954728201ab57e5465ba452800abae12c5edf326d566f3ccece09319657e431
3
- size 40098826
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cccaa3a03ba1a2902834ef825f06ca2b1a5f0326a5e0544b98fba4454f9c8b1b
3
+ size 46539431
Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l27_together.axmodel → qwen3_vl_text_p128_l27_together.axmodel RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:79d7494e99fafef52a0e828fee9d9ae937d4c0647c1e925f85def4a01b2a7d2f
3
- size 40098826
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a12ad8c0cf98016faf93cd5d4fdf99d0c563d2a922b1f1d78baae56387ccf010
3
+ size 46539431
Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l2_together.axmodel → qwen3_vl_text_p128_l2_together.axmodel RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c74ef565209dfec1f2da192c3ad7f40070a2dd9ca855c80762baa6ed9a1f6cb6
3
- size 40098826
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84b296fb9b14d24aa740c2fbfa54b2bd3ae7973a161ac9cb24c45b2372143892
3
+ size 46539431
Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l3_together.axmodel → qwen3_vl_text_p128_l3_together.axmodel RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dec86ba41ea44e6dd865de61bfe7af41771530714f1146b72191494b5f21263a
3
- size 40098826
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5033f1fe491462b8f44f5e117c261955b63535e3272b0cd784a59c97d073319
3
+ size 46539431
Qwen3-VL-2B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l4_together.axmodel → qwen3_vl_text_p128_l4_together.axmodel RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c3c7b6cc7ac5da4ae3f6e6999d7a3e5bc7ef51dfc03a768fd688096e4676b463
3
- size 40098826
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b67c490399d843fa68821a0f38180c300f32333113c0a1c8cd5729bcefd8a56
3
+ size 46539431