wli1995 commited on
Commit
49e8c2a
·
verified ·
1 Parent(s): 43b324d

update project structure

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +40 -0
  2. Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/Qwen3-VL-4B-Instruct_vision.axmodel → Qwen3-VL-4B-Instruct_vision.axmodel +0 -0
  3. Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/Qwen3-VL-4B-Instruct_vision_u8.axmodel → Qwen3-VL-4B-Instruct_vision_u8.axmodel +0 -0
  4. README.md +251 -173
  5. axera_logo.png +0 -3
  6. config.json +26 -0
  7. gradio_demo.py +0 -262
  8. main_ax650 +0 -3
  9. main_ax650_api +0 -3
  10. main_axcl_aarch64 +0 -3
  11. main_axcl_api_aarch64 +0 -3
  12. main_axcl_api_x86 +0 -3
  13. main_axcl_x86 +0 -3
  14. Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/model.embed_tokens.weight.bfloat16.bin → model.embed_tokens.weight.bfloat16.bin +0 -0
  15. openai_cli.py +0 -66
  16. post_config.json +6 -6
  17. Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l0_together.axmodel → qwen3_vl_text_p128_l0_together.axmodel +0 -0
  18. Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l10_together.axmodel → qwen3_vl_text_p128_l10_together.axmodel +0 -0
  19. Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l11_together.axmodel → qwen3_vl_text_p128_l11_together.axmodel +0 -0
  20. Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l12_together.axmodel → qwen3_vl_text_p128_l12_together.axmodel +0 -0
  21. Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l13_together.axmodel → qwen3_vl_text_p128_l13_together.axmodel +0 -0
  22. Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l14_together.axmodel → qwen3_vl_text_p128_l14_together.axmodel +0 -0
  23. Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l15_together.axmodel → qwen3_vl_text_p128_l15_together.axmodel +0 -0
  24. Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l16_together.axmodel → qwen3_vl_text_p128_l16_together.axmodel +0 -0
  25. Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l17_together.axmodel → qwen3_vl_text_p128_l17_together.axmodel +0 -0
  26. Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l18_together.axmodel → qwen3_vl_text_p128_l18_together.axmodel +0 -0
  27. Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l19_together.axmodel → qwen3_vl_text_p128_l19_together.axmodel +0 -0
  28. Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l1_together.axmodel → qwen3_vl_text_p128_l1_together.axmodel +0 -0
  29. Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l20_together.axmodel → qwen3_vl_text_p128_l20_together.axmodel +0 -0
  30. Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l21_together.axmodel → qwen3_vl_text_p128_l21_together.axmodel +0 -0
  31. Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l22_together.axmodel → qwen3_vl_text_p128_l22_together.axmodel +0 -0
  32. Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l23_together.axmodel → qwen3_vl_text_p128_l23_together.axmodel +0 -0
  33. Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l24_together.axmodel → qwen3_vl_text_p128_l24_together.axmodel +0 -0
  34. Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l25_together.axmodel → qwen3_vl_text_p128_l25_together.axmodel +0 -0
  35. Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l26_together.axmodel → qwen3_vl_text_p128_l26_together.axmodel +0 -0
  36. Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l27_together.axmodel → qwen3_vl_text_p128_l27_together.axmodel +0 -0
  37. Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l28_together.axmodel → qwen3_vl_text_p128_l28_together.axmodel +0 -0
  38. Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l29_together.axmodel → qwen3_vl_text_p128_l29_together.axmodel +0 -0
  39. Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l2_together.axmodel → qwen3_vl_text_p128_l2_together.axmodel +0 -0
  40. Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l30_together.axmodel → qwen3_vl_text_p128_l30_together.axmodel +0 -0
  41. Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l31_together.axmodel → qwen3_vl_text_p128_l31_together.axmodel +0 -0
  42. Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l32_together.axmodel → qwen3_vl_text_p128_l32_together.axmodel +0 -0
  43. Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l33_together.axmodel → qwen3_vl_text_p128_l33_together.axmodel +0 -0
  44. Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l34_together.axmodel → qwen3_vl_text_p128_l34_together.axmodel +0 -0
  45. Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l35_together.axmodel → qwen3_vl_text_p128_l35_together.axmodel +0 -0
  46. Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l3_together.axmodel → qwen3_vl_text_p128_l3_together.axmodel +0 -0
  47. Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l4_together.axmodel → qwen3_vl_text_p128_l4_together.axmodel +0 -0
  48. Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l5_together.axmodel → qwen3_vl_text_p128_l5_together.axmodel +0 -0
  49. Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l6_together.axmodel → qwen3_vl_text_p128_l6_together.axmodel +0 -0
  50. Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l7_together.axmodel → qwen3_vl_text_p128_l7_together.axmodel +0 -0
.gitattributes CHANGED
@@ -61,3 +61,43 @@ main_axcl_api_aarch64 filter=lfs diff=lfs merge=lfs -text
61
  main_axcl_api_x86 filter=lfs diff=lfs merge=lfs -text
62
  main_ax650_api filter=lfs diff=lfs merge=lfs -text
63
  axera_logo.png filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  main_axcl_api_x86 filter=lfs diff=lfs merge=lfs -text
62
  main_ax650_api filter=lfs diff=lfs merge=lfs -text
63
  axera_logo.png filter=lfs diff=lfs merge=lfs -text
64
+ Qwen3-VL-4B-Instruct_vision.axmodel filter=lfs diff=lfs merge=lfs -text
65
+ Qwen3-VL-4B-Instruct_vision_u8.axmodel filter=lfs diff=lfs merge=lfs -text
66
+ model.embed_tokens.weight.bfloat16.bin filter=lfs diff=lfs merge=lfs -text
67
+ qwen3_vl_text_p128_l0_together.axmodel filter=lfs diff=lfs merge=lfs -text
68
+ qwen3_vl_text_p128_l10_together.axmodel filter=lfs diff=lfs merge=lfs -text
69
+ qwen3_vl_text_p128_l11_together.axmodel filter=lfs diff=lfs merge=lfs -text
70
+ qwen3_vl_text_p128_l12_together.axmodel filter=lfs diff=lfs merge=lfs -text
71
+ qwen3_vl_text_p128_l13_together.axmodel filter=lfs diff=lfs merge=lfs -text
72
+ qwen3_vl_text_p128_l14_together.axmodel filter=lfs diff=lfs merge=lfs -text
73
+ qwen3_vl_text_p128_l15_together.axmodel filter=lfs diff=lfs merge=lfs -text
74
+ qwen3_vl_text_p128_l16_together.axmodel filter=lfs diff=lfs merge=lfs -text
75
+ qwen3_vl_text_p128_l17_together.axmodel filter=lfs diff=lfs merge=lfs -text
76
+ qwen3_vl_text_p128_l18_together.axmodel filter=lfs diff=lfs merge=lfs -text
77
+ qwen3_vl_text_p128_l19_together.axmodel filter=lfs diff=lfs merge=lfs -text
78
+ qwen3_vl_text_p128_l1_together.axmodel filter=lfs diff=lfs merge=lfs -text
79
+ qwen3_vl_text_p128_l20_together.axmodel filter=lfs diff=lfs merge=lfs -text
80
+ qwen3_vl_text_p128_l21_together.axmodel filter=lfs diff=lfs merge=lfs -text
81
+ qwen3_vl_text_p128_l22_together.axmodel filter=lfs diff=lfs merge=lfs -text
82
+ qwen3_vl_text_p128_l23_together.axmodel filter=lfs diff=lfs merge=lfs -text
83
+ qwen3_vl_text_p128_l24_together.axmodel filter=lfs diff=lfs merge=lfs -text
84
+ qwen3_vl_text_p128_l25_together.axmodel filter=lfs diff=lfs merge=lfs -text
85
+ qwen3_vl_text_p128_l26_together.axmodel filter=lfs diff=lfs merge=lfs -text
86
+ qwen3_vl_text_p128_l27_together.axmodel filter=lfs diff=lfs merge=lfs -text
87
+ qwen3_vl_text_p128_l28_together.axmodel filter=lfs diff=lfs merge=lfs -text
88
+ qwen3_vl_text_p128_l29_together.axmodel filter=lfs diff=lfs merge=lfs -text
89
+ qwen3_vl_text_p128_l2_together.axmodel filter=lfs diff=lfs merge=lfs -text
90
+ qwen3_vl_text_p128_l30_together.axmodel filter=lfs diff=lfs merge=lfs -text
91
+ qwen3_vl_text_p128_l31_together.axmodel filter=lfs diff=lfs merge=lfs -text
92
+ qwen3_vl_text_p128_l32_together.axmodel filter=lfs diff=lfs merge=lfs -text
93
+ qwen3_vl_text_p128_l33_together.axmodel filter=lfs diff=lfs merge=lfs -text
94
+ qwen3_vl_text_p128_l34_together.axmodel filter=lfs diff=lfs merge=lfs -text
95
+ qwen3_vl_text_p128_l35_together.axmodel filter=lfs diff=lfs merge=lfs -text
96
+ qwen3_vl_text_p128_l3_together.axmodel filter=lfs diff=lfs merge=lfs -text
97
+ qwen3_vl_text_p128_l4_together.axmodel filter=lfs diff=lfs merge=lfs -text
98
+ qwen3_vl_text_p128_l5_together.axmodel filter=lfs diff=lfs merge=lfs -text
99
+ qwen3_vl_text_p128_l6_together.axmodel filter=lfs diff=lfs merge=lfs -text
100
+ qwen3_vl_text_p128_l7_together.axmodel filter=lfs diff=lfs merge=lfs -text
101
+ qwen3_vl_text_p128_l8_together.axmodel filter=lfs diff=lfs merge=lfs -text
102
+ qwen3_vl_text_p128_l9_together.axmodel filter=lfs diff=lfs merge=lfs -text
103
+ qwen3_vl_text_post.axmodel filter=lfs diff=lfs merge=lfs -text
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/Qwen3-VL-4B-Instruct_vision.axmodel → Qwen3-VL-4B-Instruct_vision.axmodel RENAMED
File without changes
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/Qwen3-VL-4B-Instruct_vision_u8.axmodel → Qwen3-VL-4B-Instruct_vision_u8.axmodel RENAMED
File without changes
README.md CHANGED
@@ -66,202 +66,280 @@ The DDR capacity refers to the CMM memory that needs to be consumed. Ensure that
66
 
67
  ## How to use
68
 
69
- Download all files from this repository to the device
 
70
 
71
- **If you using AX650 Board**
72
-
73
- ### Demo Run
74
-
75
- #### Image understand demo
76
 
77
- - input text
78
 
79
- ```
80
- 描述这张图片
81
  ```
82
 
83
- - input image
84
 
85
- ![](./images/recoAll_attractions_1.jpg)
 
 
86
 
 
 
 
87
  ```
88
- root@ax650 ~/Qwen3-VL-4B-Instruct-GPTQ-Int4 # bash run_image_ax650.sh
89
- [I][ Init][ 156]: LLM init start
90
- [I][ Init][ 158]: Total CMM:7884 MB
91
- [I][ Init][ 34]: connect http://127.0.0.1:8080 ok
92
- bos_id: -1, eos_id: 151645
93
- img_start_token: 151652
94
- img_context_token: 151655
95
- 2% | █ | 1 / 39 [0.01s<0.58s, 66.67 count/s] tokenizer init ok[I][ Init][ 26]: LLaMaEmbedSelector use mmap
96
- 5% | ██ | 2 / 39 [0.02s<0.37s, 105.26 count/s] embed_selector init ok[I][ Init][ 201]: attr.axmodel_num:36
97
- 102% | █████████████████████████████████ | 40 / 39 [11.33s<11.05s, 3.53 count/s] init vpm axmodel ok,remain_cmm(2199 MB)[I][ Init][ 266]: IMAGE_CONTEXT_TOKEN: 151655, IMAGE_START_TOKEN: 151652
98
- [I][ Init][ 309]: image encoder output float32
99
-
100
- [I][ Init][ 339]: max_token_len : 2047
101
- [I][ Init][ 344]: kv_cache_size : 1024, kv_cache_num: 2047
102
- [I][ Init][ 352]: prefill_token_num : 128
103
- [I][ Init][ 356]: grp: 1, prefill_max_token_num : 1
104
- [I][ Init][ 356]: grp: 2, prefill_max_token_num : 128
105
- [I][ Init][ 356]: grp: 3, prefill_max_token_num : 256
106
- [I][ Init][ 356]: grp: 4, prefill_max_token_num : 384
107
- [I][ Init][ 356]: grp: 5, prefill_max_token_num : 512
108
- [I][ Init][ 356]: grp: 6, prefill_max_token_num : 640
109
- [I][ Init][ 356]: grp: 7, prefill_max_token_num : 768
110
- [I][ Init][ 356]: grp: 8, prefill_max_token_num : 896
111
- [I][ Init][ 356]: grp: 9, prefill_max_token_num : 1024
112
- [I][ Init][ 356]: grp: 10, prefill_max_token_num : 1152
113
- [I][ Init][ 360]: prefill_max_token_num : 1152
114
- [I][ Init][ 372]: LLM init ok
115
- [I][ Init][ 374]: Left CMM:2199 MB
116
- Type "q" to exit, Ctrl+c to stop current running
117
- prompt >> 描述这张图片
118
- image >> images/recoAll_attractions_1.jpg
119
- [I][ EncodeImage][ 440]: pixel_values size 1
120
- [I][ EncodeImage][ 441]: grid_h 24 grid_w 24
121
- [I][ EncodeImage][ 489]: image encode time : 222.440994 ms, size : 1
122
- [I][ Encode][ 532]: input_ids size:168
123
- [I][ Encode][ 540]: offset 15
124
- [I][ Encode][ 569]: img_embed.size:1, 368640
125
- [I][ Encode][ 583]: out_embed size:430080
126
- [I][ Encode][ 584]: input_ids size 168
127
- [I][ Encode][ 586]: position_ids size:168
128
- [I][ Run][ 607]: input token num : 168, prefill_split_num : 2
129
- [I][ Run][ 641]: input_num_token:128
130
- [I][ Run][ 641]: input_num_token:40
131
- [I][ Run][ 865]: ttft: 676.16 ms
132
- 这张图片展示了埃及吉萨的金字塔群,背景是晴朗的蓝天,前景是广阔的沙漠。
133
-
134
- 画面中主要可见三座金字塔:
135
- - 最大的一座是著名的**胡夫金字塔**,它位于画面中央偏左,是三座金字塔中最高、最显眼的。
136
- - 在其右侧,是稍小一些的**卡纳克金字塔**(或称“卡纳克金字塔”)。
137
- - 在画面最左侧,可以看到一座更小的金字塔,可能是**门卡乌金字塔**或**哈夫拉金字塔**。
138
-
139
- 这三座金字塔都是古埃及法老的陵墓,是古代世界七大奇迹中唯一现存的。它们的结构和规模令人惊叹,体现了古埃及人在建筑、数学和天文学方面的卓越成就。
140
-
141
- 整个场景在阳光下显得庄严而神秘,是埃及最具代表性的历史遗迹之一。
142
-
143
- [N][ Run][ 992]: hit eos,avg 7.12 token/s
144
  ```
145
 
146
- #### Video understand demo
147
 
148
- - input text
149
 
150
- ```
151
- 描述这个视频
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
152
  ```
153
 
154
- - input video
155
-
156
- ./video
157
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
  ```
159
- root@ax650 ~/Qwen3-VL-4B-Instruct-GPTQ-Int4 # bash run_video_ax650.sh
160
- [I][ Init][ 156]: LLM init start
161
- [I][ Init][ 158]: Total CMM:7884 MB
162
- [I][ Init][ 34]: connect http://127.0.0.1:8080 ok
163
- bos_id: -1, eos_id: 151645
164
- img_start_token: 151652
165
- img_context_token: 151656
166
- 2% | █ | 1 / 39 [0.02s<0.62s, 62.50 count/s] tokenizer init ok[I][ Init][ 26]: LLaMaEmbedSelector use mmap
167
- 5% | ██ | 2 / 39 [0.02s<0.39s, 100.00 count/s] embed_selector init ok[I][ Init][ 201]: attr.axmodel_num:36
168
- 102% | █████████████████████████████████ | 40 / 39 [44.70s<43.58s, 0.89 count/s] init vpm axmodel ok,remain_cmm(2199 MB)[I][ Init][ 266]: IMAGE_CONTEXT_TOKEN: 151656, IMAGE_START_TOKEN: 151652
169
- [I][ Init][ 309]: image encoder output float32
170
-
171
- [I][ Init][ 339]: max_token_len : 2047
172
- [I][ Init][ 344]: kv_cache_size : 1024, kv_cache_num: 2047
173
- [I][ Init][ 352]: prefill_token_num : 128
174
- [I][ Init][ 356]: grp: 1, prefill_max_token_num : 1
175
- [I][ Init][ 356]: grp: 2, prefill_max_token_num : 128
176
- [I][ Init][ 356]: grp: 3, prefill_max_token_num : 256
177
- [I][ Init][ 356]: grp: 4, prefill_max_token_num : 384
178
- [I][ Init][ 356]: grp: 5, prefill_max_token_num : 512
179
- [I][ Init][ 356]: grp: 6, prefill_max_token_num : 640
180
- [I][ Init][ 356]: grp: 7, prefill_max_token_num : 768
181
- [I][ Init][ 356]: grp: 8, prefill_max_token_num : 896
182
- [I][ Init][ 356]: grp: 9, prefill_max_token_num : 1024
183
- [I][ Init][ 356]: grp: 10, prefill_max_token_num : 1152
184
- [I][ Init][ 360]: prefill_max_token_num : 1152
185
- [I][ Init][ 372]: LLM init ok
186
- [I][ Init][ 374]: Left CMM:2199 MB
187
- Type "q" to exit, Ctrl+c to stop current running
188
- prompt >> 描述这个视频
189
- video >> video
190
- video/frame_0000.jpg
191
- video/frame_0008.jpg
192
- video/frame_0016.jpg
193
- video/frame_0024.jpg
194
- video/frame_0032.jpg
195
- video/frame_0040.jpg
196
- video/frame_0048.jpg
197
- video/frame_0056.jpg
198
- [I][ EncodeImage][ 440]: pixel_values size 4
199
- [I][ EncodeImage][ 441]: grid_h 24 grid_w 24
200
- [I][ EncodeImage][ 489]: image encode time : 773.406006 ms, size : 4
201
- [I][ Encode][ 532]: input_ids size:600
202
- [I][ Encode][ 540]: offset 15
203
- [I][ Encode][ 569]: img_embed.size:4, 368640
204
- [I][ Encode][ 574]: offset:159
205
- [I][ Encode][ 574]: offset:303
206
- [I][ Encode][ 574]: offset:447
207
- [I][ Encode][ 583]: out_embed size:1536000
208
- [I][ Encode][ 584]: input_ids size 600
209
- [I][ Encode][ 586]: position_ids size:600
210
- [I][ Run][ 607]: input token num : 600, prefill_split_num : 5
211
- [I][ Run][ 641]: input_num_token:128
212
- [I][ Run][ 641]: input_num_token:128
213
- [I][ Run][ 641]: input_num_token:128
214
- [I][ Run][ 641]: input_num_token:128
215
- [I][ Run][ 641]: input_num_token:88
216
-
217
- [I][ Run][ 865]: ttft: 1886.83 ms
218
- 这个视频展示了一群**土拨鼠**(或称“旱獭”)在山间草地上嬉戏打斗的场景。
219
-
220
- **画面细节:**
221
-
222
- - **主体动物**:画面中有多只土拨鼠,它们毛色以灰、棕、白相间,腹部和四肢颜色较浅,背部较深。它们体型圆润,耳朵短小,表情生动。
223
- - **动作**:这些土拨鼠似乎在进行一场“打斗”或“嬉戏”。它们互相扑腾、跳跃、用前爪拍打、甚至互相“拥抱”或“推搡”。动作非常活跃,充满动感,有些画面甚至有轻微的运动模糊,增强了动态感。
224
- - **背景**:背景是连绵起伏的山峦,山坡上覆盖着绿色植被,远处可见裸露的岩石和山体,天空湛蓝,阳光明媚,说明是白天晴朗的天气。
225
- - **前景**:它们站在一片布满小石子和草的地面,看起来像是山间小径或开阔地。
226
- - **构图**:画面采用近景特写,聚焦于土拨鼠的互动,背景虚化,突出了主体的动态和表情。整体构图充满活力和趣味性。
227
-
228
- **风格与氛围:**
229
-
230
- - ��张图片/视频具有**拟人化和趣味性**,土拨鼠的动作被夸张化,仿佛在“打斗”或“跳舞”,非常可爱。
231
- - 画面色彩明亮,阳光充足,给人一种**自然、活泼、欢乐**的感觉。
232
-
233
- **总结:**
234
-
235
- 这是一段充满趣味和活力的野生动物短片,展现了土拨鼠在自然环境中的社交行为,它们的“打斗”其实可能是玩耍、争夺领地或建立社交关系的自然行为。整体画面生动、可爱,极具观赏性。
236
 
237
- ---
238
 
239
- **注意**:虽然土拨鼠(旱獭)在野外确实会互相打斗,但这种“打斗”通常是**玩耍或社交行为**,并非真正的攻击。视频中的“打斗”更像是它们的社交互动,非常可爱。
 
240
 
241
- [N][ Run][ 992]: hit eos,avg 7.10 token/s
 
242
 
243
- prompt >> q
244
- ```
 
 
245
 
246
- ### Gradio demo
 
 
 
 
247
 
248
- #### start openai style api server
249
- if the tokenizer server is not run in the same machine,please modify the tokenizer server ip in shell file.
250
- ```shell
251
- pip3 install -r requirements.txt
252
- # for axcl x86
253
- ./run_axcl_x86_api.sh
254
- # for axcl aarch64
255
- ./run_axcl_aarch64_api.sh
256
- # for ax650
257
- ./run_ax650_api.sh
258
  ```
259
 
260
- #### start gradio demo
261
- if the api server is not run in the same machine,please modify the api url in gradio web ui.
262
- ```shell
263
- python gradio_demo.py
264
- ```
265
 
266
- ![image](https://cdn-uploads.huggingface.co/production/uploads/64b7837c17570fdff9b906b9/Og9fPNi0chg768gicse7M.png)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
267
 
 
 
 
 
 
 
 
 
 
66
 
67
  ## How to use
68
 
69
+ ## 安装 axllm
70
+ 方式一:克隆仓库后执行安装脚本:
71
 
72
+ ```shell
73
+ git clone -b axllm https://github.com/AXERA-TECH/ax-llm.git
74
+ cd ax-llm
75
+ ./install.sh
76
+ ```
77
 
78
+ 方式二:一行命令安装(默认分支 `axllm`):
79
 
80
+ ```shell
81
+ curl -fsSL https://raw.githubusercontent.com/AXERA-TECH/ax-llm/axllm/install.sh | bash
82
  ```
83
 
84
+ 方式三:下载Github Actions CI 导出的可执行程序(适合没有编译环境的用户):
85
 
86
+ 如果没有编译环境,请到:
87
+ `https://github.com/AXERA-TECH/ax-llm/actions?query=branch%3Aaxllm`
88
+ 下载 **最新 CI 导出的可执行程序**(`axllm`),然后:
89
 
90
+ ```shell
91
+ chmod +x axllm
92
+ sudo mv axllm /usr/bin/axllm
93
  ```
94
+
95
+ ## 模型下载(Hugging Face)
96
+ 先创建模型目录并进入,然后下载到该目录:
97
+
98
+ ```shell
99
+ mkdir -p AXERA-TECH/Qwen3-VL-4B-Instruct-GPTQ-Int4
100
+ cd AXERA-TECH/Qwen3-VL-4B-Instruct-GPTQ-Int4
101
+ hf download AXERA-TECH/Qwen3-VL-4B-Instruct-GPTQ-Int4 --local-dir .
102
+
103
+ # structure of the downloaded files
104
+ tree -L 3
105
+ .
106
+ └── AXERA-TECH
107
+ └── Qwen3-VL-4B-Instruct-GPTQ-Int4
108
+ ├── Qwen3-VL-4B-Instruct_vision.axmodel
109
+ ├── Qwen3-VL-4B-Instruct_vision_u8.axmodel
110
+ ├── README.md
111
+ ├── config.json
112
+ ├── images
113
+ ├── model.embed_tokens.weight.bfloat16.bin
114
+ ├── post_config.json
115
+ ├── qwen3_tokenizer.txt
116
+ ├── qwen3_vl_text_p128_l0_together.axmodel
117
+ ...
118
+ ├── qwen3_vl_text_p128_l9_together.axmodel
119
+ ├── qwen3_vl_text_post.axmodel
120
+ ├── requirements.txt
121
+ └── video
122
+
123
+ 4 directories, 45 files
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  ```
125
 
126
+ ## Inference with AX650 Host, such as M4N-Dock(爱芯派Pro) or AX650N DEMO Board
127
 
128
+ ### 运行(CLI)
129
 
130
+ ```shell
131
+ (base) root@ax650:~# axllm run AXERA-TECH/Qwen3-VL-4B-Instruct-GPTQ-Int4/
132
+ 20:13:34.015 INF Init:218 | LLM init start
133
+ tokenizer_type = 1
134
+ 97% | ############################### | 38 / 39 [11.25s<11.54s, 3.38 count/s] init post axmodel ok,remain_cmm(6133 MB)
135
+ 20:13:45.263 INF Init:368 | max_token_len : 2047
136
+ 20:13:45.263 INF Init:371 | kv_cache_size : 1024, kv_cache_num: 2047
137
+ 20:13:45.263 INF Init:374 | prefill_token_num : 128
138
+ 20:13:45.263 INF Init:379 | grp: 1, prefill_max_kv_cache_num : 1
139
+ 20:13:45.263 INF Init:379 | grp: 2, prefill_max_kv_cache_num : 128
140
+ 20:13:45.263 INF Init:379 | grp: 3, prefill_max_kv_cache_num : 256
141
+ 20:13:45.263 INF Init:379 | grp: 4, prefill_max_kv_cache_num : 384
142
+ 20:13:45.263 INF Init:379 | grp: 5, prefill_max_kv_cache_num : 512
143
+ 20:13:45.263 INF Init:379 | grp: 6, prefill_max_kv_cache_num : 640
144
+ 20:13:45.263 INF Init:379 | grp: 7, prefill_max_kv_cache_num : 768
145
+ 20:13:45.263 INF Init:379 | grp: 8, prefill_max_kv_cache_num : 896
146
+ 20:13:45.263 INF Init:379 | grp: 9, prefill_max_kv_cache_num : 1024
147
+ 20:13:45.263 INF Init:379 | grp: 10, prefill_max_kv_cache_num : 1152
148
+ 20:13:45.263 INF Init:384 | prefill_max_token_num : 1152
149
+ 20:13:45.263 INF Init:27 | LLaMaEmbedSelector use mmap
150
+ 100% | ################################ | 39 / 39 [11.25s<11.25s, 3.47 count/s] embed_selector init ok
151
+ 20:13:47.224 WRN Init:511 | Qwen-VL vision size override: cfg=448x448 bytes=1204224, model_input_bytes=884736 -> 384x384 (square).
152
+ 20:13:47.224 INF Init:695 | Qwen-VL token ids: vision_start=151652 image_pad=151655 video_pad=151656
153
+ 20:13:47.224 INF Init:728 | VisionModule init ok: type=Qwen3VL, tokens_per_block=144, embed_size=2560, out_dtype=fp32
154
+ 20:13:47.224 INF Init:734 | VisionModule deepstack enabled: layers=3
155
+ 20:13:47.224 INF load_config:282 | load config:
156
+ 20:13:47.224 INF load_config:282 | {
157
+ 20:13:47.224 INF load_config:282 | "enable_repetition_penalty": false,
158
+ 20:13:47.224 INF load_config:282 | "enable_temperature": false,
159
+ 20:13:47.224 INF load_config:282 | "enable_top_k_sampling": false,
160
+ 20:13:47.224 INF load_config:282 | "enable_top_p_sampling": false,
161
+ 20:13:47.224 INF load_config:282 | "penalty_window": 20,
162
+ 20:13:47.224 INF load_config:282 | "repetition_penalty": 1.2,
163
+ 20:13:47.224 INF load_config:282 | "temperature": 0.9,
164
+ 20:13:47.224 INF load_config:282 | "top_k": 10,
165
+ 20:13:47.224 INF load_config:282 | "top_p": 0.8
166
+ 20:13:47.224 INF load_config:282 | }
167
+ 20:13:47.224 INF Init:448 | LLM init ok
168
+ Commands:
169
+ /q, /exit 退出
170
+ /reset 重置 kvcache
171
+ /dd 删除一轮对话
172
+ /pp 打印历史对话
173
+ Ctrl+C: 停止当前生成
174
+ VLM enabled: after each prompt, input image path (empty = text-only). Use "video:<frames_dir>" for video.
175
+ ----------------------------------------
176
+ prompt >> describe the image
177
+ image >> ./AXERA-TECH/Qwen3-VL-4B-Instruct-GPTQ-Int4/images/ssd_car.jpg
178
+ 20:14:13.430 INF EncodeForContent:1121 | Qwen-VL pixel_values[0] bytes=884736 min=0 max=255 (w=384 h=384 tp=2 ps=16 sm=2)
179
+ 20:14:13.594 INF EncodeForContent:1144 | vision cache store: ./AXERA-TECH/Qwen3-VL-4B-Instruct-GPTQ-Int4/images/ssd_car.jpg
180
+ 20:14:13.616 INF SetKVCache:749 | prefill_grpid:3 kv_cache_num:256 precompute_len:0 input_num_token:168
181
+ 20:14:13.616 INF SetKVCache:757 | current prefill_max_token_num:1152
182
+ 20:14:13.616 INF SetKVCache:760 | first run
183
+ 20:14:13.618 INF Run:818 | input token num : 168, prefill_split_num : 2
184
+ 20:14:13.618 INF Run:858 | prefill chunk p=0 history_len=0 grpid=1 kv_cache_num=0 input_tokens=128
185
+ 20:14:13.618 INF Run:881 | prefill indices shape: p=0 idx_elems=384 idx_rows=3 pos_rows=3
186
+ 20:14:13.940 INF Run:858 | prefill chunk p=1 history_len=128 grpid=2 kv_cache_num=128 input_tokens=40
187
+ 20:14:13.940 INF Run:881 | prefill indices shape: p=1 idx_elems=384 idx_rows=3 pos_rows=3
188
+ 20:14:14.295 INF Run:1023 | ttft: 677.29 ms
189
+ This is a vibrant street photograph taken in a city, likely London, featuring a classic red double-decker bus as the central subject.
190
+
191
+ **Key elements in the image:**
192
+
193
+ - **The Bus:** A bright red, vintage-style double-decker bus, which is a hallmark of London's public transport. The bus is parked or stopped on the street. A prominent advertisement is visible on its side: “WHEN YOU SAY ‘YES’” above the website “WIXMONEY.COM”. The bus has a classic design with large windows and ornate architectural details on its upper deck.
194
+
195
+ - **The Setting:** The background consists of tall, ornate, multi-story buildings with traditional European architecture, featuring large windows, stone facades, and decorative balconies. This strongly suggests a central or affluent district in a major European city.
196
+
197
+ - **The Person:** In the foreground, a person (likely a woman) is standing on the sidewalk, looking up at the bus. She is wearing a dark coat and a light-colored hat or head covering, and she is holding a small, light-colored handbag. Her posture and gaze suggest she is observing the bus or the scene.
198
+
199
+ - **The Atmosphere:** The photo has a bright, clear, and cheerful quality, with natural daylight illuminating the scene. The colors are vivid, especially the red of the bus, which stands out against the more muted tones of the buildings and the person’s clothing.
200
+
201
+ - **The Composition:** The image is framed to capture the bus and the surrounding architecture, with the person adding a human element and a sense of scale. The perspective is slightly elevated, looking down at the bus and the street.
202
+
203
+ Overall, the image captures a moment of urban life, blending the iconic imagery of a city bus with the everyday activity of a pedestrian, all set against a backdrop of classic architecture.
204
+
205
+ 20:15:12.812 NTC Run:1145 | hit eos,avg 6.37 token/s
206
+ 20:15:12.813 INF GetKVCache:721 | precompute_len:409, remaining:743
207
+ prompt >> how many people in the image?
208
+ image >>
209
+ 20:15:33.058 INF EncodeForContent:1057 | vision cache hit (mem): ./AXERA-TECH/Qwen3-VL-4B-Instruct-GPTQ-Int4/images/ssd_car.jpg
210
+ 20:15:33.067 INF SetKVCache:749 | prefill_grpid:5 kv_cache_num:512 precompute_len:409 input_num_token:17
211
+ 20:15:33.067 INF SetKVCache:757 | current prefill_max_token_num:640
212
+ 20:15:33.068 INF Run:818 | input token num : 17, prefill_split_num : 1
213
+ 20:15:33.068 INF Run:858 | prefill chunk p=0 history_len=409 grpid=5 kv_cache_num=512 input_tokens=17
214
+ 20:15:33.068 INF Run:881 | prefill indices shape: p=0 idx_elems=384 idx_rows=3 pos_rows=3
215
+ 20:15:33.502 INF Run:1023 | ttft: 433.86 ms
216
+ Based on the image provided, there is **one person** clearly visible in the foreground — the woman standing on the sidewalk, looking up at the bus. She is the only person explicitly depicted in the photograph.
217
+
218
+ There may be other people on the bus or in the background, but they are not visible or identifiable in the image. Therefore, the answer is:
219
+
220
+ > **One person.**
221
+
222
+ 20:15:45.526 NTC Run:1145 | hit eos,avg 6.49 token/s
223
+ 20:15:45.526 INF GetKVCache:721 | precompute_len:503, remaining:649
224
+ prompt >> /q
225
  ```
226
 
227
+ ### 启动服务(OpenAI 兼容)
 
 
228
 
229
+ ```shell
230
+ (base) root@ax650:~# axllm serve AXERA-TECH/Qwen3-VL-4B-Instruct-GPTQ-Int4/
231
+ 20:18:10.375 INF Init:218 | LLM init start
232
+ tokenizer_type = 1
233
+ 97% | ############################### | 38 / 39 [6.45s<6.62s, 5.89 count/s] init post axmodel ok,remain_cmm(6133 MB)
234
+ 20:18:16.826 INF Init:368 | max_token_len : 2047
235
+ 20:18:16.826 INF Init:371 | kv_cache_size : 1024, kv_cache_num: 2047
236
+ 20:18:16.826 INF Init:374 | prefill_token_num : 128
237
+ 20:18:16.826 INF Init:379 | grp: 1, prefill_max_kv_cache_num : 1
238
+ 20:18:16.826 INF Init:379 | grp: 2, prefill_max_kv_cache_num : 128
239
+ 20:18:16.826 INF Init:379 | grp: 3, prefill_max_kv_cache_num : 256
240
+ 20:18:16.826 INF Init:379 | grp: 4, prefill_max_kv_cache_num : 384
241
+ 20:18:16.826 INF Init:379 | grp: 5, prefill_max_kv_cache_num : 512
242
+ 20:18:16.826 INF Init:379 | grp: 6, prefill_max_kv_cache_num : 640
243
+ 20:18:16.826 INF Init:379 | grp: 7, prefill_max_kv_cache_num : 768
244
+ 20:18:16.826 INF Init:379 | grp: 8, prefill_max_kv_cache_num : 896
245
+ 20:18:16.826 INF Init:379 | grp: 9, prefill_max_kv_cache_num : 1024
246
+ 20:18:16.826 INF Init:379 | grp: 10, prefill_max_kv_cache_num : 1152
247
+ 20:18:16.826 INF Init:384 | prefill_max_token_num : 1152
248
+ 20:18:16.826 INF Init:27 | LLaMaEmbedSelector use mmap
249
+ 100% | ################################ | 39 / 39 [6.45s<6.45s, 6.05 count/s] embed_selector init ok
250
+ 20:18:17.190 WRN Init:511 | Qwen-VL vision size override: cfg=448x448 bytes=1204224, model_input_bytes=884736 -> 384x384 (square).
251
+ 20:18:17.191 INF Init:695 | Qwen-VL token ids: vision_start=151652 image_pad=151655 video_pad=151656
252
+ 20:18:17.191 INF Init:728 | VisionModule init ok: type=Qwen3VL, tokens_per_block=144, embed_size=2560, out_dtype=fp32
253
+ 20:18:17.191 INF Init:734 | VisionModule deepstack enabled: layers=3
254
+ 20:18:17.191 INF load_config:282 | load config:
255
+ 20:18:17.191 INF load_config:282 | {
256
+ 20:18:17.191 INF load_config:282 | "enable_repetition_penalty": false,
257
+ 20:18:17.191 INF load_config:282 | "enable_temperature": false,
258
+ 20:18:17.191 INF load_config:282 | "enable_top_k_sampling": false,
259
+ 20:18:17.191 INF load_config:282 | "enable_top_p_sampling": false,
260
+ 20:18:17.191 INF load_config:282 | "penalty_window": 20,
261
+ 20:18:17.191 INF load_config:282 | "repetition_penalty": 1.2,
262
+ 20:18:17.191 INF load_config:282 | "temperature": 0.9,
263
+ 20:18:17.191 INF load_config:282 | "top_k": 10,
264
+ 20:18:17.191 INF load_config:282 | "top_p": 0.8
265
+ 20:18:17.191 INF load_config:282 | }
266
+ 20:18:17.191 INF Init:448 | LLM init ok
267
+ Starting server on port 8000 with model 'AXERA-TECH/Qwen3-VL-4B-Instruct-GPTQ-Int4'...
268
+ API URLs:
269
+ GET http://127.0.0.1:8000/health
270
+ GET http://127.0.0.1:8000/v1/models
271
+ POST http://127.0.0.1:8000/v1/chat/completions
272
+ GET http://10.126.35.203:8000/health
273
+ GET http://10.126.35.203:8000/v1/models
274
+ POST http://10.126.35.203:8000/v1/chat/completions
275
+ GET http://172.18.0.1:8000/health
276
+ GET http://172.18.0.1:8000/v1/models
277
+ POST http://172.18.0.1:8000/v1/chat/completions
278
+ GET http://172.17.0.1:8000/health
279
+ GET http://172.17.0.1:8000/v1/models
280
+ POST http://172.17.0.1:8000/v1/chat/completions
281
+ Aliases:
282
+ GET http://127.0.0.1:8000/models
283
+ POST http://127.0.0.1:8000/chat/completions
284
+ GET http://10.126.35.203:8000/models
285
+ POST http://10.126.35.203:8000/chat/completions
286
+ GET http://172.18.0.1:8000/models
287
+ POST http://172.18.0.1:8000/chat/completions
288
+ GET http://172.17.0.1:8000/models
289
+ POST http://172.17.0.1:8000/chat/completions
290
+ OpenAI API Server starting on http://0.0.0.0:8000
291
+ Max concurrency: 1
292
+ Models: AXERA-TECH/Qwen3-VL-4B-Instruct-GPTQ-Int4
293
  ```
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
294
 
295
+ ### OpenAI 调用示例
296
 
297
+ ```python
298
+ from openai import OpenAI
299
 
300
+ API_URL = "http://127.0.0.1:8000/v1"
301
+ MODEL = "AXERA-TECH/Qwen3-VL-4B-Instruct-GPTQ-Int4"
302
 
303
+ messages = [
304
+ {"role": "system", "content": [{"type": "text", "text": "you are a helpful assistant."}]},
305
+ {"role": "user", "content": "hello"},
306
+ ]
307
 
308
+ client = OpenAI(api_key="not-needed", base_url=API_URL)
309
+ completion = client.chat.completions.create(
310
+ model=MODEL,
311
+ messages=messages,
312
+ )
313
 
314
+ print(completion.choices[0].message.content)
 
 
 
 
 
 
 
 
 
315
  ```
316
 
 
 
 
 
 
317
 
318
+ ### OpenAI 流式调用示例
319
+
320
+ ```python
321
+ from openai import OpenAI
322
+
323
+ API_URL = "http://127.0.0.1:8000/v1"
324
+ MODEL = "AXERA-TECH/Qwen3-VL-4B-Instruct-GPTQ-Int4"
325
+
326
+ messages = [
327
+ {"role": "system", "content": [{"type": "text", "text": "you are a helpful assistant."}]},
328
+ {"role": "user", "content": "hello"},
329
+ ]
330
+
331
+ client = OpenAI(api_key="not-needed", base_url=API_URL)
332
+ stream = client.chat.completions.create(
333
+ model=MODEL,
334
+ messages=messages,
335
+ stream=True,
336
+ )
337
 
338
+ print("assistant:")
339
+ for ev in stream:
340
+ delta = getattr(ev.choices[0], "delta", None)
341
+ if delta and getattr(delta, "content", None):
342
+ print(delta.content, end="", flush=True)
343
+ print("
344
+ ")
345
+ ```
axera_logo.png DELETED

Git LFS Details

  • SHA256: 6f3729509adf9e0c8baffcda3d7c1228f7d6bcd74374fc592c2995a3c1a3dfc1
  • Pointer size: 131 Bytes
  • Size of remote file: 157 kB
config.json CHANGED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "system_prompt": "you are a helpful assistant.",
3
+ "model_name": "AXERA-TECH/Qwen3-VL-4B-Instruct-GPTQ-Int4",
4
+ "url_tokenizer_model": "qwen3_tokenizer.txt",
5
+ "tokenizer_type": "Qwen3VL",
6
+ "post_config_path": "post_config.json",
7
+ "template_filename_axmodel": "qwen3_vl_text_p128_l%d_together.axmodel",
8
+ "axmodel_num": 36,
9
+ "filename_post_axmodel": "qwen3_vl_text_post.axmodel",
10
+ "filename_tokens_embed": "model.embed_tokens.weight.bfloat16.bin",
11
+ "tokens_embed_num": 151936,
12
+ "tokens_embed_size": 2560,
13
+ "use_mmap_load_embed": true,
14
+ "vlm_type": "Qwen3VL",
15
+ "filename_image_encoder_axmodel": "Qwen3-VL-4B-Instruct_vision.axmodel",
16
+ "vision_patch_size": 16,
17
+ "vision_temporal_patch_size": 2,
18
+ "vision_spatial_merge_size": 2,
19
+ "vision_fps": 1,
20
+ "vision_tokens_per_second": 1,
21
+ "vision_cache_dir": "vision_cache",
22
+ "use_mmap_load_layer": true,
23
+ "devices": [
24
+ 0
25
+ ]
26
+ }
gradio_demo.py DELETED
@@ -1,262 +0,0 @@
1
- # gradio_chat_single_turn.py
2
- import re
3
- import subprocess
4
- import gradio as gr
5
- import base64, cv2, os, tempfile
6
- from openai import OpenAI
7
- import requests
8
-
9
- def get_all_local_ips():
10
- result = subprocess.run(['ip', 'a'], capture_output=True, text=True)
11
- output = result.stdout
12
-
13
- # 匹配所有IPv4
14
- ips = re.findall(r'inet (\d+\.\d+\.\d+\.\d+)', output)
15
-
16
- # 过滤掉回环地址
17
- real_ips = [ip for ip in ips if not ip.startswith('127.')]
18
-
19
- return real_ips
20
-
21
-
22
-
23
- # ---------- Helpers ----------
24
- def img_to_data_url_from_cvframe(frame):
25
- import base64, cv2
26
- ok, buf = cv2.imencode(".jpg", frame, [int(cv2.IMWRITE_JPEG_QUALITY), 85])
27
- b64 = base64.b64encode(buf).decode("ascii")
28
- return f"data:image/jpeg;base64,{b64}"
29
-
30
- def img_to_data_url_from_path(img_path: str) -> str:
31
- import cv2, base64
32
- img = cv2.imread(img_path)
33
- return img_to_data_url_from_cvframe(img)
34
-
35
- def video_to_data_urls(video_path: str, frame_stride: int = 30, max_frames: int = 8):
36
- import cv2, base64
37
- cap = cv2.VideoCapture(video_path)
38
- total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
39
-
40
- if total / frame_stride > max_frames:
41
- frame_stride = int(total/max_frames)
42
-
43
- urls = []
44
- idx = 0
45
- first_preview = None
46
- while len(urls) < max_frames and idx < total:
47
- cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
48
- ret, frame = cap.read()
49
- if not ret:
50
- break
51
- ok, buf = cv2.imencode(".jpg", frame, [int(cv2.IMWRITE_JPEG_QUALITY), 85])
52
- if not ok:
53
- break
54
- b64 = base64.b64encode(buf).decode("ascii")
55
- data_url = f"data:image/jpeg;base64,{b64}"
56
- urls.append(data_url)
57
- if first_preview is None:
58
- first_preview = data_url
59
- idx += frame_stride
60
- cap.release()
61
- return urls, first_preview
62
-
63
- def save_preview_image_from_data_url(data_url: str) -> str:
64
- # 仅用于在 Chatbot 里显示缩略图
65
- comma = data_url.find(",")
66
- if comma == -1:
67
- return ""
68
- b64 = data_url[comma+1:]
69
- raw = base64.b64decode(b64)
70
- fd, tmp_path = tempfile.mkstemp(suffix=".jpg", prefix="preview_")
71
- os.close(fd)
72
- with open(tmp_path, "wb") as f:
73
- f.write(raw)
74
- return tmp_path
75
-
76
- def build_messages(prompt: str, image_path: str | None, video_path: str | None,
77
- prefer_video: bool, frame_stride: int, max_frames: int):
78
- content = []
79
- if prompt and prompt.strip():
80
- content.append({"type": "text", "text": prompt.strip()})
81
-
82
- if video_path and os.path.exists(video_path) and prefer_video:
83
- urls, first_preview = video_to_data_urls(video_path, frame_stride=frame_stride, max_frames=max_frames)
84
- content.append({"type": "image_url", "is_video":True, "image_url": urls})
85
- media_desc = f"(视频抽帧:{len(urls)} 帧,步长 {frame_stride})"
86
- return {"role": "user", "content": content}, first_preview, media_desc
87
-
88
- if image_path and os.path.exists(image_path):
89
- u = img_to_data_url_from_path(image_path)
90
- content.append({"type": "image_url", "image_url": u})
91
- media_desc = "(已附带图片)"
92
- return {"role": "user", "content": content}, u, media_desc
93
-
94
- if video_path and os.path.exists(video_path):
95
- urls, first_preview = video_to_data_urls(video_path, frame_stride=frame_stride, max_frames=max_frames)
96
- content.append({"type": "image_url", "is_video":True, "image_url": urls})
97
- media_desc = f"(视频抽帧:{len(urls)} 帧,步长 {frame_stride})"
98
- return {"role": "user", "content": content}, first_preview, media_desc
99
-
100
- return {"role": "user", "content": content if content else [{"type": "text", "text": prompt or ""}]}, None, ""
101
-
102
- # ---------- Gradio callback (single-turn, stream) ----------
103
- def run_single_turn(prompt, image_file, video_file, prefer_video, frame_stride, max_frames,
104
- base_url, model, api_key, chatbot_state):
105
- """
106
- 单轮:每次发送都会重置聊天历史,只显示本轮的 user/assistant 两个气泡。
107
- """
108
- try:
109
- # 清空历史(单轮),构造用户气泡
110
- chatbot_state = []
111
-
112
- # 准备文件路径
113
- image_path = image_file if isinstance(image_file, str) else (image_file.name if image_file else None)
114
- video_path = video_file if isinstance(video_file, str) else (video_file.name if video_file else None)
115
-
116
- # 构造 messages 和预览
117
- messages, preview_data_url, media_desc = build_messages(
118
- prompt=prompt or "",
119
- image_path=image_path,
120
- video_path=video_path,
121
- prefer_video=bool(prefer_video),
122
- frame_stride=int(frame_stride),
123
- max_frames=int(max_frames),
124
- )
125
-
126
- # 组装用户气泡(Markdown):文本 + 预览图/视频说明
127
- user_md = (prompt or "").strip()
128
- if media_desc:
129
- user_md = (user_md + "\n\n" if user_md else "") + f"> {media_desc}"
130
- if preview_data_url:
131
- # user_md = (user_md + "\n\n" if user_md else "") + f"![preview]({preview_path})"
132
- user_md = (user_md + "\n\n" if user_md else "") + f"![preview]({preview_data_url})"
133
-
134
- chatbot_state.append((user_md or "(空提示)", "")) # assistant 先空字符串,等待流式填充
135
- yield chatbot_state # 先把用户气泡渲染出来
136
-
137
- # 调后端(流式)
138
- client = OpenAI(api_key=api_key or "not-needed", base_url=base_url.strip())
139
- stream = client.chat.completions.create(
140
- model=model.strip(),
141
- messages=messages,
142
- stream=True,
143
- )
144
-
145
- bot_chunks = []
146
- # 先补一个空 assistant 气泡
147
- if len(chatbot_state) == 1:
148
- chatbot_state[0] = (chatbot_state[0][0], "")
149
- yield chatbot_state
150
-
151
- # 逐 chunk 更新 assistant 气泡(Markdown)
152
- for ev in stream:
153
- delta = getattr(ev.choices[0], "delta", None)
154
- if delta and getattr(delta, "content", None):
155
- bot_chunks.append(delta.content)
156
- chatbot_state[-1] = (chatbot_state[-1][0], "".join(bot_chunks))
157
- yield chatbot_state
158
-
159
- # 结束再确保收尾
160
- chatbot_state[-1] = (chatbot_state[-1][0], "".join(bot_chunks) if bot_chunks else "(empty response)")
161
- yield chatbot_state
162
-
163
- except Exception as e:
164
- chatbot_state.append((
165
- chatbot_state[-1][0] if chatbot_state else "(request)",
166
- f"**Error:** {e}"
167
- ))
168
- yield chatbot_state
169
-
170
- # ---------- Gradio UI ----------
171
- with gr.Blocks(css="""
172
- #chat,
173
- #chat * {
174
- font-size: 18px !important;
175
- line-height: 1.6 !important;
176
- }
177
-
178
- #chat .message,
179
- #chat [data-testid="bot"],
180
- #chat [data-testid="user"] {
181
- font-size: 18px !important;
182
- }
183
- """,title="AXERA Qwen3 VL") as demo:
184
- axera_logo = img_to_data_url_from_path("./axera_logo.png")
185
- gr.Markdown(
186
- f"""
187
- <div style="display: flex; align-items: center; gap: 10px;">
188
- <img src="{axera_logo}" alt="axera_logo" style="height: 60px;">
189
- </div>
190
- """
191
- )
192
-
193
- chatbot = gr.Chatbot(
194
- label="对话",
195
- bubble_full_width=False,
196
- height=500,
197
- avatar_images=(None, None), # 可替换头像
198
- latex_delimiters=[{"left": "$$", "right": "$$", "display": True},
199
- {"left": "$", "right": "$", "display": False}],
200
- show_copy_button=True,
201
- render_markdown=True,
202
- elem_id="chat"
203
- )
204
-
205
- with gr.Row():
206
- with gr.Column(scale=2):
207
- prompt = gr.Textbox(label="Prompt", placeholder="输入你的提示语", lines=2)
208
- with gr.Row():
209
- send_btn = gr.Button("发送 ▶️", variant="primary")
210
- clear_btn = gr.Button("清空")
211
- stop_btn = gr.Button("停止 ■", variant="stop")
212
- with gr.Row():
213
- image = gr.Image(type="filepath", label="上传图片(可选)")
214
- video = gr.Video(label="上传视频(可选)")
215
-
216
- with gr.Column(scale=1):
217
- base_url = gr.Textbox(value="http://localhost:8000/v1", label="Base URL")
218
- model = gr.Textbox(value="AXERA-TECH/Qwen3-VL-2B-Instruct-GPTQ-Int4", label="Model")
219
- api_key = gr.Textbox(value="not-needed", label="API Key", type="password")
220
- with gr.Row():
221
- prefer_video = gr.Checkbox(True, label="如果有视频,优先使用视频抽帧")
222
- frame_stride = gr.Slider(1, 90, value=30, step=1, label="视频抽帧间隔")
223
- max_frames = gr.Slider(1, 8, value=8, step=1, label="最多抽帧数")
224
-
225
-
226
- # 单轮对话需要一个 state 来承载当前这轮的气泡
227
- state = gr.State([])
228
-
229
- send_btn.click(
230
- fn=run_single_turn,
231
- inputs=[prompt, image, video, prefer_video, frame_stride, max_frames, base_url, model, api_key, state],
232
- outputs=chatbot,
233
- show_progress=True,
234
- queue=True,
235
- )
236
-
237
- def stop_stream(base_url):
238
- url = f"{base_url.strip()}/stop"
239
- response = requests.get(url)
240
- if response.status_code == 200:
241
- print("Stream stopped successfully")
242
- else:
243
- print(f"Failed to stop stream: {response.status_code} - {response.text}")
244
-
245
- stop_btn.click(
246
- fn=stop_stream,
247
- inputs=[base_url],
248
- outputs=chatbot,
249
- show_progress=True,
250
- queue=True,
251
- )
252
-
253
- def clear_all():
254
- return [], "", None, None, True, 30, 8
255
- clear_btn.click(clear_all, None, [chatbot, prompt, image, video, prefer_video, frame_stride, max_frames])
256
-
257
- if __name__ == "__main__":
258
- ips = get_all_local_ips()
259
- for ip in ips:
260
- print(f"* Running on local URL: http://{ip}:7860")
261
- ip = "0.0.0.0"
262
- demo.launch(server_name=ip, server_port=7860)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
main_ax650 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:bd12cddc400cd3ffb78af4a4512211af28c33f98993b9c7447aab8d8f29d7893
3
- size 6821432
 
 
 
 
main_ax650_api DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:390236f0fef17d46c1bdf0b26f831335fe0e5ede1c10814c1462fdd360b1b984
3
- size 6935688
 
 
 
 
main_axcl_aarch64 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:a0ded679af8f4fb115b04977d4bc4ecc63783f98d3b239cd3a73de19a6cd19ed
3
- size 1952752
 
 
 
 
main_axcl_api_aarch64 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:c90d9dfae62b17ef4681f103c62b483e96a862e900a364673e57bc91d078c63d
3
- size 2105232
 
 
 
 
main_axcl_api_x86 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:67be73d1a6a4c17ee6b73222d3c5988fa10d2dbcf71515f6dad090a561dcc252
3
- size 2202296
 
 
 
 
main_axcl_x86 DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:1113a46767e5cc6c0a53172c5973848a40c65f379a428b3efc64a9fb6f6fb212
3
- size 2062240
 
 
 
 
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/model.embed_tokens.weight.bfloat16.bin → model.embed_tokens.weight.bfloat16.bin RENAMED
File without changes
openai_cli.py DELETED
@@ -1,66 +0,0 @@
1
- import base64
2
- import glob
3
- from openai import OpenAI
4
- import cv2
5
-
6
- BASE_URL = "http://localhost:8000/v1"
7
-
8
- def img_to_data_url(img_path: str):
9
- img = cv2.imread(img_path)
10
- if img is None:
11
- raise FileNotFoundError(f"Cannot read image: {img_path}")
12
- ok, buf = cv2.imencode(".jpg", img)
13
- if not ok:
14
- raise RuntimeError("cv2.imencode failed")
15
- b64 = base64.b64encode(buf).decode("ascii")
16
- return f"data:image/jpeg;base64,{b64}"
17
-
18
-
19
- def test(openai_messages):
20
- client = OpenAI(api_key="not-needed", base_url=BASE_URL)
21
-
22
- stream = client.chat.completions.create(
23
- model="AXERA-TECH/Qwen3-VL-2B-Instruct-GPTQ-Int4",
24
- messages=openai_messages,
25
- stream=True,
26
- )
27
- out_chunks = []
28
- for ev in stream:
29
- delta = ev.choices[0].delta
30
- if delta and delta.content:
31
- out_chunks.append(delta.content)
32
- print(delta.content, end="", flush=True)
33
- print()
34
- assistant_text = "".join(out_chunks).strip()
35
-
36
- def test_image():
37
- image_data = img_to_data_url("../demo_cv308/frame_0075.jpg")
38
-
39
- openai_messages = {
40
- "role": "user",
41
- "content": [
42
- {"type": "text", "text": "描述一下这张图片"},
43
- {"type": "image_url", "image_url": image_data},
44
- ],
45
- }
46
-
47
-
48
- test(openai_messages)
49
-
50
- def test_video():
51
- image_list = glob.glob("../demo_cv308/*.jpg")
52
- image_list.sort()
53
-
54
- image_data_list = [img_to_data_url(img) for img in image_list]
55
-
56
- openai_messages = {
57
- "role": "user",
58
- "content": [
59
- {"type": "text", "text": "描述一下这个视频"},
60
- {"type": "image_url", "is_video":True, "image_url": image_data_list},
61
- ],
62
- }
63
-
64
- test(openai_messages)
65
-
66
- test_video()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
post_config.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
- "enable_temperature" : true,
3
- "temperature" : 0.7,
4
 
5
  "enable_repetition_penalty" : false,
6
- "repetition_penalty" : 1,
7
- "penalty_window" : 30,
8
 
9
  "enable_top_p_sampling" : false,
10
  "top_p" : 0.8,
11
 
12
- "enable_top_k_sampling" : true,
13
- "top_k" : 20
14
  }
 
1
  {
2
+ "enable_temperature" : false,
3
+ "temperature" : 0.9,
4
 
5
  "enable_repetition_penalty" : false,
6
+ "repetition_penalty" : 1.2,
7
+ "penalty_window" : 20,
8
 
9
  "enable_top_p_sampling" : false,
10
  "top_p" : 0.8,
11
 
12
+ "enable_top_k_sampling" : false,
13
+ "top_k" : 10
14
  }
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l0_together.axmodel → qwen3_vl_text_p128_l0_together.axmodel RENAMED
File without changes
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l10_together.axmodel → qwen3_vl_text_p128_l10_together.axmodel RENAMED
File without changes
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l11_together.axmodel → qwen3_vl_text_p128_l11_together.axmodel RENAMED
File without changes
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l12_together.axmodel → qwen3_vl_text_p128_l12_together.axmodel RENAMED
File without changes
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l13_together.axmodel → qwen3_vl_text_p128_l13_together.axmodel RENAMED
File without changes
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l14_together.axmodel → qwen3_vl_text_p128_l14_together.axmodel RENAMED
File without changes
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l15_together.axmodel → qwen3_vl_text_p128_l15_together.axmodel RENAMED
File without changes
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l16_together.axmodel → qwen3_vl_text_p128_l16_together.axmodel RENAMED
File without changes
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l17_together.axmodel → qwen3_vl_text_p128_l17_together.axmodel RENAMED
File without changes
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l18_together.axmodel → qwen3_vl_text_p128_l18_together.axmodel RENAMED
File without changes
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l19_together.axmodel → qwen3_vl_text_p128_l19_together.axmodel RENAMED
File without changes
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l1_together.axmodel → qwen3_vl_text_p128_l1_together.axmodel RENAMED
File without changes
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l20_together.axmodel → qwen3_vl_text_p128_l20_together.axmodel RENAMED
File without changes
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l21_together.axmodel → qwen3_vl_text_p128_l21_together.axmodel RENAMED
File without changes
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l22_together.axmodel → qwen3_vl_text_p128_l22_together.axmodel RENAMED
File without changes
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l23_together.axmodel → qwen3_vl_text_p128_l23_together.axmodel RENAMED
File without changes
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l24_together.axmodel → qwen3_vl_text_p128_l24_together.axmodel RENAMED
File without changes
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l25_together.axmodel → qwen3_vl_text_p128_l25_together.axmodel RENAMED
File without changes
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l26_together.axmodel → qwen3_vl_text_p128_l26_together.axmodel RENAMED
File without changes
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l27_together.axmodel → qwen3_vl_text_p128_l27_together.axmodel RENAMED
File without changes
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l28_together.axmodel → qwen3_vl_text_p128_l28_together.axmodel RENAMED
File without changes
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l29_together.axmodel → qwen3_vl_text_p128_l29_together.axmodel RENAMED
File without changes
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l2_together.axmodel → qwen3_vl_text_p128_l2_together.axmodel RENAMED
File without changes
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l30_together.axmodel → qwen3_vl_text_p128_l30_together.axmodel RENAMED
File without changes
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l31_together.axmodel → qwen3_vl_text_p128_l31_together.axmodel RENAMED
File without changes
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l32_together.axmodel → qwen3_vl_text_p128_l32_together.axmodel RENAMED
File without changes
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l33_together.axmodel → qwen3_vl_text_p128_l33_together.axmodel RENAMED
File without changes
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l34_together.axmodel → qwen3_vl_text_p128_l34_together.axmodel RENAMED
File without changes
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l35_together.axmodel → qwen3_vl_text_p128_l35_together.axmodel RENAMED
File without changes
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l3_together.axmodel → qwen3_vl_text_p128_l3_together.axmodel RENAMED
File without changes
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l4_together.axmodel → qwen3_vl_text_p128_l4_together.axmodel RENAMED
File without changes
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l5_together.axmodel → qwen3_vl_text_p128_l5_together.axmodel RENAMED
File without changes
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l6_together.axmodel → qwen3_vl_text_p128_l6_together.axmodel RENAMED
File without changes
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l7_together.axmodel → qwen3_vl_text_p128_l7_together.axmodel RENAMED
File without changes