wli1995 commited on 18 days ago

Commit

49e8c2a

verified ·

1 Parent(s): 43b324d

update project structure

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +40 -0
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/Qwen3-VL-4B-Instruct_vision.axmodel → Qwen3-VL-4B-Instruct_vision.axmodel +0 -0
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/Qwen3-VL-4B-Instruct_vision_u8.axmodel → Qwen3-VL-4B-Instruct_vision_u8.axmodel +0 -0
README.md +251 -173
axera_logo.png +0 -3
config.json +26 -0
gradio_demo.py +0 -262
main_ax650 +0 -3
main_ax650_api +0 -3
main_axcl_aarch64 +0 -3
main_axcl_api_aarch64 +0 -3
main_axcl_api_x86 +0 -3
main_axcl_x86 +0 -3
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/model.embed_tokens.weight.bfloat16.bin → model.embed_tokens.weight.bfloat16.bin +0 -0
openai_cli.py +0 -66
post_config.json +6 -6
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l0_together.axmodel → qwen3_vl_text_p128_l0_together.axmodel +0 -0
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l10_together.axmodel → qwen3_vl_text_p128_l10_together.axmodel +0 -0
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l11_together.axmodel → qwen3_vl_text_p128_l11_together.axmodel +0 -0
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l12_together.axmodel → qwen3_vl_text_p128_l12_together.axmodel +0 -0
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l13_together.axmodel → qwen3_vl_text_p128_l13_together.axmodel +0 -0
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l14_together.axmodel → qwen3_vl_text_p128_l14_together.axmodel +0 -0
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l15_together.axmodel → qwen3_vl_text_p128_l15_together.axmodel +0 -0
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l16_together.axmodel → qwen3_vl_text_p128_l16_together.axmodel +0 -0
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l17_together.axmodel → qwen3_vl_text_p128_l17_together.axmodel +0 -0
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l18_together.axmodel → qwen3_vl_text_p128_l18_together.axmodel +0 -0
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l19_together.axmodel → qwen3_vl_text_p128_l19_together.axmodel +0 -0
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l1_together.axmodel → qwen3_vl_text_p128_l1_together.axmodel +0 -0
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l20_together.axmodel → qwen3_vl_text_p128_l20_together.axmodel +0 -0
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l21_together.axmodel → qwen3_vl_text_p128_l21_together.axmodel +0 -0
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l22_together.axmodel → qwen3_vl_text_p128_l22_together.axmodel +0 -0
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l23_together.axmodel → qwen3_vl_text_p128_l23_together.axmodel +0 -0
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l24_together.axmodel → qwen3_vl_text_p128_l24_together.axmodel +0 -0
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l25_together.axmodel → qwen3_vl_text_p128_l25_together.axmodel +0 -0
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l26_together.axmodel → qwen3_vl_text_p128_l26_together.axmodel +0 -0
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l27_together.axmodel → qwen3_vl_text_p128_l27_together.axmodel +0 -0
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l28_together.axmodel → qwen3_vl_text_p128_l28_together.axmodel +0 -0
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l29_together.axmodel → qwen3_vl_text_p128_l29_together.axmodel +0 -0
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l2_together.axmodel → qwen3_vl_text_p128_l2_together.axmodel +0 -0
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l30_together.axmodel → qwen3_vl_text_p128_l30_together.axmodel +0 -0
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l31_together.axmodel → qwen3_vl_text_p128_l31_together.axmodel +0 -0
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l32_together.axmodel → qwen3_vl_text_p128_l32_together.axmodel +0 -0
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l33_together.axmodel → qwen3_vl_text_p128_l33_together.axmodel +0 -0
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l34_together.axmodel → qwen3_vl_text_p128_l34_together.axmodel +0 -0
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l35_together.axmodel → qwen3_vl_text_p128_l35_together.axmodel +0 -0
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l3_together.axmodel → qwen3_vl_text_p128_l3_together.axmodel +0 -0
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l4_together.axmodel → qwen3_vl_text_p128_l4_together.axmodel +0 -0
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l5_together.axmodel → qwen3_vl_text_p128_l5_together.axmodel +0 -0
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l6_together.axmodel → qwen3_vl_text_p128_l6_together.axmodel +0 -0
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l7_together.axmodel → qwen3_vl_text_p128_l7_together.axmodel +0 -0

.gitattributes CHANGED Viewed

@@ -61,3 +61,43 @@ main_axcl_api_aarch64 filter=lfs diff=lfs merge=lfs -text
 main_axcl_api_x86 filter=lfs diff=lfs merge=lfs -text
 main_ax650_api filter=lfs diff=lfs merge=lfs -text
 axera_logo.png filter=lfs diff=lfs merge=lfs -text

 main_axcl_api_x86 filter=lfs diff=lfs merge=lfs -text
 main_ax650_api filter=lfs diff=lfs merge=lfs -text
 axera_logo.png filter=lfs diff=lfs merge=lfs -text
+Qwen3-VL-4B-Instruct_vision.axmodel filter=lfs diff=lfs merge=lfs -text
+Qwen3-VL-4B-Instruct_vision_u8.axmodel filter=lfs diff=lfs merge=lfs -text
+model.embed_tokens.weight.bfloat16.bin filter=lfs diff=lfs merge=lfs -text
+qwen3_vl_text_p128_l0_together.axmodel filter=lfs diff=lfs merge=lfs -text
+qwen3_vl_text_p128_l10_together.axmodel filter=lfs diff=lfs merge=lfs -text
+qwen3_vl_text_p128_l11_together.axmodel filter=lfs diff=lfs merge=lfs -text
+qwen3_vl_text_p128_l12_together.axmodel filter=lfs diff=lfs merge=lfs -text
+qwen3_vl_text_p128_l13_together.axmodel filter=lfs diff=lfs merge=lfs -text
+qwen3_vl_text_p128_l14_together.axmodel filter=lfs diff=lfs merge=lfs -text
+qwen3_vl_text_p128_l15_together.axmodel filter=lfs diff=lfs merge=lfs -text
+qwen3_vl_text_p128_l16_together.axmodel filter=lfs diff=lfs merge=lfs -text
+qwen3_vl_text_p128_l17_together.axmodel filter=lfs diff=lfs merge=lfs -text
+qwen3_vl_text_p128_l18_together.axmodel filter=lfs diff=lfs merge=lfs -text
+qwen3_vl_text_p128_l19_together.axmodel filter=lfs diff=lfs merge=lfs -text
+qwen3_vl_text_p128_l1_together.axmodel filter=lfs diff=lfs merge=lfs -text
+qwen3_vl_text_p128_l20_together.axmodel filter=lfs diff=lfs merge=lfs -text
+qwen3_vl_text_p128_l21_together.axmodel filter=lfs diff=lfs merge=lfs -text
+qwen3_vl_text_p128_l22_together.axmodel filter=lfs diff=lfs merge=lfs -text
+qwen3_vl_text_p128_l23_together.axmodel filter=lfs diff=lfs merge=lfs -text
+qwen3_vl_text_p128_l24_together.axmodel filter=lfs diff=lfs merge=lfs -text
+qwen3_vl_text_p128_l25_together.axmodel filter=lfs diff=lfs merge=lfs -text
+qwen3_vl_text_p128_l26_together.axmodel filter=lfs diff=lfs merge=lfs -text
+qwen3_vl_text_p128_l27_together.axmodel filter=lfs diff=lfs merge=lfs -text
+qwen3_vl_text_p128_l28_together.axmodel filter=lfs diff=lfs merge=lfs -text
+qwen3_vl_text_p128_l29_together.axmodel filter=lfs diff=lfs merge=lfs -text
+qwen3_vl_text_p128_l2_together.axmodel filter=lfs diff=lfs merge=lfs -text
+qwen3_vl_text_p128_l30_together.axmodel filter=lfs diff=lfs merge=lfs -text
+qwen3_vl_text_p128_l31_together.axmodel filter=lfs diff=lfs merge=lfs -text
+qwen3_vl_text_p128_l32_together.axmodel filter=lfs diff=lfs merge=lfs -text
+qwen3_vl_text_p128_l33_together.axmodel filter=lfs diff=lfs merge=lfs -text
+qwen3_vl_text_p128_l34_together.axmodel filter=lfs diff=lfs merge=lfs -text
+qwen3_vl_text_p128_l35_together.axmodel filter=lfs diff=lfs merge=lfs -text
+qwen3_vl_text_p128_l3_together.axmodel filter=lfs diff=lfs merge=lfs -text
+qwen3_vl_text_p128_l4_together.axmodel filter=lfs diff=lfs merge=lfs -text
+qwen3_vl_text_p128_l5_together.axmodel filter=lfs diff=lfs merge=lfs -text
+qwen3_vl_text_p128_l6_together.axmodel filter=lfs diff=lfs merge=lfs -text
+qwen3_vl_text_p128_l7_together.axmodel filter=lfs diff=lfs merge=lfs -text
+qwen3_vl_text_p128_l8_together.axmodel filter=lfs diff=lfs merge=lfs -text
+qwen3_vl_text_p128_l9_together.axmodel filter=lfs diff=lfs merge=lfs -text
+qwen3_vl_text_post.axmodel filter=lfs diff=lfs merge=lfs -text

Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/Qwen3-VL-4B-Instruct_vision.axmodel → Qwen3-VL-4B-Instruct_vision.axmodel RENAMED Viewed

File without changes

Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/Qwen3-VL-4B-Instruct_vision_u8.axmodel → Qwen3-VL-4B-Instruct_vision_u8.axmodel RENAMED Viewed

File without changes

README.md CHANGED Viewed

@@ -66,202 +66,280 @@ The DDR capacity refers to the CMM memory that needs to be consumed. Ensure that
 ## How to use
-Download all files from this repository to the device
-**If you using AX650 Board**
-### Demo Run
-#### Image understand demo
-- input text
-```
-描述这张图片
 ```
-- input image
-![](./images/recoAll_attractions_1.jpg)
 ```
-root@ax650 ~/Qwen3-VL-4B-Instruct-GPTQ-Int4 # bash run_image_ax650.sh
-[I][                            Init][ 156]: LLM init start
-[I][                            Init][ 158]: Total CMM:7884 MB
-[I][                            Init][  34]: connect http://127.0.0.1:8080 ok
-bos_id: -1, eos_id: 151645
-img_start_token: 151652
-img_context_token: 151655
-  2% | █                                 |   1 /  39 [0.01s<0.58s, 66.67 count/s] tokenizer init ok[I][                            Init][  26]: LLaMaEmbedSelector use mmap
-  5% | ██                                |   2 /  39 [0.02s<0.37s, 105.26 count/s] embed_selector init ok[I][                            Init][ 201]: attr.axmodel_num:36
-102% | █████████████████████████████████ |  40 /  39 [11.33s<11.05s, 3.53 count/s] init vpm axmodel ok,remain_cmm(2199 MB)[I][                            Init][ 266]: IMAGE_CONTEXT_TOKEN: 151655, IMAGE_START_TOKEN: 151652
-[I][                            Init][ 309]: image encoder output float32
-[I][                            Init][ 339]: max_token_len : 2047
-[I][                            Init][ 344]: kv_cache_size : 1024, kv_cache_num: 2047
-[I][                            Init][ 352]: prefill_token_num : 128
-[I][                            Init][ 356]: grp: 1, prefill_max_token_num : 1
-[I][                            Init][ 356]: grp: 2, prefill_max_token_num : 128
-[I][                            Init][ 356]: grp: 3, prefill_max_token_num : 256
-[I][                            Init][ 356]: grp: 4, prefill_max_token_num : 384
-[I][                            Init][ 356]: grp: 5, prefill_max_token_num : 512
-[I][                            Init][ 356]: grp: 6, prefill_max_token_num : 640
-[I][                            Init][ 356]: grp: 7, prefill_max_token_num : 768
-[I][                            Init][ 356]: grp: 8, prefill_max_token_num : 896
-[I][                            Init][ 356]: grp: 9, prefill_max_token_num : 1024
-[I][                            Init][ 356]: grp: 10, prefill_max_token_num : 1152
-[I][                            Init][ 360]: prefill_max_token_num : 1152
-[I][                            Init][ 372]: LLM init ok
-[I][                            Init][ 374]: Left CMM:2199 MB
-Type "q" to exit, Ctrl+c to stop current running
-prompt >> 描述这张图片
-image >> images/recoAll_attractions_1.jpg
-[I][                     EncodeImage][ 440]: pixel_values size 1
-[I][                     EncodeImage][ 441]: grid_h 24 grid_w 24
-[I][                     EncodeImage][ 489]: image encode time : 222.440994 ms, size : 1
-[I][                          Encode][ 532]: input_ids size:168
-[I][                          Encode][ 540]: offset 15
-[I][                          Encode][ 569]: img_embed.size:1, 368640
-[I][                          Encode][ 583]: out_embed size:430080
-[I][                          Encode][ 584]: input_ids size 168
-[I][                          Encode][ 586]: position_ids size:168
-[I][                             Run][ 607]: input token num : 168, prefill_split_num : 2
-[I][                             Run][ 641]: input_num_token:128
-[I][                             Run][ 641]: input_num_token:40
-[I][                             Run][ 865]: ttft: 676.16 ms
-这张图片展示了埃及吉萨的金字塔群，背景是晴朗的蓝天，前景是广阔的沙漠。
-画面中主要可见三座金字塔：
-- 最大的一座是著名的**胡夫金字塔**，它位于画面中央偏左，是三座金字塔中最高、最显眼的。
-- 在其右侧，是稍小一些的**卡纳克金字塔**（或称“卡纳克金字塔”）。
-- 在画面最左侧，可以看到一座更小的金字塔，可能是**门卡乌金字塔**或**哈夫拉金字塔**。
-这三座金字塔都是古埃及法老的陵墓，是古代世界七大奇迹中唯一现存的。它们的结构和规模令人惊叹，体现了古埃及人在建筑、数学和天文学方面的卓越成就。
-整个场景在阳光下显得庄严而神秘，是埃及最具代表性的历史遗迹之一。
-[N][                             Run][ 992]: hit eos,avg 7.12 token/s
 ```
-#### Video understand demo
-- input text
-```
-描述这个视频
 ```
-- input video
-./video
 ```
-root@ax650 ~/Qwen3-VL-4B-Instruct-GPTQ-Int4 # bash run_video_ax650.sh
-[I][                            Init][ 156]: LLM init start
-[I][                            Init][ 158]: Total CMM:7884 MB
-[I][                            Init][  34]: connect http://127.0.0.1:8080 ok
-bos_id: -1, eos_id: 151645
-img_start_token: 151652
-img_context_token: 151656
-  2% | █                                 |   1 /  39 [0.02s<0.62s, 62.50 count/s] tokenizer init ok[I][                            Init][  26]: LLaMaEmbedSelector use mmap
-  5% | ██                                |   2 /  39 [0.02s<0.39s, 100.00 count/s] embed_selector init ok[I][                            Init][ 201]: attr.axmodel_num:36
-102% | █████████████████████████████████ |  40 /  39 [44.70s<43.58s, 0.89 count/s] init vpm axmodel ok,remain_cmm(2199 MB)[I][                            Init][ 266]: IMAGE_CONTEXT_TOKEN: 151656, IMAGE_START_TOKEN: 151652
-[I][                            Init][ 309]: image encoder output float32
-[I][                            Init][ 339]: max_token_len : 2047
-[I][                            Init][ 344]: kv_cache_size : 1024, kv_cache_num: 2047
-[I][                            Init][ 352]: prefill_token_num : 128
-[I][                            Init][ 356]: grp: 1, prefill_max_token_num : 1
-[I][                            Init][ 356]: grp: 2, prefill_max_token_num : 128
-[I][                            Init][ 356]: grp: 3, prefill_max_token_num : 256
-[I][                            Init][ 356]: grp: 4, prefill_max_token_num : 384
-[I][                            Init][ 356]: grp: 5, prefill_max_token_num : 512
-[I][                            Init][ 356]: grp: 6, prefill_max_token_num : 640
-[I][                            Init][ 356]: grp: 7, prefill_max_token_num : 768
-[I][                            Init][ 356]: grp: 8, prefill_max_token_num : 896
-[I][                            Init][ 356]: grp: 9, prefill_max_token_num : 1024
-[I][                            Init][ 356]: grp: 10, prefill_max_token_num : 1152
-[I][                            Init][ 360]: prefill_max_token_num : 1152
-[I][                            Init][ 372]: LLM init ok
-[I][                            Init][ 374]: Left CMM:2199 MB
-Type "q" to exit, Ctrl+c to stop current running
-prompt >> 描述这个视频
-video >> video
-video/frame_0000.jpg
-video/frame_0008.jpg
-video/frame_0016.jpg
-video/frame_0024.jpg
-video/frame_0032.jpg
-video/frame_0040.jpg
-video/frame_0048.jpg
-video/frame_0056.jpg
-[I][                     EncodeImage][ 440]: pixel_values size 4
-[I][                     EncodeImage][ 441]: grid_h 24 grid_w 24
-[I][                     EncodeImage][ 489]: image encode time : 773.406006 ms, size : 4
-[I][                          Encode][ 532]: input_ids size:600
-[I][                          Encode][ 540]: offset 15
-[I][                          Encode][ 569]: img_embed.size:4, 368640
-[I][                          Encode][ 574]: offset:159
-[I][                          Encode][ 574]: offset:303
-[I][                          Encode][ 574]: offset:447
-[I][                          Encode][ 583]: out_embed size:1536000
-[I][                          Encode][ 584]: input_ids size 600
-[I][                          Encode][ 586]: position_ids size:600
-[I][                             Run][ 607]: input token num : 600, prefill_split_num : 5
-[I][                             Run][ 641]: input_num_token:128
-[I][                             Run][ 641]: input_num_token:128
-[I][                             Run][ 641]: input_num_token:128
-[I][                             Run][ 641]: input_num_token:128
-[I][                             Run][ 641]: input_num_token:88
-[I][                             Run][ 865]: ttft: 1886.83 ms
-这个视频展示了一群**土拨鼠**（或称“旱獭”）在山间草地上嬉戏打斗的场景。
-**画面细节：**
-- **主体动物**：画面中有多只土拨鼠，它们毛色以灰、棕、白相间，腹部和四肢颜色较浅，背部较深。它们体型圆润，耳朵短小，表情生动。
-- **动作**：这些土拨鼠似乎在进行一场“打斗”或“嬉戏”。它们互相扑腾、跳跃、用前爪拍打、甚至互相“拥抱”或“推搡”。动作非常活跃，充满动感，有些画面甚至有轻微的运动模糊，增强了动态感。
-- **背景**：背景是连绵起伏的山峦，山坡上覆盖着绿色植被，远处可见裸露的岩石和山体，天空湛蓝，阳光明媚，说明是白天晴朗的天气。
-- **前景**：它们站在一片布满小石子和草的地面，看起来像是山间小径或开阔地。
-- **构图**：画面采用近景特写，聚焦于土拨鼠的互动，背景虚化，突出了主体的动态和表情。整体构图充满活力和趣味性。
-**风格与氛围：**
-- ��张图片/视频具有**拟人化和趣味性**，土拨鼠的动作被夸张化，仿佛在“打斗”或“跳舞”，非常可爱。
-- 画面色彩明亮，阳光充足，给人一种**自然、活泼、欢乐**的感觉。
-**总结：**
-这是一段充满趣味和活力的野生动物短片，展现了土拨鼠在自然环境中的社交行为，它们的“打斗”其实可能是玩耍、争夺领地或建立社交关系的自然行为。整体画面生动、可爱，极具观赏性。
----
-**注意**：虽然土拨鼠（旱獭）在野外确实会互相打斗，但这种“打斗”通常是**玩耍或社交行为**，并非真正的攻击。视频中的“打斗”更像是它们的社交互动，非常可爱。
-[N][                             Run][ 992]: hit eos,avg 7.10 token/s
-prompt >> q
-```
-### Gradio demo
-#### start openai style api server
-if the tokenizer server is not run in the same machine,please modify the tokenizer server ip in shell file.
-```shell
-pip3 install -r requirements.txt
-# for axcl x86
-./run_axcl_x86_api.sh
-# for axcl aarch64
-./run_axcl_aarch64_api.sh
-# for ax650
-./run_ax650_api.sh
 ```
-#### start gradio demo
-if the api server is not run in the same machine,please modify the api url in gradio web ui.
-```shell
-python gradio_demo.py
-```
-![image](https://cdn-uploads.huggingface.co/production/uploads/64b7837c17570fdff9b906b9/Og9fPNi0chg768gicse7M.png)

 ## How to use
+## 安装 axllm
+方式一：克隆仓库后执行安装脚本：
+```shell
+git clone -b axllm https://github.com/AXERA-TECH/ax-llm.git
+cd ax-llm
+./install.sh
+```
+方式二：一行命令安装（默认分支 `axllm`）：
+```shell
+curl -fsSL https://raw.githubusercontent.com/AXERA-TECH/ax-llm/axllm/install.sh | bash
 ```
+方式三：下载Github Actions CI 导出的可执行程序（适合没有编译环境的用户）：
+如果没有编译环境，请到：
+`https://github.com/AXERA-TECH/ax-llm/actions?query=branch%3Aaxllm`
+下载 **最新 CI 导出的可执行程序**（`axllm`），然后：
+```shell
+chmod +x axllm
+sudo mv axllm /usr/bin/axllm
 ```
+## 模型下载（Hugging Face）
+先创建模型目录并进入，然后下载到该目录：
+```shell
+mkdir -p AXERA-TECH/Qwen3-VL-4B-Instruct-GPTQ-Int4
+cd AXERA-TECH/Qwen3-VL-4B-Instruct-GPTQ-Int4
+hf download AXERA-TECH/Qwen3-VL-4B-Instruct-GPTQ-Int4 --local-dir .
+# structure of the downloaded files
+tree -L 3
+.
+└── AXERA-TECH
+    └── Qwen3-VL-4B-Instruct-GPTQ-Int4
+        ├── Qwen3-VL-4B-Instruct_vision.axmodel
+        ├── Qwen3-VL-4B-Instruct_vision_u8.axmodel
+        ├── README.md
+        ├── config.json
+        ├── images
+        ├── model.embed_tokens.weight.bfloat16.bin
+        ├── post_config.json
+        ├── qwen3_tokenizer.txt
+        ├── qwen3_vl_text_p128_l0_together.axmodel
+        ...
+        ├── qwen3_vl_text_p128_l9_together.axmodel
+        ├── qwen3_vl_text_post.axmodel
+        ├── requirements.txt
+        └── video
+4 directories, 45 files
 ```
+## Inference with AX650 Host, such as M4N-Dock(爱芯派Pro) or AX650N DEMO Board
+### 运行（CLI）
+```shell
+(base) root@ax650:~# axllm run AXERA-TECH/Qwen3-VL-4B-Instruct-GPTQ-Int4/
+20:13:34.015 INF Init:218 | LLM init start
+tokenizer_type = 1
+ 97% | ###############################  |  38 /  39 [11.25s<11.54s, 3.38 count/s] init post axmodel ok,remain_cmm(6133 MB)
+20:13:45.263 INF Init:368 | max_token_len : 2047
+20:13:45.263 INF Init:371 | kv_cache_size : 1024, kv_cache_num: 2047
+20:13:45.263 INF Init:374 | prefill_token_num : 128
+20:13:45.263 INF Init:379 | grp: 1, prefill_max_kv_cache_num : 1
+20:13:45.263 INF Init:379 | grp: 2, prefill_max_kv_cache_num : 128
+20:13:45.263 INF Init:379 | grp: 3, prefill_max_kv_cache_num : 256
+20:13:45.263 INF Init:379 | grp: 4, prefill_max_kv_cache_num : 384
+20:13:45.263 INF Init:379 | grp: 5, prefill_max_kv_cache_num : 512
+20:13:45.263 INF Init:379 | grp: 6, prefill_max_kv_cache_num : 640
+20:13:45.263 INF Init:379 | grp: 7, prefill_max_kv_cache_num : 768
+20:13:45.263 INF Init:379 | grp: 8, prefill_max_kv_cache_num : 896
+20:13:45.263 INF Init:379 | grp: 9, prefill_max_kv_cache_num : 1024
+20:13:45.263 INF Init:379 | grp: 10, prefill_max_kv_cache_num : 1152
+20:13:45.263 INF Init:384 | prefill_max_token_num : 1152
+20:13:45.263 INF Init:27 | LLaMaEmbedSelector use mmap
+100% | ################################ |  39 /  39 [11.25s<11.25s, 3.47 count/s] embed_selector init ok
+20:13:47.224 WRN Init:511 | Qwen-VL vision size override: cfg=448x448 bytes=1204224, model_input_bytes=884736 -> 384x384 (square).
+20:13:47.224 INF Init:695 | Qwen-VL token ids: vision_start=151652 image_pad=151655 video_pad=151656
+20:13:47.224 INF Init:728 | VisionModule init ok: type=Qwen3VL, tokens_per_block=144, embed_size=2560, out_dtype=fp32
+20:13:47.224 INF Init:734 | VisionModule deepstack enabled: layers=3
+20:13:47.224 INF load_config:282 | load config:
+20:13:47.224 INF load_config:282 | {
+20:13:47.224 INF load_config:282 |     "enable_repetition_penalty": false,
+20:13:47.224 INF load_config:282 |     "enable_temperature": false,
+20:13:47.224 INF load_config:282 |     "enable_top_k_sampling": false,
+20:13:47.224 INF load_config:282 |     "enable_top_p_sampling": false,
+20:13:47.224 INF load_config:282 |     "penalty_window": 20,
+20:13:47.224 INF load_config:282 |     "repetition_penalty": 1.2,
+20:13:47.224 INF load_config:282 |     "temperature": 0.9,
+20:13:47.224 INF load_config:282 |     "top_k": 10,
+20:13:47.224 INF load_config:282 |     "top_p": 0.8
+20:13:47.224 INF load_config:282 | }
+20:13:47.224 INF Init:448 | LLM init ok
+Commands:
+  /q, /exit  退出
+  /reset     重置 kvcache
+  /dd        删除一轮对话
+  /pp        打印历史对话
+Ctrl+C: 停止当前生成
+VLM enabled: after each prompt, input image path (empty = text-only). Use "video:<frames_dir>" for video.
+----------------------------------------
+prompt >> describe the image
+image >> ./AXERA-TECH/Qwen3-VL-4B-Instruct-GPTQ-Int4/images/ssd_car.jpg
+20:14:13.430 INF EncodeForContent:1121 | Qwen-VL pixel_values[0] bytes=884736 min=0 max=255 (w=384 h=384 tp=2 ps=16 sm=2)
+20:14:13.594 INF EncodeForContent:1144 | vision cache store: ./AXERA-TECH/Qwen3-VL-4B-Instruct-GPTQ-Int4/images/ssd_car.jpg
+20:14:13.616 INF SetKVCache:749 | prefill_grpid:3 kv_cache_num:256 precompute_len:0 input_num_token:168
+20:14:13.616 INF SetKVCache:757 | current prefill_max_token_num:1152
+20:14:13.616 INF SetKVCache:760 | first run
+20:14:13.618 INF Run:818 | input token num : 168, prefill_split_num : 2
+20:14:13.618 INF Run:858 | prefill chunk p=0 history_len=0 grpid=1 kv_cache_num=0 input_tokens=128
+20:14:13.618 INF Run:881 | prefill indices shape: p=0 idx_elems=384 idx_rows=3 pos_rows=3
+20:14:13.940 INF Run:858 | prefill chunk p=1 history_len=128 grpid=2 kv_cache_num=128 input_tokens=40
+20:14:13.940 INF Run:881 | prefill indices shape: p=1 idx_elems=384 idx_rows=3 pos_rows=3
+20:14:14.295 INF Run:1023 | ttft: 677.29 ms
+This is a vibrant street photograph taken in a city, likely London, featuring a classic red double-decker bus as the central subject.
+**Key elements in the image:**
+- **The Bus:** A bright red, vintage-style double-decker bus, which is a hallmark of London's public transport. The bus is parked or stopped on the street. A prominent advertisement is visible on its side: “WHEN YOU SAY ‘YES’” above the website “WIXMONEY.COM”. The bus has a classic design with large windows and ornate architectural details on its upper deck.
+- **The Setting:** The background consists of tall, ornate, multi-story buildings with traditional European architecture, featuring large windows, stone facades, and decorative balconies. This strongly suggests a central or affluent district in a major European city.
+- **The Person:** In the foreground, a person (likely a woman) is standing on the sidewalk, looking up at the bus. She is wearing a dark coat and a light-colored hat or head covering, and she is holding a small, light-colored handbag. Her posture and gaze suggest she is observing the bus or the scene.
+- **The Atmosphere:** The photo has a bright, clear, and cheerful quality, with natural daylight illuminating the scene. The colors are vivid, especially the red of the bus, which stands out against the more muted tones of the buildings and the person’s clothing.
+- **The Composition:** The image is framed to capture the bus and the surrounding architecture, with the person adding a human element and a sense of scale. The perspective is slightly elevated, looking down at the bus and the street.
+Overall, the image captures a moment of urban life, blending the iconic imagery of a city bus with the everyday activity of a pedestrian, all set against a backdrop of classic architecture.
+20:15:12.812 NTC Run:1145 | hit eos,avg 6.37 token/s
+20:15:12.813 INF GetKVCache:721 | precompute_len:409, remaining:743
+prompt >> how many people in the image?
+image >>
+20:15:33.058 INF EncodeForContent:1057 | vision cache hit (mem): ./AXERA-TECH/Qwen3-VL-4B-Instruct-GPTQ-Int4/images/ssd_car.jpg
+20:15:33.067 INF SetKVCache:749 | prefill_grpid:5 kv_cache_num:512 precompute_len:409 input_num_token:17
+20:15:33.067 INF SetKVCache:757 | current prefill_max_token_num:640
+20:15:33.068 INF Run:818 | input token num : 17, prefill_split_num : 1
+20:15:33.068 INF Run:858 | prefill chunk p=0 history_len=409 grpid=5 kv_cache_num=512 input_tokens=17
+20:15:33.068 INF Run:881 | prefill indices shape: p=0 idx_elems=384 idx_rows=3 pos_rows=3
+20:15:33.502 INF Run:1023 | ttft: 433.86 ms
+Based on the image provided, there is **one person** clearly visible in the foreground — the woman standing on the sidewalk, looking up at the bus. She is the only person explicitly depicted in the photograph.
+There may be other people on the bus or in the background, but they are not visible or identifiable in the image. Therefore, the answer is:
+> **One person.**
+20:15:45.526 NTC Run:1145 | hit eos,avg 6.49 token/s
+20:15:45.526 INF GetKVCache:721 | precompute_len:503, remaining:649
+prompt >> /q
 ```
+### 启动服务（OpenAI 兼容）
+```shell
+(base) root@ax650:~# axllm serve AXERA-TECH/Qwen3-VL-4B-Instruct-GPTQ-Int4/
+20:18:10.375 INF Init:218 | LLM init start
+tokenizer_type = 1
+ 97% | ###############################  |  38 /  39 [6.45s<6.62s, 5.89 count/s] init post axmodel ok,remain_cmm(6133 MB)
+20:18:16.826 INF Init:368 | max_token_len : 2047
+20:18:16.826 INF Init:371 | kv_cache_size : 1024, kv_cache_num: 2047
+20:18:16.826 INF Init:374 | prefill_token_num : 128
+20:18:16.826 INF Init:379 | grp: 1, prefill_max_kv_cache_num : 1
+20:18:16.826 INF Init:379 | grp: 2, prefill_max_kv_cache_num : 128
+20:18:16.826 INF Init:379 | grp: 3, prefill_max_kv_cache_num : 256
+20:18:16.826 INF Init:379 | grp: 4, prefill_max_kv_cache_num : 384
+20:18:16.826 INF Init:379 | grp: 5, prefill_max_kv_cache_num : 512
+20:18:16.826 INF Init:379 | grp: 6, prefill_max_kv_cache_num : 640
+20:18:16.826 INF Init:379 | grp: 7, prefill_max_kv_cache_num : 768
+20:18:16.826 INF Init:379 | grp: 8, prefill_max_kv_cache_num : 896
+20:18:16.826 INF Init:379 | grp: 9, prefill_max_kv_cache_num : 1024
+20:18:16.826 INF Init:379 | grp: 10, prefill_max_kv_cache_num : 1152
+20:18:16.826 INF Init:384 | prefill_max_token_num : 1152
+20:18:16.826 INF Init:27 | LLaMaEmbedSelector use mmap
+100% | ################################ |  39 /  39 [6.45s<6.45s, 6.05 count/s] embed_selector init ok
+20:18:17.190 WRN Init:511 | Qwen-VL vision size override: cfg=448x448 bytes=1204224, model_input_bytes=884736 -> 384x384 (square).
+20:18:17.191 INF Init:695 | Qwen-VL token ids: vision_start=151652 image_pad=151655 video_pad=151656
+20:18:17.191 INF Init:728 | VisionModule init ok: type=Qwen3VL, tokens_per_block=144, embed_size=2560, out_dtype=fp32
+20:18:17.191 INF Init:734 | VisionModule deepstack enabled: layers=3
+20:18:17.191 INF load_config:282 | load config:
+20:18:17.191 INF load_config:282 | {
+20:18:17.191 INF load_config:282 |     "enable_repetition_penalty": false,
+20:18:17.191 INF load_config:282 |     "enable_temperature": false,
+20:18:17.191 INF load_config:282 |     "enable_top_k_sampling": false,
+20:18:17.191 INF load_config:282 |     "enable_top_p_sampling": false,
+20:18:17.191 INF load_config:282 |     "penalty_window": 20,
+20:18:17.191 INF load_config:282 |     "repetition_penalty": 1.2,
+20:18:17.191 INF load_config:282 |     "temperature": 0.9,
+20:18:17.191 INF load_config:282 |     "top_k": 10,
+20:18:17.191 INF load_config:282 |     "top_p": 0.8
+20:18:17.191 INF load_config:282 | }
+20:18:17.191 INF Init:448 | LLM init ok
+Starting server on port 8000 with model 'AXERA-TECH/Qwen3-VL-4B-Instruct-GPTQ-Int4'...
+API URLs:
+  GET  http://127.0.0.1:8000/health
+  GET  http://127.0.0.1:8000/v1/models
+  POST http://127.0.0.1:8000/v1/chat/completions
+  GET  http://10.126.35.203:8000/health
+  GET  http://10.126.35.203:8000/v1/models
+  POST http://10.126.35.203:8000/v1/chat/completions
+  GET  http://172.18.0.1:8000/health
+  GET  http://172.18.0.1:8000/v1/models
+  POST http://172.18.0.1:8000/v1/chat/completions
+  GET  http://172.17.0.1:8000/health
+  GET  http://172.17.0.1:8000/v1/models
+  POST http://172.17.0.1:8000/v1/chat/completions
+Aliases:
+  GET  http://127.0.0.1:8000/models
+  POST http://127.0.0.1:8000/chat/completions
+  GET  http://10.126.35.203:8000/models
+  POST http://10.126.35.203:8000/chat/completions
+  GET  http://172.18.0.1:8000/models
+  POST http://172.18.0.1:8000/chat/completions
+  GET  http://172.17.0.1:8000/models
+  POST http://172.17.0.1:8000/chat/completions
+OpenAI API Server starting on http://0.0.0.0:8000
+Max concurrency: 1
+Models: AXERA-TECH/Qwen3-VL-4B-Instruct-GPTQ-Int4
 ```
+### OpenAI 调用示例
+```python
+from openai import OpenAI
+API_URL = "http://127.0.0.1:8000/v1"
+MODEL = "AXERA-TECH/Qwen3-VL-4B-Instruct-GPTQ-Int4"
+messages = [
+    {"role": "system", "content": [{"type": "text", "text": "you are a helpful assistant."}]},
+    {"role": "user", "content": "hello"},
+]
+client = OpenAI(api_key="not-needed", base_url=API_URL)
+completion = client.chat.completions.create(
+    model=MODEL,
+    messages=messages,
+)
+print(completion.choices[0].message.content)
 ```
+### OpenAI 流式调用示例
+```python
+from openai import OpenAI
+API_URL = "http://127.0.0.1:8000/v1"
+MODEL = "AXERA-TECH/Qwen3-VL-4B-Instruct-GPTQ-Int4"
+messages = [
+    {"role": "system", "content": [{"type": "text", "text": "you are a helpful assistant."}]},
+    {"role": "user", "content": "hello"},
+]
+client = OpenAI(api_key="not-needed", base_url=API_URL)
+stream = client.chat.completions.create(
+    model=MODEL,
+    messages=messages,
+    stream=True,
+)
+print("assistant:")
+for ev in stream:
+    delta = getattr(ev.choices[0], "delta", None)
+    if delta and getattr(delta, "content", None):
+        print(delta.content, end="", flush=True)
+print("
+")
+```

axera_logo.png DELETED Viewed

Git LFS Details

SHA256: 6f3729509adf9e0c8baffcda3d7c1228f7d6bcd74374fc592c2995a3c1a3dfc1
Pointer size: 131 Bytes
Size of remote file: 157 kB

config.json CHANGED Viewed

	@@ -0,0 +1,26 @@

+{
+  "system_prompt": "you are a helpful assistant.",
+  "model_name": "AXERA-TECH/Qwen3-VL-4B-Instruct-GPTQ-Int4",
+  "url_tokenizer_model": "qwen3_tokenizer.txt",
+  "tokenizer_type": "Qwen3VL",
+  "post_config_path": "post_config.json",
+  "template_filename_axmodel": "qwen3_vl_text_p128_l%d_together.axmodel",
+  "axmodel_num": 36,
+  "filename_post_axmodel": "qwen3_vl_text_post.axmodel",
+  "filename_tokens_embed": "model.embed_tokens.weight.bfloat16.bin",
+  "tokens_embed_num": 151936,
+  "tokens_embed_size": 2560,
+  "use_mmap_load_embed": true,
+  "vlm_type": "Qwen3VL",
+  "filename_image_encoder_axmodel": "Qwen3-VL-4B-Instruct_vision.axmodel",
+  "vision_patch_size": 16,
+  "vision_temporal_patch_size": 2,
+  "vision_spatial_merge_size": 2,
+  "vision_fps": 1,
+  "vision_tokens_per_second": 1,
+  "vision_cache_dir": "vision_cache",
+  "use_mmap_load_layer": true,
+  "devices": [
+    0
+  ]
+}

gradio_demo.py DELETED Viewed

@@ -1,262 +0,0 @@
-# gradio_chat_single_turn.py
-import re
-import subprocess
-import gradio as gr
-import base64, cv2, os, tempfile
-from openai import OpenAI
-import requests
-def get_all_local_ips():
-    result = subprocess.run(['ip', 'a'], capture_output=True, text=True)
-    output = result.stdout
-    # 匹配所有IPv4
-    ips = re.findall(r'inet (\d+\.\d+\.\d+\.\d+)', output)
-    # 过滤掉回环地址
-    real_ips = [ip for ip in ips if not ip.startswith('127.')]
-    return real_ips
-# ---------- Helpers ----------
-def img_to_data_url_from_cvframe(frame):
-    import base64, cv2
-    ok, buf = cv2.imencode(".jpg", frame, [int(cv2.IMWRITE_JPEG_QUALITY), 85])
-    b64 = base64.b64encode(buf).decode("ascii")
-    return f"data:image/jpeg;base64,{b64}"
-def img_to_data_url_from_path(img_path: str) -> str:
-    import cv2, base64
-    img = cv2.imread(img_path)
-    return img_to_data_url_from_cvframe(img)
-def video_to_data_urls(video_path: str, frame_stride: int = 30, max_frames: int = 8):
-    import cv2, base64
-    cap = cv2.VideoCapture(video_path)
-    total = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
-    if total / frame_stride > max_frames:
-        frame_stride = int(total/max_frames)
-    urls = []
-    idx = 0
-    first_preview = None
-    while len(urls) < max_frames and idx < total:
-        cap.set(cv2.CAP_PROP_POS_FRAMES, idx)
-        ret, frame = cap.read()
-        if not ret:
-            break
-        ok, buf = cv2.imencode(".jpg", frame, [int(cv2.IMWRITE_JPEG_QUALITY), 85])
-        if not ok:
-            break
-        b64 = base64.b64encode(buf).decode("ascii")
-        data_url = f"data:image/jpeg;base64,{b64}"
-        urls.append(data_url)
-        if first_preview is None:
-            first_preview = data_url
-        idx += frame_stride
-    cap.release()
-    return urls, first_preview
-def save_preview_image_from_data_url(data_url: str) -> str:
-    # 仅用于在 Chatbot 里显示缩略图
-    comma = data_url.find(",")
-    if comma == -1:
-        return ""
-    b64 = data_url[comma+1:]
-    raw = base64.b64decode(b64)
-    fd, tmp_path = tempfile.mkstemp(suffix=".jpg", prefix="preview_")
-    os.close(fd)
-    with open(tmp_path, "wb") as f:
-        f.write(raw)
-    return tmp_path
-def build_messages(prompt: str, image_path: str | None, video_path: str | None,
-                   prefer_video: bool, frame_stride: int, max_frames: int):
-    content = []
-    if prompt and prompt.strip():
-        content.append({"type": "text", "text": prompt.strip()})
-    if video_path and os.path.exists(video_path) and prefer_video:
-        urls, first_preview = video_to_data_urls(video_path, frame_stride=frame_stride, max_frames=max_frames)
-        content.append({"type": "image_url", "is_video":True, "image_url": urls})
-        media_desc = f"（视频抽帧：{len(urls)} 帧，步长 {frame_stride}）"
-        return {"role": "user", "content": content}, first_preview, media_desc
-    if image_path and os.path.exists(image_path):
-        u = img_to_data_url_from_path(image_path)
-        content.append({"type": "image_url", "image_url": u})
-        media_desc = "（已附带图片）"
-        return {"role": "user", "content": content}, u, media_desc
-    if video_path and os.path.exists(video_path):
-        urls, first_preview = video_to_data_urls(video_path, frame_stride=frame_stride, max_frames=max_frames)
-        content.append({"type": "image_url", "is_video":True, "image_url": urls})
-        media_desc = f"（视频抽帧：{len(urls)} 帧，步长 {frame_stride}）"
-        return {"role": "user", "content": content}, first_preview, media_desc
-    return {"role": "user", "content": content if content else [{"type": "text", "text": prompt or ""}]}, None, ""
-# ---------- Gradio callback (single-turn, stream) ----------
-def run_single_turn(prompt, image_file, video_file, prefer_video, frame_stride, max_frames,
-                    base_url, model, api_key, chatbot_state):
-    """
-    单轮：每次发送都会重置聊天历史，只显示本轮的 user/assistant 两个气泡。
-    """
-    try:
-        # 清空历史（单轮），构造用户气泡
-        chatbot_state = []
-        # 准备文件路径
-        image_path = image_file if isinstance(image_file, str) else (image_file.name if image_file else None)
-        video_path = video_file if isinstance(video_file, str) else (video_file.name if video_file else None)
-        # 构造 messages 和预览
-        messages, preview_data_url, media_desc = build_messages(
-            prompt=prompt or "",
-            image_path=image_path,
-            video_path=video_path,
-            prefer_video=bool(prefer_video),
-            frame_stride=int(frame_stride),
-            max_frames=int(max_frames),
-        )
-        # 组装用户气泡（Markdown）：文本 + 预览图/视频说明
-        user_md = (prompt or "").strip()
-        if media_desc:
-            user_md = (user_md + "\n\n" if user_md else "") + f"> {media_desc}"
-        if preview_data_url:
-            # user_md = (user_md + "\n\n" if user_md else "") + f"![preview]({preview_path})"
-            user_md = (user_md + "\n\n" if user_md else "") + f"![preview]({preview_data_url})"
-        chatbot_state.append((user_md or "(空提示)", ""))  # assistant 先空字符串，等待流式填充
-        yield chatbot_state  # 先把用户气泡渲染出来
-        # 调后端（流式）
-        client = OpenAI(api_key=api_key or "not-needed", base_url=base_url.strip())
-        stream = client.chat.completions.create(
-            model=model.strip(),
-            messages=messages,
-            stream=True,
-        )
-        bot_chunks = []
-        # 先补一个空 assistant 气泡
-        if len(chatbot_state) == 1:
-            chatbot_state[0] = (chatbot_state[0][0], "")
-            yield chatbot_state
-        # 逐 chunk 更新 assistant 气泡（Markdown）
-        for ev in stream:
-            delta = getattr(ev.choices[0], "delta", None)
-            if delta and getattr(delta, "content", None):
-                bot_chunks.append(delta.content)
-                chatbot_state[-1] = (chatbot_state[-1][0], "".join(bot_chunks))
-                yield chatbot_state
-        # 结束再确保收尾
-        chatbot_state[-1] = (chatbot_state[-1][0], "".join(bot_chunks) if bot_chunks else "(empty response)")
-        yield chatbot_state
-    except Exception as e:
-        chatbot_state.append((
-            chatbot_state[-1][0] if chatbot_state else "(request)",
-            f"**Error:** {e}"
-        ))
-        yield chatbot_state
-# ---------- Gradio UI ----------
-with gr.Blocks(css="""
-    #chat,
-    #chat * {
-        font-size: 18px !important;
-        line-height: 1.6 !important;
-    }
-    #chat .message,
-    #chat [data-testid="bot"],
-    #chat [data-testid="user"] {
-        font-size: 18px !important;
-    }
-""",title="AXERA Qwen3 VL") as demo:
-    axera_logo = img_to_data_url_from_path("./axera_logo.png")
-    gr.Markdown(
-        f"""
-        <div style="display: flex; align-items: center; gap: 10px;">
-            <img src="{axera_logo}" alt="axera_logo" style="height: 60px;">
-        </div>
-        """
-    )
-    chatbot = gr.Chatbot(
-        label="对话",
-        bubble_full_width=False,
-        height=500,
-        avatar_images=(None, None),  # 可替换头像
-        latex_delimiters=[{"left": "$$", "right": "$$", "display": True},
-                          {"left": "$", "right": "$", "display": False}],
-        show_copy_button=True,
-        render_markdown=True,
-        elem_id="chat"
-    )
-    with gr.Row():
-        with gr.Column(scale=2):
-            prompt = gr.Textbox(label="Prompt", placeholder="输入你的提示语", lines=2)
-            with gr.Row():
-                send_btn = gr.Button("发送 ▶️", variant="primary")
-                clear_btn = gr.Button("清空")
-                stop_btn = gr.Button("停止 ■", variant="stop")
-            with gr.Row():
-                image = gr.Image(type="filepath", label="上传图片（可选）")
-                video = gr.Video(label="上传视频（可选）")
-        with gr.Column(scale=1):
-            base_url = gr.Textbox(value="http://localhost:8000/v1", label="Base URL")
-            model = gr.Textbox(value="AXERA-TECH/Qwen3-VL-2B-Instruct-GPTQ-Int4", label="Model")
-            api_key = gr.Textbox(value="not-needed", label="API Key", type="password")
-            with gr.Row():
-                prefer_video = gr.Checkbox(True, label="如果有视频，优先使用视频抽帧")
-                frame_stride = gr.Slider(1, 90, value=30, step=1, label="视频抽帧间隔")
-                max_frames = gr.Slider(1, 8, value=8, step=1, label="最多抽帧数")
-    # 单轮对话需要一个 state 来承载当前这轮的气泡
-    state = gr.State([])
-    send_btn.click(
-        fn=run_single_turn,
-        inputs=[prompt, image, video, prefer_video, frame_stride, max_frames, base_url, model, api_key, state],
-        outputs=chatbot,
-        show_progress=True,
-        queue=True,
-    )
-    def stop_stream(base_url):
-        url = f"{base_url.strip()}/stop"
-        response = requests.get(url)
-        if response.status_code == 200:
-            print("Stream stopped successfully")
-        else:
-            print(f"Failed to stop stream: {response.status_code} - {response.text}")
-    stop_btn.click(
-        fn=stop_stream,
-        inputs=[base_url],
-        outputs=chatbot,
-        show_progress=True,
-        queue=True,
-    )
-    def clear_all():
-        return [], "", None, None, True, 30, 8
-    clear_btn.click(clear_all, None, [chatbot, prompt, image, video, prefer_video, frame_stride, max_frames])
-if __name__ == "__main__":
-    ips = get_all_local_ips()
-    for ip in ips:
-        print(f"* Running on local URL:  http://{ip}:7860")
-    ip = "0.0.0.0"
-    demo.launch(server_name=ip, server_port=7860)

main_ax650 DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:bd12cddc400cd3ffb78af4a4512211af28c33f98993b9c7447aab8d8f29d7893
-size 6821432

main_ax650_api DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:390236f0fef17d46c1bdf0b26f831335fe0e5ede1c10814c1462fdd360b1b984
-size 6935688

main_axcl_aarch64 DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:a0ded679af8f4fb115b04977d4bc4ecc63783f98d3b239cd3a73de19a6cd19ed
-size 1952752

main_axcl_api_aarch64 DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:c90d9dfae62b17ef4681f103c62b483e96a862e900a364673e57bc91d078c63d
-size 2105232

main_axcl_api_x86 DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:67be73d1a6a4c17ee6b73222d3c5988fa10d2dbcf71515f6dad090a561dcc252
-size 2202296

main_axcl_x86 DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:1113a46767e5cc6c0a53172c5973848a40c65f379a428b3efc64a9fb6f6fb212
-size 2062240

Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/model.embed_tokens.weight.bfloat16.bin → model.embed_tokens.weight.bfloat16.bin RENAMED Viewed

File without changes

openai_cli.py DELETED Viewed

@@ -1,66 +0,0 @@
-import base64
-import glob
-from openai import OpenAI
-import cv2
-BASE_URL = "http://localhost:8000/v1"
-def img_to_data_url(img_path: str):
-    img = cv2.imread(img_path)
-    if img is None:
-        raise FileNotFoundError(f"Cannot read image: {img_path}")
-    ok, buf = cv2.imencode(".jpg", img)
-    if not ok:
-        raise RuntimeError("cv2.imencode failed")
-    b64 = base64.b64encode(buf).decode("ascii")
-    return f"data:image/jpeg;base64,{b64}"
-def test(openai_messages):
-    client = OpenAI(api_key="not-needed", base_url=BASE_URL)
-    stream = client.chat.completions.create(
-        model="AXERA-TECH/Qwen3-VL-2B-Instruct-GPTQ-Int4",
-        messages=openai_messages,
-        stream=True,
-    )
-    out_chunks = []
-    for ev in stream:
-        delta = ev.choices[0].delta
-        if delta and delta.content:
-            out_chunks.append(delta.content)
-            print(delta.content, end="", flush=True)
-    print()
-    assistant_text = "".join(out_chunks).strip()
-def test_image():
-    image_data = img_to_data_url("../demo_cv308/frame_0075.jpg")
-    openai_messages = {
-        "role": "user",
-        "content": [
-            {"type": "text", "text": "描述一下这张图片"},
-            {"type": "image_url", "image_url": image_data},
-        ],
-    }
-    test(openai_messages)
-def test_video():
-    image_list = glob.glob("../demo_cv308/*.jpg")
-    image_list.sort()
-    image_data_list = [img_to_data_url(img) for img in image_list]
-    openai_messages = {
-        "role": "user",
-        "content": [
-            {"type": "text", "text": "描述一下这个视频"},
-            {"type": "image_url", "is_video":True, "image_url": image_data_list},
-        ],
-    }
-    test(openai_messages)
-test_video()

post_config.json CHANGED Viewed

@@ -1,14 +1,14 @@
 {
-    "enable_temperature" : true,
-    "temperature" : 0.7,
     "enable_repetition_penalty" : false,
-    "repetition_penalty" : 1,
-    "penalty_window" : 30,
     "enable_top_p_sampling" : false,
     "top_p" : 0.8,
-    "enable_top_k_sampling" : true,
-    "top_k" : 20
 }

 {
+    "enable_temperature" : false,
+    "temperature" : 0.9,
     "enable_repetition_penalty" : false,
+    "repetition_penalty" : 1.2,
+    "penalty_window" : 20,
     "enable_top_p_sampling" : false,
     "top_p" : 0.8,
+    "enable_top_k_sampling" : false,
+    "top_k" : 10
 }

Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l0_together.axmodel → qwen3_vl_text_p128_l0_together.axmodel RENAMED Viewed

File without changes

Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l10_together.axmodel → qwen3_vl_text_p128_l10_together.axmodel RENAMED Viewed

File without changes

Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l11_together.axmodel → qwen3_vl_text_p128_l11_together.axmodel RENAMED Viewed

File without changes

Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l12_together.axmodel → qwen3_vl_text_p128_l12_together.axmodel RENAMED Viewed

File without changes

Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l13_together.axmodel → qwen3_vl_text_p128_l13_together.axmodel RENAMED Viewed

File without changes

Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l14_together.axmodel → qwen3_vl_text_p128_l14_together.axmodel RENAMED Viewed

File without changes

Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l15_together.axmodel → qwen3_vl_text_p128_l15_together.axmodel RENAMED Viewed

File without changes

Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l16_together.axmodel → qwen3_vl_text_p128_l16_together.axmodel RENAMED Viewed

File without changes

Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l17_together.axmodel → qwen3_vl_text_p128_l17_together.axmodel RENAMED Viewed

File without changes

Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l18_together.axmodel → qwen3_vl_text_p128_l18_together.axmodel RENAMED Viewed

File without changes

Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l19_together.axmodel → qwen3_vl_text_p128_l19_together.axmodel RENAMED Viewed

File without changes

Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l1_together.axmodel → qwen3_vl_text_p128_l1_together.axmodel RENAMED Viewed

File without changes

Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l20_together.axmodel → qwen3_vl_text_p128_l20_together.axmodel RENAMED Viewed

File without changes

Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l21_together.axmodel → qwen3_vl_text_p128_l21_together.axmodel RENAMED Viewed

File without changes

Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l22_together.axmodel → qwen3_vl_text_p128_l22_together.axmodel RENAMED Viewed

File without changes

Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l23_together.axmodel → qwen3_vl_text_p128_l23_together.axmodel RENAMED Viewed

File without changes

Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l24_together.axmodel → qwen3_vl_text_p128_l24_together.axmodel RENAMED Viewed

File without changes

Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l25_together.axmodel → qwen3_vl_text_p128_l25_together.axmodel RENAMED Viewed

File without changes

Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l26_together.axmodel → qwen3_vl_text_p128_l26_together.axmodel RENAMED Viewed

File without changes

Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l27_together.axmodel → qwen3_vl_text_p128_l27_together.axmodel RENAMED Viewed

File without changes

Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l28_together.axmodel → qwen3_vl_text_p128_l28_together.axmodel RENAMED Viewed

File without changes

Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l29_together.axmodel → qwen3_vl_text_p128_l29_together.axmodel RENAMED Viewed

File without changes

Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l2_together.axmodel → qwen3_vl_text_p128_l2_together.axmodel RENAMED Viewed

File without changes

Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l30_together.axmodel → qwen3_vl_text_p128_l30_together.axmodel RENAMED Viewed

File without changes

Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l31_together.axmodel → qwen3_vl_text_p128_l31_together.axmodel RENAMED Viewed

File without changes

Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l32_together.axmodel → qwen3_vl_text_p128_l32_together.axmodel RENAMED Viewed

File without changes

Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l33_together.axmodel → qwen3_vl_text_p128_l33_together.axmodel RENAMED Viewed

File without changes

Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l34_together.axmodel → qwen3_vl_text_p128_l34_together.axmodel RENAMED Viewed

File without changes

Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l35_together.axmodel → qwen3_vl_text_p128_l35_together.axmodel RENAMED Viewed

File without changes

Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l3_together.axmodel → qwen3_vl_text_p128_l3_together.axmodel RENAMED Viewed

File without changes

Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l4_together.axmodel → qwen3_vl_text_p128_l4_together.axmodel RENAMED Viewed

File without changes

Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l5_together.axmodel → qwen3_vl_text_p128_l5_together.axmodel RENAMED Viewed

File without changes

Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l6_together.axmodel → qwen3_vl_text_p128_l6_together.axmodel RENAMED Viewed

File without changes

Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l7_together.axmodel → qwen3_vl_text_p128_l7_together.axmodel RENAMED Viewed

File without changes