lihongjie commited on
Commit
f09eeda
·
1 Parent(s): 028cce7

添加 U8+u16混合量化的Image Encoder

Browse files
.gitattributes CHANGED
@@ -55,3 +55,5 @@ Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l21_together.axmod
55
  Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l28_together.axmodel filter=lfs diff=lfs merge=lfs -text
56
  main_ax650 filter=lfs diff=lfs merge=lfs -text
57
  main_axcl_aarch64 filter=lfs diff=lfs merge=lfs -text
 
 
 
55
  Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/qwen3_vl_text_p128_l28_together.axmodel filter=lfs diff=lfs merge=lfs -text
56
  main_ax650 filter=lfs diff=lfs merge=lfs -text
57
  main_axcl_aarch64 filter=lfs diff=lfs merge=lfs -text
58
+ main_axcl_x86 filter=lfs diff=lfs merge=lfs -text
59
+ Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/Qwen3-VL-4B-Instruct_vision_u8.axmodel filter=lfs diff=lfs merge=lfs -text
Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/Qwen3-VL-4B-Instruct_vision_u8.axmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0318af7d86d0df92ceee2344eb1239fd1c74b3fedf84a7cd2cdcbf91341d9121
3
+ size 441899962
README.md CHANGED
@@ -52,6 +52,16 @@ For those who are interested in model conversion, you can try to export axmodel
52
  |--|--|--|--|--|--|--|--|
53
  |AX650| 384*384 | 8 | 773 ms | 1887 ms | 7.1 tokens/sec| 5.6GiB | 5.6GiB |
54
 
 
 
 
 
 
 
 
 
 
 
55
  The DDR capacity refers to the CMM memory that needs to be consumed. Ensure that the CMM memory allocation on the development board is greater than this value.
56
 
57
  ## How to use
 
52
  |--|--|--|--|--|--|--|--|
53
  |AX650| 384*384 | 8 | 773 ms | 1887 ms | 7.1 tokens/sec| 5.6GiB | 5.6GiB |
54
 
55
+ **Image Process (Image Encoder U8+U16 Quantization)**
56
+ |Chips| input size | image num | image encoder | ttft(320 tokens) | w4a16 | CMM | Flash |
57
+ |--|--|--|--|--|--|--|--|
58
+ |AX650| 384*384 | 1 | 171 ms | 678 ms | 7.0 tokens/sec| 5.6GiB | 5.6GiB |
59
+
60
+ **Video Process (Image Encoder U8+U16 Quantization)**
61
+ |Chips| input size | image num | image encoder |ttft(600 tokens) | w4a16 | CMM | Flash |
62
+ |--|--|--|--|--|--|--|--|
63
+ |AX650| 384*384 | 8 | 498 ms | 1887 ms | 7.1 tokens/sec| 5.6GiB | 5.6GiB |
64
+
65
  The DDR capacity refers to the CMM memory that needs to be consumed. Ensure that the CMM memory allocation on the development board is greater than this value.
66
 
67
  ## How to use
run_image_ax650_imgu8.sh ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ AXMODEL_DIR=./Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/
2
+
3
+ ./main_ax650 \
4
+ --template_filename_axmodel "${AXMODEL_DIR}/qwen3_vl_text_p128_l%d_together.axmodel" \
5
+ --axmodel_num 36 \
6
+ --filename_image_encoder_axmodedl "${AXMODEL_DIR}/Qwen3-VL-4B-Instruct_vision_u8.axmodel" \
7
+ --bos 0 --eos 0 \
8
+ --dynamic_load_axmodel_layer 0 \
9
+ --use_mmap_load_embed 1 \
10
+ --filename_tokenizer_model "http://127.0.0.1:8080" \
11
+ --filename_post_axmodel "${AXMODEL_DIR}/qwen3_vl_text_post.axmodel" \
12
+ --use_topk 0 \
13
+ --filename_tokens_embed "${AXMODEL_DIR}/model.embed_tokens.weight.bfloat16.bin" \
14
+ --tokens_embed_num 151936 \
15
+ --tokens_embed_size 2560 \
16
+ --patch_size 16 \
17
+ --live_print 1 \
18
+ --continue 1 \
19
+ --video 0 \
20
+ --img_width 384 \
21
+ --img_height 384 \
22
+ --vision_start_token_id 151652 \
23
+ --post_config_path post_config.json
run_video_ax650_imgu8.sh ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ AXMODEL_DIR=./Qwen3-VL-4B-Instruct-AX650-c128_p1152-int4/
2
+
3
+ ./main_ax650 \
4
+ --template_filename_axmodel "${AXMODEL_DIR}/qwen3_vl_text_p128_l%d_together.axmodel" \
5
+ --axmodel_num 36 \
6
+ --filename_image_encoder_axmodedl "${AXMODEL_DIR}/Qwen3-VL-4B-Instruct_vision_u8.axmodel" \
7
+ --bos 0 --eos 0 \
8
+ --dynamic_load_axmodel_layer 0 \
9
+ --use_mmap_load_embed 1 \
10
+ --filename_tokenizer_model "http://127.0.0.1:8080" \
11
+ --filename_post_axmodel "${AXMODEL_DIR}/qwen3_vl_text_post.axmodel" \
12
+ --use_topk 0 \
13
+ --filename_tokens_embed "${AXMODEL_DIR}/model.embed_tokens.weight.bfloat16.bin" \
14
+ --tokens_embed_num 151936 \
15
+ --tokens_embed_size 2560 \
16
+ --patch_size 16 \
17
+ --live_print 1 \
18
+ --continue 1 \
19
+ --video 1 \
20
+ --img_width 384 \
21
+ --img_height 384 \
22
+ --vision_start_token_id 151652 \
23
+ --post_config_path post_config.json