wli1995 commited on Jan 22

Commit

5eee450

verified ·

1 Parent(s): 5e8d06a

Upload c++ demo

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +5 -0
FastVLM_tokenizer.txt +0 -0
README.md +91 -14
fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l0_together.axmodel +2 -2
fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l10_together.axmodel +2 -2
fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l11_together.axmodel +2 -2
fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l12_together.axmodel +2 -2
fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l13_together.axmodel +2 -2
fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l14_together.axmodel +2 -2
fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l15_together.axmodel +2 -2
fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l16_together.axmodel +2 -2
fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l17_together.axmodel +2 -2
fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l18_together.axmodel +2 -2
fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l19_together.axmodel +2 -2
fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l1_together.axmodel +2 -2
fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l20_together.axmodel +2 -2
fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l21_together.axmodel +2 -2
fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l22_together.axmodel +2 -2
fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l23_together.axmodel +2 -2
fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l24_together.axmodel +2 -2
fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l25_together.axmodel +2 -2
fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l26_together.axmodel +2 -2
fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l27_together.axmodel +2 -2
fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l2_together.axmodel +2 -2
fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l3_together.axmodel +2 -2
fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l4_together.axmodel +2 -2
fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l5_together.axmodel +2 -2
fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l6_together.axmodel +2 -2
fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l7_together.axmodel +2 -2
fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l8_together.axmodel +2 -2
fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l9_together.axmodel +2 -2
fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_post.axmodel +2 -2
fastvlm_ax650_context_1k_prefill_640_int4/model.embed_tokens.weight.bfloat16.bin +2 -2
fastvlm_ax650_context_1k_prefill_640_int4/model.embed_tokens.weight.npy +2 -2
fastvlm_tokenizer/added_tokens.json +1 -0
fastvlm_tokenizer/config.json +37 -7
fastvlm_tokenizer/generation_config.json +1 -1
fastvlm_tokenizer/tokenizer.json +3 -0
fastvlm_tokenizer/tokenizer_config.json +9 -1
fastvlm_tokenizer/vocab.json +0 -0
infer_axmodel.py +9 -10
main_ax650 +3 -0
main_ax650_api +3 -0
main_axcl_x86 +3 -0
main_axcl_x86_api +3 -0
post_config.json +14 -0
run_ax650_1024.sh +14 -0
run_ax650_512.sh +14 -0
run_ax650_api.sh +13 -0
run_axcl_x86.sh +15 -0

.gitattributes CHANGED Viewed

@@ -66,3 +66,8 @@ fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l9_together.axmodel f
 fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_post.axmodel filter=lfs diff=lfs merge=lfs -text
 images/image_1.jpg filter=lfs diff=lfs merge=lfs -text
 images/ssd_horse.jpg filter=lfs diff=lfs merge=lfs -text

 fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_post.axmodel filter=lfs diff=lfs merge=lfs -text
 images/image_1.jpg filter=lfs diff=lfs merge=lfs -text
 images/ssd_horse.jpg filter=lfs diff=lfs merge=lfs -text
+fastvlm_tokenizer/tokenizer.json filter=lfs diff=lfs merge=lfs -text
+main_ax650 filter=lfs diff=lfs merge=lfs -text
+main_ax650_api filter=lfs diff=lfs merge=lfs -text
+main_axcl_x86 filter=lfs diff=lfs merge=lfs -text
+main_axcl_x86_api filter=lfs diff=lfs merge=lfs -text

FastVLM_tokenizer.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

README.md CHANGED Viewed

@@ -34,10 +34,11 @@ How to Convert LLM from Huggingface to axmodel[TODO]
   - [M4N-Dock(爱芯派Pro)](https://wiki.sipeed.com/hardware/zh/maixIV/m4ndock/m4ndock.html)
   - [M.2 Accelerator card](https://axcl-docs.readthedocs.io/zh-cn/latest/doc_guide_hardware.html)
-|Chips|image encoder|ttft|w4a16|
-|--|--|--|--|
-|AX650| 216.257 ms (1024x1024)| 709.455 ms (291tokens)| 21.38 tokens/sec|
-|AX650| 44.747 ms (512x512)| 167.543 ms (99tokens)| 21.38 tokens/sec|
 ## How to use
@@ -47,16 +48,27 @@ Download all files from this repository to the device
 ```
 $ tree -L 1
 .
-├── config.json
-├── fastvlm_ax650_context_1k_prefill_640_int4
-├── fastvlm_tokenizer
-├── images
-├── infer_axmodel.py
-├── README.md
-├── requirements.txt
-└── utils
-5 directories, 4 files
 ```
 #### Install transformer
@@ -69,6 +81,71 @@ pip install -r requirements.txt
 Run the following command on the Axera board to start a chat conversation:
 ```sh
 $ python infer_axmodel.py -v ./fastvlm_ax650_context_1k_prefill_640_int4/image_encoder_512x512.axmodel -m ./fastvlm_ax650_context_1k_prefill_640_int4  -t ./fastvlm_tokenizer/ -i 512
 ```

   - [M4N-Dock(爱芯派Pro)](https://wiki.sipeed.com/hardware/zh/maixIV/m4ndock/m4ndock.html)
   - [M.2 Accelerator card](https://axcl-docs.readthedocs.io/zh-cn/latest/doc_guide_hardware.html)
+|Chips|image encoder|ttft|w4a16|CMM(GiB)|
+|--|--|--|--|--|
+|AX650| 237.49 ms (1024x1024)| 418.43 ms (291tokens)| 19.87 tokens/sec|1.4|
+|AXCL x86| 233.93 ms (1024x1024)| 779.51 ms (286tokens)| 12.47 tokens/sec|1.4|
+|AX650| 58.33 ms (512x512)| 128.92 ms (100tokens)| 19.87 tokens/sec|1.4|
 ## How to use
 ```
 $ tree -L 1
 .
+|-- FastVLM_tokenizer.txt
+|-- README.md
+|-- config.json
+|-- fastvlm_ax650_context_1k_prefill_640_int4
+|-- fastvlm_tokenizer
+|-- images
+|-- infer_axmodel.py
+|-- main_ax650
+|-- main_ax650_api
+|-- main_axcl_x86
+|-- main_axcl_x86_api
+|-- post_config.json
+|-- requirements.txt
+|-- run_ax650_1024.sh
+|-- run_ax650_512.sh
+|-- run_ax650_api.sh
+|-- run_axcl_x86.sh
+|-- run_axcl_x86_api.sh
+`-- utils
+4 directories, 15 files
 ```
 #### Install transformer
 Run the following command on the Axera board to start a chat conversation:
+```bash
+root@ax650:~/FastVLM-1.5B-GPTQ-Int4# ./run_ax650_1024.sh
+[I][                            Init][ 134]: LLM init start
+tokenizer_type = 3
+stop_tokens size: 2
+151645
+151645
+  6% | ███                               |   2 /  31 [2.24s<34.74s, 0.89 count/s] embed_selector init ok
+100% | ████████████████████████████████ |  31 /  31 [5.96s<5.96s, 5.20 count/s] init post axmodel ok,remain_cmm(8619 MB)[I][                            Init][ 252]: IMAGE_CONTEXT_TOKEN: 151646
+[I][                            Init][ 284]: image encoder input nhwc@uint8
+[I][                            Init][ 308]: image encoder output float32
+[I][                            Init][ 318]: image_encoder_height : 1024, image_encoder_width: 1024
+[I][                            Init][ 320]: max_token_len : 1024
+[I][                            Init][ 323]: kv_cache_size : 256, kv_cache_num: 1024
+[I][                            Init][ 331]: prefill_token_num : 128
+[I][                            Init][ 335]: grp: 1, prefill_max_token_num : 1
+[I][                            Init][ 335]: grp: 2, prefill_max_token_num : 128
+[I][                            Init][ 335]: grp: 3, prefill_max_token_num : 256
+[I][                            Init][ 335]: grp: 4, prefill_max_token_num : 512
+[I][                            Init][ 335]: grp: 5, prefill_max_token_num : 640
+[I][                            Init][ 339]: prefill_max_token_num : 640
+[I][                     load_config][ 282]: load config:
+{
+    "enable_repetition_penalty": false,
+    "enable_temperature": true,
+    "enable_top_k_sampling": true,
+    "enable_top_p_sampling": false,
+    "penalty_window": 30,
+    "repetition_penalty": 2,
+    "temperature": 0.1,
+    "top_k": 10,
+    "top_p": 0.8
+}
+[I][                            Init][ 348]: LLM init ok
+Type "q" to exit, Ctrl+c to stop current running
+prompt >> who are you
+image >>
+[I][                          Encode][ 470]: input_ids size: 33
+[I][                             Run][ 604]: input token num : 33, prefill_split_num : 1
+[I][                             Run][ 619]: prefill grpid 2
+[I][                             Run][ 646]: input_num_token:33
+[I][                             Run][ 770]: ttft: 127.56 ms
+I am FastVLM, a language model developed by Apple Inc.
+[N][                             Run][ 879]: hit eos,avg 19.87 token/s
+prompt >> describe the image
+image >> ./images/ssd_horse.jpg
+[I][                          Encode][ 442]: image encode time : 237.49 ms, size : 393216
+[I][                          Encode][ 496]: imgs_embed.size() : 1, media token size : 256
+[I][                             Run][ 604]: input token num : 291, prefill_split_num : 3
+[I][                             Run][ 619]: prefill grpid 4
+[I][                             Run][ 646]: input_num_token:128
+[I][                             Run][ 646]: input_num_token:128
+[I][                             Run][ 646]: input_num_token:35
+[I][                             Run][ 770]: ttft: 418.43 ms
+The image depicts a scene in an outdoor setting, likely a farm or ranch, with a person riding a brown horse. The person is wearing a blue hoodie and jeans, and is seated on a saddle with a yellow blanket underneath. The horse has a white blaze on its face and is standing on a dirt ground. In front of the horse, there is a brown dog wearing a pink collar, looking up at the person on the horse. In the background, there is a silver pickup truck parked near a fence, and beyond the fence, there are trees and other people. The overall atmosphere of the image suggests a casual, outdoor activity, possibly involving horse riding and training.
+[N][                             Run][ 879]: hit eos,avg 19.85 token/s
+prompt >> q
+```
 ```sh
 $ python infer_axmodel.py -v ./fastvlm_ax650_context_1k_prefill_640_int4/image_encoder_512x512.axmodel -m ./fastvlm_ax650_context_1k_prefill_640_int4  -t ./fastvlm_tokenizer/ -i 512
 ```

fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l0_together.axmodel CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:97c84a7b5a5d81511164a97745d11caa6eca22bb9564405dee34b7426772c3f0
-size 29802559

 version https://git-lfs.github.com/spec/v1
+oid sha256:ffa354f075478707195367c7574873a71a37be99e3a29c9dd5d8b82df37e7f63
+size 30032071

fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l10_together.axmodel CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:5cbfc193d6d402f39d56cea0b7943cd72f0382f9fdc98d552609384d1b6eb1a9
-size 29802567

 version https://git-lfs.github.com/spec/v1
+oid sha256:a1dcbb2a8f56e526b9491c06ee608cd3a1dd0afa01a09c789866ad7438874515
+size 30032079

fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l11_together.axmodel CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:feb8903b4d2fb7a79e782aa43ee7e07e1a878c2d7d134df1f3304d5d24e74ba0
-size 29802567

 version https://git-lfs.github.com/spec/v1
+oid sha256:61dcbd2e0accc5cbc3b86a1c96a956be07e7f8d60752b7979aa051a6fe22bc8e
+size 30032079

fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l12_together.axmodel CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:54c3d3f8634ab7f5e88146a4ec561eecd60e7b7c5adee6501d4d74bd55b51b32
-size 29802567

 version https://git-lfs.github.com/spec/v1
+oid sha256:abafe8799e7275ff4e5d2abfecbfb7d73dc5b3059f9c8bdcff6af91e2fd2145d
+size 30032079

fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l13_together.axmodel CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4bf7e682c6873f265ba3a8634b8f9c6bb375e91cef9425e10bc667e36a32370d
-size 29802567

 version https://git-lfs.github.com/spec/v1
+oid sha256:6158b91977b1c32212345deee1dac3c8a4816db6ae9601757712720f3aa35523
+size 30032079

fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l14_together.axmodel CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:320713aa63910d2b23a4a79cf26dbc1a8a2c3fb8c5a3dc308d7c93f36f9b86af
-size 29802567

 version https://git-lfs.github.com/spec/v1
+oid sha256:cfd43f25b51aac7197d6a92ba83fdc11368fbfc5794aaa024a3b047d2e6e551a
+size 30032079

fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l15_together.axmodel CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a1a3276e65abaafb41220aae26b28a9ad272284c3e23443d724d2463be5c18e9
-size 29802567

 version https://git-lfs.github.com/spec/v1
+oid sha256:87087ce133dafe41aa30bca543ad7e4f9393c5a8985825d49969a434870d8d54
+size 30032079

fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l16_together.axmodel CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b42ea532700fc248fce250f33c91296a29c82fb04809c504eccb7f74f3cc3dfc
-size 29802567

 version https://git-lfs.github.com/spec/v1
+oid sha256:d2afc202eeead21c2f755be28106c8e82466800806c249fa36f1dd4bb6ee0233
+size 30032079

fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l17_together.axmodel CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:032c1b803149ced100c0557b460023568f1a53f73a3c1b2f9eaf026f41b963e9
-size 29802567

 version https://git-lfs.github.com/spec/v1
+oid sha256:919a507d3d5661e202606deebd3fbad3f473dc5f3d9eead5ce8ff4d06dda7b8c
+size 30032079

fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l18_together.axmodel CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:822e5e51c35de9feefe4cb4c7423cea663eef6ecd5a60b314999db1a2ef85f1e
-size 29802567

 version https://git-lfs.github.com/spec/v1
+oid sha256:b50d9131898b6857f12efd3cb506ca7b6f25f355c1a982a8d77b5e342a254953
+size 30032079

fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l19_together.axmodel CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0975632690a7b295dcb8909be1d0ebc257e8cef5b1b60ad47d23a1f1f7318f24
-size 29802567

 version https://git-lfs.github.com/spec/v1
+oid sha256:bf21551105a3545de7d60a2859601f7aa5cabc17713830bf631c9e0c008674ea
+size 30032079

fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l1_together.axmodel CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:36a0c441db431e831fb175cd039b7c0970b2a3147b56cc85d29b541d15092614
-size 29802559

 version https://git-lfs.github.com/spec/v1
+oid sha256:97748366cd1e6ac901d9e1930b8ebb126cd13855a986bd70e3b5854985053695
+size 30032071

fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l20_together.axmodel CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:24b6e0956ed865010417c5c4505b7af31f4c3996eaa96bfe678e36157e0ca80f
-size 29802567

 version https://git-lfs.github.com/spec/v1
+oid sha256:6de9b2d3a452965305d0f9d5a42a563b553d2a841cb8c6a62de93a04eb2d2ab2
+size 30032079

fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l21_together.axmodel CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d4d35acc8425f9150a8fc3d51f76df4de7e22fb1ad161f6499d2cf165c0010dd
-size 29802567

 version https://git-lfs.github.com/spec/v1
+oid sha256:b60f6ddb148a67940f3bafd83ca6ef1bd344fb21631242ecb5150da30af90d79
+size 30032079

fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l22_together.axmodel CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fab242ebbeab515d47084a61114e56ab42e45266cc09adfc58e947f30011cbf9
-size 29802567

 version https://git-lfs.github.com/spec/v1
+oid sha256:9c7ee3531e4e9b67356d2a801f0fce4212b18625e4b46b5e6efbb6b779e12a7f
+size 30032079

fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l23_together.axmodel CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ff319423a917b998957ea386ffa1612e19e1e5948f792901c9fb0967c47f6376
-size 29802567

 version https://git-lfs.github.com/spec/v1
+oid sha256:891d76611b5f146310095ee5363c214f6d015d303a14fc6f77917b1f9b2b9cf0
+size 30032079

fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l24_together.axmodel CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cccad862ac8d7101047df049a598a3d8df36a828b2f785efeeecd351f2d8b31b
-size 29802567

 version https://git-lfs.github.com/spec/v1
+oid sha256:32edc5f719b763d0d359640712bc71187f2c0d29303124e39d1a14692b69d32c
+size 30032079

fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l25_together.axmodel CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:a35a18afcc36e060fc5025a4bda786db2be588d48f066671137bc9c421e8c435
-size 29802567

 version https://git-lfs.github.com/spec/v1
+oid sha256:078f16be6b152fb457869e09dd05ab0fd58e69204ef9b26158b743f1a9fbf284
+size 30032079

fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l26_together.axmodel CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:83498d03558ba7aae47a2933b28089abb279c40fb37358dab7903b462e7d387c
-size 29802567

 version https://git-lfs.github.com/spec/v1
+oid sha256:111b0ecf00d2182ed1b4cda860d426f78da586af3eba481e5007e02de952ee82
+size 30032079

fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l27_together.axmodel CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4c85585f12cac18c95b516f485176abda4c11efa4c50eb8064af2ce67135f93a
-size 29802567

 version https://git-lfs.github.com/spec/v1
+oid sha256:80c1a8cdc285f38ebb9775108ecc6496d061829138cfa5c4443c48a046957e6d
+size 30032079

fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l2_together.axmodel CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2d0766c4d014f1df113af2075e1878733eb5168488855d1ece4688892cf1bf49
-size 29802559

 version https://git-lfs.github.com/spec/v1
+oid sha256:841da42bdf30a5a9728b1bd5ed79ae34018f10b27861d295882a9339498e0a2e
+size 30032071

fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l3_together.axmodel CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d5c7f3cc0274f26a1cf9bf4cb79234ed83c2522e66c68369caba86c0ce0ec969
-size 29802559

 version https://git-lfs.github.com/spec/v1
+oid sha256:94994655aeada6cea714e52703ee687fafd503cf884b7b245928e7008dc8227c
+size 30032071

fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l4_together.axmodel CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b7795e6fd1ca8da46ba039c384b3a7361619048c8fd2dc46307155164d86705c
-size 29802559

 version https://git-lfs.github.com/spec/v1
+oid sha256:be2e0b87c56658492c960792f2664227652b2176cf8a98cda3e44f6375680316
+size 30032071

fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l5_together.axmodel CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8c66a69232635b86d6f9ac5527a461f7f90bffd2b31c5e46fb74cd7b99321dac
-size 29802559

 version https://git-lfs.github.com/spec/v1
+oid sha256:6962cf4684917281f7305b83d7cde412f0e96eace8663ec42e0c162135a3017e
+size 30032071

fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l6_together.axmodel CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e093bae053fbdc910c203248e21c4b66239d32ab273f863f41486ee72ce072c2
-size 29802559

 version https://git-lfs.github.com/spec/v1
+oid sha256:431ca95208e44af62e2efdaea7c4b215ab97bae244a58c83a7f2244bbb05990b
+size 30032071

fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l7_together.axmodel CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:ba313e7f26818e4fa36027e62b9276c1d7c48a342505a78812db0d6fae748aca
-size 29802559

 version https://git-lfs.github.com/spec/v1
+oid sha256:bba03599293d8aa1853d3a0ec7c6fe00b174a0dadd03da6cb4cdd56af44503ec
+size 30032071

fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l8_together.axmodel CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7bfd2c82228f858f1dd15f46d23239197b0b62d435eb53a0eae73c21c3e1d5a8
-size 29802559

 version https://git-lfs.github.com/spec/v1
+oid sha256:ee264eaca720fa9f8ddf2f1c537c56d39ba525d246de91f4bc9aa09d61e9782b
+size 30032071

fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l9_together.axmodel CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:673154996aa1caf889a9932f09e56395e87e2aa32d5eb16f7056eaf5a0ce371e
-size 29802559

 version https://git-lfs.github.com/spec/v1
+oid sha256:4e7d245920e9d45da51a5d187c5f0acbe2941bd483492cfe58daaa23f8740303
+size 30032071

fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_post.axmodel CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:fba049bc6df809d7f4cd2f1baa3e01ccfd88c5388356ac7b4facc507d744ebca
-size 254826994

 version https://git-lfs.github.com/spec/v1
+oid sha256:faea2655f070f91f8b25b75df246ae4de7348696b9f1b23b361bd9f4b01f199c
+size 254344594

fastvlm_ax650_context_1k_prefill_640_int4/model.embed_tokens.weight.bfloat16.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:e01aea64483c11c7de1dbfb8fac8b07860ad669718deb5cf35b8eb71a0dbe593
-size 466747392

 version https://git-lfs.github.com/spec/v1
+oid sha256:2791648834191a3080f8952421197b98ee45cdc9337cf45abc35103562347b79
+size 465859584

fastvlm_ax650_context_1k_prefill_640_int4/model.embed_tokens.weight.npy CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:8f682683aa248b48679ed39eb816e4066c7c4a9f53f7b94bad2aef079a63a6d3
-size 933494912

 version https://git-lfs.github.com/spec/v1
+oid sha256:1045cc5b2298c3032c2f6a3f1b2f83bc4aa69d39f32476d9bfd7b68b9d380ccd
+size 931719296

fastvlm_tokenizer/added_tokens.json CHANGED Viewed

@@ -1,4 +1,5 @@
 {
   "<|endoftext|>": 151643,
   "<|im_end|>": 151645,
   "<|im_start|>": 151644

 {
+  "<image>": 151646,
   "<|endoftext|>": 151643,
   "<|im_end|>": 151645,
   "<|im_start|>": 151644

fastvlm_tokenizer/config.json CHANGED Viewed

@@ -1,14 +1,14 @@
 {
-  "_name_or_path": "./llava-v1.5-13b",
   "architectures": [
     "LlavaQwen2ForCausalLM"
   ],
   "auto_map": {
     "AutoConfig": "llava_qwen.LlavaConfig",
     "AutoModelForCausalLM": "llava_qwen.LlavaQwen2ForCausalLM"
-  },
-  "attention_dropout": 0.0,
   "bos_token_id": 151643,
   "eos_token_id": 151645,
   "freeze_mm_mlp_adapter": false,
   "hidden_act": "silu",
@@ -17,6 +17,36 @@
   "image_grid_pinpoints": null,
   "initializer_range": 0.02,
   "intermediate_size": 8960,
   "max_position_embeddings": 32768,
   "max_window_layers": 28,
   "mm_hidden_size": 3072,
@@ -33,17 +63,17 @@
   "num_hidden_layers": 28,
   "num_key_value_heads": 2,
   "rms_norm_eps": 1e-06,
   "rope_theta": 1000000.0,
-  "sliding_window": 32768,
   "tie_word_embeddings": true,
   "tokenizer_model_max_length": 8192,
   "tokenizer_padding_side": "right",
-  "torch_dtype": "bfloat16",
-  "transformers_version": "4.39.3",
   "tune_mm_mlp_adapter": false,
   "unfreeze_mm_vision_tower": true,
   "use_cache": true,
   "use_mm_proj": true,
   "use_sliding_window": false,
-  "vocab_size": 151936
 }

 {
   "architectures": [
     "LlavaQwen2ForCausalLM"
   ],
+  "attention_dropout": 0.0,
   "auto_map": {
     "AutoConfig": "llava_qwen.LlavaConfig",
     "AutoModelForCausalLM": "llava_qwen.LlavaQwen2ForCausalLM"
+  },
   "bos_token_id": 151643,
+  "dtype": "float32",
   "eos_token_id": 151645,
   "freeze_mm_mlp_adapter": false,
   "hidden_act": "silu",
   "image_grid_pinpoints": null,
   "initializer_range": 0.02,
   "intermediate_size": 8960,
+  "layer_types": [
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention",
+    "full_attention"
+  ],
   "max_position_embeddings": 32768,
   "max_window_layers": 28,
   "mm_hidden_size": 3072,
   "num_hidden_layers": 28,
   "num_key_value_heads": 2,
   "rms_norm_eps": 1e-06,
+  "rope_scaling": null,
   "rope_theta": 1000000.0,
+  "sliding_window": null,
   "tie_word_embeddings": true,
   "tokenizer_model_max_length": 8192,
   "tokenizer_padding_side": "right",
+  "transformers_version": "4.57.0",
   "tune_mm_mlp_adapter": false,
   "unfreeze_mm_vision_tower": true,
   "use_cache": true,
   "use_mm_proj": true,
   "use_sliding_window": false,
+  "vocab_size": 151647
 }

fastvlm_tokenizer/generation_config.json CHANGED Viewed

@@ -2,5 +2,5 @@
   "do_sample": true,
   "temperature": null,
   "top_p": null,
-  "transformers_version": "4.39.3"
 }

   "do_sample": true,
   "temperature": null,
   "top_p": null,
+  "transformers_version": "4.57.0"
 }

fastvlm_tokenizer/tokenizer.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b187c9fe72f04a62ed1f592418d79751ed77f5eab6c3abded85349cf97f152ea
+size 11413284

fastvlm_tokenizer/tokenizer_config.json CHANGED Viewed

@@ -24,6 +24,14 @@
       "rstrip": false,
       "single_word": false,
       "special": true
     }
   },
   "additional_special_tokens": [
@@ -31,10 +39,10 @@
     "<|im_end|>"
   ],
   "bos_token": null,
-  "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
   "clean_up_tokenization_spaces": false,
   "eos_token": "<|im_end|>",
   "errors": "replace",
   "model_max_length": 8192,
   "pad_token": "<|endoftext|>",
   "padding_side": "right",

       "rstrip": false,
       "single_word": false,
       "special": true
+    },
+    "151646": {
+      "content": "<image>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
     }
   },
   "additional_special_tokens": [
     "<|im_end|>"
   ],
   "bos_token": null,
   "clean_up_tokenization_spaces": false,
   "eos_token": "<|im_end|>",
   "errors": "replace",
+  "extra_special_tokens": {},
   "model_max_length": 8192,
   "pad_token": "<|endoftext|>",
   "padding_side": "right",

fastvlm_tokenizer/vocab.json CHANGED Viewed

The diff for this file is too large to render. See raw diff

infer_axmodel.py CHANGED Viewed

@@ -43,9 +43,7 @@ def vision_encoder(image_path, ax_session, args):
     return vit_output
-def llm_infer(image_features, llm_path, config, tokenizer, imer, get_input, token_length):
-    embeds = np.load(os.path.join(llm_path, "model.embed_tokens.weight.npy"))
     prompt = "<|im_start|>system\nYou are a helpful assistant, created by apple company.<|im_end|>\n"
     question = get_input
@@ -53,10 +51,10 @@ def llm_infer(image_features, llm_path, config, tokenizer, imer, get_input, toke
     if image_features is not None:
     #     # for idx in range(len(image_features)):
-        prompt += "\n<img>" + "<image>"*token_length + "</img>\n"
     prompt += "<|im_end|>\n<|im_start|>assistant\n"
-    token_ids = tokenizer_image_token(prompt, tokenizer, IMAGE_TOKEN_INDEX)
     # 图像理解
     prefill_data = np.take(embeds, token_ids, axis=0)
@@ -64,7 +62,7 @@ def llm_infer(image_features, llm_path, config, tokenizer, imer, get_input, toke
     token_len = len(token_ids)
     if image_features is not None:
-        image_start_index = np.where(np.array(token_ids) == -200)[0][0] # <image> tag 151646
         image_insert_index = image_start_index + 1
         prefill_data[image_insert_index : image_insert_index + token_length] = image_features[0, :, :]
@@ -85,8 +83,8 @@ def llm_infer(image_features, llm_path, config, tokenizer, imer, get_input, toke
 if __name__ == "__main__":
     args = argparse.ArgumentParser()
-    args.add_argument("--vision_model", "-v", type=str, default="./fastvlm_ax650_context_1k_prefill_640/image_encoder_1024x1024.axmodel", help="Path to the vision axmodel.")
-    args.add_argument("--model_path", "-m", type=str, default="./fastvlm_ax650_context_1k_prefill_640", help="Path to the llm axmodel.")
     args.add_argument("--tokenizer_path", "-t", type=str, default="./fastvlm_tokenizer", help="Path to the tokenizer.")
     args.add_argument("--input_size", "-i", type=str, default="1024", help="Input size of the vision encoder model.")
     # args.add_argument("--question", type=str, default="介绍一下你自己", help="The question to ask the model.")
@@ -110,6 +108,7 @@ if __name__ == "__main__":
     imer = InferManager(config, args.model_path, max_seq_len=max_seq_len) # prefill + decode max length
     ax_session = ax.InferenceSession(args.vision_model)
     print(f"[INFO]: 输入文本进行对话，或者输入图片路径进行图片理解, 或者输入q退出对话。")
     while True:
@@ -125,7 +124,7 @@ if __name__ == "__main__":
                     continue
                 image_features = vision_encoder(get_input, ax_session, args)
                 get_input = "Describe the image in detail."
-                llm_infer(image_features, args.model_path, config, tokenizer, imer, get_input, token_length)
             else:
                 image_features = None
-                llm_infer(image_features, args.model_path, config, tokenizer, imer, get_input, token_length)

     return vit_output
+def llm_infer(image_features, llm_path, config, tokenizer, imer, get_input, token_length, embeds):
     prompt = "<|im_start|>system\nYou are a helpful assistant, created by apple company.<|im_end|>\n"
     question = get_input
     if image_features is not None:
     #     # for idx in range(len(image_features)):
+        prompt += "\n" + "<image>"*token_length + "\n"
     prompt += "<|im_end|>\n<|im_start|>assistant\n"
+    token_ids = tokenizer.encode(prompt)
     # 图像理解
     prefill_data = np.take(embeds, token_ids, axis=0)
     token_len = len(token_ids)
     if image_features is not None:
+        image_start_index = np.where(np.array(token_ids) == 151646)[0][0] # <image> tag 151646
         image_insert_index = image_start_index + 1
         prefill_data[image_insert_index : image_insert_index + token_length] = image_features[0, :, :]
 if __name__ == "__main__":
     args = argparse.ArgumentParser()
+    args.add_argument("--vision_model", "-v", type=str, default="./fastvlm_ax650_context_1k_prefill_640_int4/image_encoder_1024x1024.axmodel", help="Path to the vision axmodel.")
+    args.add_argument("--model_path", "-m", type=str, default="./fastvlm_ax650_context_1k_prefill_640_int4", help="Path to the llm axmodel.")
     args.add_argument("--tokenizer_path", "-t", type=str, default="./fastvlm_tokenizer", help="Path to the tokenizer.")
     args.add_argument("--input_size", "-i", type=str, default="1024", help="Input size of the vision encoder model.")
     # args.add_argument("--question", type=str, default="介绍一下你自己", help="The question to ask the model.")
     imer = InferManager(config, args.model_path, max_seq_len=max_seq_len) # prefill + decode max length
     ax_session = ax.InferenceSession(args.vision_model)
+    embeds = np.load(os.path.join(args.model_path, "model.embed_tokens.weight.npy"))
     print(f"[INFO]: 输入文本进行对话，或者输入图片路径进行图片理解, 或者输入q退出对话。")
     while True:
                     continue
                 image_features = vision_encoder(get_input, ax_session, args)
                 get_input = "Describe the image in detail."
+                llm_infer(image_features, args.model_path, config, tokenizer, imer, get_input, token_length, embeds)
             else:
                 image_features = None
+                llm_infer(image_features, args.model_path, config, tokenizer, imer, get_input, token_length, embeds)

main_ax650 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e3e4365188762fe10890b8c02b87e576faeb756bc5904804d5cbb0f7b664879e
+size 1215488

main_ax650_api ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:748b2f6cbf36c6ed15fb3ae75df39741332c8793c46f9caf6878a7834d3fe718
+size 1309168

main_axcl_x86 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7205b7ade7c5efaeef89fb3e458b6da11508e6d9fe992cfb92eefe0094c4a8b5
+size 7009480

main_axcl_x86_api ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3c6c2dd57d2679f349a22279a4f4b92f941ec62314b58b38052228552ff0bd70
+size 7128832

post_config.json ADDED Viewed

	@@ -0,0 +1,14 @@

+{
+    "enable_temperature" : true,
+    "temperature" : 0.1,
+    "enable_repetition_penalty" : false,
+    "repetition_penalty" : 2,
+    "penalty_window" : 30,
+    "enable_top_p_sampling" : false,
+    "top_p" : 0.8,
+    "enable_top_k_sampling" : true,
+    "top_k" : 10
+}

run_ax650_1024.sh ADDED Viewed

	@@ -0,0 +1,14 @@

+AXMODEL_DIR=./fastvlm_ax650_context_1k_prefill_640_int4/
+./main_ax650 \
+--template_filename_axmodel "${AXMODEL_DIR}/llava_qwen2_p128_l%d_together.axmodel" \
+--filename_post_axmodel "${AXMODEL_DIR}/llava_qwen2_post.axmodel" \
+--filename_tokenizer_txt "FastVLM_tokenizer.txt" \
+--filename_tokens_embed "${AXMODEL_DIR}/model.embed_tokens.weight.bfloat16.bin" \
+--filename_image_encoder_axmodedl "${AXMODEL_DIR}/image_encoder_1024x1024.axmodel" \
+--axmodel_num 28 \
+--tokens_embed_num 151647 \
+--tokens_embed_size 1536 \
+--live_print 1 \
+--img_width 1024 \
+--img_height 1024

run_ax650_512.sh ADDED Viewed

	@@ -0,0 +1,14 @@

+AXMODEL_DIR=./fastvlm_ax650_context_1k_prefill_640_int4/
+./main_ax650 \
+--template_filename_axmodel "${AXMODEL_DIR}/llava_qwen2_p128_l%d_together.axmodel" \
+--filename_post_axmodel "${AXMODEL_DIR}/llava_qwen2_post.axmodel" \
+--filename_tokenizer_txt "FastVLM_tokenizer.txt" \
+--filename_tokens_embed "${AXMODEL_DIR}/model.embed_tokens.weight.bfloat16.bin" \
+--filename_image_encoder_axmodedl "${AXMODEL_DIR}/image_encoder_512x512.axmodel" \
+--axmodel_num 28 \
+--tokens_embed_num 151647 \
+--tokens_embed_size 1536 \
+--live_print 1 \
+--img_width 512 \
+--img_height 512

run_ax650_api.sh ADDED Viewed

	@@ -0,0 +1,13 @@

+AXMODEL_DIR=./fastvlm_ax650_context_1k_prefill_640_int4/
+./main_ax650_api \
+--template_filename_axmodel "${AXMODEL_DIR}/llava_qwen2_p128_l%d_together.axmodel" \
+--filename_post_axmodel "${AXMODEL_DIR}/llava_qwen2_post.axmodel" \
+--filename_tokenizer_txt "FastVLM_tokenizer.txt" \
+--filename_tokens_embed "${AXMODEL_DIR}/model.embed_tokens.weight.bfloat16.bin" \
+--filename_image_encoder_axmodedl "${AXMODEL_DIR}/image_encoder_1024x1024.axmodel" \
+--axmodel_num 28 \
+--tokens_embed_num 151647 \
+--tokens_embed_size 1536 \
+--img_width 1024 \
+--img_height 1024

run_axcl_x86.sh ADDED Viewed

	@@ -0,0 +1,15 @@

+AXMODEL_DIR=./fastvlm_ax650_context_1k_prefill_640_int4/
+./main_axcl_x86 \
+--template_filename_axmodel "${AXMODEL_DIR}/llava_qwen2_p128_l%d_together.axmodel" \
+--filename_post_axmodel "${AXMODEL_DIR}/llava_qwen2_post.axmodel" \
+--filename_tokenizer_txt "FastVLM_tokenizer.txt" \
+--filename_tokens_embed "${AXMODEL_DIR}/model.embed_tokens.weight.bfloat16.bin" \
+--filename_image_encoder_axmodel "${AXMODEL_DIR}/image_encoder_1024x1024.axmodel" \
+--axmodel_num 28 \
+--tokens_embed_num 151647 \
+--tokens_embed_size 1536 \
+--live_print 1 \
+--img_width 1024 \
+--img_height 1024 \
+--devices 0