wli1995 commited on
Commit
5eee450
·
verified ·
1 Parent(s): 5e8d06a

Upload c++ demo

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +5 -0
  2. FastVLM_tokenizer.txt +0 -0
  3. README.md +91 -14
  4. fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l0_together.axmodel +2 -2
  5. fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l10_together.axmodel +2 -2
  6. fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l11_together.axmodel +2 -2
  7. fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l12_together.axmodel +2 -2
  8. fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l13_together.axmodel +2 -2
  9. fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l14_together.axmodel +2 -2
  10. fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l15_together.axmodel +2 -2
  11. fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l16_together.axmodel +2 -2
  12. fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l17_together.axmodel +2 -2
  13. fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l18_together.axmodel +2 -2
  14. fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l19_together.axmodel +2 -2
  15. fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l1_together.axmodel +2 -2
  16. fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l20_together.axmodel +2 -2
  17. fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l21_together.axmodel +2 -2
  18. fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l22_together.axmodel +2 -2
  19. fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l23_together.axmodel +2 -2
  20. fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l24_together.axmodel +2 -2
  21. fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l25_together.axmodel +2 -2
  22. fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l26_together.axmodel +2 -2
  23. fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l27_together.axmodel +2 -2
  24. fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l2_together.axmodel +2 -2
  25. fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l3_together.axmodel +2 -2
  26. fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l4_together.axmodel +2 -2
  27. fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l5_together.axmodel +2 -2
  28. fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l6_together.axmodel +2 -2
  29. fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l7_together.axmodel +2 -2
  30. fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l8_together.axmodel +2 -2
  31. fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l9_together.axmodel +2 -2
  32. fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_post.axmodel +2 -2
  33. fastvlm_ax650_context_1k_prefill_640_int4/model.embed_tokens.weight.bfloat16.bin +2 -2
  34. fastvlm_ax650_context_1k_prefill_640_int4/model.embed_tokens.weight.npy +2 -2
  35. fastvlm_tokenizer/added_tokens.json +1 -0
  36. fastvlm_tokenizer/config.json +37 -7
  37. fastvlm_tokenizer/generation_config.json +1 -1
  38. fastvlm_tokenizer/tokenizer.json +3 -0
  39. fastvlm_tokenizer/tokenizer_config.json +9 -1
  40. fastvlm_tokenizer/vocab.json +0 -0
  41. infer_axmodel.py +9 -10
  42. main_ax650 +3 -0
  43. main_ax650_api +3 -0
  44. main_axcl_x86 +3 -0
  45. main_axcl_x86_api +3 -0
  46. post_config.json +14 -0
  47. run_ax650_1024.sh +14 -0
  48. run_ax650_512.sh +14 -0
  49. run_ax650_api.sh +13 -0
  50. run_axcl_x86.sh +15 -0
.gitattributes CHANGED
@@ -66,3 +66,8 @@ fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l9_together.axmodel f
66
  fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_post.axmodel filter=lfs diff=lfs merge=lfs -text
67
  images/image_1.jpg filter=lfs diff=lfs merge=lfs -text
68
  images/ssd_horse.jpg filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
66
  fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_post.axmodel filter=lfs diff=lfs merge=lfs -text
67
  images/image_1.jpg filter=lfs diff=lfs merge=lfs -text
68
  images/ssd_horse.jpg filter=lfs diff=lfs merge=lfs -text
69
+ fastvlm_tokenizer/tokenizer.json filter=lfs diff=lfs merge=lfs -text
70
+ main_ax650 filter=lfs diff=lfs merge=lfs -text
71
+ main_ax650_api filter=lfs diff=lfs merge=lfs -text
72
+ main_axcl_x86 filter=lfs diff=lfs merge=lfs -text
73
+ main_axcl_x86_api filter=lfs diff=lfs merge=lfs -text
FastVLM_tokenizer.txt ADDED
The diff for this file is too large to render. See raw diff
 
README.md CHANGED
@@ -34,10 +34,11 @@ How to Convert LLM from Huggingface to axmodel[TODO]
34
  - [M4N-Dock(爱芯派Pro)](https://wiki.sipeed.com/hardware/zh/maixIV/m4ndock/m4ndock.html)
35
  - [M.2 Accelerator card](https://axcl-docs.readthedocs.io/zh-cn/latest/doc_guide_hardware.html)
36
 
37
- |Chips|image encoder|ttft|w4a16|
38
- |--|--|--|--|
39
- |AX650| 216.257 ms (1024x1024)| 709.455 ms (291tokens)| 21.38 tokens/sec|
40
- |AX650| 44.747 ms (512x512)| 167.543 ms (99tokens)| 21.38 tokens/sec|
 
41
 
42
 
43
  ## How to use
@@ -47,16 +48,27 @@ Download all files from this repository to the device
47
  ```
48
  $ tree -L 1
49
  .
50
- ├── config.json
51
- ├── fastvlm_ax650_context_1k_prefill_640_int4
52
- ├── fastvlm_tokenizer
53
- ├── images
54
- ├── infer_axmodel.py
55
- ├── README.md
56
- ├── requirements.txt
57
- └── utils
58
-
59
- 5 directories, 4 files
 
 
 
 
 
 
 
 
 
 
 
60
  ```
61
 
62
  #### Install transformer
@@ -69,6 +81,71 @@ pip install -r requirements.txt
69
 
70
  Run the following command on the Axera board to start a chat conversation:
71
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
72
  ```sh
73
  $ python infer_axmodel.py -v ./fastvlm_ax650_context_1k_prefill_640_int4/image_encoder_512x512.axmodel -m ./fastvlm_ax650_context_1k_prefill_640_int4 -t ./fastvlm_tokenizer/ -i 512
74
  ```
 
34
  - [M4N-Dock(爱芯派Pro)](https://wiki.sipeed.com/hardware/zh/maixIV/m4ndock/m4ndock.html)
35
  - [M.2 Accelerator card](https://axcl-docs.readthedocs.io/zh-cn/latest/doc_guide_hardware.html)
36
 
37
+ |Chips|image encoder|ttft|w4a16|CMM(GiB)|
38
+ |--|--|--|--|--|
39
+ |AX650| 237.49 ms (1024x1024)| 418.43 ms (291tokens)| 19.87 tokens/sec|1.4|
40
+ |AXCL x86| 233.93 ms (1024x1024)| 779.51 ms (286tokens)| 12.47 tokens/sec|1.4|
41
+ |AX650| 58.33 ms (512x512)| 128.92 ms (100tokens)| 19.87 tokens/sec|1.4|
42
 
43
 
44
  ## How to use
 
48
  ```
49
  $ tree -L 1
50
  .
51
+ |-- FastVLM_tokenizer.txt
52
+ |-- README.md
53
+ |-- config.json
54
+ |-- fastvlm_ax650_context_1k_prefill_640_int4
55
+ |-- fastvlm_tokenizer
56
+ |-- images
57
+ |-- infer_axmodel.py
58
+ |-- main_ax650
59
+ |-- main_ax650_api
60
+ |-- main_axcl_x86
61
+ |-- main_axcl_x86_api
62
+ |-- post_config.json
63
+ |-- requirements.txt
64
+ |-- run_ax650_1024.sh
65
+ |-- run_ax650_512.sh
66
+ |-- run_ax650_api.sh
67
+ |-- run_axcl_x86.sh
68
+ |-- run_axcl_x86_api.sh
69
+ `-- utils
70
+
71
+ 4 directories, 15 files
72
  ```
73
 
74
  #### Install transformer
 
81
 
82
  Run the following command on the Axera board to start a chat conversation:
83
 
84
+ ```bash
85
+ root@ax650:~/FastVLM-1.5B-GPTQ-Int4# ./run_ax650_1024.sh
86
+ [I][ Init][ 134]: LLM init start
87
+ tokenizer_type = 3
88
+ stop_tokens size: 2
89
+ 151645
90
+ 151645
91
+ 6% | ███ | 2 / 31 [2.24s<34.74s, 0.89 count/s] embed_selector init ok
92
+ 100% | ████████████████████████████████ | 31 / 31 [5.96s<5.96s, 5.20 count/s] init post axmodel ok,remain_cmm(8619 MB)[I][ Init][ 252]: IMAGE_CONTEXT_TOKEN: 151646
93
+ [I][ Init][ 284]: image encoder input nhwc@uint8
94
+ [I][ Init][ 308]: image encoder output float32
95
+
96
+ [I][ Init][ 318]: image_encoder_height : 1024, image_encoder_width: 1024
97
+ [I][ Init][ 320]: max_token_len : 1024
98
+ [I][ Init][ 323]: kv_cache_size : 256, kv_cache_num: 1024
99
+ [I][ Init][ 331]: prefill_token_num : 128
100
+ [I][ Init][ 335]: grp: 1, prefill_max_token_num : 1
101
+ [I][ Init][ 335]: grp: 2, prefill_max_token_num : 128
102
+ [I][ Init][ 335]: grp: 3, prefill_max_token_num : 256
103
+ [I][ Init][ 335]: grp: 4, prefill_max_token_num : 512
104
+ [I][ Init][ 335]: grp: 5, prefill_max_token_num : 640
105
+ [I][ Init][ 339]: prefill_max_token_num : 640
106
+ [I][ load_config][ 282]: load config:
107
+ {
108
+ "enable_repetition_penalty": false,
109
+ "enable_temperature": true,
110
+ "enable_top_k_sampling": true,
111
+ "enable_top_p_sampling": false,
112
+ "penalty_window": 30,
113
+ "repetition_penalty": 2,
114
+ "temperature": 0.1,
115
+ "top_k": 10,
116
+ "top_p": 0.8
117
+ }
118
+
119
+ [I][ Init][ 348]: LLM init ok
120
+ Type "q" to exit, Ctrl+c to stop current running
121
+ prompt >> who are you
122
+ image >>
123
+ [I][ Encode][ 470]: input_ids size: 33
124
+ [I][ Run][ 604]: input token num : 33, prefill_split_num : 1
125
+ [I][ Run][ 619]: prefill grpid 2
126
+ [I][ Run][ 646]: input_num_token:33
127
+ [I][ Run][ 770]: ttft: 127.56 ms
128
+ I am FastVLM, a language model developed by Apple Inc.
129
+
130
+ [N][ Run][ 879]: hit eos,avg 19.87 token/s
131
+
132
+ prompt >> describe the image
133
+ image >> ./images/ssd_horse.jpg
134
+ [I][ Encode][ 442]: image encode time : 237.49 ms, size : 393216
135
+ [I][ Encode][ 496]: imgs_embed.size() : 1, media token size : 256
136
+ [I][ Run][ 604]: input token num : 291, prefill_split_num : 3
137
+ [I][ Run][ 619]: prefill grpid 4
138
+ [I][ Run][ 646]: input_num_token:128
139
+ [I][ Run][ 646]: input_num_token:128
140
+ [I][ Run][ 646]: input_num_token:35
141
+ [I][ Run][ 770]: ttft: 418.43 ms
142
+ The image depicts a scene in an outdoor setting, likely a farm or ranch, with a person riding a brown horse. The person is wearing a blue hoodie and jeans, and is seated on a saddle with a yellow blanket underneath. The horse has a white blaze on its face and is standing on a dirt ground. In front of the horse, there is a brown dog wearing a pink collar, looking up at the person on the horse. In the background, there is a silver pickup truck parked near a fence, and beyond the fence, there are trees and other people. The overall atmosphere of the image suggests a casual, outdoor activity, possibly involving horse riding and training.
143
+
144
+ [N][ Run][ 879]: hit eos,avg 19.85 token/s
145
+
146
+ prompt >> q
147
+ ```
148
+
149
  ```sh
150
  $ python infer_axmodel.py -v ./fastvlm_ax650_context_1k_prefill_640_int4/image_encoder_512x512.axmodel -m ./fastvlm_ax650_context_1k_prefill_640_int4 -t ./fastvlm_tokenizer/ -i 512
151
  ```
fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l0_together.axmodel CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:97c84a7b5a5d81511164a97745d11caa6eca22bb9564405dee34b7426772c3f0
3
- size 29802559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffa354f075478707195367c7574873a71a37be99e3a29c9dd5d8b82df37e7f63
3
+ size 30032071
fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l10_together.axmodel CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5cbfc193d6d402f39d56cea0b7943cd72f0382f9fdc98d552609384d1b6eb1a9
3
- size 29802567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1dcbb2a8f56e526b9491c06ee608cd3a1dd0afa01a09c789866ad7438874515
3
+ size 30032079
fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l11_together.axmodel CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:feb8903b4d2fb7a79e782aa43ee7e07e1a878c2d7d134df1f3304d5d24e74ba0
3
- size 29802567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61dcbd2e0accc5cbc3b86a1c96a956be07e7f8d60752b7979aa051a6fe22bc8e
3
+ size 30032079
fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l12_together.axmodel CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:54c3d3f8634ab7f5e88146a4ec561eecd60e7b7c5adee6501d4d74bd55b51b32
3
- size 29802567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:abafe8799e7275ff4e5d2abfecbfb7d73dc5b3059f9c8bdcff6af91e2fd2145d
3
+ size 30032079
fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l13_together.axmodel CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4bf7e682c6873f265ba3a8634b8f9c6bb375e91cef9425e10bc667e36a32370d
3
- size 29802567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6158b91977b1c32212345deee1dac3c8a4816db6ae9601757712720f3aa35523
3
+ size 30032079
fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l14_together.axmodel CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:320713aa63910d2b23a4a79cf26dbc1a8a2c3fb8c5a3dc308d7c93f36f9b86af
3
- size 29802567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cfd43f25b51aac7197d6a92ba83fdc11368fbfc5794aaa024a3b047d2e6e551a
3
+ size 30032079
fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l15_together.axmodel CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a1a3276e65abaafb41220aae26b28a9ad272284c3e23443d724d2463be5c18e9
3
- size 29802567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87087ce133dafe41aa30bca543ad7e4f9393c5a8985825d49969a434870d8d54
3
+ size 30032079
fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l16_together.axmodel CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b42ea532700fc248fce250f33c91296a29c82fb04809c504eccb7f74f3cc3dfc
3
- size 29802567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d2afc202eeead21c2f755be28106c8e82466800806c249fa36f1dd4bb6ee0233
3
+ size 30032079
fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l17_together.axmodel CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:032c1b803149ced100c0557b460023568f1a53f73a3c1b2f9eaf026f41b963e9
3
- size 29802567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:919a507d3d5661e202606deebd3fbad3f473dc5f3d9eead5ce8ff4d06dda7b8c
3
+ size 30032079
fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l18_together.axmodel CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:822e5e51c35de9feefe4cb4c7423cea663eef6ecd5a60b314999db1a2ef85f1e
3
- size 29802567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b50d9131898b6857f12efd3cb506ca7b6f25f355c1a982a8d77b5e342a254953
3
+ size 30032079
fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l19_together.axmodel CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0975632690a7b295dcb8909be1d0ebc257e8cef5b1b60ad47d23a1f1f7318f24
3
- size 29802567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf21551105a3545de7d60a2859601f7aa5cabc17713830bf631c9e0c008674ea
3
+ size 30032079
fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l1_together.axmodel CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:36a0c441db431e831fb175cd039b7c0970b2a3147b56cc85d29b541d15092614
3
- size 29802559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97748366cd1e6ac901d9e1930b8ebb126cd13855a986bd70e3b5854985053695
3
+ size 30032071
fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l20_together.axmodel CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:24b6e0956ed865010417c5c4505b7af31f4c3996eaa96bfe678e36157e0ca80f
3
- size 29802567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6de9b2d3a452965305d0f9d5a42a563b553d2a841cb8c6a62de93a04eb2d2ab2
3
+ size 30032079
fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l21_together.axmodel CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d4d35acc8425f9150a8fc3d51f76df4de7e22fb1ad161f6499d2cf165c0010dd
3
- size 29802567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b60f6ddb148a67940f3bafd83ca6ef1bd344fb21631242ecb5150da30af90d79
3
+ size 30032079
fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l22_together.axmodel CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fab242ebbeab515d47084a61114e56ab42e45266cc09adfc58e947f30011cbf9
3
- size 29802567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c7ee3531e4e9b67356d2a801f0fce4212b18625e4b46b5e6efbb6b779e12a7f
3
+ size 30032079
fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l23_together.axmodel CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ff319423a917b998957ea386ffa1612e19e1e5948f792901c9fb0967c47f6376
3
- size 29802567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:891d76611b5f146310095ee5363c214f6d015d303a14fc6f77917b1f9b2b9cf0
3
+ size 30032079
fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l24_together.axmodel CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cccad862ac8d7101047df049a598a3d8df36a828b2f785efeeecd351f2d8b31b
3
- size 29802567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32edc5f719b763d0d359640712bc71187f2c0d29303124e39d1a14692b69d32c
3
+ size 30032079
fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l25_together.axmodel CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a35a18afcc36e060fc5025a4bda786db2be588d48f066671137bc9c421e8c435
3
- size 29802567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:078f16be6b152fb457869e09dd05ab0fd58e69204ef9b26158b743f1a9fbf284
3
+ size 30032079
fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l26_together.axmodel CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:83498d03558ba7aae47a2933b28089abb279c40fb37358dab7903b462e7d387c
3
- size 29802567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:111b0ecf00d2182ed1b4cda860d426f78da586af3eba481e5007e02de952ee82
3
+ size 30032079
fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l27_together.axmodel CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4c85585f12cac18c95b516f485176abda4c11efa4c50eb8064af2ce67135f93a
3
- size 29802567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80c1a8cdc285f38ebb9775108ecc6496d061829138cfa5c4443c48a046957e6d
3
+ size 30032079
fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l2_together.axmodel CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2d0766c4d014f1df113af2075e1878733eb5168488855d1ece4688892cf1bf49
3
- size 29802559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:841da42bdf30a5a9728b1bd5ed79ae34018f10b27861d295882a9339498e0a2e
3
+ size 30032071
fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l3_together.axmodel CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d5c7f3cc0274f26a1cf9bf4cb79234ed83c2522e66c68369caba86c0ce0ec969
3
- size 29802559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94994655aeada6cea714e52703ee687fafd503cf884b7b245928e7008dc8227c
3
+ size 30032071
fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l4_together.axmodel CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b7795e6fd1ca8da46ba039c384b3a7361619048c8fd2dc46307155164d86705c
3
- size 29802559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:be2e0b87c56658492c960792f2664227652b2176cf8a98cda3e44f6375680316
3
+ size 30032071
fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l5_together.axmodel CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8c66a69232635b86d6f9ac5527a461f7f90bffd2b31c5e46fb74cd7b99321dac
3
- size 29802559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6962cf4684917281f7305b83d7cde412f0e96eace8663ec42e0c162135a3017e
3
+ size 30032071
fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l6_together.axmodel CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e093bae053fbdc910c203248e21c4b66239d32ab273f863f41486ee72ce072c2
3
- size 29802559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:431ca95208e44af62e2efdaea7c4b215ab97bae244a58c83a7f2244bbb05990b
3
+ size 30032071
fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l7_together.axmodel CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ba313e7f26818e4fa36027e62b9276c1d7c48a342505a78812db0d6fae748aca
3
- size 29802559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bba03599293d8aa1853d3a0ec7c6fe00b174a0dadd03da6cb4cdd56af44503ec
3
+ size 30032071
fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l8_together.axmodel CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7bfd2c82228f858f1dd15f46d23239197b0b62d435eb53a0eae73c21c3e1d5a8
3
- size 29802559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee264eaca720fa9f8ddf2f1c537c56d39ba525d246de91f4bc9aa09d61e9782b
3
+ size 30032071
fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_p128_l9_together.axmodel CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:673154996aa1caf889a9932f09e56395e87e2aa32d5eb16f7056eaf5a0ce371e
3
- size 29802559
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4e7d245920e9d45da51a5d187c5f0acbe2941bd483492cfe58daaa23f8740303
3
+ size 30032071
fastvlm_ax650_context_1k_prefill_640_int4/llava_qwen2_post.axmodel CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fba049bc6df809d7f4cd2f1baa3e01ccfd88c5388356ac7b4facc507d744ebca
3
- size 254826994
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:faea2655f070f91f8b25b75df246ae4de7348696b9f1b23b361bd9f4b01f199c
3
+ size 254344594
fastvlm_ax650_context_1k_prefill_640_int4/model.embed_tokens.weight.bfloat16.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e01aea64483c11c7de1dbfb8fac8b07860ad669718deb5cf35b8eb71a0dbe593
3
- size 466747392
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2791648834191a3080f8952421197b98ee45cdc9337cf45abc35103562347b79
3
+ size 465859584
fastvlm_ax650_context_1k_prefill_640_int4/model.embed_tokens.weight.npy CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8f682683aa248b48679ed39eb816e4066c7c4a9f53f7b94bad2aef079a63a6d3
3
- size 933494912
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1045cc5b2298c3032c2f6a3f1b2f83bc4aa69d39f32476d9bfd7b68b9d380ccd
3
+ size 931719296
fastvlm_tokenizer/added_tokens.json CHANGED
@@ -1,4 +1,5 @@
1
  {
 
2
  "<|endoftext|>": 151643,
3
  "<|im_end|>": 151645,
4
  "<|im_start|>": 151644
 
1
  {
2
+ "<image>": 151646,
3
  "<|endoftext|>": 151643,
4
  "<|im_end|>": 151645,
5
  "<|im_start|>": 151644
fastvlm_tokenizer/config.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
- "_name_or_path": "./llava-v1.5-13b",
3
  "architectures": [
4
  "LlavaQwen2ForCausalLM"
5
  ],
 
6
  "auto_map": {
7
  "AutoConfig": "llava_qwen.LlavaConfig",
8
  "AutoModelForCausalLM": "llava_qwen.LlavaQwen2ForCausalLM"
9
- },
10
- "attention_dropout": 0.0,
11
  "bos_token_id": 151643,
 
12
  "eos_token_id": 151645,
13
  "freeze_mm_mlp_adapter": false,
14
  "hidden_act": "silu",
@@ -17,6 +17,36 @@
17
  "image_grid_pinpoints": null,
18
  "initializer_range": 0.02,
19
  "intermediate_size": 8960,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  "max_position_embeddings": 32768,
21
  "max_window_layers": 28,
22
  "mm_hidden_size": 3072,
@@ -33,17 +63,17 @@
33
  "num_hidden_layers": 28,
34
  "num_key_value_heads": 2,
35
  "rms_norm_eps": 1e-06,
 
36
  "rope_theta": 1000000.0,
37
- "sliding_window": 32768,
38
  "tie_word_embeddings": true,
39
  "tokenizer_model_max_length": 8192,
40
  "tokenizer_padding_side": "right",
41
- "torch_dtype": "bfloat16",
42
- "transformers_version": "4.39.3",
43
  "tune_mm_mlp_adapter": false,
44
  "unfreeze_mm_vision_tower": true,
45
  "use_cache": true,
46
  "use_mm_proj": true,
47
  "use_sliding_window": false,
48
- "vocab_size": 151936
49
  }
 
1
  {
 
2
  "architectures": [
3
  "LlavaQwen2ForCausalLM"
4
  ],
5
+ "attention_dropout": 0.0,
6
  "auto_map": {
7
  "AutoConfig": "llava_qwen.LlavaConfig",
8
  "AutoModelForCausalLM": "llava_qwen.LlavaQwen2ForCausalLM"
9
+ },
 
10
  "bos_token_id": 151643,
11
+ "dtype": "float32",
12
  "eos_token_id": 151645,
13
  "freeze_mm_mlp_adapter": false,
14
  "hidden_act": "silu",
 
17
  "image_grid_pinpoints": null,
18
  "initializer_range": 0.02,
19
  "intermediate_size": 8960,
20
+ "layer_types": [
21
+ "full_attention",
22
+ "full_attention",
23
+ "full_attention",
24
+ "full_attention",
25
+ "full_attention",
26
+ "full_attention",
27
+ "full_attention",
28
+ "full_attention",
29
+ "full_attention",
30
+ "full_attention",
31
+ "full_attention",
32
+ "full_attention",
33
+ "full_attention",
34
+ "full_attention",
35
+ "full_attention",
36
+ "full_attention",
37
+ "full_attention",
38
+ "full_attention",
39
+ "full_attention",
40
+ "full_attention",
41
+ "full_attention",
42
+ "full_attention",
43
+ "full_attention",
44
+ "full_attention",
45
+ "full_attention",
46
+ "full_attention",
47
+ "full_attention",
48
+ "full_attention"
49
+ ],
50
  "max_position_embeddings": 32768,
51
  "max_window_layers": 28,
52
  "mm_hidden_size": 3072,
 
63
  "num_hidden_layers": 28,
64
  "num_key_value_heads": 2,
65
  "rms_norm_eps": 1e-06,
66
+ "rope_scaling": null,
67
  "rope_theta": 1000000.0,
68
+ "sliding_window": null,
69
  "tie_word_embeddings": true,
70
  "tokenizer_model_max_length": 8192,
71
  "tokenizer_padding_side": "right",
72
+ "transformers_version": "4.57.0",
 
73
  "tune_mm_mlp_adapter": false,
74
  "unfreeze_mm_vision_tower": true,
75
  "use_cache": true,
76
  "use_mm_proj": true,
77
  "use_sliding_window": false,
78
+ "vocab_size": 151647
79
  }
fastvlm_tokenizer/generation_config.json CHANGED
@@ -2,5 +2,5 @@
2
  "do_sample": true,
3
  "temperature": null,
4
  "top_p": null,
5
- "transformers_version": "4.39.3"
6
  }
 
2
  "do_sample": true,
3
  "temperature": null,
4
  "top_p": null,
5
+ "transformers_version": "4.57.0"
6
  }
fastvlm_tokenizer/tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b187c9fe72f04a62ed1f592418d79751ed77f5eab6c3abded85349cf97f152ea
3
+ size 11413284
fastvlm_tokenizer/tokenizer_config.json CHANGED
@@ -24,6 +24,14 @@
24
  "rstrip": false,
25
  "single_word": false,
26
  "special": true
 
 
 
 
 
 
 
 
27
  }
28
  },
29
  "additional_special_tokens": [
@@ -31,10 +39,10 @@
31
  "<|im_end|>"
32
  ],
33
  "bos_token": null,
34
- "chat_template": "{% for message in messages %}{% if loop.first and messages[0]['role'] != 'system' %}{{ '<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n' }}{% endif %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
35
  "clean_up_tokenization_spaces": false,
36
  "eos_token": "<|im_end|>",
37
  "errors": "replace",
 
38
  "model_max_length": 8192,
39
  "pad_token": "<|endoftext|>",
40
  "padding_side": "right",
 
24
  "rstrip": false,
25
  "single_word": false,
26
  "special": true
27
+ },
28
+ "151646": {
29
+ "content": "<image>",
30
+ "lstrip": false,
31
+ "normalized": true,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": false
35
  }
36
  },
37
  "additional_special_tokens": [
 
39
  "<|im_end|>"
40
  ],
41
  "bos_token": null,
 
42
  "clean_up_tokenization_spaces": false,
43
  "eos_token": "<|im_end|>",
44
  "errors": "replace",
45
+ "extra_special_tokens": {},
46
  "model_max_length": 8192,
47
  "pad_token": "<|endoftext|>",
48
  "padding_side": "right",
fastvlm_tokenizer/vocab.json CHANGED
The diff for this file is too large to render. See raw diff
 
infer_axmodel.py CHANGED
@@ -43,9 +43,7 @@ def vision_encoder(image_path, ax_session, args):
43
 
44
  return vit_output
45
 
46
- def llm_infer(image_features, llm_path, config, tokenizer, imer, get_input, token_length):
47
-
48
- embeds = np.load(os.path.join(llm_path, "model.embed_tokens.weight.npy"))
49
 
50
  prompt = "<|im_start|>system\nYou are a helpful assistant, created by apple company.<|im_end|>\n"
51
  question = get_input
@@ -53,10 +51,10 @@ def llm_infer(image_features, llm_path, config, tokenizer, imer, get_input, toke
53
 
54
  if image_features is not None:
55
  # # for idx in range(len(image_features)):
56
- prompt += "\n<img>" + "<image>"*token_length + "</img>\n"
57
  prompt += "<|im_end|>\n<|im_start|>assistant\n"
58
 
59
- token_ids = tokenizer_image_token(prompt, tokenizer, IMAGE_TOKEN_INDEX)
60
 
61
  # 图像理解
62
  prefill_data = np.take(embeds, token_ids, axis=0)
@@ -64,7 +62,7 @@ def llm_infer(image_features, llm_path, config, tokenizer, imer, get_input, toke
64
  token_len = len(token_ids)
65
 
66
  if image_features is not None:
67
- image_start_index = np.where(np.array(token_ids) == -200)[0][0] # <image> tag 151646
68
  image_insert_index = image_start_index + 1
69
  prefill_data[image_insert_index : image_insert_index + token_length] = image_features[0, :, :]
70
 
@@ -85,8 +83,8 @@ def llm_infer(image_features, llm_path, config, tokenizer, imer, get_input, toke
85
  if __name__ == "__main__":
86
 
87
  args = argparse.ArgumentParser()
88
- args.add_argument("--vision_model", "-v", type=str, default="./fastvlm_ax650_context_1k_prefill_640/image_encoder_1024x1024.axmodel", help="Path to the vision axmodel.")
89
- args.add_argument("--model_path", "-m", type=str, default="./fastvlm_ax650_context_1k_prefill_640", help="Path to the llm axmodel.")
90
  args.add_argument("--tokenizer_path", "-t", type=str, default="./fastvlm_tokenizer", help="Path to the tokenizer.")
91
  args.add_argument("--input_size", "-i", type=str, default="1024", help="Input size of the vision encoder model.")
92
  # args.add_argument("--question", type=str, default="介绍一下你自己", help="The question to ask the model.")
@@ -110,6 +108,7 @@ if __name__ == "__main__":
110
 
111
  imer = InferManager(config, args.model_path, max_seq_len=max_seq_len) # prefill + decode max length
112
  ax_session = ax.InferenceSession(args.vision_model)
 
113
 
114
  print(f"[INFO]: 输入文本进行对话,或者输入图片路径进行图片理解, 或者输入q退出对话。")
115
  while True:
@@ -125,7 +124,7 @@ if __name__ == "__main__":
125
  continue
126
  image_features = vision_encoder(get_input, ax_session, args)
127
  get_input = "Describe the image in detail."
128
- llm_infer(image_features, args.model_path, config, tokenizer, imer, get_input, token_length)
129
  else:
130
  image_features = None
131
- llm_infer(image_features, args.model_path, config, tokenizer, imer, get_input, token_length)
 
43
 
44
  return vit_output
45
 
46
+ def llm_infer(image_features, llm_path, config, tokenizer, imer, get_input, token_length, embeds):
 
 
47
 
48
  prompt = "<|im_start|>system\nYou are a helpful assistant, created by apple company.<|im_end|>\n"
49
  question = get_input
 
51
 
52
  if image_features is not None:
53
  # # for idx in range(len(image_features)):
54
+ prompt += "\n" + "<image>"*token_length + "\n"
55
  prompt += "<|im_end|>\n<|im_start|>assistant\n"
56
 
57
+ token_ids = tokenizer.encode(prompt)
58
 
59
  # 图像理解
60
  prefill_data = np.take(embeds, token_ids, axis=0)
 
62
  token_len = len(token_ids)
63
 
64
  if image_features is not None:
65
+ image_start_index = np.where(np.array(token_ids) == 151646)[0][0] # <image> tag 151646
66
  image_insert_index = image_start_index + 1
67
  prefill_data[image_insert_index : image_insert_index + token_length] = image_features[0, :, :]
68
 
 
83
  if __name__ == "__main__":
84
 
85
  args = argparse.ArgumentParser()
86
+ args.add_argument("--vision_model", "-v", type=str, default="./fastvlm_ax650_context_1k_prefill_640_int4/image_encoder_1024x1024.axmodel", help="Path to the vision axmodel.")
87
+ args.add_argument("--model_path", "-m", type=str, default="./fastvlm_ax650_context_1k_prefill_640_int4", help="Path to the llm axmodel.")
88
  args.add_argument("--tokenizer_path", "-t", type=str, default="./fastvlm_tokenizer", help="Path to the tokenizer.")
89
  args.add_argument("--input_size", "-i", type=str, default="1024", help="Input size of the vision encoder model.")
90
  # args.add_argument("--question", type=str, default="介绍一下你自己", help="The question to ask the model.")
 
108
 
109
  imer = InferManager(config, args.model_path, max_seq_len=max_seq_len) # prefill + decode max length
110
  ax_session = ax.InferenceSession(args.vision_model)
111
+ embeds = np.load(os.path.join(args.model_path, "model.embed_tokens.weight.npy"))
112
 
113
  print(f"[INFO]: 输入文本进行对话,或者输入图片路径进行图片理解, 或者输入q退出对话。")
114
  while True:
 
124
  continue
125
  image_features = vision_encoder(get_input, ax_session, args)
126
  get_input = "Describe the image in detail."
127
+ llm_infer(image_features, args.model_path, config, tokenizer, imer, get_input, token_length, embeds)
128
  else:
129
  image_features = None
130
+ llm_infer(image_features, args.model_path, config, tokenizer, imer, get_input, token_length, embeds)
main_ax650 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3e4365188762fe10890b8c02b87e576faeb756bc5904804d5cbb0f7b664879e
3
+ size 1215488
main_ax650_api ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:748b2f6cbf36c6ed15fb3ae75df39741332c8793c46f9caf6878a7834d3fe718
3
+ size 1309168
main_axcl_x86 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7205b7ade7c5efaeef89fb3e458b6da11508e6d9fe992cfb92eefe0094c4a8b5
3
+ size 7009480
main_axcl_x86_api ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c6c2dd57d2679f349a22279a4f4b92f941ec62314b58b38052228552ff0bd70
3
+ size 7128832
post_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "enable_temperature" : true,
3
+ "temperature" : 0.1,
4
+
5
+ "enable_repetition_penalty" : false,
6
+ "repetition_penalty" : 2,
7
+ "penalty_window" : 30,
8
+
9
+ "enable_top_p_sampling" : false,
10
+ "top_p" : 0.8,
11
+
12
+ "enable_top_k_sampling" : true,
13
+ "top_k" : 10
14
+ }
run_ax650_1024.sh ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ AXMODEL_DIR=./fastvlm_ax650_context_1k_prefill_640_int4/
2
+
3
+ ./main_ax650 \
4
+ --template_filename_axmodel "${AXMODEL_DIR}/llava_qwen2_p128_l%d_together.axmodel" \
5
+ --filename_post_axmodel "${AXMODEL_DIR}/llava_qwen2_post.axmodel" \
6
+ --filename_tokenizer_txt "FastVLM_tokenizer.txt" \
7
+ --filename_tokens_embed "${AXMODEL_DIR}/model.embed_tokens.weight.bfloat16.bin" \
8
+ --filename_image_encoder_axmodedl "${AXMODEL_DIR}/image_encoder_1024x1024.axmodel" \
9
+ --axmodel_num 28 \
10
+ --tokens_embed_num 151647 \
11
+ --tokens_embed_size 1536 \
12
+ --live_print 1 \
13
+ --img_width 1024 \
14
+ --img_height 1024
run_ax650_512.sh ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ AXMODEL_DIR=./fastvlm_ax650_context_1k_prefill_640_int4/
2
+
3
+ ./main_ax650 \
4
+ --template_filename_axmodel "${AXMODEL_DIR}/llava_qwen2_p128_l%d_together.axmodel" \
5
+ --filename_post_axmodel "${AXMODEL_DIR}/llava_qwen2_post.axmodel" \
6
+ --filename_tokenizer_txt "FastVLM_tokenizer.txt" \
7
+ --filename_tokens_embed "${AXMODEL_DIR}/model.embed_tokens.weight.bfloat16.bin" \
8
+ --filename_image_encoder_axmodedl "${AXMODEL_DIR}/image_encoder_512x512.axmodel" \
9
+ --axmodel_num 28 \
10
+ --tokens_embed_num 151647 \
11
+ --tokens_embed_size 1536 \
12
+ --live_print 1 \
13
+ --img_width 512 \
14
+ --img_height 512
run_ax650_api.sh ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ AXMODEL_DIR=./fastvlm_ax650_context_1k_prefill_640_int4/
2
+
3
+ ./main_ax650_api \
4
+ --template_filename_axmodel "${AXMODEL_DIR}/llava_qwen2_p128_l%d_together.axmodel" \
5
+ --filename_post_axmodel "${AXMODEL_DIR}/llava_qwen2_post.axmodel" \
6
+ --filename_tokenizer_txt "FastVLM_tokenizer.txt" \
7
+ --filename_tokens_embed "${AXMODEL_DIR}/model.embed_tokens.weight.bfloat16.bin" \
8
+ --filename_image_encoder_axmodedl "${AXMODEL_DIR}/image_encoder_1024x1024.axmodel" \
9
+ --axmodel_num 28 \
10
+ --tokens_embed_num 151647 \
11
+ --tokens_embed_size 1536 \
12
+ --img_width 1024 \
13
+ --img_height 1024
run_axcl_x86.sh ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ AXMODEL_DIR=./fastvlm_ax650_context_1k_prefill_640_int4/
2
+
3
+ ./main_axcl_x86 \
4
+ --template_filename_axmodel "${AXMODEL_DIR}/llava_qwen2_p128_l%d_together.axmodel" \
5
+ --filename_post_axmodel "${AXMODEL_DIR}/llava_qwen2_post.axmodel" \
6
+ --filename_tokenizer_txt "FastVLM_tokenizer.txt" \
7
+ --filename_tokens_embed "${AXMODEL_DIR}/model.embed_tokens.weight.bfloat16.bin" \
8
+ --filename_image_encoder_axmodel "${AXMODEL_DIR}/image_encoder_1024x1024.axmodel" \
9
+ --axmodel_num 28 \
10
+ --tokens_embed_num 151647 \
11
+ --tokens_embed_size 1536 \
12
+ --live_print 1 \
13
+ --img_width 1024 \
14
+ --img_height 1024 \
15
+ --devices 0