update axmodel and demo
Browse files- README.md +1 -1
- main_ax650 +2 -2
- qwen2.5-0.5b-gptq-int8-ctx-ax650/model.embed_tokens.weight.bfloat16.bin +1 -1
- qwen2.5-0.5b-gptq-int8-ctx-ax650/qwen2_p128_l0_together.axmodel +2 -2
- qwen2.5-0.5b-gptq-int8-ctx-ax650/qwen2_p128_l10_together.axmodel +2 -2
- qwen2.5-0.5b-gptq-int8-ctx-ax650/qwen2_p128_l11_together.axmodel +2 -2
- qwen2.5-0.5b-gptq-int8-ctx-ax650/qwen2_p128_l12_together.axmodel +2 -2
- qwen2.5-0.5b-gptq-int8-ctx-ax650/qwen2_p128_l13_together.axmodel +2 -2
- qwen2.5-0.5b-gptq-int8-ctx-ax650/qwen2_p128_l14_together.axmodel +2 -2
- qwen2.5-0.5b-gptq-int8-ctx-ax650/qwen2_p128_l15_together.axmodel +2 -2
- qwen2.5-0.5b-gptq-int8-ctx-ax650/qwen2_p128_l16_together.axmodel +2 -2
- qwen2.5-0.5b-gptq-int8-ctx-ax650/qwen2_p128_l17_together.axmodel +2 -2
- qwen2.5-0.5b-gptq-int8-ctx-ax650/qwen2_p128_l18_together.axmodel +2 -2
- qwen2.5-0.5b-gptq-int8-ctx-ax650/qwen2_p128_l19_together.axmodel +2 -2
- qwen2.5-0.5b-gptq-int8-ctx-ax650/qwen2_p128_l1_together.axmodel +2 -2
- qwen2.5-0.5b-gptq-int8-ctx-ax650/qwen2_p128_l20_together.axmodel +2 -2
- qwen2.5-0.5b-gptq-int8-ctx-ax650/qwen2_p128_l21_together.axmodel +2 -2
- qwen2.5-0.5b-gptq-int8-ctx-ax650/qwen2_p128_l22_together.axmodel +2 -2
- qwen2.5-0.5b-gptq-int8-ctx-ax650/qwen2_p128_l23_together.axmodel +2 -2
- qwen2.5-0.5b-gptq-int8-ctx-ax650/qwen2_p128_l2_together.axmodel +2 -2
- qwen2.5-0.5b-gptq-int8-ctx-ax650/qwen2_p128_l3_together.axmodel +2 -2
- qwen2.5-0.5b-gptq-int8-ctx-ax650/qwen2_p128_l4_together.axmodel +2 -2
- qwen2.5-0.5b-gptq-int8-ctx-ax650/qwen2_p128_l5_together.axmodel +2 -2
- qwen2.5-0.5b-gptq-int8-ctx-ax650/qwen2_p128_l6_together.axmodel +2 -2
- qwen2.5-0.5b-gptq-int8-ctx-ax650/qwen2_p128_l7_together.axmodel +2 -2
- qwen2.5-0.5b-gptq-int8-ctx-ax650/qwen2_p128_l8_together.axmodel +2 -2
- qwen2.5-0.5b-gptq-int8-ctx-ax650/qwen2_p128_l9_together.axmodel +2 -2
- qwen2.5-0.5b-gptq-int8-ctx-ax650/qwen2_post.axmodel +2 -2
- run_qwen2.5_0.5b_gptq_int8_ctx_ax630c.sh +1 -1
- run_qwen2.5_0.5b_gptq_int8_ctx_ax650.sh +1 -1
README.md
CHANGED
|
@@ -18,7 +18,7 @@ This version of Qwen2.5-0.5B-Instruct-GPTQ-Int8 has been converted to run on the
|
|
| 18 |
|
| 19 |
This model has been optimized with the following LoRA:
|
| 20 |
|
| 21 |
-
Compatible with Pulsar2 version: 4.
|
| 22 |
|
| 23 |
## Convert tools links:
|
| 24 |
|
|
|
|
| 18 |
|
| 19 |
This model has been optimized with the following LoRA:
|
| 20 |
|
| 21 |
+
Compatible with Pulsar2 version: 4.2(Not released yet)
|
| 22 |
|
| 23 |
## Convert tools links:
|
| 24 |
|
main_ax650
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f19ddeb193769b16aa8c5d9bba887558aa0a4ed10eb50a19d9bc117f1ba527e5
|
| 3 |
+
size 985352
|
qwen2.5-0.5b-gptq-int8-ctx-ax650/model.embed_tokens.weight.bfloat16.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 272269312
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4a00e7baebfd01831d3104348802fb85cc075365a86b6b0007515fa1b9225e7d
|
| 3 |
size 272269312
|
qwen2.5-0.5b-gptq-int8-ctx-ax650/qwen2_p128_l0_together.axmodel
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:128e6cadf4083309b89da1a059228acb47a6f74b732f742a93d395e386ba84c1
|
| 3 |
+
size 20063198
|
qwen2.5-0.5b-gptq-int8-ctx-ax650/qwen2_p128_l10_together.axmodel
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:59a5cd4ad74fd5205102addf46b4cc26a6b985fedb73859ae902731dca9b139c
|
| 3 |
+
size 20063198
|
qwen2.5-0.5b-gptq-int8-ctx-ax650/qwen2_p128_l11_together.axmodel
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1e58e2425dfeecc5e23478503ec469ddbfa63af624da4ec9ccf2fd85acb60c0c
|
| 3 |
+
size 20063198
|
qwen2.5-0.5b-gptq-int8-ctx-ax650/qwen2_p128_l12_together.axmodel
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4250f0f5c6cb0a728ed00da2f7b426012762312d97edca1fa836eb9acb7a9ea8
|
| 3 |
+
size 20063198
|
qwen2.5-0.5b-gptq-int8-ctx-ax650/qwen2_p128_l13_together.axmodel
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d6e33372d3f4cb653ed75b8738776040a8110028e22904c61a78e79b629fbfa9
|
| 3 |
+
size 20063198
|
qwen2.5-0.5b-gptq-int8-ctx-ax650/qwen2_p128_l14_together.axmodel
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6f8de0c0fbfc68096a7f93bba1bc302e0167b7ee713dc5c1e4ece744d58ba2b7
|
| 3 |
+
size 20063198
|
qwen2.5-0.5b-gptq-int8-ctx-ax650/qwen2_p128_l15_together.axmodel
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9339020a685bd121161433cddf8e72298500308d6d89769c3551f686406c2287
|
| 3 |
+
size 20063198
|
qwen2.5-0.5b-gptq-int8-ctx-ax650/qwen2_p128_l16_together.axmodel
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bf2e34b64231999dd9bdf8b975acf51f00eb0c74d1683d5add95baa0307cf5bc
|
| 3 |
+
size 20063198
|
qwen2.5-0.5b-gptq-int8-ctx-ax650/qwen2_p128_l17_together.axmodel
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c3565d1d53ca5c6603a4a6c4c703a01d3b89bece2a869372068296595066fa3e
|
| 3 |
+
size 20063198
|
qwen2.5-0.5b-gptq-int8-ctx-ax650/qwen2_p128_l18_together.axmodel
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:314226f0fefc4fbca97f554563a1154f1c1bdaa5df77fe73c0640315749861e8
|
| 3 |
+
size 20063198
|
qwen2.5-0.5b-gptq-int8-ctx-ax650/qwen2_p128_l19_together.axmodel
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:550b4ec9b83ada1cddd5cb0985fab8dfd29835cbd7a555eecb74de4206766302
|
| 3 |
+
size 20063198
|
qwen2.5-0.5b-gptq-int8-ctx-ax650/qwen2_p128_l1_together.axmodel
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:db9f84feff98f5a4cbebfabe2dcc8c0d4c92ee0f3a252c3dc3e89a6785602814
|
| 3 |
+
size 20063198
|
qwen2.5-0.5b-gptq-int8-ctx-ax650/qwen2_p128_l20_together.axmodel
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4eabdc12a0ec3f8729302cf5ec3a997d2e133c68af710faa43636ee6df60ab9f
|
| 3 |
+
size 20063198
|
qwen2.5-0.5b-gptq-int8-ctx-ax650/qwen2_p128_l21_together.axmodel
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:213aaf6c83a75737a9a3cd046d9307715302d009df5b6876fd5a6c0010db5a80
|
| 3 |
+
size 20063198
|
qwen2.5-0.5b-gptq-int8-ctx-ax650/qwen2_p128_l22_together.axmodel
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:27c4e7f550a4ab1aca31f5bc2384baecb415be04fa67d9e666f22458684e0ac4
|
| 3 |
+
size 20063198
|
qwen2.5-0.5b-gptq-int8-ctx-ax650/qwen2_p128_l23_together.axmodel
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1a276d53e37b5dba617b12a43f3c5566792de774ab08721655cf9f5df2d4f229
|
| 3 |
+
size 20063198
|
qwen2.5-0.5b-gptq-int8-ctx-ax650/qwen2_p128_l2_together.axmodel
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c710265c1e0aee7f0b7351aeed07b2a360cbf254e61819cf3eb0fbef40a0ad72
|
| 3 |
+
size 20063198
|
qwen2.5-0.5b-gptq-int8-ctx-ax650/qwen2_p128_l3_together.axmodel
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:67bf20d21e7e5bb2d9417c96ac20b1e3f3a03b65b03c1709b632eef2d835f1ba
|
| 3 |
+
size 20063198
|
qwen2.5-0.5b-gptq-int8-ctx-ax650/qwen2_p128_l4_together.axmodel
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1231dbcdb5bb32647b6947196a36b794653e48d5f127f4e6996d7860c229911a
|
| 3 |
+
size 20063198
|
qwen2.5-0.5b-gptq-int8-ctx-ax650/qwen2_p128_l5_together.axmodel
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:59f6feeee7f4115ac7e0dd852952fda143ffccb4a763ca265f97b88af48f1e8e
|
| 3 |
+
size 20063198
|
qwen2.5-0.5b-gptq-int8-ctx-ax650/qwen2_p128_l6_together.axmodel
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ea331a3e5c88485f551ef14e9e7f228e73dfe63115bd5bd9f62a0f34a3060f3a
|
| 3 |
+
size 20063198
|
qwen2.5-0.5b-gptq-int8-ctx-ax650/qwen2_p128_l7_together.axmodel
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1ca2f28e4578c9b25a72362c2b4c8285b8700c8f40b28de3e915fe9a58fb6877
|
| 3 |
+
size 20063198
|
qwen2.5-0.5b-gptq-int8-ctx-ax650/qwen2_p128_l8_together.axmodel
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:4d2ea8f4bffd3b3f030d1e7824742a57ffc0e5f01f6061b360aeb35c7ee82391
|
| 3 |
+
size 20063198
|
qwen2.5-0.5b-gptq-int8-ctx-ax650/qwen2_p128_l9_together.axmodel
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d8b2fdcd0696e6cb585e7cf2484591e8af61fe2062fb023fa4be9ba67a363a3f
|
| 3 |
+
size 20063198
|
qwen2.5-0.5b-gptq-int8-ctx-ax650/qwen2_post.axmodel
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9edc8f408ac8ac21d2bde360b9725d6754fab437499921acf769b83af1c6a96c
|
| 3 |
+
size 147954839
|
run_qwen2.5_0.5b_gptq_int8_ctx_ax630c.sh
CHANGED
|
@@ -2,7 +2,6 @@
|
|
| 2 |
--system_prompt "You are Qwen, created by Alibaba Cloud. You are a helpful assistant." \
|
| 3 |
--template_filename_axmodel "qwen2.5-0.5b-gptq-int8-ctx-ax630c/qwen2_p128_l%d_together.axmodel" \
|
| 4 |
--axmodel_num 24 \
|
| 5 |
-
--tokenizer_type 2 \
|
| 6 |
--url_tokenizer_model "http://127.0.0.1:12345" \
|
| 7 |
--filename_post_axmodel "qwen2.5-0.5b-gptq-int8-ctx-ax630c/qwen2_post.axmodel" \
|
| 8 |
--filename_tokens_embed "qwen2.5-0.5b-gptq-int8-ctx-ax630c/model.embed_tokens.weight.bfloat16.bin" \
|
|
@@ -12,3 +11,4 @@
|
|
| 12 |
--live_print 1
|
| 13 |
|
| 14 |
# --kvcache_path /home/axera/ax-llm/build/kvcache_yuanqi \
|
|
|
|
|
|
| 2 |
--system_prompt "You are Qwen, created by Alibaba Cloud. You are a helpful assistant." \
|
| 3 |
--template_filename_axmodel "qwen2.5-0.5b-gptq-int8-ctx-ax630c/qwen2_p128_l%d_together.axmodel" \
|
| 4 |
--axmodel_num 24 \
|
|
|
|
| 5 |
--url_tokenizer_model "http://127.0.0.1:12345" \
|
| 6 |
--filename_post_axmodel "qwen2.5-0.5b-gptq-int8-ctx-ax630c/qwen2_post.axmodel" \
|
| 7 |
--filename_tokens_embed "qwen2.5-0.5b-gptq-int8-ctx-ax630c/model.embed_tokens.weight.bfloat16.bin" \
|
|
|
|
| 11 |
--live_print 1
|
| 12 |
|
| 13 |
# --kvcache_path /home/axera/ax-llm/build/kvcache_yuanqi \
|
| 14 |
+
# --tokenizer_type 2 \
|
run_qwen2.5_0.5b_gptq_int8_ctx_ax650.sh
CHANGED
|
@@ -2,7 +2,6 @@
|
|
| 2 |
--system_prompt "You are Qwen, created by Alibaba Cloud. You are a helpful assistant." \
|
| 3 |
--template_filename_axmodel "qwen2.5-0.5b-gptq-int8-ctx-ax650/qwen2_p128_l%d_together.axmodel" \
|
| 4 |
--axmodel_num 24 \
|
| 5 |
-
--tokenizer_type 2 \
|
| 6 |
--url_tokenizer_model "http://127.0.0.1:12345" \
|
| 7 |
--filename_post_axmodel "qwen2.5-0.5b-gptq-int8-ctx-ax650/qwen2_post.axmodel" \
|
| 8 |
--filename_tokens_embed "qwen2.5-0.5b-gptq-int8-ctx-ax650/model.embed_tokens.weight.bfloat16.bin" \
|
|
@@ -12,3 +11,4 @@
|
|
| 12 |
--live_print 1
|
| 13 |
|
| 14 |
# --kvcache_path /home/axera/ax-llm/build/kvcache_yuanqi \
|
|
|
|
|
|
| 2 |
--system_prompt "You are Qwen, created by Alibaba Cloud. You are a helpful assistant." \
|
| 3 |
--template_filename_axmodel "qwen2.5-0.5b-gptq-int8-ctx-ax650/qwen2_p128_l%d_together.axmodel" \
|
| 4 |
--axmodel_num 24 \
|
|
|
|
| 5 |
--url_tokenizer_model "http://127.0.0.1:12345" \
|
| 6 |
--filename_post_axmodel "qwen2.5-0.5b-gptq-int8-ctx-ax650/qwen2_post.axmodel" \
|
| 7 |
--filename_tokens_embed "qwen2.5-0.5b-gptq-int8-ctx-ax650/model.embed_tokens.weight.bfloat16.bin" \
|
|
|
|
| 11 |
--live_print 1
|
| 12 |
|
| 13 |
# --kvcache_path /home/axera/ax-llm/build/kvcache_yuanqi \
|
| 14 |
+
# --tokenizer_type 2 \
|