diff --git a/.gitattributes b/.gitattributes index 612339144c9f4c73c9dea4af4687fe6b14987d20..56df39bd611dcecf2214fe5606e7f0ffc83b9e77 100644 --- a/.gitattributes +++ b/.gitattributes @@ -97,3 +97,33 @@ figures/figures_benchmark.jpg filter=lfs diff=lfs merge=lfs -text figures/benchmark.jpg filter=lfs diff=lfs merge=lfs -text main_axcl_aarch64 filter=lfs diff=lfs merge=lfs -text main_axcl_x86 filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-int4-ax650/qwen2_p128_l0_together.axmodel filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-int4-ax650/qwen2_p128_l10_together.axmodel filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-int4-ax650/qwen2_p128_l11_together.axmodel filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-int4-ax650/qwen2_p128_l12_together.axmodel filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-int4-ax650/qwen2_p128_l13_together.axmodel filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-int4-ax650/qwen2_p128_l14_together.axmodel filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-int4-ax650/qwen2_p128_l15_together.axmodel filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-int4-ax650/qwen2_p128_l16_together.axmodel filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-int4-ax650/qwen2_p128_l17_together.axmodel filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-int4-ax650/qwen2_p128_l18_together.axmodel filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-int4-ax650/qwen2_p128_l19_together.axmodel filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-int4-ax650/qwen2_p128_l1_together.axmodel filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-int4-ax650/qwen2_p128_l20_together.axmodel filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-int4-ax650/qwen2_p128_l21_together.axmodel filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-int4-ax650/qwen2_p128_l22_together.axmodel filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-int4-ax650/qwen2_p128_l23_together.axmodel filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-int4-ax650/qwen2_p128_l24_together.axmodel filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-int4-ax650/qwen2_p128_l25_together.axmodel filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-int4-ax650/qwen2_p128_l26_together.axmodel filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-int4-ax650/qwen2_p128_l27_together.axmodel filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-int4-ax650/qwen2_p128_l2_together.axmodel filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-int4-ax650/qwen2_p128_l3_together.axmodel filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-int4-ax650/qwen2_p128_l4_together.axmodel filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-int4-ax650/qwen2_p128_l5_together.axmodel filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-int4-ax650/qwen2_p128_l6_together.axmodel filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-int4-ax650/qwen2_p128_l7_together.axmodel filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-int4-ax650/qwen2_p128_l8_together.axmodel filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-int4-ax650/qwen2_p128_l9_together.axmodel filter=lfs diff=lfs merge=lfs -text +deepseek-r1-1.5b-int4-ax650/qwen2_post.axmodel filter=lfs diff=lfs merge=lfs -text +main_ax650 filter=lfs diff=lfs merge=lfs -text diff --git a/README.md b/README.md index b80b3b257b648b5b6da0691ad22400e06d77e3c3..9feb6369318d315298857229dc23973899ec4352 100644 --- a/README.md +++ b/README.md @@ -18,7 +18,7 @@ This version of DeepSeek-R1-Distill-Qwen-1.5B has been converted to run on the A This model has been optimized with the following LoRA: -Compatible with Pulsar2 version: 4.1 +Compatible with Pulsar2 version: 4.2 ## Feature @@ -28,7 +28,7 @@ Compatible with Pulsar2 version: 4.1 ## Convert tools links: -For those who are interested in model conversion, you can try to export axmodel through the original repo : https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B +For those who are interested in model conversion, you can try to export axmodel through the original repo : https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B and https://huggingface.co/jakiAJK/DeepSeek-R1-Distill-Qwen-1.5B_GPTQ-int4 [Pulsar2 Link, How to Convert LLM from Huggingface to axmodel](https://pulsar2-docs.readthedocs.io/en/latest/appendix/build_llm.html) @@ -79,26 +79,32 @@ Download all files from this repository to the device ``` root@ax650:/mnt/qtang/llm-test/deepseek-r1-1.5b-ctx# tree -L 1 . +|-- README.md |-- config.json -|-- deepseek-r1-1.5b-ctx-ax650 +|-- deepseek-r1-1.5b-ax650 +|-- deepseek-r1-1.5b-int4-ax650 |-- deepseek-r1_tokenizer -|-- deepseek-r1_tokenizer_uid.py +|-- deepseek-r1_tokenizer.py +|-- figures |-- main_ax650 |-- main_axcl_aarch64 |-- main_axcl_x86 |-- post_config.json -|-- run_deepseek-r1_1.5b_ctx_ax650.sh -|-- run_deepseek-r1_1.5b_ctx_axcl_aarch64.sh -`-- run_deepseek-r1_1.5b_ctx_axcl_x86.sh - -2 directories, 9 files +|-- run_deepseek-r1_1.5B_ax650.sh +|-- run_deepseek-r1_1.5B_int4_ax650.sh +|-- run_deepseek-r1_1.5b_axcl_aarch64.sh +|-- run_deepseek-r1_1.5b_axcl_x86.sh +|-- run_deepseek-r1_1.5b_int4_axcl_aarch64.sh +`-- run_deepseek-r1_1.5b_int4_axcl_x86.sh + +4 directories, 16 files ``` #### Start the Tokenizer service ``` root@ax650:/mnt/qtang/llm-test/deepseek-r1-1.5b-ctx# python3 deepseek-r1_tokenizer_uid.py -Server running at http://0.0.0.0:12345 +Server running at http://127.0.0.1:12345 ``` #### System prompt cache @@ -108,14 +114,13 @@ Server running at http://0.0.0.0:12345 - This folder needs to be created manually before running, for example `mkdir kvcache` ``` -root@ax650:/mnt/qtang/llm-test/deepseek-r1-1.5b-ctx# cat run_deepseek-r1_1.5b_ctx_ax650.sh +root@ax650:/mnt/qtang/llm-test/deepseek-r1-1.5b-ctx# cat run_deepseek-r1_1.5b_ax650.sh ./main_ax650 \ ---template_filename_axmodel "deepseek-r1-1.5b-ctx-ax650/qwen2_p128_l%d_together.axmodel" \ +--template_filename_axmodel "deepseek-r1-1.5b-ax650/qwen2_p128_l%d_together.axmodel" \ --axmodel_num 28 \ ---tokenizer_type 2 \ ---url_tokenizer_model "http://0.0.0.0:12345" \ ---filename_post_axmodel "deepseek-r1-1.5b-ctx-ax650/qwen2_post.axmodel" \ ---filename_tokens_embed "deepseek-r1-1.5b-ctx-ax650/model.embed_tokens.weight.bfloat16.bin" \ +--url_tokenizer_model "http://127.0.0.1:12345" \ +--filename_post_axmodel "deepseek-r1-1.5b-ax650/qwen2_post.axmodel" \ +--filename_tokens_embed "deepseek-r1-1.5b-ax650/model.embed_tokens.weight.bfloat16.bin" \ --tokens_embed_num 151936 \ --tokens_embed_size 1536 \ --use_mmap_load_embed 1 \ @@ -124,18 +129,16 @@ root@ax650:/mnt/qtang/llm-test/deepseek-r1-1.5b-ctx# cat run_deepseek-r1_1.5b_ct #### Inference with AX650 Host, such as M4N-Dock(爱芯派Pro) or AX650N DEMO Board -Open another terminal and run `run_deepseek-r1_1.5b_ctx_ax650.sh` +Open another terminal and run `run_deepseek-r1_1.5b_ax650.sh` ``` -root@ax650:/mnt/qtang/llm-test/deepseek-r1-1.5b-ctx# ./run_deepseek-r1_1.5b_ctx_ax650.sh +root@ax650:/mnt/qtang/llm-test/deepseek-r1-1.5b-ctx# ./run_deepseek-r1_1.5b_ax650.sh [I][ Init][ 110]: LLM init start -[I][ Init][ 34]: connect http://0.0.0.0:12345 ok -[I][ Init][ 57]: uid: 7fedc3e5-e824-4915-935a-c0de5a341928 +[I][ Init][ 34]: connect http://127.0.0.1:12345 ok +[I][ Init][ 57]: uid: 96cbe293-ff24-4011-b3a1-b5ab95234990 bos_id: 151646, eos_id: 151643 - 3% | ██ | 1 / 31 [2.28s<70.62s, 0.44 count/s] tokenizer init ok -[I][ Init][ 26]: LLaMaEmbedSelector use mmap -100% | ████████████████████████████████ | 31 / 31 [26.47s<26.47s, 1.17 count/s] init post axmodel ok,remain_cmm(8947 MB) -[I][ Init][ 188]: max_token_len : 2047 + 3% | ██ | 1 / 31 [2.07s<64.23s, 0.48 count/s] tokenizer init ok[I][ Init][ 26]: LLaMaEmbedSelector use mmap +100% | ████████████████████████████████ | 31 / 31 [8.76s<8.76s, 3.54 count/s] init post axmodel ok,remain_cmm(8114 MB)[I][ Init][ 188]: max_token_len : 2047 [I][ Init][ 193]: kv_cache_size : 256, kv_cache_num: 2047 [I][ Init][ 201]: prefill_token_num : 128 [I][ Init][ 205]: grp: 1, prefill_max_token_num : 1 @@ -147,11 +150,7 @@ bos_id: 151646, eos_id: 151643 [I][ Init][ 205]: grp: 7, prefill_max_token_num : 768 [I][ Init][ 205]: grp: 8, prefill_max_token_num : 896 [I][ Init][ 205]: grp: 9, prefill_max_token_num : 1024 -[I][ Init][ 205]: grp: 10, prefill_max_token_num : 1152 -[I][ Init][ 205]: grp: 11, prefill_max_token_num : 1280 -[I][ Init][ 205]: grp: 12, prefill_max_token_num : 1408 -[I][ Init][ 205]: grp: 13, prefill_max_token_num : 1536 -[I][ Init][ 209]: prefill_max_token_num : 1536 +[I][ Init][ 209]: prefill_max_token_num : 1024 [I][ load_config][ 282]: load config: { "enable_repetition_penalty": false, @@ -167,31 +166,24 @@ bos_id: 151646, eos_id: 151643 [I][ Init][ 218]: LLM init ok Type "q" to exit, Ctrl+c to stop current running -[I][ GenerateKVCachePrefill][ 271]: input token num : 16, prefill_split_num : 1 prefill_grpid : 2 -[I][ GenerateKVCachePrefill][ 308]: input_num_token:16 -[I][ main][ 230]: precompute_len: 16 -[I][ main][ 231]: system_prompt: -prompt >> 1+2=? -[I][ SetKVCache][ 531]: prefill_grpid:2 kv_cache_num:128 precompute_len:16 input_num_token:8 -[I][ SetKVCache][ 534]: current prefill_max_token_num:1408 -[I][ Run][ 660]: input token num : 8, prefill_split_num : 1 -[I][ Run][ 686]: input_num_token:8 -[I][ Run][ 829]: ttft: 306.60 ms +[I][ GenerateKVCachePrefill][ 275]: input token num : 11, prefill_split_num : 1 prefill_grpid : 2 +[I][ GenerateKVCachePrefill][ 315]: input_num_token:11 +[I][ main][ 228]: precompute_len: 11 +[I][ main][ 229]: system_prompt: +prompt >> 你是谁 +[I][ SetKVCache][ 529]: prefill_grpid:2 kv_cache_num:128 precompute_len:11 input_num_token:6 +[I][ SetKVCache][ 532]: current prefill_max_token_num:896 +[I][ Run][ 658]: input token num : 6, prefill_split_num : 1 +[I][ Run][ 684]: input_num_token:6 +[I][ Run][ 807]: ttft: 256.87 ms -Okay, the user has asked "1+2=?", which is a simple addition question. -I should provide the answer, but also consider if there's more to it. - -Since the user specified "Qwen, created by Alibaba Cloud," -maybe they're testing if I understand the context or need further assistance within that framework. - -I'll give the correct sum and let them know if they need anything else. That should be helpful. +您好!我是由中国的深度求索(DeepSeek)公司开发的智能助手DeepSeek-R1。如您有任何任何问题,我会尽我所能为您提供帮助。 -1 + 2 equals **3**. +您好!我是由中国的深度求索(DeepSeek)公司开发的智能助手DeepSeek-R1。如您有任何任何问题,我会尽我所能为您提供帮助。 + +[N][ Run][ 921]: hit eos,avg 17.68 token/s -[N][ Run][ 943]: hit eos,avg 11.25 token/s +[I][ GetKVCache][ 498]: precompute_len:91, remaining:933 -[I][ GetKVCache][ 500]: precompute_len:123, remaining:1413 -prompt >> q -root@ax650:/mnt/qtang/llm-test/deepseek-r1-1.5b-ctx# ``` \ No newline at end of file diff --git a/deepseek-r1-1.5b-ax650/qwen2_p128_l0_together.axmodel b/deepseek-r1-1.5b-ax650/qwen2_p128_l0_together.axmodel index 6b0c82bfa824e8eb9a5ab8b156e5416f81e9ab20..74993fd2557af08558512ff838ba912035e50789 100644 --- a/deepseek-r1-1.5b-ax650/qwen2_p128_l0_together.axmodel +++ b/deepseek-r1-1.5b-ax650/qwen2_p128_l0_together.axmodel @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:522ade59d65cff80d29c70a7dfc6a5dc033d603974d74dcd19b354b09530722d -size 52834948 +oid sha256:be25a034f27581cd0f7af207a9c0598381e1f19ba0ecfdab943f56c52b1afc9a +size 62970116 diff --git a/deepseek-r1-1.5b-ax650/qwen2_p128_l10_together.axmodel b/deepseek-r1-1.5b-ax650/qwen2_p128_l10_together.axmodel index 08156d11b393e8e39bd72fbef9f2e5f4d0649bee..a9b187bb61dd33040f3c4b608b38a1f33e8f43f9 100644 --- a/deepseek-r1-1.5b-ax650/qwen2_p128_l10_together.axmodel +++ b/deepseek-r1-1.5b-ax650/qwen2_p128_l10_together.axmodel @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:459fa50ffb11858a43b54ce215476589fc4faf8dfb96ca9214b8293b58ac1103 -size 52834948 +oid sha256:f1c86026e1bc725957c6e933da8760062715047b794ace7e2c1f1e7b62b16c12 +size 62970116 diff --git a/deepseek-r1-1.5b-ax650/qwen2_p128_l11_together.axmodel b/deepseek-r1-1.5b-ax650/qwen2_p128_l11_together.axmodel index 6028528fef78c99af658edddb1abdbb376113862..3a7df76a86198a873c1d7bf71a6b6fc8bb64899f 100644 --- a/deepseek-r1-1.5b-ax650/qwen2_p128_l11_together.axmodel +++ b/deepseek-r1-1.5b-ax650/qwen2_p128_l11_together.axmodel @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:a58561902aed842a82fe0f7b2272d213a91c275ae76eca221590d08d2ff97661 -size 52834948 +oid sha256:bce863f600e20101c5827dd217f0675ef9ecdb91c2b1daa6df79a3144ea4654d +size 62970116 diff --git a/deepseek-r1-1.5b-ax650/qwen2_p128_l12_together.axmodel b/deepseek-r1-1.5b-ax650/qwen2_p128_l12_together.axmodel index 53bb8d8ce2cb5ab3c2bfb02246e1e7aad0e15e1d..dbaaaa725e816934912b8c5b38fdb6e560ab258f 100644 --- a/deepseek-r1-1.5b-ax650/qwen2_p128_l12_together.axmodel +++ b/deepseek-r1-1.5b-ax650/qwen2_p128_l12_together.axmodel @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0cf81bd3b3ac80888246bfc3bc5bb297acbebd8a507ebfb791e53f4600c30ef3 -size 52834948 +oid sha256:79b13a28b5d799c27df905e5165d0acfa66e9cae166bc5a0770f111b741efd79 +size 62970116 diff --git a/deepseek-r1-1.5b-ax650/qwen2_p128_l13_together.axmodel b/deepseek-r1-1.5b-ax650/qwen2_p128_l13_together.axmodel index 80a6ef09bc22a8990c3c2b8af7a10d71f137d08d..2beae42b82a394e4918db7668b69a88b487a7c6b 100644 --- a/deepseek-r1-1.5b-ax650/qwen2_p128_l13_together.axmodel +++ b/deepseek-r1-1.5b-ax650/qwen2_p128_l13_together.axmodel @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:986e45505669199a5715d64086defa9268ab7c1ee284ee515d65458fe9b62594 -size 52834948 +oid sha256:e99ab77861a2d0db96009c64d0e369c2333173044b97affdcc626c9d70fab1a8 +size 62970116 diff --git a/deepseek-r1-1.5b-ax650/qwen2_p128_l14_together.axmodel b/deepseek-r1-1.5b-ax650/qwen2_p128_l14_together.axmodel index eab80b4b33583893017277428e6bc650d65503e2..170eb5abd121fa75983e8468a52f55658efdd735 100644 --- a/deepseek-r1-1.5b-ax650/qwen2_p128_l14_together.axmodel +++ b/deepseek-r1-1.5b-ax650/qwen2_p128_l14_together.axmodel @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:70e051d1037248b958467d58629514e627d964b2f27ec6d6c9cbce72fd1b152a -size 52834948 +oid sha256:fa2518bf3b7f068dbe709a3cfea3907d49096ce9fae393673957e88adbbdb913 +size 62970116 diff --git a/deepseek-r1-1.5b-ax650/qwen2_p128_l15_together.axmodel b/deepseek-r1-1.5b-ax650/qwen2_p128_l15_together.axmodel index b13281262763144ec7c6e5f8da64cfc209c9d50a..b40344f90eca0b32bd6e425f26f7117ab8a5dc57 100644 --- a/deepseek-r1-1.5b-ax650/qwen2_p128_l15_together.axmodel +++ b/deepseek-r1-1.5b-ax650/qwen2_p128_l15_together.axmodel @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:5760efccff8171cff54e9c7b299558cd437b9e8ca2d27ad67ebf7ce7cfae46d6 -size 52834948 +oid sha256:4a71aa34f0cb5c912058a57e051d0f06ef2baad10f47fc21700c96bdeae011de +size 62970116 diff --git a/deepseek-r1-1.5b-ax650/qwen2_p128_l16_together.axmodel b/deepseek-r1-1.5b-ax650/qwen2_p128_l16_together.axmodel index 1afc0e3a8a262de938b116eb25de4bf4cca012e2..a37b38e7cf91674075c02aa759b24878d4e19f5f 100644 --- a/deepseek-r1-1.5b-ax650/qwen2_p128_l16_together.axmodel +++ b/deepseek-r1-1.5b-ax650/qwen2_p128_l16_together.axmodel @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f12844f3420f944f332ca51033b912ee4de89be6c7ded26c58e44dc24d2724e9 -size 52834948 +oid sha256:b14d934b5ed86b85b403ad5b7f920e5d7d7327c5d9d9625e453dd54f20b96057 +size 62970116 diff --git a/deepseek-r1-1.5b-ax650/qwen2_p128_l17_together.axmodel b/deepseek-r1-1.5b-ax650/qwen2_p128_l17_together.axmodel index 9bc61621016560d9ee15cb5106eea54f8ea53164..8096deca56c645c4d1e012c07cc5157f13fe5b62 100644 --- a/deepseek-r1-1.5b-ax650/qwen2_p128_l17_together.axmodel +++ b/deepseek-r1-1.5b-ax650/qwen2_p128_l17_together.axmodel @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:53a18582cd841cd56d8039e739feafeae2051d84d1f036b5b6ac5791186c47a2 -size 52834948 +oid sha256:c76091b5259d4c46c31f083806edba162c268af509ee84a4aebfcb4c29686090 +size 62970116 diff --git a/deepseek-r1-1.5b-ax650/qwen2_p128_l18_together.axmodel b/deepseek-r1-1.5b-ax650/qwen2_p128_l18_together.axmodel index e8537fa72a5426252a17e33bf7e809afd4efdae4..55433999e21d37df6502311d3c6408cef190c6ab 100644 --- a/deepseek-r1-1.5b-ax650/qwen2_p128_l18_together.axmodel +++ b/deepseek-r1-1.5b-ax650/qwen2_p128_l18_together.axmodel @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:283a07ecb1670ad7198a42260d3444696ae2e16a0eb4ce9e600678c1aab63657 -size 52834948 +oid sha256:fa0b4f01e430bfb6b2b9d147b9b987028f9abeddba6afe92c330d1fad89785d7 +size 62970116 diff --git a/deepseek-r1-1.5b-ax650/qwen2_p128_l19_together.axmodel b/deepseek-r1-1.5b-ax650/qwen2_p128_l19_together.axmodel index b9b9022aa7af89aa60a12b3e7dd64f7325c238a1..67cffa1ed6c4973e4458bd0fb6cd2e3bdfa13f04 100644 --- a/deepseek-r1-1.5b-ax650/qwen2_p128_l19_together.axmodel +++ b/deepseek-r1-1.5b-ax650/qwen2_p128_l19_together.axmodel @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:09fa3121fbcbf0c003cc8c2731030b1e56af41defd9068824d0f2be128eea9ba -size 52834948 +oid sha256:e240dae492739f1911c7ff5ab7190454684943cce1044c3566cac711cafbd5e0 +size 62970116 diff --git a/deepseek-r1-1.5b-ax650/qwen2_p128_l1_together.axmodel b/deepseek-r1-1.5b-ax650/qwen2_p128_l1_together.axmodel index 36add6b01dfba3cb1ee9498ef7939f78a032c17d..7f2d6b7f349edec7ebd935b0213750c3071bc1d8 100644 --- a/deepseek-r1-1.5b-ax650/qwen2_p128_l1_together.axmodel +++ b/deepseek-r1-1.5b-ax650/qwen2_p128_l1_together.axmodel @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f2327972616360b4b7bc222f5fd17de676def7bf049c49b939f4057cb37591fd -size 52834948 +oid sha256:0a08ea7b97e094625a8be0a08d2be0d8e54960f68cadde74adbc3f8039d375d3 +size 62970116 diff --git a/deepseek-r1-1.5b-ax650/qwen2_p128_l20_together.axmodel b/deepseek-r1-1.5b-ax650/qwen2_p128_l20_together.axmodel index c5fb9b41e6a863c897e531c993a3bfdbb33aad48..39a1836132f86ffc24f17d25b3fb91721d5f377d 100644 --- a/deepseek-r1-1.5b-ax650/qwen2_p128_l20_together.axmodel +++ b/deepseek-r1-1.5b-ax650/qwen2_p128_l20_together.axmodel @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:96ff64967a922794b9ef77df7b23d515cf7b892805c1ce4fa4abe84be6cdd971 -size 52834948 +oid sha256:6fdb4f7da5a7514a260e4f497f5db4af0ce94d5b6ad229fb49a8b808be4bce6b +size 62970116 diff --git a/deepseek-r1-1.5b-ax650/qwen2_p128_l21_together.axmodel b/deepseek-r1-1.5b-ax650/qwen2_p128_l21_together.axmodel index 9bf918878f789a3d68a261768d41ce9097ca95d5..52cfa87ace995dfcb6f6c69f146b8b3146156b06 100644 --- a/deepseek-r1-1.5b-ax650/qwen2_p128_l21_together.axmodel +++ b/deepseek-r1-1.5b-ax650/qwen2_p128_l21_together.axmodel @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ea512f881311fdeb068f02138cf4efa1784ae2d0f685c31c7290cb464a9f6ed8 -size 52834948 +oid sha256:7a52db67d8d790ceaf9c44604d3206d1207955d92b288f319c009c605fbff9e6 +size 62970116 diff --git a/deepseek-r1-1.5b-ax650/qwen2_p128_l22_together.axmodel b/deepseek-r1-1.5b-ax650/qwen2_p128_l22_together.axmodel index 4a71f425d3163245ca9c333132c7251ccc59d1e9..fb19efbf343dca6cdc94d7d825ffdbdc7325b179 100644 --- a/deepseek-r1-1.5b-ax650/qwen2_p128_l22_together.axmodel +++ b/deepseek-r1-1.5b-ax650/qwen2_p128_l22_together.axmodel @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:495fccb3b187ac3b99cdcd1b7b50905ac4b820b3c9e20358ae2fceaac399284c -size 52834948 +oid sha256:06b8c833345060de714dddbec563abaf98194537e0d4932a47feee727dc17939 +size 62970116 diff --git a/deepseek-r1-1.5b-ax650/qwen2_p128_l23_together.axmodel b/deepseek-r1-1.5b-ax650/qwen2_p128_l23_together.axmodel index a508b033971a3651022036fcb73d1e50db3eff36..1bb352a64536c7a1c50fdc0c218fae5ba41c9a3b 100644 --- a/deepseek-r1-1.5b-ax650/qwen2_p128_l23_together.axmodel +++ b/deepseek-r1-1.5b-ax650/qwen2_p128_l23_together.axmodel @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:3e29d32050388048d926a1d7e2bb32b3b3f30778db9cabcdbc1fc732e9c10bcb -size 52834948 +oid sha256:a747e0072af4c386af7b8c4f46388732572a8bab4b72cb3df9efb4dee52054db +size 62970116 diff --git a/deepseek-r1-1.5b-ax650/qwen2_p128_l24_together.axmodel b/deepseek-r1-1.5b-ax650/qwen2_p128_l24_together.axmodel index 6e3803ccd1b3cb9a74f0da64e4cf5082d9f61710..e5f9a5ce9360a1606ef0f3689e4d27fc2d70f7e9 100644 --- a/deepseek-r1-1.5b-ax650/qwen2_p128_l24_together.axmodel +++ b/deepseek-r1-1.5b-ax650/qwen2_p128_l24_together.axmodel @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:f0641564f5017fdaf98bbbfff5364b88e0911c5d3a343585ef95ffbdf4d76628 -size 52834948 +oid sha256:5591329fbdb9e2e0c939b44ae09796048b621bc7910fcb41eb2e96f1aeea5848 +size 62970116 diff --git a/deepseek-r1-1.5b-ax650/qwen2_p128_l25_together.axmodel b/deepseek-r1-1.5b-ax650/qwen2_p128_l25_together.axmodel index fd78645a75c4308ca2865cc190e96d34ce9b9f87..be642e5c6be88c3f6e532f352603426448f207f3 100644 --- a/deepseek-r1-1.5b-ax650/qwen2_p128_l25_together.axmodel +++ b/deepseek-r1-1.5b-ax650/qwen2_p128_l25_together.axmodel @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:38bbfc9f023ba81ad8e778c179208760a091b02911a54e749ff2b24761bb0bec -size 52834948 +oid sha256:4f3f13627396c2e87f4bbfff0c449f7d51f45ece7878cad12fe185a580d9a83e +size 62970116 diff --git a/deepseek-r1-1.5b-ax650/qwen2_p128_l26_together.axmodel b/deepseek-r1-1.5b-ax650/qwen2_p128_l26_together.axmodel index 01a04f0955dbdfb589b1f2211bc0374be566652d..2d02783a121ae4b03c4cbd3e9fc7707061853b24 100644 --- a/deepseek-r1-1.5b-ax650/qwen2_p128_l26_together.axmodel +++ b/deepseek-r1-1.5b-ax650/qwen2_p128_l26_together.axmodel @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:97abb5dd5d4800be31950475c9769a6025382e834bebf9eebd8ec6b86159c550 -size 52834948 +oid sha256:54119e40e581a543fd4144afff911ae4a4da0990b31c796737f6bed72dc9fa10 +size 62970116 diff --git a/deepseek-r1-1.5b-ax650/qwen2_p128_l27_together.axmodel b/deepseek-r1-1.5b-ax650/qwen2_p128_l27_together.axmodel index 8cb806ef78ddf1cfd6258d25fc9b16e10746938e..9309d741c536be7b9ebf345c98dddbacd217ec2b 100644 --- a/deepseek-r1-1.5b-ax650/qwen2_p128_l27_together.axmodel +++ b/deepseek-r1-1.5b-ax650/qwen2_p128_l27_together.axmodel @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:762fce5404016b3f3aff3a0d66f587f0ac06d4176c0e1ddeca68885eca0029ff -size 52834948 +oid sha256:20f2701ef879f3690ee7a871097f0388224dbec76c2af2c0331ec13d7cef5a58 +size 62970116 diff --git a/deepseek-r1-1.5b-ax650/qwen2_p128_l2_together.axmodel b/deepseek-r1-1.5b-ax650/qwen2_p128_l2_together.axmodel index 105023995a80e7f99acab90af4455689a041f4ba..569fde075dbbe1724d7e29dc0f5779af3039c2c5 100644 --- a/deepseek-r1-1.5b-ax650/qwen2_p128_l2_together.axmodel +++ b/deepseek-r1-1.5b-ax650/qwen2_p128_l2_together.axmodel @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:66d0843374ec2aa65842474f5146b90339a962541af9c54d60a0909f6107ef0a -size 52834948 +oid sha256:1c69bb6cc74eec834aeff2bb9cb5d1deae54f3c805b345cc16d78697d6a745b2 +size 62970116 diff --git a/deepseek-r1-1.5b-ax650/qwen2_p128_l3_together.axmodel b/deepseek-r1-1.5b-ax650/qwen2_p128_l3_together.axmodel index 42f8806305d26e67050443e72dede4d1a1df4887..7141a06bd25f0496c8fc5d72a3a69973a5ad5b21 100644 --- a/deepseek-r1-1.5b-ax650/qwen2_p128_l3_together.axmodel +++ b/deepseek-r1-1.5b-ax650/qwen2_p128_l3_together.axmodel @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ca361332c875422394d3e11b59c0c8069b9f66692fe6a80533021f2f0444b98f -size 52834948 +oid sha256:d66e85cb4980ba39bc4205d04d30a6030f3e07f20a2118dd3edbac5b800eceee +size 62970116 diff --git a/deepseek-r1-1.5b-ax650/qwen2_p128_l4_together.axmodel b/deepseek-r1-1.5b-ax650/qwen2_p128_l4_together.axmodel index ade1750e22f6ed58273c8a85ac19a07d35ceb6e0..5473323d6e65034665a8761670c8b6ea49dd0a30 100644 --- a/deepseek-r1-1.5b-ax650/qwen2_p128_l4_together.axmodel +++ b/deepseek-r1-1.5b-ax650/qwen2_p128_l4_together.axmodel @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:00cf57979c4ccc3eb6e9e7d1b701dd44fde3e64f528a19d8924e390aafd43b97 -size 52834948 +oid sha256:fb2e65b19a8474ea35aa24bd1ab417df4890cceedb08ae5ea33d4bef2d537f9f +size 62970116 diff --git a/deepseek-r1-1.5b-ax650/qwen2_p128_l5_together.axmodel b/deepseek-r1-1.5b-ax650/qwen2_p128_l5_together.axmodel index 27b78a0d277989d270eb6c5cff93f16f42ae6cc6..6254d6412b89ba6bf5bce3f221f7ff1d839abdb5 100644 --- a/deepseek-r1-1.5b-ax650/qwen2_p128_l5_together.axmodel +++ b/deepseek-r1-1.5b-ax650/qwen2_p128_l5_together.axmodel @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:0e34467ca89cdbd0dc15dfb83be967eb4bcc217604d72a82793ce1ef011e84be -size 52834948 +oid sha256:443f4dfe91116c071a37f2d92464bd953e56c08081c9f8b4f89498f591962100 +size 62970116 diff --git a/deepseek-r1-1.5b-ax650/qwen2_p128_l6_together.axmodel b/deepseek-r1-1.5b-ax650/qwen2_p128_l6_together.axmodel index 8cb00792e5732e278286322c629c728d175b78ae..ef0ab031f021ebd1561626e7244024142b497a1e 100644 --- a/deepseek-r1-1.5b-ax650/qwen2_p128_l6_together.axmodel +++ b/deepseek-r1-1.5b-ax650/qwen2_p128_l6_together.axmodel @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:df34fdfd121d439ecc3f4c2f1965004bac72b3bf1e467a4fc10023a6bea8d5db -size 52834948 +oid sha256:7dca045308029638415dc8b816b9b5a256d64d21384c4b71b0ad058dae450420 +size 62970116 diff --git a/deepseek-r1-1.5b-ax650/qwen2_p128_l7_together.axmodel b/deepseek-r1-1.5b-ax650/qwen2_p128_l7_together.axmodel index d92b3679d1b3be162853595da8b05201c0937e09..542f3ac2db8f49d39ef4ce8a7483fcbc2b01ab65 100644 --- a/deepseek-r1-1.5b-ax650/qwen2_p128_l7_together.axmodel +++ b/deepseek-r1-1.5b-ax650/qwen2_p128_l7_together.axmodel @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:2858db26c2692db1339be9a09e8c95a59e9ae66033a9c397f2ba84cd4aac864e -size 52834948 +oid sha256:da0519e123bc0f7ab4680f7120e462a08f2540768edb82f904cfc8a0bc25b580 +size 62970116 diff --git a/deepseek-r1-1.5b-ax650/qwen2_p128_l8_together.axmodel b/deepseek-r1-1.5b-ax650/qwen2_p128_l8_together.axmodel index 91909f061239f199c9e1b0ad4210bd36a3fdc8a0..e58c730f28e0fd6666c23d38a204e084b018dc8d 100644 --- a/deepseek-r1-1.5b-ax650/qwen2_p128_l8_together.axmodel +++ b/deepseek-r1-1.5b-ax650/qwen2_p128_l8_together.axmodel @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:da4c317a23fce0ae9c6353427e00f33db5d9866ed1ec4d389170b1d0eb88ba9f -size 52834948 +oid sha256:a3dd7537c5436cda4f3773ecf274348995166b1e1de9af26180dfec708f35c71 +size 62970116 diff --git a/deepseek-r1-1.5b-ax650/qwen2_p128_l9_together.axmodel b/deepseek-r1-1.5b-ax650/qwen2_p128_l9_together.axmodel index 46f3f11d92b6a33a19e1fb972cb99e30ef8b6d59..77d4999af1015da4526721fda314087999ddd358 100644 --- a/deepseek-r1-1.5b-ax650/qwen2_p128_l9_together.axmodel +++ b/deepseek-r1-1.5b-ax650/qwen2_p128_l9_together.axmodel @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ec1ed959ab0525f9bc2f40deee81b6169296a11acce9472c1830792b15b101a4 -size 52834948 +oid sha256:e9795532614c166985165869e63023c82a96f61912980f3a304a96f49b781a55 +size 62970116 diff --git a/deepseek-r1-1.5b-ax650/qwen2_post.axmodel b/deepseek-r1-1.5b-ax650/qwen2_post.axmodel index 3968cc2d8f3c87793ac2a8787bd43437c57b57a9..0473c68517c35db87c6afa5135c426ade302af57 100644 --- a/deepseek-r1-1.5b-ax650/qwen2_post.axmodel +++ b/deepseek-r1-1.5b-ax650/qwen2_post.axmodel @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:ede68b189b7892beca2a4877bf36c9a7f98ee420c93fa46a5e106fc545b78593 +oid sha256:706dcdd770eda42a82548f3d3c5cc9dd3840818d72eca312bd96ed7ac9e6714d size 254692887 diff --git a/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l0_together.axmodel b/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l0_together.axmodel new file mode 100644 index 0000000000000000000000000000000000000000..c6addea951b3a8e6e4a12066055e27aba54ddc50 --- /dev/null +++ b/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l0_together.axmodel @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6aaa5ceb34564e043555a0b783be09c637ba6f7474e699c089e0b66ee1d25439 +size 37866696 diff --git a/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l10_together.axmodel b/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l10_together.axmodel new file mode 100644 index 0000000000000000000000000000000000000000..549d5c5612046a5cefa332a85f654827cdee874f --- /dev/null +++ b/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l10_together.axmodel @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:85a6f545503d31dca0214bad55439f0c50399b7258c70d2e274b8d414cab8ff8 +size 37866688 diff --git a/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l11_together.axmodel b/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l11_together.axmodel new file mode 100644 index 0000000000000000000000000000000000000000..e90ac9a19ca68b3b9d43a6b59b84f9996750da00 --- /dev/null +++ b/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l11_together.axmodel @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3be0a3d96470fa7682862373f90320a673dfeb1e7505c4640d62070b6ee20dd7 +size 37866688 diff --git a/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l12_together.axmodel b/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l12_together.axmodel new file mode 100644 index 0000000000000000000000000000000000000000..87809d655b6041b99193a2d0f569fe8e20ba2ed0 --- /dev/null +++ b/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l12_together.axmodel @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:260b2593262480cbd1515f60ac45d7c3d350e80a9a3529606f49106baf662d33 +size 37866688 diff --git a/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l13_together.axmodel b/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l13_together.axmodel new file mode 100644 index 0000000000000000000000000000000000000000..100981d646d0253690dc2e1065d5d7f0549c186a --- /dev/null +++ b/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l13_together.axmodel @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0a75ee8dcf1bb19f28592f2c11a2c8b0c5bc7cedef3124182c9778e313034a6e +size 37866688 diff --git a/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l14_together.axmodel b/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l14_together.axmodel new file mode 100644 index 0000000000000000000000000000000000000000..f3e4ff925b052681aa58748cdec8db2671a6a0a4 --- /dev/null +++ b/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l14_together.axmodel @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f3375832c9e6b5c94185353906b5de4c88e26fc966e904090aa12029560b7509 +size 37866688 diff --git a/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l15_together.axmodel b/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l15_together.axmodel new file mode 100644 index 0000000000000000000000000000000000000000..98797479c06edaf1cdb4a81c4e1a446996e8eccd --- /dev/null +++ b/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l15_together.axmodel @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c5482b12cee22649f45e9d19a9f3436862c0175f71bcaecd45b5de95029b609c +size 37866688 diff --git a/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l16_together.axmodel b/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l16_together.axmodel new file mode 100644 index 0000000000000000000000000000000000000000..da39200e8b2480af41d879dc9295f43587ed9709 --- /dev/null +++ b/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l16_together.axmodel @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4061bbde2821e282d15ea5a23e95a9ecc6c463f2bfe2592845f8ac4d96d62ec6 +size 37866696 diff --git a/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l17_together.axmodel b/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l17_together.axmodel new file mode 100644 index 0000000000000000000000000000000000000000..324e4f5dbac9bef8593a6f3cd3110795e0620815 --- /dev/null +++ b/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l17_together.axmodel @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f18b66ae4a5bafac5bde713de4cbcdf7a6a0a46cae9b7c7ac23e259cd741caa1 +size 37866696 diff --git a/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l18_together.axmodel b/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l18_together.axmodel new file mode 100644 index 0000000000000000000000000000000000000000..b8a9d0823239e2102241e9165d2bbbe4a3642b03 --- /dev/null +++ b/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l18_together.axmodel @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ea7c7663e96ab4062c53c21f3b2f9abaf7fd3527b53be23759272155836ce654 +size 37866696 diff --git a/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l19_together.axmodel b/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l19_together.axmodel new file mode 100644 index 0000000000000000000000000000000000000000..7296b507ea2d53696705d17785caf52170014d85 --- /dev/null +++ b/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l19_together.axmodel @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f201ffe7f99797eccc9bec0725c7ca83ff7e6c541d9d84eaac117681e4de1d77 +size 37866696 diff --git a/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l1_together.axmodel b/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l1_together.axmodel new file mode 100644 index 0000000000000000000000000000000000000000..3be80cb95809a130561e06895fbdc46f98edfd2a --- /dev/null +++ b/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l1_together.axmodel @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:16641c20d5633676bf80bae34a6c66763d8f0b23f708ff83ace07f2a04fc55df +size 37866696 diff --git a/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l20_together.axmodel b/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l20_together.axmodel new file mode 100644 index 0000000000000000000000000000000000000000..204f9061c821762cb2ddf7c6f8ddd01ff41e25f7 --- /dev/null +++ b/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l20_together.axmodel @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdb618b59cf9d8d63a5cfbdaec59c72c7be233d28941e42e2536dd9eb2ad08f9 +size 37866696 diff --git a/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l21_together.axmodel b/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l21_together.axmodel new file mode 100644 index 0000000000000000000000000000000000000000..b5001c0cdf1ab511c76f80c79ceb449029420501 --- /dev/null +++ b/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l21_together.axmodel @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:318e348c4cd4be6825e11e263c34653fbdefa61b7b8a80ef1fae18872ec2b7f4 +size 37866696 diff --git a/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l22_together.axmodel b/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l22_together.axmodel new file mode 100644 index 0000000000000000000000000000000000000000..51a0b9d6dc561738a9a09847a8d64fe746e11687 --- /dev/null +++ b/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l22_together.axmodel @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dfcecc0aefe64288f6e70a1257b7ce97240d3415dccd1a0e784412a64aa996bd +size 37866696 diff --git a/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l23_together.axmodel b/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l23_together.axmodel new file mode 100644 index 0000000000000000000000000000000000000000..8a91a73f3115fe6e338baa1a7670901bbc683d42 --- /dev/null +++ b/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l23_together.axmodel @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:18b79890f3392302c71d40d90a853362bf97b7ebec020836e1916c71dbc6b73e +size 37866696 diff --git a/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l24_together.axmodel b/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l24_together.axmodel new file mode 100644 index 0000000000000000000000000000000000000000..82595121cc57a73616cb3ff651f5d7cde3d75d2b --- /dev/null +++ b/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l24_together.axmodel @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b887c92a69a95207c14ace97a2540c14d6ac3d9be598accddb121bc2d81769a +size 37866688 diff --git a/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l25_together.axmodel b/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l25_together.axmodel new file mode 100644 index 0000000000000000000000000000000000000000..f5e9c857502573640eea9c18c2cd3827f6c95082 --- /dev/null +++ b/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l25_together.axmodel @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:30d46409a6dcb7f47271f1308e1698346ab6658d6cc41ac28dee36863fe0ee2a +size 37866688 diff --git a/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l26_together.axmodel b/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l26_together.axmodel new file mode 100644 index 0000000000000000000000000000000000000000..ec656662bf65456360358c13304ea03e8f8372c6 --- /dev/null +++ b/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l26_together.axmodel @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3a8c666022bcb30590dd4d61a77ab688faa085d3bcb7ead81f7ba628161f697f +size 37866688 diff --git a/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l27_together.axmodel b/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l27_together.axmodel new file mode 100644 index 0000000000000000000000000000000000000000..8ce208b5cd2d532a7e999fb8aeb0b42f5b7bae9c --- /dev/null +++ b/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l27_together.axmodel @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:73cabe93563b23782ffbfb5648ae491d6049f6c5ddd47828f64b3d6863ef776e +size 37866688 diff --git a/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l2_together.axmodel b/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l2_together.axmodel new file mode 100644 index 0000000000000000000000000000000000000000..9b6770d9bce3fabf0c978f468c52b0790de0ba5b --- /dev/null +++ b/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l2_together.axmodel @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c4a77506d162a3b7da8e87f7b50ea0dbf360fab9a83c9f95a821d51e63f05d32 +size 37866696 diff --git a/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l3_together.axmodel b/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l3_together.axmodel new file mode 100644 index 0000000000000000000000000000000000000000..8ad27efc547ffce39ee2fea6335be69d97848162 --- /dev/null +++ b/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l3_together.axmodel @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3734cf4d35b851595a93ad46ae5515538e5d5b7cfe40f0608af903fb76feb55 +size 37866696 diff --git a/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l4_together.axmodel b/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l4_together.axmodel new file mode 100644 index 0000000000000000000000000000000000000000..f99d903b1decea90482f95e5f587c2c21e5ea23d --- /dev/null +++ b/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l4_together.axmodel @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:27cbc4a23a164ac5a62968b35ff9c994c6aae1c3e9d2498accbee7ba3b316224 +size 37866696 diff --git a/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l5_together.axmodel b/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l5_together.axmodel new file mode 100644 index 0000000000000000000000000000000000000000..e2a4208477776c42693cff30afdd698c4c33d94f --- /dev/null +++ b/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l5_together.axmodel @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:59fe253b425e293ca4ddbc15c2a092db9fa0d2e188810192f56650b00a268624 +size 37866696 diff --git a/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l6_together.axmodel b/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l6_together.axmodel new file mode 100644 index 0000000000000000000000000000000000000000..997d522a5f4fc61034f54a5ff31bb7521743f911 --- /dev/null +++ b/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l6_together.axmodel @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:051b7198ec28417f93a346dd9e5d7e3bf70a81ee8d9439458c11dcce3867ee37 +size 37866696 diff --git a/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l7_together.axmodel b/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l7_together.axmodel new file mode 100644 index 0000000000000000000000000000000000000000..e008599e2441145f8d686b2701edfd581d775b6c --- /dev/null +++ b/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l7_together.axmodel @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:92321d05dd40a8ba7eb6879e4c33e4e978fa67da481eea528493c1086d14208a +size 37866696 diff --git a/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l8_together.axmodel b/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l8_together.axmodel new file mode 100644 index 0000000000000000000000000000000000000000..242a107f5ef78e5a3ce8c3460156a689c56361c4 --- /dev/null +++ b/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l8_together.axmodel @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f69e13f961eda72f387940a0e6f6f96943a847b42133fb92788609584ecbf592 +size 37866688 diff --git a/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l9_together.axmodel b/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l9_together.axmodel new file mode 100644 index 0000000000000000000000000000000000000000..8a508dcbdbdbf8cfdc7c53d4b562e93d85c7e4cc --- /dev/null +++ b/deepseek-r1-1.5b-int4-ax650/qwen2_p128_l9_together.axmodel @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:dd7988f758c872dad6ae5b2b1afac3b1d073a6002caa49546928f12931099909 +size 37866688 diff --git a/deepseek-r1-1.5b-int4-ax650/qwen2_post.axmodel b/deepseek-r1-1.5b-int4-ax650/qwen2_post.axmodel new file mode 100644 index 0000000000000000000000000000000000000000..c7ba34b8b194c2212b02637340086271022d8260 --- /dev/null +++ b/deepseek-r1-1.5b-int4-ax650/qwen2_post.axmodel @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f604634f10d92e751c6f47190e9f232168f797e2f638216424c59ce8abe7e720 +size 254692887 diff --git a/deepseek-r1_tokenizer_uid.py b/deepseek-r1_tokenizer_uid.py new file mode 100644 index 0000000000000000000000000000000000000000..00b96326ac95088ccb57bddef0731c9c72d62431 --- /dev/null +++ b/deepseek-r1_tokenizer_uid.py @@ -0,0 +1,199 @@ +from transformers import AutoTokenizer, PreTrainedTokenizerFast +from http.server import HTTPServer, BaseHTTPRequestHandler +import json +import argparse +import uuid + +# 全局字典:存储 uid 到 Tokenizer_Http 实例的映射 +tokenizers = {} + +class Tokenizer_Http(): + def __init__(self): + model_id = "deepseek-r1_tokenizer" + self.tokenizer = AutoTokenizer.from_pretrained(model_id) + self.messages = [ + {"role": "system", "content": "You are DeepSeek. You are a helpful assistant."}, + ] + self.token_ids = [] + + self.token_ids_cache = [] + + def encode(self, prompt, last_reply=None): + if last_reply is not None: + self.messages.append({"role": "assistant", "content": last_reply}) + text = self.tokenizer.apply_chat_template( + self.messages, + tokenize=False, + add_generation_prompt=True + ) + # print("生成的文本:\n============\n", text, "============\n") + self.token_ids = self.tokenizer.encode(text)[:-3] + self.messages.append({"role": "user", "content": prompt}) + + text = self.tokenizer.apply_chat_template( + self.messages, + tokenize=False, + add_generation_prompt=True + ) + print("生成的文本:\n============\n", text, "============\n") + token_ids = self.tokenizer.encode(text) + # 找出新增部分 + diff = token_ids[len(self.token_ids):] + self.token_ids = token_ids + print(self.decode(diff)) + return token_ids, diff + + def decode(self, token_ids): + self.token_ids_cache += token_ids + text = self.tokenizer.decode(self.token_ids_cache) + if "\ufffd" in text: + print("text 中包含非法字符") + return "" + else: + self.token_ids_cache.clear() + return text + + + @property + def bos_id(self): + return self.tokenizer.bos_token_id + + @property + def eos_id(self): + return self.tokenizer.eos_token_id + + @property + def bos_token(self): + return self.tokenizer.bos_token + + @property + def eos_token(self): + return self.tokenizer.eos_token + + def reset(self, system_prompt="You are DeepSeek. You are a helpful assistant."): + self.messages = [ + {"role": "system", "content": system_prompt}, + ] + text = self.tokenizer.apply_chat_template( + self.messages, + tokenize=False, + add_generation_prompt=True + ) + token_ids = self.tokenizer.encode(text)[:-3] + self.token_ids = token_ids + print(self.decode(token_ids)) + return token_ids + + +class Request(BaseHTTPRequestHandler): + timeout = 5 + server_version = 'Apache' + + def do_GET(self): + print("GET 请求路径:", self.path) + self.send_response(200) + self.send_header("Content-Type", "application/json") + self.end_headers() + + # 新增接口:获取 uid + if '/get_uid' in self.path: + new_uid = str(uuid.uuid4()) + print("新 uid:", new_uid) + # 为该 uid 创建一个新的 Tokenizer_Http 实例 + tokenizers[new_uid] = Tokenizer_Http() + msg = json.dumps({'uid': new_uid}) + elif '/bos_id' in self.path: + # 获取 uid 参数(例如 ?uid=xxx) + uid = self.get_query_param("uid") + instance: Tokenizer_Http = tokenizers.get(uid) + if instance is None: + msg = json.dumps({'error': 'Invalid uid'}) + else: + bos_id = instance.bos_id + msg = json.dumps({'bos_id': bos_id if bos_id is not None else -1}) + elif '/eos_id' in self.path: + uid = self.get_query_param("uid") + instance: Tokenizer_Http = tokenizers.get(uid) + if instance is None: + msg = json.dumps({'error': 'Invalid uid'}) + else: + eos_id = instance.eos_id + msg = json.dumps({'eos_id': eos_id if eos_id is not None else -1}) + else: + msg = json.dumps({'error': 'Invalid GET endpoint'}) + + print("响应消息:", msg) + self.wfile.write(msg.encode()) + + def do_POST(self): + content_length = int(self.headers.get('content-length', 0)) + data = self.rfile.read(content_length).decode() + print("POST 请求路径:", self.path) + print("接收到的数据:", data) + req = json.loads(data) + + self.send_response(200) + self.send_header("Content-Type", "application/json") + self.end_headers() + + if '/encode' in self.path: + # 请求数据中必须包含 uid, text, 和可选的 last_reply + uid = req.get('uid') + prompt = req.get('text') + last_reply = req.get('last_reply') + instance: Tokenizer_Http = tokenizers.get(uid) + if instance is None: + msg = json.dumps({'error': 'Invalid uid'}) + else: + token_ids, diff = instance.encode(prompt, last_reply) + msg = json.dumps({'token_ids': token_ids, 'diff': diff}) + elif '/decode' in self.path: + uid = req.get('uid') + token_ids = req.get('token_ids') + instance: Tokenizer_Http = tokenizers.get(uid) + if instance is None: + msg = json.dumps({'error': 'Invalid uid'}) + else: + text = instance.decode(token_ids) + msg = json.dumps({'text': text}) + elif '/reset' in self.path: + uid = req.get("uid") + system_prompt = req.get("system_prompt") + instance: Tokenizer_Http = tokenizers.get(uid) + if instance is None: + msg = json.dumps({'error': 'Invalid uid'}) + else: + if system_prompt is not None: + print("system_prompt:", system_prompt) + token_ids = instance.reset(system_prompt) + msg = json.dumps({'token_ids': token_ids}) + else: + token_ids = instance.reset() + msg = json.dumps({'token_ids': token_ids}) + else: + msg = json.dumps({'error': 'Invalid POST endpoint'}) + + print("响应消息:", msg) + self.wfile.write(msg.encode()) + + def get_query_param(self, key): + """ + 辅助函数:从 GET 请求的 URL 中获取查询参数的值 + 例如:/bos_id?uid=xxx + """ + from urllib.parse import urlparse, parse_qs + query = urlparse(self.path).query + params = parse_qs(query) + values = params.get(key) + return values[0] if values else None + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + parser.add_argument('--host', type=str, default='0.0.0.0') + parser.add_argument('--port', type=int, default=12345) + args = parser.parse_args() + + host = (args.host, args.port) + print('Server running at http://%s:%s' % host) + server = HTTPServer(host, Request) + server.serve_forever() \ No newline at end of file diff --git a/main_ax650 b/main_ax650 new file mode 100644 index 0000000000000000000000000000000000000000..e356c8a1ba8d909137ed8fadbb8954d107033154 --- /dev/null +++ b/main_ax650 @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f19ddeb193769b16aa8c5d9bba887558aa0a4ed10eb50a19d9bc117f1ba527e5 +size 985352 diff --git a/main_axcl_aarch64 b/main_axcl_aarch64 index 1790d7620d629cefb0823c254849aacdb51bbad2..50c89e82d5aa8deff6f067f48603f2a301014891 100644 --- a/main_axcl_aarch64 +++ b/main_axcl_aarch64 @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:bb111fc00c54abb6142a8f44df087bf104c8150a1cefa6be55c6b174b932c4ec -size 999008 +oid sha256:1f9f1a1ca329b47f70840e8b6d104ce8248a82326aa2402bccb31144590a8fb2 +size 1725008 diff --git a/main_axcl_x86 b/main_axcl_x86 index 6711952241de8943efec1350e53f93684dcc2186..1bd83db9edd6d195da54a1a37820c20d45a8415c 100644 --- a/main_axcl_x86 +++ b/main_axcl_x86 @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:6cba0be8df523e351789fcfa745772423096f52a3b0a760f8d8c9f5b8bb2ec82 -size 1022384 +oid sha256:928d36be31c15d081a7d346464f41458e9624d8b68d5f7dfb3d3189686ce2754 +size 8421624 diff --git a/run_deepseek-r1_1.5b_ax650.sh b/run_deepseek-r1_1.5b_ax650.sh new file mode 100644 index 0000000000000000000000000000000000000000..1a690a80df828749cb15c5f931e9a481694ab227 --- /dev/null +++ b/run_deepseek-r1_1.5b_ax650.sh @@ -0,0 +1,10 @@ +./main_ax650 \ +--template_filename_axmodel "deepseek-r1-1.5b-ax650/qwen2_p128_l%d_together.axmodel" \ +--axmodel_num 28 \ +--url_tokenizer_model "http://127.0.0.1:12345" \ +--filename_post_axmodel "deepseek-r1-1.5b-ax650/qwen2_post.axmodel" \ +--filename_tokens_embed "deepseek-r1-1.5b-ax650/model.embed_tokens.weight.bfloat16.bin" \ +--tokens_embed_num 151936 \ +--tokens_embed_size 1536 \ +--use_mmap_load_embed 1 \ +--live_print 1 \ No newline at end of file diff --git a/run_deepseek-r1_1.5b_axcl_aarch64.sh b/run_deepseek-r1_1.5b_axcl_aarch64.sh index 3213ad5317ad61df445800503628c3773dd147da..cae336bd736e0fa3c595b5e13796e4bf959f810d 100644 --- a/run_deepseek-r1_1.5b_axcl_aarch64.sh +++ b/run_deepseek-r1_1.5b_axcl_aarch64.sh @@ -1,14 +1,10 @@ ./main_axcl_aarch64 \ --template_filename_axmodel "deepseek-r1-1.5b-ax650/qwen2_p128_l%d_together.axmodel" \ --axmodel_num 28 \ ---tokenizer_type 2 \ ---filename_tokenizer_model "http://127.0.0.1:12345" \ ---bos 0 --eos 0 \ +--url_tokenizer_model "http://127.0.0.1:12345" \ --filename_post_axmodel "deepseek-r1-1.5b-ax650/qwen2_post.axmodel" \ --filename_tokens_embed "deepseek-r1-1.5b-ax650/model.embed_tokens.weight.bfloat16.bin" \ --tokens_embed_num 151936 \ --tokens_embed_size 1536 \ --use_mmap_load_embed 0 \ ---live_print 1 \ ---continue 1 \ ---prompt "$1" +--live_print 1 diff --git a/run_deepseek-r1_1.5b_axcl_x86.sh b/run_deepseek-r1_1.5b_axcl_x86.sh index 380126e0d87b281caa4cebe6a0771cf814cd1ebf..093111ca804521dc5f42facde61bbfc36ed7fbae 100644 --- a/run_deepseek-r1_1.5b_axcl_x86.sh +++ b/run_deepseek-r1_1.5b_axcl_x86.sh @@ -1,14 +1,10 @@ ./main_axcl_x86 \ --template_filename_axmodel "deepseek-r1-1.5b-ax650/qwen2_p128_l%d_together.axmodel" \ --axmodel_num 28 \ ---tokenizer_type 2 \ ---filename_tokenizer_model "http://127.0.0.1:12345" \ ---bos 0 --eos 0 \ +--url_tokenizer_model "http://127.0.0.1:12345" \ --filename_post_axmodel "deepseek-r1-1.5b-ax650/qwen2_post.axmodel" \ --filename_tokens_embed "deepseek-r1-1.5b-ax650/model.embed_tokens.weight.bfloat16.bin" \ --tokens_embed_num 151936 \ --tokens_embed_size 1536 \ --use_mmap_load_embed 0 \ ---live_print 1 \ ---continue 1 \ ---prompt "$1" +--live_print 1 \ No newline at end of file diff --git a/run_deepseek-r1_1.5b_int4_ax650.sh b/run_deepseek-r1_1.5b_int4_ax650.sh new file mode 100644 index 0000000000000000000000000000000000000000..0e6ebd2f69f0013b2af61bb7229f0f8fc4d46a1a --- /dev/null +++ b/run_deepseek-r1_1.5b_int4_ax650.sh @@ -0,0 +1,10 @@ +./main_ax650 \ +--template_filename_axmodel "deepseek-r1-1.5b-int4-ax650/qwen2_p128_l%d_together.axmodel" \ +--axmodel_num 28 \ +--url_tokenizer_model "http://127.0.0.1:12345" \ +--filename_post_axmodel "deepseek-r1-1.5b-int4-ax650/qwen2_post.axmodel" \ +--filename_tokens_embed "deepseek-r1-1.5b-int4-ax650/model.embed_tokens.weight.bfloat16.bin" \ +--tokens_embed_num 151936 \ +--tokens_embed_size 1536 \ +--use_mmap_load_embed 1 \ +--live_print 1 diff --git a/run_deepseek-r1_1.5b_int4_axcl_aarch64.sh b/run_deepseek-r1_1.5b_int4_axcl_aarch64.sh new file mode 100644 index 0000000000000000000000000000000000000000..afc2c46e31878432b5195a65f4a5c667cd38cb98 --- /dev/null +++ b/run_deepseek-r1_1.5b_int4_axcl_aarch64.sh @@ -0,0 +1,10 @@ +./main_axcl_aarch64 \ +--template_filename_axmodel "deepseek-r1-1.5b-int4-ax650/qwen2_p128_l%d_together.axmodel" \ +--axmodel_num 28 \ +--url_tokenizer_model "http://127.0.0.1:12345" \ +--filename_post_axmodel "deepseek-r1-1.5b-int4-ax650/qwen2_post.axmodel" \ +--filename_tokens_embed "deepseek-r1-1.5b-int4-ax650/model.embed_tokens.weight.bfloat16.bin" \ +--tokens_embed_num 151936 \ +--tokens_embed_size 1536 \ +--use_mmap_load_embed 1 \ +--live_print 1 diff --git a/run_deepseek-r1_1.5b_int4_axcl_x86.sh b/run_deepseek-r1_1.5b_int4_axcl_x86.sh new file mode 100644 index 0000000000000000000000000000000000000000..8b835afae59522fd2c2600c14487d2ff9294012b --- /dev/null +++ b/run_deepseek-r1_1.5b_int4_axcl_x86.sh @@ -0,0 +1,10 @@ +./main_axcl_x86 \ +--template_filename_axmodel "deepseek-r1-1.5b-int4-ax650/qwen2_p128_l%d_together.axmodel" \ +--axmodel_num 28 \ +--url_tokenizer_model "http://127.0.0.1:12345" \ +--filename_post_axmodel "deepseek-r1-1.5b-int4-ax650/qwen2_post.axmodel" \ +--filename_tokens_embed "deepseek-r1-1.5b-int4-ax650/model.embed_tokens.weight.bfloat16.bin" \ +--tokens_embed_num 151936 \ +--tokens_embed_size 1536 \ +--use_mmap_load_embed 1 \ +--live_print 1