Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes. Â See raw diff
- .gitattributes +8 -0
- Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/model.embed_tokens.weight.bfloat16.bin +3 -0
- Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l0_together.axmodel +3 -0
- Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l10_together.axmodel +3 -0
- Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l11_together.axmodel +3 -0
- Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l12_together.axmodel +3 -0
- Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l13_together.axmodel +3 -0
- Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l14_together.axmodel +3 -0
- Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l15_together.axmodel +3 -0
- Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l16_together.axmodel +3 -0
- Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l17_together.axmodel +3 -0
- Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l18_together.axmodel +3 -0
- Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l19_together.axmodel +3 -0
- Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l1_together.axmodel +3 -0
- Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l20_together.axmodel +3 -0
- Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l21_together.axmodel +3 -0
- Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l22_together.axmodel +3 -0
- Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l23_together.axmodel +3 -0
- Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l24_together.axmodel +3 -0
- Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l25_together.axmodel +3 -0
- Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l26_together.axmodel +3 -0
- Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l27_together.axmodel +3 -0
- Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l28_together.axmodel +3 -0
- Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l29_together.axmodel +3 -0
- Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l2_together.axmodel +3 -0
- Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l30_together.axmodel +3 -0
- Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l31_together.axmodel +3 -0
- Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l32_together.axmodel +3 -0
- Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l33_together.axmodel +3 -0
- Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l34_together.axmodel +3 -0
- Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l35_together.axmodel +3 -0
- Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l3_together.axmodel +3 -0
- Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l4_together.axmodel +3 -0
- Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l5_together.axmodel +3 -0
- Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l6_together.axmodel +3 -0
- Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l7_together.axmodel +3 -0
- Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l8_together.axmodel +3 -0
- Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l9_together.axmodel +3 -0
- Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_post.axmodel +3 -0
- README.md +245 -3
- config.json +0 -0
- main_api_ax650 +3 -0
- main_api_axcl_aarch64 +3 -0
- main_api_axcl_x86 +3 -0
- main_ax650 +3 -0
- main_axcl_aarch64 +3 -0
- main_axcl_x86 +3 -0
- post_config.json +14 -0
- qwen2.5_tokenizer/merges.txt +0 -0
- qwen2.5_tokenizer/tokenizer.json +0 -0
.gitattributes
CHANGED
|
@@ -32,4 +32,12 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 32 |
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
|
|
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 32 |
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
*.axmodel filter=lfs diff=lfs merge=lfs -text
|
| 36 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
main_ax650 filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
main_axcl_aarch64 filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
main_axcl_x86 filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
qwen3_tokenizer/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
main_api_ax650 filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
main_api_axcl_aarch64 filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
main_api_axcl_x86 filter=lfs diff=lfs merge=lfs -text
|
Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/model.embed_tokens.weight.bfloat16.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:eabe5625fc0575bf517c424041e9701c0fd521889e0f547c8522d2aa20e8c0f8
|
| 3 |
+
size 777912320
|
Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l0_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b6706bd04faa7987ac84fcedd44487c5dd0fd3e01b3c354a72d2158412cbe572
|
| 3 |
+
size 130950985
|
Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l10_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8bc94bbed29a443bd92b07e6da311758e25fcd5024aa72d148a0535c6b8b78c8
|
| 3 |
+
size 130950985
|
Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l11_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f823ae3bcbd505ee94403814877d46b1c9cd1cc20e4d9971d718c1f12f6436de
|
| 3 |
+
size 130950985
|
Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l12_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:aede41d1d2855c5f5e48a1f1ac73948a0e0611c6a791321e51258625e0e298d4
|
| 3 |
+
size 130950985
|
Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l13_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:474f529221fc4d145fa5430ce075dd79bebe38d909a0a9ea5d012eb141aac967
|
| 3 |
+
size 130950985
|
Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l14_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e1c57875f9474d918c04fdb0dab4ab532a511494b0f4d3ed6c104a0ad07acef4
|
| 3 |
+
size 130950985
|
Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l15_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d3bdf22e8f1c85695ffb8d62f9c365357136f1e3f03050d14a17d947774f735a
|
| 3 |
+
size 130950985
|
Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l16_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5890dae7fc86f8ceed0649616acab5b5d407a0b71939d4c4da1c9f9c5149948f
|
| 3 |
+
size 130950985
|
Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l17_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3cc5a36b246584335aa3bef4bf72bd0cf480c26710c5879a7b6dd0542be0ee8d
|
| 3 |
+
size 130950985
|
Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l18_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:31b56b453671afdd2ea10444371ac5e8d992dbcae38077e3653b96fdbb143e29
|
| 3 |
+
size 130950985
|
Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l19_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0dd27c2bccbecd4198e812062b9a323df28d8d362be9e5d28cc4398f3cad08d5
|
| 3 |
+
size 130950985
|
Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l1_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:851f6535c5a09b1e40ae95c8f39ff654d5af0a5dab72286164e80a192e377d75
|
| 3 |
+
size 130950985
|
Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l20_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ccafe960507254b793eafed1e5253bc924bd91b1ac7ce7b1c4ee22583729b86a
|
| 3 |
+
size 130950985
|
Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l21_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:750e3a04abb85e7a59c4ac2736f154083e92a92b6d0a2e0ac115e3847364d722
|
| 3 |
+
size 130950985
|
Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l22_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cab38992c778edc588994cf9398c6ade0e15460f3dc44b591b7f602af0df45fe
|
| 3 |
+
size 130950985
|
Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l23_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f82d042f14e24f73837d252c1f2eb68666cc63e41c552b31b9651e503a1bd9a1
|
| 3 |
+
size 130950985
|
Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l24_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:43dc45f7579e0cf2272a283ae1370a8a2442a91b13f85e94ffe1a3789e653f0f
|
| 3 |
+
size 130950985
|
Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l25_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c663ff33b1d455f73f3a4a1ca0b2278f7fd731a980e1072f9f4e5b7be002a5e9
|
| 3 |
+
size 130950985
|
Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l26_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c99edb9aabc14cb053e269be031746801011cc7b2c15a1f7385d4229f5b8bbfa
|
| 3 |
+
size 130950985
|
Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l27_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b15ecab8b8471ed7fbe170dd93ddec9584c591ba6bea6e7224ec1e76abbbd106
|
| 3 |
+
size 130950985
|
Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l28_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2495c57afe2dc507a13076947138b2b4f8876082fd9808a99ca00f01ec8b968c
|
| 3 |
+
size 130950985
|
Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l29_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:20480961c16012330bfa504c1a4e55553cffd15607516816b0e4c2103ec1ebc5
|
| 3 |
+
size 130950985
|
Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l2_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e5a89ea1587149fac2bb1b1adf66c82433f068bac661690d0c2633240ab11757
|
| 3 |
+
size 130950985
|
Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l30_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e26950f6520856fa7a7b15179acde9d7b7ac7290c4f95cfd1acf71ee37aaf640
|
| 3 |
+
size 130950985
|
Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l31_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:78acc56d21871cd026b2d584d1e61431d45de4363ae29f5576887719f782069c
|
| 3 |
+
size 130950985
|
Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l32_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:52cedd1013577cab86a53258baaa62eeae99b2552264894f322eab41b66a7133
|
| 3 |
+
size 130950985
|
Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l33_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6689879470e8d25a508f120f77c8c5ee657ca934c05024a8f593e94389c50b1b
|
| 3 |
+
size 130950985
|
Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l34_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:98f7b2ac543f85c9380c97f9805b5d572646d1466b8f40b1e916a5675fe6b7eb
|
| 3 |
+
size 130950985
|
Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l35_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:92da75168b8ea67f92df5d87f9d5e98d7ae33f3ecc27dd1d76053ef7319d368e
|
| 3 |
+
size 130950985
|
Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l3_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a797f620572903f60bc64a6ecac74e795ce51ec9f4091fa80c3250d0c397de84
|
| 3 |
+
size 130950985
|
Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l4_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bb84b2848fe814b7a4e8113f5f47edeb7cdadb3904c9f14deaa08c5a91359372
|
| 3 |
+
size 130950985
|
Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l5_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6a41dd48f4a331e0046f3bc11d94aa35a1ca5ea2bc3e8f04ec6389011b6d4647
|
| 3 |
+
size 130950985
|
Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l6_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3ab2e9c6cc960225aaf8a19d2d16fc920d4c048b379c542ec1a4af1b8ea644dd
|
| 3 |
+
size 130950985
|
Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l7_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:511e855a8de9857fb0962437f9520bbaab42e559d249cd93d22f1ee826f8f5c5
|
| 3 |
+
size 130950985
|
Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l8_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:261747bb3e06f351084c9754f443da736917ab23e22a113c9baeb3312d759136
|
| 3 |
+
size 130950985
|
Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_p256_l9_together.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:357cafbab182da8770c5ba00acae9d76499e2bb114594cf6c116eec8d983b411
|
| 3 |
+
size 130950985
|
Qwen3-4B-Instruct-2507-GPTQ-Int8-context-4k-prefill-3584/qwen3_post.axmodel
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d7508737ba0506611ead05d48adba0aeae10fd81679b62d221362dd1d8ac520f
|
| 3 |
+
size 424152083
|
README.md
CHANGED
|
@@ -1,3 +1,245 @@
|
|
| 1 |
-
---
|
| 2 |
-
license:
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
---
|
| 2 |
+
license: apache-2.0
|
| 3 |
+
language:
|
| 4 |
+
- en
|
| 5 |
+
base_model:
|
| 6 |
+
- Qwen/Qwen3-4B
|
| 7 |
+
pipeline_tag: text-generation
|
| 8 |
+
tags:
|
| 9 |
+
- Qwen
|
| 10 |
+
- Qwen3
|
| 11 |
+
- Int8
|
| 12 |
+
---
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
# Qwen3-4B-Int8
|
| 16 |
+
|
| 17 |
+
This version of Qwen3-4B-Int8 has been converted to run on the Axera NPU using **w8a16** quantization.
|
| 18 |
+
|
| 19 |
+
This model has been optimized with the following LoRA:
|
| 20 |
+
|
| 21 |
+
Compatible with Pulsar2 version: 4.2(Not released yet)
|
| 22 |
+
|
| 23 |
+
## Convert tools links:
|
| 24 |
+
|
| 25 |
+
For those who are interested in model conversion, you can try to export axmodel through the original repo :
|
| 26 |
+
https://huggingface.co/Qwen/Qwen3-4B
|
| 27 |
+
|
| 28 |
+
[Pulsar2 Link, How to Convert LLM from Huggingface to axmodel](https://pulsar2-docs.readthedocs.io/en/latest/appendix/build_llm.html)
|
| 29 |
+
|
| 30 |
+
[AXera NPU LLM Runtime](https://github.com/AXERA-TECH/ax-llm)
|
| 31 |
+
|
| 32 |
+
|
| 33 |
+
## Support Platform
|
| 34 |
+
|
| 35 |
+
- AX650
|
| 36 |
+
- [M4N-Dock(爱芯派Pro)](https://wiki.sipeed.com/hardware/zh/maixIV/m4ndock/m4ndock.html)
|
| 37 |
+
- [M.2 Accelerator card](https://axcl-docs.readthedocs.io/zh-cn/latest/doc_guide_hardware.html)
|
| 38 |
+
|
| 39 |
+
|Chips|w8a16|w4a16|
|
| 40 |
+
|--|--|--|
|
| 41 |
+
|AX650| 4.5 tokens/sec|TBD|
|
| 42 |
+
|
| 43 |
+
## How to use
|
| 44 |
+
|
| 45 |
+
Download all files from this repository to the device
|
| 46 |
+
|
| 47 |
+
```
|
| 48 |
+
root@ax650:/mnt/qtang/llm-test/qwen3-4b# tree -L 1
|
| 49 |
+
.
|
| 50 |
+
|-- config.json
|
| 51 |
+
|-- main_ax650
|
| 52 |
+
|-- main_axcl_aarch64
|
| 53 |
+
|-- main_axcl_x86
|
| 54 |
+
|-- post_config.json
|
| 55 |
+
|-- qwen2.5_tokenizer
|
| 56 |
+
|-- qwen3-4b-ax650
|
| 57 |
+
|-- qwen3_tokenizer
|
| 58 |
+
|-- qwen3_tokenizer_uid.py
|
| 59 |
+
|-- run_qwen3_4b_int8_ctx_ax650.sh
|
| 60 |
+
|-- run_qwen3_4b_int8_ctx_axcl_aarch64.sh
|
| 61 |
+
`-- run_qwen3_4b_int8_ctx_axcl_x86.sh
|
| 62 |
+
|
| 63 |
+
3 directories, 9 files
|
| 64 |
+
root@ax650:/mnt/qtang/llm-test/qwen3-4b#
|
| 65 |
+
|
| 66 |
+
```
|
| 67 |
+
|
| 68 |
+
#### Start the Tokenizer service
|
| 69 |
+
|
| 70 |
+
Install requirement
|
| 71 |
+
|
| 72 |
+
```
|
| 73 |
+
pip install transformers jinja2
|
| 74 |
+
```
|
| 75 |
+
|
| 76 |
+
```
|
| 77 |
+
root@ax650:/mnt/qtang/llm-test/qwen3-4b# python3 qwen3_tokenizer_uid.py
|
| 78 |
+
None of PyTorch, TensorFlow >= 2.0, or Flax have been found. Models won't be available and only tokenizers, configuration and file/data utilities can be used.
|
| 79 |
+
Server running at http://0.0.0.0:12345
|
| 80 |
+
```
|
| 81 |
+
|
| 82 |
+
#### Inference with AX650 Host, such as M4N-Dock(爱芯派Pro) or AX650N DEMO Board
|
| 83 |
+
|
| 84 |
+
Open another terminal and run `run_qwen3_4b_int8_ctx_ax650.sh`
|
| 85 |
+
|
| 86 |
+
```
|
| 87 |
+
root@ax650:/mnt/qtang/llm-test/qwen3-4b# ./run_qwen3_4b_int8_ctx_ax650.sh
|
| 88 |
+
[I][ Init][ 110]: LLM init start
|
| 89 |
+
[I][ Init][ 34]: connect http://127.0.0.1:12345 ok
|
| 90 |
+
[I][ Init][ 57]: uid: 6e90ff82-b9c9-42dc-8f61-081203389166
|
| 91 |
+
bos_id: -1, eos_id: 151645
|
| 92 |
+
2% | â–ˆ | 1 / 39 [3.95s<153.89s, 0.25 count/s] tokenizer init ok
|
| 93 |
+
[I][ Init][ 26]: LLaMaEmbedSelector use mmap
|
| 94 |
+
100% | ████████████████████████████████ | 39 / 39 [48.03s<48.03s, 0.81 count/s] init post axmodel ok,remain_cmm(5621 MB)
|
| 95 |
+
[I][ Init][ 188]: max_token_len : 2559
|
| 96 |
+
[I][ Init][ 193]: kv_cache_size : 1024, kv_cache_num: 2559
|
| 97 |
+
[I][ Init][ 201]: prefill_token_num : 128
|
| 98 |
+
[I][ Init][ 205]: grp: 1, prefill_max_token_num : 1
|
| 99 |
+
[I][ Init][ 205]: grp: 2, prefill_max_token_num : 256
|
| 100 |
+
[I][ Init][ 205]: grp: 3, prefill_max_token_num : 512
|
| 101 |
+
[I][ Init][ 205]: grp: 4, prefill_max_token_num : 1024
|
| 102 |
+
[I][ Init][ 205]: grp: 5, prefill_max_token_num : 1536
|
| 103 |
+
[I][ Init][ 205]: grp: 6, prefill_max_token_num : 2048
|
| 104 |
+
[I][ Init][ 209]: prefill_max_token_num : 2048
|
| 105 |
+
[I][ load_config][ 282]: load config:
|
| 106 |
+
{
|
| 107 |
+
"enable_repetition_penalty": false,
|
| 108 |
+
"enable_temperature": false,
|
| 109 |
+
"enable_top_k_sampling": true,
|
| 110 |
+
"enable_top_p_sampling": false,
|
| 111 |
+
"penalty_window": 20,
|
| 112 |
+
"repetition_penalty": 1.2,
|
| 113 |
+
"temperature": 0.9,
|
| 114 |
+
"top_k": 1,
|
| 115 |
+
"top_p": 0.8
|
| 116 |
+
}
|
| 117 |
+
|
| 118 |
+
[I][ Init][ 218]: LLM init ok
|
| 119 |
+
Type "q" to exit, Ctrl+c to stop current running
|
| 120 |
+
[I][ GenerateKVCachePrefill][ 270]: input token num : 21, prefill_split_num : 1 prefill_grpid : 2
|
| 121 |
+
[I][ GenerateKVCachePrefill][ 307]: input_num_token:21
|
| 122 |
+
[I][ main][ 230]: precompute_len: 21
|
| 123 |
+
[I][ main][ 231]: system_prompt: You are Qwen, created by Alibaba Cloud. You are a helpful assistant.
|
| 124 |
+
prompt >> 1+3=?
|
| 125 |
+
[I][ SetKVCache][ 530]: prefill_grpid:2 kv_cache_num:256 precompute_len:21 input_num_token:16
|
| 126 |
+
[I][ SetKVCache][ 533]: current prefill_max_token_num:1920
|
| 127 |
+
[I][ Run][ 659]: input token num : 16, prefill_split_num : 1
|
| 128 |
+
[I][ Run][ 685]: input_num_token:16
|
| 129 |
+
[I][ Run][ 808]: ttft: 1169.05 ms
|
| 130 |
+
<think>
|
| 131 |
+
|
| 132 |
+
</think>
|
| 133 |
+
|
| 134 |
+
1 + 3 = 4
|
| 135 |
+
|
| 136 |
+
[N][ Run][ 922]: hit eos,avg 4.22 token/s
|
| 137 |
+
|
| 138 |
+
[I][ GetKVCache][ 499]: precompute_len:48, remaining:2000
|
| 139 |
+
prompt >> who are you?
|
| 140 |
+
[I][ SetKVCache][ 530]: prefill_grpid:2 kv_cache_num:256 precompute_len:48 input_num_token:16
|
| 141 |
+
[I][ SetKVCache][ 533]: current prefill_max_token_num:1920
|
| 142 |
+
[I][ Run][ 659]: input token num : 16, prefill_split_num : 1
|
| 143 |
+
[I][ Run][ 685]: input_num_token:16
|
| 144 |
+
[I][ Run][ 808]: ttft: 1168.56 ms
|
| 145 |
+
<think>
|
| 146 |
+
|
| 147 |
+
</think>
|
| 148 |
+
|
| 149 |
+
I am Qwen, a large-scale language model developed by Alibaba Cloud. I can answer questions, create content,
|
| 150 |
+
and help with a variety of tasks. How can I assist you today?
|
| 151 |
+
|
| 152 |
+
[N][ Run][ 922]: hit eos,avg 4.22 token/s
|
| 153 |
+
|
| 154 |
+
[I][ GetKVCache][ 499]: precompute_len:106, remaining:1942
|
| 155 |
+
prompt >> q
|
| 156 |
+
root@ax650:/mnt/qtang/llm-test/qwen3-4b#
|
| 157 |
+
```
|
| 158 |
+
|
| 159 |
+
#### Inference with M.2 Accelerator card
|
| 160 |
+
|
| 161 |
+
[What is M.2 Accelerator card?](https://axcl-docs.readthedocs.io/zh-cn/latest/doc_guide_hardware.html), Show this DEMO based on Raspberry PI 5.
|
| 162 |
+
|
| 163 |
+
```
|
| 164 |
+
(base) axera@raspberrypi:~/samples/qwen3-4b $ ./run_qwen3_4b_int8_ctx_axcl_aarch64.sh
|
| 165 |
+
[I][ Init][ 136]: LLM init start
|
| 166 |
+
[I][ Init][ 34]: connect http://127.0.0.1:12345 ok
|
| 167 |
+
[I][ Init][ 57]: uid: a5b1e427-0cdf-4da6-b3a7-f5e0517da0bb
|
| 168 |
+
bos_id: -1, eos_id: 151645
|
| 169 |
+
2% | â–ˆ | 1 / 39 [0.99s<38.45s, 1.01 count/s] tokenizer init ok
|
| 170 |
+
[I][ Init][ 45]: LLaMaEmbedSelector use mmap
|
| 171 |
+
5% | ██ | 2 / 39 [0.99s<19.23s, 2.03 count/s] embed_selector init ok
|
| 172 |
+
[I][ run][ 30]: AXCLWorker start with devid 0
|
| 173 |
+
100% | ████████████████████████████████ | 39 / 39 [133.16s<133.16s, 0.29 count/s] init post axmodel ok,remain_cmm(691 MB)(1096 MB)000000000
|
| 174 |
+
[I][ Init][ 237]: max_token_len : 2559
|
| 175 |
+
[I][ Init][ 240]: kv_cache_size : 1024, kv_cache_num: 2559
|
| 176 |
+
[I][ Init][ 248]: prefill_token_num : 128
|
| 177 |
+
[I][ Init][ 252]: grp: 1, prefill_max_token_num : 1
|
| 178 |
+
[I][ Init][ 252]: grp: 2, prefill_max_token_num : 256
|
| 179 |
+
[I][ Init][ 252]: grp: 3, prefill_max_token_num : 512
|
| 180 |
+
[I][ Init][ 252]: grp: 4, prefill_max_token_num : 1024
|
| 181 |
+
[I][ Init][ 252]: grp: 5, prefill_max_token_num : 1536
|
| 182 |
+
[I][ Init][ 252]: grp: 6, prefill_max_token_num : 2048
|
| 183 |
+
[I][ Init][ 256]: prefill_max_token_num : 2048
|
| 184 |
+
________________________
|
| 185 |
+
| ID| remain cmm(MB)|
|
| 186 |
+
========================
|
| 187 |
+
| 0| 691|
|
| 188 |
+
¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯¯
|
| 189 |
+
[I][ load_config][ 282]: load config:
|
| 190 |
+
{
|
| 191 |
+
"enable_repetition_penalty": false,
|
| 192 |
+
"enable_temperature": false,
|
| 193 |
+
"enable_top_k_sampling": true,
|
| 194 |
+
"enable_top_p_sampling": false,
|
| 195 |
+
"penalty_window": 20,
|
| 196 |
+
"repetition_penalty": 1.2,
|
| 197 |
+
"temperature": 0.9,
|
| 198 |
+
"top_k": 1,
|
| 199 |
+
"top_p": 0.8
|
| 200 |
+
}
|
| 201 |
+
|
| 202 |
+
[I][ Init][ 279]: LLM init ok
|
| 203 |
+
Type "q" to exit, Ctrl+c to stop current running
|
| 204 |
+
[I][ GenerateKVCachePrefill][ 335]: input token num : 21, prefill_split_num : 1 prefill_grpid : 2
|
| 205 |
+
[I][ GenerateKVCachePrefill][ 372]: input_num_token:21
|
| 206 |
+
[I][ main][ 236]: precompute_len: 21
|
| 207 |
+
[I][ main][ 237]: system_prompt: You are Qwen, created by Alibaba Cloud. You are a helpful assistant.
|
| 208 |
+
prompt >> who are you
|
| 209 |
+
[I][ SetKVCache][ 628]: prefill_grpid:2 kv_cache_num:256 precompute_len:21 input_num_token:27
|
| 210 |
+
[I][ SetKVCache][ 631]: current prefill_max_token_num:1920
|
| 211 |
+
[I][ Run][ 869]: input token num : 27, prefill_split_num : 1
|
| 212 |
+
[I][ Run][ 901]: input_num_token:27
|
| 213 |
+
[I][ Run][1030]: ttft: 1339.01 ms
|
| 214 |
+
<think>
|
| 215 |
+
|
| 216 |
+
</think>
|
| 217 |
+
|
| 218 |
+
I am Qwen, a large-scale language model developed by Alibaba Cloud. I can answer questions,
|
| 219 |
+
create content, and help with a variety of tasks. What can I assist you with?
|
| 220 |
+
|
| 221 |
+
[N][ Run][1182]: hit eos,avg 3.65 token/s
|
| 222 |
+
|
| 223 |
+
[I][ GetKVCache][ 597]: precompute_len:90, remaining:1958
|
| 224 |
+
prompt >> q
|
| 225 |
+
[I][ run][ 80]: AXCLWorker exit with devid 0
|
| 226 |
+
(base) axera@raspberrypi:~/samples/qwen3-4b $
|
| 227 |
+
(base) axera@raspberrypi:~ $ axcl-smi
|
| 228 |
+
+------------------------------------------------------------------------------------------------+
|
| 229 |
+
| AXCL-SMI V3.4.0_20250423020139 Driver V3.4.0_20250423020139 |
|
| 230 |
+
+-----------------------------------------+--------------+---------------------------------------+
|
| 231 |
+
| Card Name Firmware | Bus-Id | Memory-Usage |
|
| 232 |
+
| Fan Temp Pwr:Usage/Cap | CPU NPU | CMM-Usage |
|
| 233 |
+
|=========================================+==============+=======================================|
|
| 234 |
+
| 0 AX650N V3.4.0 | 0000:01:00.0 | 193 MiB / 945 MiB |
|
| 235 |
+
| -- 37C -- / -- | 2% 0% | 6348 MiB / 7040 MiB |
|
| 236 |
+
+-----------------------------------------+--------------+---------------------------------------+
|
| 237 |
+
|
| 238 |
+
+------------------------------------------------------------------------------------------------+
|
| 239 |
+
| Processes: |
|
| 240 |
+
| Card PID Process Name NPU Memory Usage |
|
| 241 |
+
|================================================================================================|
|
| 242 |
+
| 0 84643 /home/axera/samples/qwen3-4b/main_axcl_aarch64 4894032 KiB |
|
| 243 |
+
+------------------------------------------------------------------------------------------------+
|
| 244 |
+
(base) axera@raspberrypi:~ $
|
| 245 |
+
```
|
config.json
ADDED
|
File without changes
|
main_api_ax650
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b917bd4b79e702c0cc9b416cc5c681144563a5e3182e8cf485ae74e20ba79a32
|
| 3 |
+
size 1134952
|
main_api_axcl_aarch64
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3558444d93ce7459db247421128aca6ba3fdbde5932eff6aea66653fa7370cdf
|
| 3 |
+
size 1816560
|
main_api_axcl_x86
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d1628c4b204088badd37a3a29a36a5d981c124bb0f74280bc2553836d64153d9
|
| 3 |
+
size 1913024
|
main_ax650
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ddeb85a2fff13f2e009e1d9ac7ea9bbddbede20eab12e48a399bf6bf3fdaf7af
|
| 3 |
+
size 1023280
|
main_axcl_aarch64
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1f9f1a1ca329b47f70840e8b6d104ce8248a82326aa2402bccb31144590a8fb2
|
| 3 |
+
size 1725008
|
main_axcl_x86
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c8b200b6dac4a7019abb8f13e229cca5096cd1f70a5faf0a554b50b00f0b7e41
|
| 3 |
+
size 1844336
|
post_config.json
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"enable_temperature" : false,
|
| 3 |
+
"temperature" : 0.9,
|
| 4 |
+
|
| 5 |
+
"enable_repetition_penalty" : false,
|
| 6 |
+
"repetition_penalty" : 1.2,
|
| 7 |
+
"penalty_window" : 20,
|
| 8 |
+
|
| 9 |
+
"enable_top_p_sampling" : false,
|
| 10 |
+
"top_p" : 0.8,
|
| 11 |
+
|
| 12 |
+
"enable_top_k_sampling" : true,
|
| 13 |
+
"top_k" : 1
|
| 14 |
+
}
|
qwen2.5_tokenizer/merges.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
qwen2.5_tokenizer/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|