wli1995 commited on
Commit
41ea465
·
verified ·
1 Parent(s): 52491bb

Upload folder using huggingface_hub

Browse files
Files changed (33) hide show
  1. post_config.json +14 -0
  2. qwen2.5_7b_gptq_int4_tp/model.embed_tokens.weight.bfloat16.bin +3 -0
  3. qwen2.5_7b_gptq_int4_tp/qwen2_p128_l0_together.tar +3 -0
  4. qwen2.5_7b_gptq_int4_tp/qwen2_p128_l10_together.tar +3 -0
  5. qwen2.5_7b_gptq_int4_tp/qwen2_p128_l11_together.tar +3 -0
  6. qwen2.5_7b_gptq_int4_tp/qwen2_p128_l12_together.tar +3 -0
  7. qwen2.5_7b_gptq_int4_tp/qwen2_p128_l13_together.tar +3 -0
  8. qwen2.5_7b_gptq_int4_tp/qwen2_p128_l14_together.tar +3 -0
  9. qwen2.5_7b_gptq_int4_tp/qwen2_p128_l15_together.tar +3 -0
  10. qwen2.5_7b_gptq_int4_tp/qwen2_p128_l16_together.tar +3 -0
  11. qwen2.5_7b_gptq_int4_tp/qwen2_p128_l17_together.tar +3 -0
  12. qwen2.5_7b_gptq_int4_tp/qwen2_p128_l18_together.tar +3 -0
  13. qwen2.5_7b_gptq_int4_tp/qwen2_p128_l19_together.tar +3 -0
  14. qwen2.5_7b_gptq_int4_tp/qwen2_p128_l1_together.tar +3 -0
  15. qwen2.5_7b_gptq_int4_tp/qwen2_p128_l20_together.tar +3 -0
  16. qwen2.5_7b_gptq_int4_tp/qwen2_p128_l21_together.tar +3 -0
  17. qwen2.5_7b_gptq_int4_tp/qwen2_p128_l22_together.tar +3 -0
  18. qwen2.5_7b_gptq_int4_tp/qwen2_p128_l23_together.tar +3 -0
  19. qwen2.5_7b_gptq_int4_tp/qwen2_p128_l24_together.tar +3 -0
  20. qwen2.5_7b_gptq_int4_tp/qwen2_p128_l25_together.tar +3 -0
  21. qwen2.5_7b_gptq_int4_tp/qwen2_p128_l26_together.tar +3 -0
  22. qwen2.5_7b_gptq_int4_tp/qwen2_p128_l27_together.tar +3 -0
  23. qwen2.5_7b_gptq_int4_tp/qwen2_p128_l2_together.tar +3 -0
  24. qwen2.5_7b_gptq_int4_tp/qwen2_p128_l3_together.tar +3 -0
  25. qwen2.5_7b_gptq_int4_tp/qwen2_p128_l4_together.tar +3 -0
  26. qwen2.5_7b_gptq_int4_tp/qwen2_p128_l5_together.tar +3 -0
  27. qwen2.5_7b_gptq_int4_tp/qwen2_p128_l6_together.tar +3 -0
  28. qwen2.5_7b_gptq_int4_tp/qwen2_p128_l7_together.tar +3 -0
  29. qwen2.5_7b_gptq_int4_tp/qwen2_p128_l8_together.tar +3 -0
  30. qwen2.5_7b_gptq_int4_tp/qwen2_p128_l9_together.tar +3 -0
  31. qwen2.5_7b_gptq_int4_tp/qwen2_post.tar +3 -0
  32. run_qwen2.5_7B_axcl_context_tp.sh +1 -1
  33. run_qwen2.5_7B_int4_axcl_context_tp.sh +12 -0
post_config.json ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "enable_temperature" : true,
3
+ "temperature" : 0.9,
4
+
5
+ "enable_repetition_penalty" : false,
6
+ "repetition_penalty" : 1.2,
7
+ "penalty_window" : 20,
8
+
9
+ "enable_top_p_sampling" : false,
10
+ "top_p" : 0.8,
11
+
12
+ "enable_top_k_sampling" : true,
13
+ "top_k" : 10
14
+ }
qwen2.5_7b_gptq_int4_tp/model.embed_tokens.weight.bfloat16.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78ae605b310cf526d54ba22b27796f44010fe28198cb6e3388efb62e09cc2ac4
3
+ size 1089994752
qwen2.5_7b_gptq_int4_tp/qwen2_p128_l0_together.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bec13da879ecc0245bd3e78bd70f4bd0b1b8e950ec21647a255ed37284fe01db
3
+ size 178534400
qwen2.5_7b_gptq_int4_tp/qwen2_p128_l10_together.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08e8d413098d550eb82c4fd582cee765790c05aae7ba9ff993a773e5802d3aa4
3
+ size 178534400
qwen2.5_7b_gptq_int4_tp/qwen2_p128_l11_together.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f592a3b75220a90faa002b7dacd6c538841526198c23dec2049e7495e721781
3
+ size 178534400
qwen2.5_7b_gptq_int4_tp/qwen2_p128_l12_together.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:746b88af3ffda9d635cf473aad1ceb84f3a3ba2ac56b6dc0bf3eba3194eb13ab
3
+ size 178534400
qwen2.5_7b_gptq_int4_tp/qwen2_p128_l13_together.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e70aa5e52c9e721932959ee4bfc5cb1592f076b857cb763aab2942824d34e086
3
+ size 178534400
qwen2.5_7b_gptq_int4_tp/qwen2_p128_l14_together.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:381a044c020d976eef5b6bd19374c2b0af0f4894ac17c16ffaba6a404cc1e5dc
3
+ size 178534400
qwen2.5_7b_gptq_int4_tp/qwen2_p128_l15_together.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce7e2402ee944aab173387e3b1a01b749166527304e9071f7575c935049b48cd
3
+ size 178534400
qwen2.5_7b_gptq_int4_tp/qwen2_p128_l16_together.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c0f4fb4ba18c0f81de726304cc40d1331c67f25d414bfc670ff810b569e0daa
3
+ size 178534400
qwen2.5_7b_gptq_int4_tp/qwen2_p128_l17_together.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a384ac62198d4d5ab2b4c9ba5c066c90e0bf404cafe2bda761ec777a5d2c620
3
+ size 178534400
qwen2.5_7b_gptq_int4_tp/qwen2_p128_l18_together.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d9a2f6718332df825b98e63d4b3902875f5345de35c859d1f241a3cb774fd34f
3
+ size 178534400
qwen2.5_7b_gptq_int4_tp/qwen2_p128_l19_together.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:085e1a1805fe2fbfd2558fbe196bcaa70f6ef0fcdd580d4167742bc5b610a353
3
+ size 178534400
qwen2.5_7b_gptq_int4_tp/qwen2_p128_l1_together.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94cce39ddb7e1f6debcc964bf656e57f9574697f9f6544d52f163c279efee041
3
+ size 178534400
qwen2.5_7b_gptq_int4_tp/qwen2_p128_l20_together.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49b6381d2eea8c9b08140f3c3f4381d94d7cf25d665fa65749e1e90b47ff2031
3
+ size 178534400
qwen2.5_7b_gptq_int4_tp/qwen2_p128_l21_together.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6949f997ea0166a3fddd2c5e8d41ebdc95a1f4c87098b4bce67a4b41c137d11
3
+ size 178534400
qwen2.5_7b_gptq_int4_tp/qwen2_p128_l22_together.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1d9f39dff0297654946350da14a360f8004cea1a48891aaec43ec3af07a1f0b
3
+ size 178534400
qwen2.5_7b_gptq_int4_tp/qwen2_p128_l23_together.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c35e97a1bac4648a0f3f06fd43821975618f78591fdb9173734b2481e5853f4b
3
+ size 178534400
qwen2.5_7b_gptq_int4_tp/qwen2_p128_l24_together.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:660b302c2293bd24396037222962476350fdbe4d1f34cf012b39c18e41c673a8
3
+ size 178534400
qwen2.5_7b_gptq_int4_tp/qwen2_p128_l25_together.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef62b4cff56884d850e8b4d9213f3b7f06bb246e26037bcc1d81ef93194783ae
3
+ size 178534400
qwen2.5_7b_gptq_int4_tp/qwen2_p128_l26_together.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a136a52e485ea5a954964b3c19ecc67be509c31e6bb24e3ecc72efae6dbc711
3
+ size 178534400
qwen2.5_7b_gptq_int4_tp/qwen2_p128_l27_together.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b0f5cc4e5ee363e2604fad48a118bd186fb4ae89a96163cf4bc05c5bda505ea
3
+ size 178534400
qwen2.5_7b_gptq_int4_tp/qwen2_p128_l2_together.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b1871f7803325ae205cddec2812a32a09a20c026f0e65e8769c55ee5e73e243f
3
+ size 178534400
qwen2.5_7b_gptq_int4_tp/qwen2_p128_l3_together.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7f2a17d5c2d3e7fcac30aca408790fc10eb5177795b43e8882ad5b35ae5a1aa
3
+ size 178534400
qwen2.5_7b_gptq_int4_tp/qwen2_p128_l4_together.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6dae05da5e53319fde901838dfe888c27ca87c73c5707e8ec37e798a88fdf681
3
+ size 178534400
qwen2.5_7b_gptq_int4_tp/qwen2_p128_l5_together.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2fa32b26b08bd98a9d0ed41f542173f2694284016945925fbd0c095ba53f7f0
3
+ size 178534400
qwen2.5_7b_gptq_int4_tp/qwen2_p128_l6_together.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d7f30c98d96b251b505ca4c6cdb997d5aec895b2fea2a31979b85408e3b669c
3
+ size 178534400
qwen2.5_7b_gptq_int4_tp/qwen2_p128_l7_together.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fafc9005d882634d647259e99e0afba1712309bab5c80d8be98fe88fad1d34f
3
+ size 178534400
qwen2.5_7b_gptq_int4_tp/qwen2_p128_l8_together.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fad6b6ab0d12b60b5f611a189f76b3515ee42c1b0a1b4d0089ec27258823629
3
+ size 178534400
qwen2.5_7b_gptq_int4_tp/qwen2_p128_l9_together.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce272e9aa3e595f7745866f64bf3434bb3139807dad0af45b4d5b308fe9d5c6b
3
+ size 178534400
qwen2.5_7b_gptq_int4_tp/qwen2_post.tar ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8fa3f48596030201cc816ad6ac1f8f730a3bba2b1ae4b2f211372e388368c3a0
3
+ size 593203200
run_qwen2.5_7B_axcl_context_tp.sh CHANGED
@@ -9,4 +9,4 @@
9
  --tokens_embed_size 3584 \
10
  --use_mmap_load_embed 1 \
11
  --live_print 1 \
12
- --devices 0,1,2,3
 
9
  --tokens_embed_size 3584 \
10
  --use_mmap_load_embed 1 \
11
  --live_print 1 \
12
+ --devices 4,5,6,7
run_qwen2.5_7B_int4_axcl_context_tp.sh ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ./main_tp_x86 \
2
+ --template_filename_axmodel "qwen2.5_7b_gptq_int4_tp/qwen2_p128_l%d_together.tar" \
3
+ --axmodel_num 28 \
4
+ --tokenizer_type 2 \
5
+ --url_tokenizer_model "http://127.0.0.1:12345" \
6
+ --filename_post_axmodel "qwen2.5_7b_gptq_int4_tp/qwen2_post.tar" \
7
+ --filename_tokens_embed "qwen2.5_7b_gptq_int4_tp/model.embed_tokens.weight.bfloat16.bin" \
8
+ --tokens_embed_num 152064 \
9
+ --tokens_embed_size 3584 \
10
+ --use_mmap_load_embed 1 \
11
+ --live_print 1 \
12
+ --devices 4,5,6,7