Update README.md
Browse files
README.md
CHANGED
|
@@ -84,7 +84,7 @@ This model can be deployed efficiently using the [vLLM](https://docs.vllm.ai/en/
|
|
| 84 |
from vllm import LLM, SamplingParams
|
| 85 |
from transformers import AutoTokenizer
|
| 86 |
|
| 87 |
-
model_id = "
|
| 88 |
number_gpus = 1
|
| 89 |
max_model_len = 8192
|
| 90 |
|
|
@@ -614,7 +614,7 @@ The results were obtained using the following commands:
|
|
| 614 |
```
|
| 615 |
lm_eval \
|
| 616 |
--model vllm \
|
| 617 |
-
--model_args pretrained="
|
| 618 |
--tasks mmlu_llama_3.1_instruct \
|
| 619 |
--fewshot_as_multiturn \
|
| 620 |
--apply_chat_template \
|
|
@@ -626,7 +626,7 @@ lm_eval \
|
|
| 626 |
```
|
| 627 |
lm_eval \
|
| 628 |
--model vllm \
|
| 629 |
-
--model_args pretrained="
|
| 630 |
--tasks mmlu_cot_0shot_llama_3.1_instruct \
|
| 631 |
--apply_chat_template \
|
| 632 |
--num_fewshot 0 \
|
|
@@ -637,7 +637,7 @@ lm_eval \
|
|
| 637 |
```
|
| 638 |
lm_eval \
|
| 639 |
--model vllm \
|
| 640 |
-
--model_args pretrained="
|
| 641 |
--tasks arc_challenge_llama_3.1_instruct \
|
| 642 |
--apply_chat_template \
|
| 643 |
--num_fewshot 0 \
|
|
@@ -648,7 +648,7 @@ lm_eval \
|
|
| 648 |
```
|
| 649 |
lm_eval \
|
| 650 |
--model vllm \
|
| 651 |
-
--model_args pretrained="
|
| 652 |
--tasks gsm8k_cot_llama_3.1_instruct \
|
| 653 |
--fewshot_as_multiturn \
|
| 654 |
--apply_chat_template \
|
|
@@ -660,7 +660,7 @@ lm_eval \
|
|
| 660 |
```
|
| 661 |
lm_eval \
|
| 662 |
--model vllm \
|
| 663 |
-
--model_args pretrained="
|
| 664 |
--tasks hellaswag \
|
| 665 |
--num_fewshot 10 \
|
| 666 |
--batch_size auto
|
|
@@ -670,7 +670,7 @@ lm_eval \
|
|
| 670 |
```
|
| 671 |
lm_eval \
|
| 672 |
--model vllm \
|
| 673 |
-
--model_args pretrained="
|
| 674 |
--tasks winogrande \
|
| 675 |
--num_fewshot 5 \
|
| 676 |
--batch_size auto
|
|
@@ -680,7 +680,7 @@ lm_eval \
|
|
| 680 |
```
|
| 681 |
lm_eval \
|
| 682 |
--model vllm \
|
| 683 |
-
--model_args pretrained="
|
| 684 |
--tasks truthfulqa \
|
| 685 |
--num_fewshot 0 \
|
| 686 |
--batch_size auto
|
|
@@ -690,7 +690,7 @@ lm_eval \
|
|
| 690 |
```
|
| 691 |
lm_eval \
|
| 692 |
--model vllm \
|
| 693 |
-
--model_args pretrained="
|
| 694 |
--apply_chat_template \
|
| 695 |
--fewshot_as_multiturn \
|
| 696 |
--tasks leaderboard \
|
|
@@ -701,7 +701,7 @@ lm_eval \
|
|
| 701 |
```
|
| 702 |
lm_eval \
|
| 703 |
--model vllm \
|
| 704 |
-
--model_args pretrained="
|
| 705 |
--tasks mmlu_pt_llama_3.1_instruct \
|
| 706 |
--fewshot_as_multiturn \
|
| 707 |
--apply_chat_template \
|
|
@@ -713,7 +713,7 @@ lm_eval \
|
|
| 713 |
```
|
| 714 |
lm_eval \
|
| 715 |
--model vllm \
|
| 716 |
-
--model_args pretrained="
|
| 717 |
--tasks mmlu_es_llama_3.1_instruct \
|
| 718 |
--fewshot_as_multiturn \
|
| 719 |
--apply_chat_template \
|
|
@@ -725,7 +725,7 @@ lm_eval \
|
|
| 725 |
```
|
| 726 |
lm_eval \
|
| 727 |
--model vllm \
|
| 728 |
-
--model_args pretrained="
|
| 729 |
--tasks mmlu_it_llama_3.1_instruct \
|
| 730 |
--fewshot_as_multiturn \
|
| 731 |
--apply_chat_template \
|
|
@@ -737,7 +737,7 @@ lm_eval \
|
|
| 737 |
```
|
| 738 |
lm_eval \
|
| 739 |
--model vllm \
|
| 740 |
-
--model_args pretrained="
|
| 741 |
--tasks mmlu_de_llama_3.1_instruct \
|
| 742 |
--fewshot_as_multiturn \
|
| 743 |
--apply_chat_template \
|
|
@@ -749,7 +749,7 @@ lm_eval \
|
|
| 749 |
```
|
| 750 |
lm_eval \
|
| 751 |
--model vllm \
|
| 752 |
-
--model_args pretrained="
|
| 753 |
--tasks mmlu_fr_llama_3.1_instruct \
|
| 754 |
--fewshot_as_multiturn \
|
| 755 |
--apply_chat_template \
|
|
@@ -761,7 +761,7 @@ lm_eval \
|
|
| 761 |
```
|
| 762 |
lm_eval \
|
| 763 |
--model vllm \
|
| 764 |
-
--model_args pretrained="
|
| 765 |
--tasks mmlu_hi_llama_3.1_instruct \
|
| 766 |
--fewshot_as_multiturn \
|
| 767 |
--apply_chat_template \
|
|
@@ -773,7 +773,7 @@ lm_eval \
|
|
| 773 |
```
|
| 774 |
lm_eval \
|
| 775 |
--model vllm \
|
| 776 |
-
--model_args pretrained="
|
| 777 |
--tasks mmlu_th_llama_3.1_instruct \
|
| 778 |
--fewshot_as_multiturn \
|
| 779 |
--apply_chat_template \
|
|
@@ -785,7 +785,7 @@ lm_eval \
|
|
| 785 |
##### Generation
|
| 786 |
```
|
| 787 |
python3 codegen/generate.py \
|
| 788 |
-
--model
|
| 789 |
--bs 16 \
|
| 790 |
--temperature 0.2 \
|
| 791 |
--n_samples 50 \
|
|
|
|
| 84 |
from vllm import LLM, SamplingParams
|
| 85 |
from transformers import AutoTokenizer
|
| 86 |
|
| 87 |
+
model_id = "RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16"
|
| 88 |
number_gpus = 1
|
| 89 |
max_model_len = 8192
|
| 90 |
|
|
|
|
| 614 |
```
|
| 615 |
lm_eval \
|
| 616 |
--model vllm \
|
| 617 |
+
--model_args pretrained="RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
|
| 618 |
--tasks mmlu_llama_3.1_instruct \
|
| 619 |
--fewshot_as_multiturn \
|
| 620 |
--apply_chat_template \
|
|
|
|
| 626 |
```
|
| 627 |
lm_eval \
|
| 628 |
--model vllm \
|
| 629 |
+
--model_args pretrained="RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16",dtype=auto,max_model_len=4064,max_gen_toks=1024,tensor_parallel_size=1 \
|
| 630 |
--tasks mmlu_cot_0shot_llama_3.1_instruct \
|
| 631 |
--apply_chat_template \
|
| 632 |
--num_fewshot 0 \
|
|
|
|
| 637 |
```
|
| 638 |
lm_eval \
|
| 639 |
--model vllm \
|
| 640 |
+
--model_args pretrained="RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16",dtype=auto,max_model_len=3940,max_gen_toks=100,tensor_parallel_size=1 \
|
| 641 |
--tasks arc_challenge_llama_3.1_instruct \
|
| 642 |
--apply_chat_template \
|
| 643 |
--num_fewshot 0 \
|
|
|
|
| 648 |
```
|
| 649 |
lm_eval \
|
| 650 |
--model vllm \
|
| 651 |
+
--model_args pretrained="RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16",dtype=auto,max_model_len=4096,max_gen_toks=1024,tensor_parallel_size=1 \
|
| 652 |
--tasks gsm8k_cot_llama_3.1_instruct \
|
| 653 |
--fewshot_as_multiturn \
|
| 654 |
--apply_chat_template \
|
|
|
|
| 660 |
```
|
| 661 |
lm_eval \
|
| 662 |
--model vllm \
|
| 663 |
+
--model_args pretrained="RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16",dtype=auto,add_bos_token=True,max_model_len=4096,tensor_parallel_size=1 \
|
| 664 |
--tasks hellaswag \
|
| 665 |
--num_fewshot 10 \
|
| 666 |
--batch_size auto
|
|
|
|
| 670 |
```
|
| 671 |
lm_eval \
|
| 672 |
--model vllm \
|
| 673 |
+
--model_args pretrained="RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16",dtype=auto,add_bos_token=True,max_model_len=4096,tensor_parallel_size=1 \
|
| 674 |
--tasks winogrande \
|
| 675 |
--num_fewshot 5 \
|
| 676 |
--batch_size auto
|
|
|
|
| 680 |
```
|
| 681 |
lm_eval \
|
| 682 |
--model vllm \
|
| 683 |
+
--model_args pretrained="RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16",dtype=auto,add_bos_token=True,max_model_len=4096,tensor_parallel_size=1 \
|
| 684 |
--tasks truthfulqa \
|
| 685 |
--num_fewshot 0 \
|
| 686 |
--batch_size auto
|
|
|
|
| 690 |
```
|
| 691 |
lm_eval \
|
| 692 |
--model vllm \
|
| 693 |
+
--model_args pretrained="RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16",dtype=auto,max_model_len=4096,tensor_parallel_size=1,enable_chunked_prefill=True \
|
| 694 |
--apply_chat_template \
|
| 695 |
--fewshot_as_multiturn \
|
| 696 |
--tasks leaderboard \
|
|
|
|
| 701 |
```
|
| 702 |
lm_eval \
|
| 703 |
--model vllm \
|
| 704 |
+
--model_args pretrained="RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
|
| 705 |
--tasks mmlu_pt_llama_3.1_instruct \
|
| 706 |
--fewshot_as_multiturn \
|
| 707 |
--apply_chat_template \
|
|
|
|
| 713 |
```
|
| 714 |
lm_eval \
|
| 715 |
--model vllm \
|
| 716 |
+
--model_args pretrained="RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
|
| 717 |
--tasks mmlu_es_llama_3.1_instruct \
|
| 718 |
--fewshot_as_multiturn \
|
| 719 |
--apply_chat_template \
|
|
|
|
| 725 |
```
|
| 726 |
lm_eval \
|
| 727 |
--model vllm \
|
| 728 |
+
--model_args pretrained="RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
|
| 729 |
--tasks mmlu_it_llama_3.1_instruct \
|
| 730 |
--fewshot_as_multiturn \
|
| 731 |
--apply_chat_template \
|
|
|
|
| 737 |
```
|
| 738 |
lm_eval \
|
| 739 |
--model vllm \
|
| 740 |
+
--model_args pretrained="RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
|
| 741 |
--tasks mmlu_de_llama_3.1_instruct \
|
| 742 |
--fewshot_as_multiturn \
|
| 743 |
--apply_chat_template \
|
|
|
|
| 749 |
```
|
| 750 |
lm_eval \
|
| 751 |
--model vllm \
|
| 752 |
+
--model_args pretrained="RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
|
| 753 |
--tasks mmlu_fr_llama_3.1_instruct \
|
| 754 |
--fewshot_as_multiturn \
|
| 755 |
--apply_chat_template \
|
|
|
|
| 761 |
```
|
| 762 |
lm_eval \
|
| 763 |
--model vllm \
|
| 764 |
+
--model_args pretrained="RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
|
| 765 |
--tasks mmlu_hi_llama_3.1_instruct \
|
| 766 |
--fewshot_as_multiturn \
|
| 767 |
--apply_chat_template \
|
|
|
|
| 773 |
```
|
| 774 |
lm_eval \
|
| 775 |
--model vllm \
|
| 776 |
+
--model_args pretrained="RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16",dtype=auto,max_model_len=3850,max_gen_toks=10,tensor_parallel_size=1 \
|
| 777 |
--tasks mmlu_th_llama_3.1_instruct \
|
| 778 |
--fewshot_as_multiturn \
|
| 779 |
--apply_chat_template \
|
|
|
|
| 785 |
##### Generation
|
| 786 |
```
|
| 787 |
python3 codegen/generate.py \
|
| 788 |
+
--model RedHatAI/Meta-Llama-3.1-8B-Instruct-quantized.w4a16 \
|
| 789 |
--bs 16 \
|
| 790 |
--temperature 0.2 \
|
| 791 |
--n_samples 50 \
|