msj19 commited on
Commit
4883124
·
verified ·
1 Parent(s): ea68eab

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. opencompass/configs/models/deepseek/lmdeploy_deepseek_7b_chat.py +15 -0
  2. opencompass/configs/models/deepseek/lmdeploy_deepseek_r1_distill_llama_70b.py +20 -0
  3. opencompass/configs/models/deepseek/lmdeploy_deepseek_r1_distill_llama_8b.py +20 -0
  4. opencompass/configs/models/deepseek/lmdeploy_deepseek_r1_distill_qwen_14b.py +20 -0
  5. opencompass/configs/models/deepseek/lmdeploy_deepseek_r1_distill_qwen_1_5b.py +20 -0
  6. opencompass/configs/models/deepseek/lmdeploy_deepseek_r1_distill_qwen_32b.py +20 -0
  7. opencompass/configs/models/deepseek/lmdeploy_deepseek_r1_distill_qwen_7b.py +20 -0
  8. opencompass/configs/models/deepseek/lmdeploy_deepseek_series.py +23 -0
  9. opencompass/configs/models/deepseek/lmdeploy_deepseek_v2.py +18 -0
  10. opencompass/configs/models/deepseek/lmdeploy_deepseek_v2_5.py +21 -0
  11. opencompass/configs/models/deepseek/lmdeploy_deepseek_v2_5_1210.py +21 -0
  12. opencompass/configs/models/deepseek/lmdeploy_deepseek_v2_lite.py +20 -0
  13. opencompass/configs/models/deepseek/vllm_deepseek_67b_chat.py +13 -0
  14. opencompass/configs/models/deepseek/vllm_deepseek_7b_chat.py +13 -0
  15. opencompass/configs/models/deepseek/vllm_deepseek_moe_16b_base.py +15 -0
  16. opencompass/configs/models/deepseek/vllm_deepseek_moe_16b_chat.py +13 -0
  17. opencompass/configs/models/deltanet/delta_net-1.3B.py +27 -0
  18. opencompass/configs/models/deltanet/gdn-1.3B.py +24 -0
  19. opencompass/configs/models/deltanet/gla-1.3B.py +28 -0
  20. opencompass/configs/models/deltanet/hgrn2-1.3B.py +24 -0
  21. opencompass/configs/models/deltanet/mask_deltanet-1.3B.py +24 -0
  22. opencompass/configs/models/deltanet/mask_gdn-1.3B.py +24 -0
  23. opencompass/configs/models/deltanet/mask_gdn_t-1.3B.py +24 -0
  24. opencompass/configs/models/deltanet/retnet-1.3B.py +24 -0
  25. opencompass/configs/models/deltanet/transformer++-1.3B.py +28 -0
  26. opencompass/configs/models/falcon/hf_falcon_40b.py +12 -0
  27. opencompass/configs/models/falcon/hf_falcon_7b.py +12 -0
  28. opencompass/configs/models/gemini/gemini_1_5_flash.py +22 -0
  29. opencompass/configs/models/gemini/gemini_1_5_pro.py +22 -0
  30. opencompass/configs/models/gemini/gemini_pro.py +22 -0
  31. opencompass/configs/models/gemma/hf_gemma2_27b.py +15 -0
  32. opencompass/configs/models/gemma/hf_gemma2_27b_it.py +16 -0
  33. opencompass/configs/models/gemma/hf_gemma2_2b.py +15 -0
  34. opencompass/configs/models/gemma/hf_gemma2_2b_it.py +16 -0
  35. opencompass/configs/models/gemma/hf_gemma2_9b.py +15 -0
  36. opencompass/configs/models/gemma/hf_gemma2_9b_it.py +16 -0
  37. opencompass/configs/models/gemma/hf_gemma_2b.py +12 -0
  38. opencompass/configs/models/gemma/hf_gemma_2b_it.py +12 -0
  39. opencompass/configs/models/gemma/hf_gemma_7b.py +12 -0
  40. opencompass/configs/models/gemma/hf_gemma_7b_it.py +12 -0
  41. opencompass/configs/models/gemma/lmdeploy_gemma_27b.py +17 -0
  42. opencompass/configs/models/gemma/lmdeploy_gemma_27b_it.py +17 -0
  43. opencompass/configs/models/gemma/lmdeploy_gemma_9b.py +17 -0
  44. opencompass/configs/models/gemma/lmdeploy_gemma_9b_it.py +17 -0
  45. opencompass/configs/models/gemma/vllm_gemma_2b.py +15 -0
  46. opencompass/configs/models/gemma/vllm_gemma_2b_it.py +14 -0
  47. opencompass/configs/models/gemma/vllm_gemma_3_12b_it.py +16 -0
  48. opencompass/configs/models/gemma/vllm_gemma_3_27b_it.py +16 -0
  49. opencompass/configs/models/gemma/vllm_gemma_3_4b_it.py +17 -0
  50. opencompass/configs/models/gemma/vllm_gemma_7b.py +15 -0
opencompass/configs/models/deepseek/lmdeploy_deepseek_7b_chat.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.models import TurboMindModelwithChatTemplate
2
+
3
+ models = [
4
+ dict(
5
+ type=TurboMindModelwithChatTemplate,
6
+ abbr='deepseek-7b-chat-turbomind',
7
+ path='deepseek-ai/deepseek-llm-7b-chat',
8
+ engine_config=dict(max_batch_size=16, tp=1),
9
+ gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9),
10
+ max_seq_len=8192,
11
+ max_out_len=2048,
12
+ batch_size=16,
13
+ run_cfg=dict(num_gpus=1),
14
+ )
15
+ ]
opencompass/configs/models/deepseek/lmdeploy_deepseek_r1_distill_llama_70b.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.models import TurboMindModelwithChatTemplate
2
+ from opencompass.utils.text_postprocessors import extract_non_reasoning_content
3
+
4
+ models = [
5
+ dict(
6
+ type=TurboMindModelwithChatTemplate,
7
+ abbr='deepseek-r1-distill-llama-70b-turbomind',
8
+ path='deepseek-ai/DeepSeek-R1-Distill-Llama-70B',
9
+ engine_config=dict(session_len=32768, max_batch_size=8, tp=8),
10
+ gen_config=dict(top_k=1,
11
+ temperature=1e-6,
12
+ top_p=0.9,
13
+ max_new_tokens=32768),
14
+ max_seq_len=32768,
15
+ max_out_len=32768,
16
+ batch_size=8,
17
+ run_cfg=dict(num_gpus=8),
18
+ pred_postprocessor=dict(type=extract_non_reasoning_content)
19
+ )
20
+ ]
opencompass/configs/models/deepseek/lmdeploy_deepseek_r1_distill_llama_8b.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.models import TurboMindModelwithChatTemplate
2
+ from opencompass.utils.text_postprocessors import extract_non_reasoning_content
3
+
4
+ models = [
5
+ dict(
6
+ type=TurboMindModelwithChatTemplate,
7
+ abbr='deepseek-r1-distill-llama-8b-turbomind',
8
+ path='deepseek-ai/DeepSeek-R1-Distill-Llama-8B',
9
+ engine_config=dict(session_len=32768, max_batch_size=8, tp=1),
10
+ gen_config=dict(top_k=1,
11
+ temperature=1e-6,
12
+ top_p=0.9,
13
+ max_new_tokens=32768),
14
+ max_seq_len=32768,
15
+ max_out_len=32768,
16
+ batch_size=8,
17
+ run_cfg=dict(num_gpus=1),
18
+ pred_postprocessor=dict(type=extract_non_reasoning_content)
19
+ )
20
+ ]
opencompass/configs/models/deepseek/lmdeploy_deepseek_r1_distill_qwen_14b.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.models import TurboMindModelwithChatTemplate
2
+ from opencompass.utils.text_postprocessors import extract_non_reasoning_content
3
+
4
+ models = [
5
+ dict(
6
+ type=TurboMindModelwithChatTemplate,
7
+ abbr='deepseek-r1-distill-qwen-14b-turbomind',
8
+ path='deepseek-ai/DeepSeek-R1-Distill-Qwen-14B',
9
+ engine_config=dict(session_len=32768, max_batch_size=16, tp=2),
10
+ gen_config=dict(top_k=1,
11
+ temperature=1e-6,
12
+ top_p=0.9,
13
+ max_new_tokens=32768),
14
+ max_seq_len=32768,
15
+ max_out_len=32768,
16
+ batch_size=16,
17
+ run_cfg=dict(num_gpus=2),
18
+ pred_postprocessor=dict(type=extract_non_reasoning_content)
19
+ )
20
+ ]
opencompass/configs/models/deepseek/lmdeploy_deepseek_r1_distill_qwen_1_5b.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.models import TurboMindModelwithChatTemplate
2
+ from opencompass.utils.text_postprocessors import extract_non_reasoning_content
3
+
4
+ models = [
5
+ dict(
6
+ type=TurboMindModelwithChatTemplate,
7
+ abbr='deepseek-r1-distill-qwen-1_5b-turbomind',
8
+ path='deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B',
9
+ engine_config=dict(session_len=32768, max_batch_size=16, tp=1),
10
+ gen_config=dict(top_k=1,
11
+ temperature=1e-6,
12
+ top_p=0.9,
13
+ max_new_tokens=32768),
14
+ max_seq_len=32768,
15
+ max_out_len=32768,
16
+ batch_size=16,
17
+ run_cfg=dict(num_gpus=1),
18
+ pred_postprocessor=dict(type=extract_non_reasoning_content)
19
+ )
20
+ ]
opencompass/configs/models/deepseek/lmdeploy_deepseek_r1_distill_qwen_32b.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.models import TurboMindModelwithChatTemplate
2
+ from opencompass.utils.text_postprocessors import extract_non_reasoning_content
3
+
4
+ models = [
5
+ dict(
6
+ type=TurboMindModelwithChatTemplate,
7
+ abbr='deepseek-r1-distill-qwen-32b-turbomind',
8
+ path='deepseek-ai/DeepSeek-R1-Distill-Qwen-32B',
9
+ engine_config=dict(session_len=32768, max_batch_size=16, tp=4),
10
+ gen_config=dict(top_k=1,
11
+ temperature=1e-6,
12
+ top_p=0.9,
13
+ max_new_tokens=32768),
14
+ max_seq_len=32768,
15
+ max_out_len=32768,
16
+ batch_size=16,
17
+ run_cfg=dict(num_gpus=4),
18
+ pred_postprocessor=dict(type=extract_non_reasoning_content)
19
+ )
20
+ ]
opencompass/configs/models/deepseek/lmdeploy_deepseek_r1_distill_qwen_7b.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.models import TurboMindModelwithChatTemplate
2
+ from opencompass.utils.text_postprocessors import extract_non_reasoning_content
3
+
4
+ models = [
5
+ dict(
6
+ type=TurboMindModelwithChatTemplate,
7
+ abbr='deepseek-r1-distill-qwen-7b-turbomind',
8
+ path='deepseek-ai/DeepSeek-R1-Distill-Qwen-7B',
9
+ engine_config=dict(session_len=32768, max_batch_size=8, tp=1),
10
+ gen_config=dict(top_k=1,
11
+ temperature=1e-6,
12
+ top_p=0.9,
13
+ max_new_tokens=32768),
14
+ max_seq_len=32768,
15
+ max_out_len=32768,
16
+ batch_size=8,
17
+ run_cfg=dict(num_gpus=1),
18
+ pred_postprocessor=dict(type=extract_non_reasoning_content)
19
+ )
20
+ ]
opencompass/configs/models/deepseek/lmdeploy_deepseek_series.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.models import LmdeployPytorchModel
2
+
3
+ settings = [
4
+ ('deepseek-7b-base-hf', 'deepseek-ai/deepseek-llm-7b-base', 1),
5
+ ('deepseek-67b-base-hf', 'deepseek-ai/deepseek-llm-67b-base', 4),
6
+ ]
7
+
8
+ models = []
9
+ for abbr, path, num_gpus in settings:
10
+ models.append(
11
+ dict(
12
+ type=LmdeployPytorchModel,
13
+ abbr=abbr,
14
+ path=path,
15
+ engine_config=dict(session_len=2048, max_batch_size=16, tp=num_gpus),
16
+ gen_config=dict(top_k=1, temperature=1, top_p=0.9, max_new_tokens=1024),
17
+ max_out_len=1024,
18
+ max_seq_len=2048,
19
+ batch_size=16,
20
+ concurrency=16,
21
+ run_cfg=dict(num_gpus=num_gpus),
22
+ )
23
+ )
opencompass/configs/models/deepseek/lmdeploy_deepseek_v2.py ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # flake8: noqa
2
+ from mmengine.config import read_base
3
+ from opencompass.models import (
4
+ TurboMindModel,
5
+ )
6
+ lmdeploy_deepseek_v2_model = [
7
+ dict(
8
+ type=TurboMindModel,
9
+ abbr='deepseek-v2-turbomind',
10
+ path='deepseek-ai/DeepSeek-V2',
11
+ engine_config=dict(session_len=7168, max_batch_size=4, tp=8, cache_max_entry_count=0.7),
12
+ gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9),
13
+ max_seq_len=7168,
14
+ max_out_len=2048,
15
+ batch_size=4,
16
+ run_cfg=dict(num_gpus=8),
17
+ )
18
+ ]
opencompass/configs/models/deepseek/lmdeploy_deepseek_v2_5.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.models import TurboMindModelwithChatTemplate
2
+
3
+ models = [
4
+ dict(
5
+ type=TurboMindModelwithChatTemplate,
6
+ abbr='deepseek-v2_5-turbomind',
7
+ path='deepseek-ai/DeepSeek-V2.5',
8
+ backend='pytorch',
9
+ engine_config=dict(
10
+ session_len=7168,
11
+ max_batch_size=4,
12
+ tp=8,
13
+ cache_max_entry_count=0.7,
14
+ ),
15
+ gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9),
16
+ max_seq_len=7168,
17
+ max_out_len=2048,
18
+ batch_size=4,
19
+ run_cfg=dict(num_gpus=8),
20
+ )
21
+ ]
opencompass/configs/models/deepseek/lmdeploy_deepseek_v2_5_1210.py ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.models import TurboMindModelwithChatTemplate
2
+
3
+ models = [
4
+ dict(
5
+ type=TurboMindModelwithChatTemplate,
6
+ abbr='deepseek-v2_5-1210-turbomind',
7
+ path='deepseek-ai/DeepSeek-V2.5-1210',
8
+ backend='pytorch',
9
+ engine_config=dict(
10
+ session_len=7168,
11
+ max_batch_size=4,
12
+ tp=8,
13
+ cache_max_entry_count=0.7,
14
+ ),
15
+ gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9),
16
+ max_seq_len=7168,
17
+ max_out_len=2048,
18
+ batch_size=4,
19
+ run_cfg=dict(num_gpus=8),
20
+ )
21
+ ]
opencompass/configs/models/deepseek/lmdeploy_deepseek_v2_lite.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.models import TurboMindModelwithChatTemplate
2
+
3
+ models = [
4
+ dict(
5
+ type=TurboMindModelwithChatTemplate,
6
+ abbr='deepseek-v2_lite-chat-turbomind',
7
+ path='deepseek-ai/DeepSeek-V2-Lite-Chat',
8
+ engine_config=dict(
9
+ session_len=7168,
10
+ max_batch_size=4,
11
+ tp=2,
12
+ cache_max_entry_count=0.7,
13
+ ),
14
+ gen_config=dict(top_k=1, temperature=1e-6, top_p=0.9),
15
+ max_seq_len=7168,
16
+ max_out_len=2048,
17
+ batch_size=4,
18
+ run_cfg=dict(num_gpus=2),
19
+ )
20
+ ]
opencompass/configs/models/deepseek/vllm_deepseek_67b_chat.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.models import VLLMwithChatTemplate
2
+
3
+ models = [
4
+ dict(
5
+ type=VLLMwithChatTemplate,
6
+ abbr='deepseek-67b-chat-vllm',
7
+ path='deepseek-ai/deepseek-llm-67b-chat',
8
+ max_out_len=1024,
9
+ batch_size=16,
10
+ model_kwargs=dict(tensor_parallel_size=4),
11
+ run_cfg=dict(num_gpus=4),
12
+ )
13
+ ]
opencompass/configs/models/deepseek/vllm_deepseek_7b_chat.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.models import VLLMwithChatTemplate
2
+
3
+ models = [
4
+ dict(
5
+ type=VLLMwithChatTemplate,
6
+ abbr='deepseek-7b-chat-vllm',
7
+ path='deepseek-ai/deepseek-llm-7b-chat',
8
+ max_out_len=1024,
9
+ batch_size=16,
10
+ model_kwargs=dict(tensor_parallel_size=1),
11
+ run_cfg=dict(num_gpus=1),
12
+ )
13
+ ]
opencompass/configs/models/deepseek/vllm_deepseek_moe_16b_base.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.models import VLLM
2
+
3
+ models = [
4
+ dict(
5
+ type=VLLM,
6
+ abbr='deepseek-moe-16b-base-vllm',
7
+ path='deepseek-ai/deepseek-moe-16b-base',
8
+ model_kwargs=dict(tensor_parallel_size=1, gpu_memory_utilization=0.6),
9
+ max_out_len=1024,
10
+ max_seq_len=8192,
11
+ batch_size=16,
12
+ generation_kwargs=dict(temperature=0),
13
+ run_cfg=dict(num_gpus=1),
14
+ )
15
+ ]
opencompass/configs/models/deepseek/vllm_deepseek_moe_16b_chat.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.models import VLLMwithChatTemplate
2
+
3
+ models = [
4
+ dict(
5
+ type=VLLMwithChatTemplate,
6
+ abbr='deepseek-moe-16b-chat-vllm',
7
+ path='deepseek-ai/deepseek-moe-16b-chat',
8
+ model_kwargs=dict(tensor_parallel_size=1, gpu_memory_utilization=0.6),
9
+ max_out_len=1024,
10
+ batch_size=16,
11
+ run_cfg=dict(num_gpus=1),
12
+ )
13
+ ]
opencompass/configs/models/deltanet/delta_net-1.3B.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.models import HuggingFaceBaseModel
2
+
3
+ models = [
4
+ dict(
5
+ type=HuggingFaceBaseModel,
6
+ # HuggingFaceCasualLM
7
+ path='/mnt/jfzn/msj/delta_net-1.3B-100B',
8
+ tokenizer_path='/mnt/jfzn/msj/delta_net-1.3B-100B',
9
+ tokenizer_kwargs=dict(
10
+ padding_side='left', truncation_side='left',
11
+ ),
12
+ model_kwargs=dict(
13
+ torch_dtype='torch.bfloat16',
14
+ device_map='auto',
15
+ trust_remote_code=True,
16
+ ),
17
+ max_seq_len=2048,
18
+ abbr='delta_net',
19
+ batch_size=128,
20
+ # max_out_len=100,
21
+ run_cfg=dict(num_gpus=1), # 模型部署在几个GPU上
22
+ )
23
+ ]
24
+
25
+
26
+
27
+
opencompass/configs/models/deltanet/gdn-1.3B.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.models import HuggingFaceBaseModel
2
+
3
+ models = [
4
+ dict(
5
+ type=HuggingFaceBaseModel,
6
+ # HuggingFaceCasualLM
7
+ path='/mnt/jfzn/msj/train_exp/gdn_1B_a800',
8
+ tokenizer_path='/mnt/jfzn/msj/train_exp/gdn_1B_a800',
9
+ tokenizer_kwargs=dict(
10
+ padding_side='left', truncation_side='left',
11
+ ),
12
+ model_kwargs=dict(
13
+ torch_dtype='torch.bfloat16',
14
+ device_map='auto',
15
+ trust_remote_code=True,
16
+ ),
17
+ max_seq_len=2048,
18
+ # opencompass
19
+ abbr='gated_deltanet',
20
+ batch_size=128,
21
+ # max_out_len=100,
22
+ run_cfg=dict(num_gpus=1), # 模型部署在几个GPU上
23
+ )
24
+ ]
opencompass/configs/models/deltanet/gla-1.3B.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.models import HuggingFaceBaseModel
2
+
3
+ models = [
4
+ dict(
5
+ type=HuggingFaceBaseModel,
6
+ # HuggingFaceCasualLM
7
+ path='/mnt/jfzn/msj/gla-1.3B-100B',
8
+ tokenizer_path='/mnt/jfzn/msj/gla-1.3B-100B',
9
+ tokenizer_kwargs=dict(
10
+ padding_side='left', truncation_side='left',
11
+ ),
12
+ model_kwargs=dict(
13
+ torch_dtype='torch.bfloat16',
14
+ device_map='auto',
15
+ trust_remote_code=True,
16
+ ),
17
+ max_seq_len=2048,
18
+ # opencompass
19
+ abbr='gla',
20
+ batch_size=128,
21
+ # max_out_len=100,
22
+ run_cfg=dict(num_gpus=1), # 模型部署在几个GPU上
23
+ )
24
+ ]
25
+
26
+
27
+
28
+
opencompass/configs/models/deltanet/hgrn2-1.3B.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.models import HuggingFaceBaseModel
2
+
3
+ models = [
4
+ dict(
5
+ type=HuggingFaceBaseModel,
6
+ # HuggingFaceCasualLM
7
+ path='/mnt/jfzn/msj/download_model/hgrn2-1.3B-100B',
8
+ tokenizer_path='/mnt/jfzn/msj/download_model/hgrn2-1.3B-100B',
9
+ tokenizer_kwargs=dict(
10
+ padding_side='left', truncation_side='left',
11
+ ),
12
+ model_kwargs=dict(
13
+ torch_dtype='torch.bfloat16',
14
+ device_map='auto',
15
+ trust_remote_code=True,
16
+ ),
17
+ max_seq_len=2048,
18
+ # opencompass
19
+ abbr='hgrn2',
20
+ batch_size=128,
21
+ # max_out_len=100,
22
+ run_cfg=dict(num_gpus=1), # 模型部署在几个GPU上
23
+ )
24
+ ]
opencompass/configs/models/deltanet/mask_deltanet-1.3B.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.models import HuggingFaceBaseModel
2
+
3
+ models = [
4
+ dict(
5
+ type=HuggingFaceBaseModel,
6
+ # HuggingFaceCasualLM
7
+ path='/mnt/jfzn/msj/train_exp/mask_deltanet_1B_rank4',
8
+ tokenizer_path='/mnt/jfzn/msj/train_exp/mask_gdn_1B_hrr-rank4',
9
+ tokenizer_kwargs=dict(
10
+ padding_side='left', truncation_side='left',
11
+ ),
12
+ model_kwargs=dict(
13
+ torch_dtype='torch.bfloat16',
14
+ device_map='auto',
15
+ trust_remote_code=True,
16
+ ),
17
+ max_seq_len=2048,
18
+ # opencompass
19
+ abbr='mask_deltanet',
20
+ batch_size=128,
21
+ # max_out_len=100,
22
+ run_cfg=dict(num_gpus=1), # 模型部署在几个GPU上
23
+ )
24
+ ]
opencompass/configs/models/deltanet/mask_gdn-1.3B.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.models import HuggingFaceBaseModel
2
+
3
+ models = [
4
+ dict(
5
+ type=HuggingFaceBaseModel,
6
+ # HuggingFaceCasualLM
7
+ path='/mnt/jfzn/msj/train_exp/mask_gdn_1B_hrr-rank4',
8
+ tokenizer_path='/mnt/jfzn/msj/train_exp/mask_gdn_1B_hrr-rank4',
9
+ tokenizer_kwargs=dict(
10
+ padding_side='left', truncation_side='left',
11
+ ),
12
+ model_kwargs=dict(
13
+ torch_dtype='torch.bfloat16',
14
+ device_map='auto',
15
+ trust_remote_code=True,
16
+ ),
17
+ max_seq_len=2048,
18
+ # opencompass
19
+ abbr='mask_gdn',
20
+ batch_size=128,
21
+ # max_out_len=100,
22
+ run_cfg=dict(num_gpus=1), # 模型部署在几个GPU上
23
+ )
24
+ ]
opencompass/configs/models/deltanet/mask_gdn_t-1.3B.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.models import HuggingFaceBaseModel
2
+
3
+ models = [
4
+ dict(
5
+ type=HuggingFaceBaseModel,
6
+ # HuggingFaceCasualLM
7
+ path='/mnt/jfzn/msj/train_exp/mask_gdn_1B_hrr4_byt',
8
+ tokenizer_path='/mnt/jfzn/msj/train_exp/mask_gdn_1B_hrr4_byt',
9
+ tokenizer_kwargs=dict(
10
+ padding_side='left', truncation_side='left',
11
+ ),
12
+ model_kwargs=dict(
13
+ torch_dtype='torch.bfloat16',
14
+ device_map='auto',
15
+ trust_remote_code=True,
16
+ ),
17
+ max_seq_len=2048,
18
+ # opencompass
19
+ abbr='mask_gdn',
20
+ batch_size=128,
21
+ # max_out_len=100,
22
+ run_cfg=dict(num_gpus=1), # 模型部署在几个GPU上
23
+ )
24
+ ]
opencompass/configs/models/deltanet/retnet-1.3B.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.models import HuggingFaceBaseModel
2
+
3
+ models = [
4
+ dict(
5
+ type=HuggingFaceBaseModel,
6
+ # HuggingFaceCasualLM
7
+ path='/mnt/jfzn/msj/download_model/retnet-1.3B-100B',
8
+ tokenizer_path='/mnt/jfzn/msj/download_model/retnet-1.3B-100B',
9
+ tokenizer_kwargs=dict(
10
+ padding_side='left', truncation_side='left',
11
+ ),
12
+ model_kwargs=dict(
13
+ torch_dtype='torch.bfloat16',
14
+ device_map='auto',
15
+ trust_remote_code=True,
16
+ ),
17
+ max_seq_len=2048,
18
+ # opencompass
19
+ abbr='retnet',
20
+ batch_size=128,
21
+ # max_out_len=48,
22
+ run_cfg=dict(num_gpus=1), # 模型部署在几个GPU上
23
+ )
24
+ ]
opencompass/configs/models/deltanet/transformer++-1.3B.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.models import HuggingFaceBaseModel
2
+
3
+ models = [
4
+ dict(
5
+ type=HuggingFaceBaseModel,
6
+ # HuggingFaceCasualLM
7
+ path='/mnt/jfzn/msj/transformer-1.3B-100B',
8
+ tokenizer_path='/mnt/jfzn/msj/transformer-1.3B-100B',
9
+ tokenizer_kwargs=dict(
10
+ padding_side='left', truncation_side='left',
11
+ ),
12
+ model_kwargs=dict(
13
+ torch_dtype='torch.bfloat16',
14
+ device_map='auto',
15
+ trust_remote_code=True,
16
+ ),
17
+ max_seq_len=2048,
18
+ # opencompass
19
+ abbr='transformer',
20
+ batch_size=128,
21
+ # max_out_len=100,
22
+ run_cfg=dict(num_gpus=1), # 模型部署在几个GPU上
23
+ )
24
+ ]
25
+
26
+
27
+
28
+
opencompass/configs/models/falcon/hf_falcon_40b.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.models import HuggingFaceBaseModel
2
+
3
+ models = [
4
+ dict(
5
+ type=HuggingFaceBaseModel,
6
+ abbr='falcon-40b-hf',
7
+ path='tiiuae/falcon-40b',
8
+ max_out_len=1024,
9
+ batch_size=8,
10
+ run_cfg=dict(num_gpus=4),
11
+ )
12
+ ]
opencompass/configs/models/falcon/hf_falcon_7b.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.models import HuggingFaceBaseModel
2
+
3
+ models = [
4
+ dict(
5
+ type=HuggingFaceBaseModel,
6
+ abbr='falcon-7b-hf',
7
+ path='tiiuae/falcon-7b',
8
+ max_out_len=1024,
9
+ batch_size=8,
10
+ run_cfg=dict(num_gpus=1),
11
+ )
12
+ ]
opencompass/configs/models/gemini/gemini_1_5_flash.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.models import Gemini
2
+
3
+ api_meta_template = dict(round=[
4
+ dict(role='HUMAN', api_role='HUMAN'),
5
+ dict(role='BOT', api_role='BOT', generate=True),
6
+ ], )
7
+
8
+ models = [
9
+ dict(
10
+ abbr='gemini-1.5-flash',
11
+ type=Gemini,
12
+ path='gemini-1.5-flash',
13
+ key=
14
+ 'ENV', # The key will be obtained from $GEMINI_API_KEY, but you can write down your key here as well
15
+ meta_template=api_meta_template,
16
+ query_per_second=15,
17
+ max_out_len=100,
18
+ max_seq_len=2048,
19
+ batch_size=1,
20
+ temperature=1,
21
+ )
22
+ ]
opencompass/configs/models/gemini/gemini_1_5_pro.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.models import Gemini
2
+
3
+ api_meta_template = dict(round=[
4
+ dict(role='HUMAN', api_role='HUMAN'),
5
+ dict(role='BOT', api_role='BOT', generate=True),
6
+ ], )
7
+
8
+ models = [
9
+ dict(
10
+ abbr='gemini-1.5-pro',
11
+ type=Gemini,
12
+ path='gemini-1.5-pro',
13
+ key=
14
+ 'ENV', # The key will be obtained from $GEMINI_API_KEY, but you can write down your key here as well
15
+ meta_template=api_meta_template,
16
+ query_per_second=2,
17
+ max_out_len=100,
18
+ max_seq_len=2048,
19
+ batch_size=1,
20
+ temperature=1,
21
+ )
22
+ ]
opencompass/configs/models/gemini/gemini_pro.py ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.models import Gemini
2
+
3
+
4
+ api_meta_template = dict(
5
+ round=[
6
+ dict(role='HUMAN', api_role='HUMAN'),
7
+ dict(role='BOT', api_role='BOT', generate=True),
8
+ ],
9
+ )
10
+
11
+ models = [
12
+ dict(abbr='gemini',
13
+ type=Gemini,
14
+ path='gemini-pro',
15
+ key='ENV', # The key will be obtained from $GEMINI_API_KEY, but you can write down your key here as well
16
+ meta_template=api_meta_template,
17
+ query_per_second=16,
18
+ max_out_len=100,
19
+ max_seq_len=2048,
20
+ batch_size=1,
21
+ temperature=1,)
22
+ ]
opencompass/configs/models/gemma/hf_gemma2_27b.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.models import HuggingFaceBaseModel
2
+
3
+ models = [
4
+ dict(
5
+ type=HuggingFaceBaseModel,
6
+ abbr='gemma2-27b-hf',
7
+ path='google/gemma-2-27b',
8
+ max_out_len=1024,
9
+ batch_size=4,
10
+ run_cfg=dict(num_gpus=2),
11
+ model_kwargs=dict(
12
+ torch_dtype='torch.bfloat16',
13
+ ),
14
+ )
15
+ ]
opencompass/configs/models/gemma/hf_gemma2_27b_it.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.models import HuggingFacewithChatTemplate
2
+
3
+ models = [
4
+ dict(
5
+ type=HuggingFacewithChatTemplate,
6
+ abbr='gemma2-27b-it-hf',
7
+ path='google/gemma-2-27b-it',
8
+ max_out_len=2048,
9
+ batch_size=1,
10
+ run_cfg=dict(num_gpus=2),
11
+ stop_words=['<end_of_turn>'],
12
+ model_kwargs=dict(
13
+ torch_dtype='torch.bfloat16',
14
+ )
15
+ )
16
+ ]
opencompass/configs/models/gemma/hf_gemma2_2b.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.models import HuggingFaceBaseModel
2
+
3
+ models = [
4
+ dict(
5
+ type=HuggingFaceBaseModel,
6
+ abbr='gemma2-2b-hf',
7
+ path='google/gemma-2-2b',
8
+ max_out_len=1024,
9
+ batch_size=4,
10
+ run_cfg=dict(num_gpus=1),
11
+ model_kwargs=dict(
12
+ torch_dtype='torch.bfloat16',
13
+ ),
14
+ )
15
+ ]
opencompass/configs/models/gemma/hf_gemma2_2b_it.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.models import HuggingFacewithChatTemplate
2
+
3
+ models = [
4
+ dict(
5
+ type=HuggingFacewithChatTemplate,
6
+ abbr='gemma2-2b-it-hf',
7
+ path='google/gemma-2-2b-it',
8
+ max_out_len=2048,
9
+ batch_size=1,
10
+ run_cfg=dict(num_gpus=1),
11
+ stop_words=['<end_of_turn>'],
12
+ model_kwargs=dict(
13
+ torch_dtype='torch.bfloat16',
14
+ )
15
+ )
16
+ ]
opencompass/configs/models/gemma/hf_gemma2_9b.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.models import HuggingFaceBaseModel
2
+
3
+ models = [
4
+ dict(
5
+ type=HuggingFaceBaseModel,
6
+ abbr='gemma2-9b-hf',
7
+ path='google/gemma-2-9b',
8
+ max_out_len=1024,
9
+ batch_size=4,
10
+ run_cfg=dict(num_gpus=1),
11
+ model_kwargs=dict(
12
+ torch_dtype='torch.bfloat16',
13
+ ),
14
+ )
15
+ ]
opencompass/configs/models/gemma/hf_gemma2_9b_it.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.models import HuggingFacewithChatTemplate
2
+
3
+ models = [
4
+ dict(
5
+ type=HuggingFacewithChatTemplate,
6
+ abbr='gemma2-9b-it-hf',
7
+ path='google/gemma-2-9b-it',
8
+ max_out_len=2048,
9
+ batch_size=1,
10
+ run_cfg=dict(num_gpus=1),
11
+ stop_words=['<end_of_turn>'],
12
+ model_kwargs=dict(
13
+ torch_dtype='torch.bfloat16',
14
+ )
15
+ )
16
+ ]
opencompass/configs/models/gemma/hf_gemma_2b.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.models import HuggingFaceBaseModel
2
+
3
+ models = [
4
+ dict(
5
+ type=HuggingFaceBaseModel,
6
+ abbr='gemma-2b-hf',
7
+ path='google/gemma-2b',
8
+ max_out_len=1024,
9
+ batch_size=8,
10
+ run_cfg=dict(num_gpus=1),
11
+ )
12
+ ]
opencompass/configs/models/gemma/hf_gemma_2b_it.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.models import HuggingFacewithChatTemplate
2
+
3
+ models = [
4
+ dict(
5
+ type=HuggingFacewithChatTemplate,
6
+ abbr='gemma-2b-it-hf',
7
+ path='google/gemma-2b-it',
8
+ max_out_len=1024,
9
+ batch_size=1,
10
+ run_cfg=dict(num_gpus=1),
11
+ )
12
+ ]
opencompass/configs/models/gemma/hf_gemma_7b.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.models import HuggingFaceBaseModel
2
+
3
+ models = [
4
+ dict(
5
+ type=HuggingFaceBaseModel,
6
+ abbr='gemma-7b-hf',
7
+ path='google/gemma-7b',
8
+ max_out_len=1024,
9
+ batch_size=8,
10
+ run_cfg=dict(num_gpus=1),
11
+ )
12
+ ]
opencompass/configs/models/gemma/hf_gemma_7b_it.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.models import HuggingFacewithChatTemplate
2
+
3
+ models = [
4
+ dict(
5
+ type=HuggingFacewithChatTemplate,
6
+ abbr='gemma-7b-it-hf',
7
+ path='google/gemma-7b-it',
8
+ max_out_len=1024,
9
+ batch_size=1,
10
+ run_cfg=dict(num_gpus=1),
11
+ )
12
+ ]
opencompass/configs/models/gemma/lmdeploy_gemma_27b.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.models import TurboMindModel
2
+
3
+ models = [
4
+ dict(
5
+ type=TurboMindModel,
6
+ abbr='gemma-2-27b-turbomind',
7
+ path='google/gemma-2-27b',
8
+ engine_config=dict(session_len=16384, max_batch_size=16, tp=2),
9
+ gen_config=dict(
10
+ top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096
11
+ ),
12
+ max_seq_len=16384,
13
+ max_out_len=4096,
14
+ batch_size=16,
15
+ run_cfg=dict(num_gpus=2),
16
+ )
17
+ ]
opencompass/configs/models/gemma/lmdeploy_gemma_27b_it.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.models import TurboMindModelwithChatTemplate
2
+
3
+ models = [
4
+ dict(
5
+ type=TurboMindModelwithChatTemplate,
6
+ abbr='gemma-2-27b-it-turbomind',
7
+ path='google/gemma-2-27b-it',
8
+ engine_config=dict(session_len=16384, max_batch_size=16, tp=1),
9
+ gen_config=dict(
10
+ top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096
11
+ ),
12
+ max_seq_len=16384,
13
+ max_out_len=4096,
14
+ batch_size=16,
15
+ run_cfg=dict(num_gpus=1),
16
+ )
17
+ ]
opencompass/configs/models/gemma/lmdeploy_gemma_9b.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.models import TurboMindModel
2
+
3
+ models = [
4
+ dict(
5
+ type=TurboMindModel,
6
+ abbr='gemma-2-9b-turbomind',
7
+ path='google/gemma-2-9b',
8
+ engine_config=dict(session_len=16384, max_batch_size=16, tp=1),
9
+ gen_config=dict(
10
+ top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096
11
+ ),
12
+ max_seq_len=16384,
13
+ max_out_len=4096,
14
+ batch_size=16,
15
+ run_cfg=dict(num_gpus=1),
16
+ )
17
+ ]
opencompass/configs/models/gemma/lmdeploy_gemma_9b_it.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.models import TurboMindModelwithChatTemplate
2
+
3
+ models = [
4
+ dict(
5
+ type=TurboMindModelwithChatTemplate,
6
+ abbr='gemma-2-9b-it-turbomind',
7
+ path='google/gemma-2-9b-it',
8
+ engine_config=dict(session_len=16384, max_batch_size=16, tp=1),
9
+ gen_config=dict(
10
+ top_k=1, temperature=1e-6, top_p=0.9, max_new_tokens=4096
11
+ ),
12
+ max_seq_len=16384,
13
+ max_out_len=4096,
14
+ batch_size=16,
15
+ run_cfg=dict(num_gpus=1),
16
+ )
17
+ ]
opencompass/configs/models/gemma/vllm_gemma_2b.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.models import VLLM
2
+
3
+ models = [
4
+ dict(
5
+ type=VLLM,
6
+ abbr='gemma-2b-vllm',
7
+ path='google/gemma-2b',
8
+ model_kwargs=dict(tensor_parallel_size=1, gpu_memory_utilization=0.5),
9
+ max_out_len=1024,
10
+ max_seq_len=8192,
11
+ batch_size=16,
12
+ generation_kwargs=dict(temperature=0),
13
+ run_cfg=dict(num_gpus=1),
14
+ )
15
+ ]
opencompass/configs/models/gemma/vllm_gemma_2b_it.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.models import VLLMwithChatTemplate
2
+
3
+ models = [
4
+ dict(
5
+ type=VLLMwithChatTemplate,
6
+ abbr='gemma-2b-it-vllm',
7
+ path='google/gemma-2b-it',
8
+ model_kwargs=dict(tensor_parallel_size=1, gpu_memory_utilization=0.5),
9
+ max_out_len=1024,
10
+ batch_size=16,
11
+ generation_kwargs=dict(temperature=0),
12
+ run_cfg=dict(num_gpus=1),
13
+ )
14
+ ]
opencompass/configs/models/gemma/vllm_gemma_3_12b_it.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.models import VLLMwithChatTemplate
2
+
3
+ models = [
4
+ dict(
5
+ type=VLLMwithChatTemplate,
6
+ abbr='gemma-3-12b-it-vllm',
7
+ path='google/gemma-3-12b-it',
8
+ model_kwargs=dict(tensor_parallel_size=4,
9
+ # for long context
10
+ rope_scaling={'factor': 8.0, 'rope_type': 'linear'}),
11
+ max_out_len=4096,
12
+ batch_size=1,
13
+ generation_kwargs=dict(temperature=0),
14
+ run_cfg=dict(num_gpus=4),
15
+ )
16
+ ]
opencompass/configs/models/gemma/vllm_gemma_3_27b_it.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.models import VLLMwithChatTemplate
2
+
3
+ models = [
4
+ dict(
5
+ type=VLLMwithChatTemplate,
6
+ abbr='gemma-3-27b-it-vllm',
7
+ path='google/gemma-3-27b-it',
8
+ model_kwargs=dict(tensor_parallel_size=4,
9
+ # for long context
10
+ rope_scaling={'factor': 8.0, 'rope_type': 'linear'}),
11
+ max_out_len=4096,
12
+ batch_size=1,
13
+ generation_kwargs=dict(temperature=0),
14
+ run_cfg=dict(num_gpus=4),
15
+ )
16
+ ]
opencompass/configs/models/gemma/vllm_gemma_3_4b_it.py ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.models import VLLMwithChatTemplate
2
+
3
+ models = [
4
+ dict(
5
+ type=VLLMwithChatTemplate,
6
+ abbr='gemma-3-4b-it-vllm',
7
+ path='google/gemma-3-4b-it',
8
+ model_kwargs=dict(tensor_parallel_size=2,
9
+ # for long context
10
+ rope_scaling={'factor': 8.0, 'rope_type': 'linear'}),
11
+ max_seq_len=140000,
12
+ max_out_len=4096,
13
+ batch_size=1,
14
+ generation_kwargs=dict(temperature=0),
15
+ run_cfg=dict(num_gpus=2),
16
+ )
17
+ ]
opencompass/configs/models/gemma/vllm_gemma_7b.py ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from opencompass.models import VLLM
2
+
3
+ models = [
4
+ dict(
5
+ type=VLLM,
6
+ abbr='gemma-7b-vllm',
7
+ path='google/gemma-7b',
8
+ model_kwargs=dict(tensor_parallel_size=1, gpu_memory_utilization=0.5),
9
+ max_out_len=1024,
10
+ max_seq_len=8192,
11
+ batch_size=16,
12
+ generation_kwargs=dict(temperature=0),
13
+ run_cfg=dict(num_gpus=1),
14
+ )
15
+ ]