msj19 commited on Jan 22

Commit

8082566

verified ·

1 Parent(s): dc367ce

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

examples/eval_academic_leaderboard_202412.py +129 -0
examples/eval_academic_leaderboard_202502.py +137 -0
examples/eval_babilong.py +51 -0
examples/eval_bench_intern_s1.py +169 -0
examples/eval_cascade_evaluator.py +130 -0
examples/eval_chat_agent.py +67 -0
examples/eval_chat_demo.py +14 -0
examples/eval_chat_last.py +35 -0
examples/eval_chatml_datasets.py +51 -0
examples/eval_chembench.py +23 -0
examples/eval_chinese_simpleqa.py +73 -0
examples/eval_cibench.py +154 -0
examples/eval_claude.py +19 -0
examples/eval_codeagent.py +52 -0
examples/eval_codebench_full.py +155 -0
examples/eval_compassarena_subjectivebench_bradleyterry.py +119 -0
examples/eval_contamination.py +21 -0
examples/eval_corebench_2409_longcontext.py +127 -0
examples/eval_corebench_2409_subjective.py +123 -0
examples/eval_edgellm_demo.py +65 -0
examples/eval_gpt3.5.py +38 -0
examples/eval_hellobench.py +106 -0
examples/eval_internlm2_chat_keyset.py +46 -0
examples/eval_internlm2_keyset.py +24 -0
examples/eval_internlm3_math500_thinking.py +120 -0
examples/eval_internlm_chat_lmdeploy_apiserver.py +58 -0
examples/eval_internlm_flames_chat.py +116 -0
examples/eval_internlm_lmdeploy_apiserver.py +43 -0
examples/eval_internlm_math_chat.py +17 -0
examples/eval_lightllm.py +52 -0
examples/eval_math_llm_judge_internal.py +43 -0
examples/eval_mathbench.py +41 -0
examples/eval_modelscope_datasets.py +112 -0
examples/eval_qwen_7b.py +58 -0
examples/eval_ruler_fix_tokenizer.py +38 -0
examples/eval_subjective_alpacaeval_official.py +72 -0
requirements/vllm.txt +1 -0
tmp/08b1e522-33ea-430a-ba78-4d273bf09a88_params.py +1424 -0
tmp/0954e290-fcd0-400c-8c58-f14a577dc5e4_params.py +1424 -0
tmp/0985e09b-75af-404f-ac0c-079c3aa085fb_params.py +0 -0
tmp/09d7374d-16f6-44e6-a2fa-f4925f8fb3fc_params.py +56 -0
tmp/0a5aa083-12c4-41a8-92db-57a728f50ed5_params.py +0 -0
tmp/0bd141af-ea86-420f-b26c-b2890fc57de2_params.py +56 -0
tmp/0c3d2c0a-49a1-40b1-b0b6-3d32b7381062_params.py +1420 -0
tmp/0d03fed5-a949-4dc0-815b-cf2f740d6181_params.py +53 -0
tmp/0d2ff363-9d6a-489c-b18d-e978d436a065_params.py +0 -0
tmp/10481e04-ca08-4f83-972f-e8fccc958b91_params.py +61 -0
tmp/104a1807-a194-4864-99ea-1a9fe1a47bac_params.py +0 -0
tmp/11308d03-3ab0-43b0-9f06-64b71c4140c1_params.py +55 -0
tmp/1405e46f-8be4-462d-a794-3b47ef9839c2_params.py +1424 -0

examples/eval_academic_leaderboard_202412.py ADDED Viewed

	@@ -0,0 +1,129 @@

+import os.path as osp
+from mmengine.config import read_base
+from opencompass.partitioners import NaivePartitioner, NumWorkerPartitioner
+from opencompass.runners import LocalRunner, VOLCRunner
+from opencompass.tasks import OpenICLEvalTask, OpenICLInferTask
+#######################################################################
+#                          PART 0  Essential Configs                  #
+#######################################################################
+with read_base():
+    # Datasets Part
+    # Knowledge
+    # Math
+    from opencompass.configs.datasets.aime2024.aime2024_gen_6e39a4 import \
+        aime2024_datasets
+    from opencompass.configs.datasets.bbh.bbh_0shot_nocot_gen_925fc4 import \
+        bbh_datasets
+    # General Reasoning
+    from opencompass.configs.datasets.gpqa.gpqa_openai_simple_evals_gen_5aeece import \
+        gpqa_datasets
+    from opencompass.configs.datasets.humaneval.humaneval_openai_sample_evals_gen_dcae0e import \
+        humaneval_datasets
+    # Instruction Following
+    from opencompass.configs.datasets.IFEval.IFEval_gen_353ae7 import \
+        ifeval_datasets
+    from opencompass.configs.datasets.livecodebench.livecodebench_gen_a4f90b import \
+        LCBCodeGeneration_dataset
+    from opencompass.configs.datasets.math.math_prm800k_500_0shot_cot_gen import \
+        math_datasets
+    from opencompass.configs.datasets.mmlu_pro.mmlu_pro_0shot_cot_gen_08c1de import \
+        mmlu_pro_datasets
+    # Model List
+    from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_5_7b_chat import \
+        models as hf_internlm2_5_7b_chat_model
+    # Summary Groups
+    from opencompass.configs.summarizers.groups.bbh import bbh_summary_groups
+    from opencompass.configs.summarizers.groups.mmlu_pro import \
+        mmlu_pro_summary_groups
+#######################################################################
+#                          PART 1  Datasets List                      #
+#######################################################################
+# datasets list for evaluation
+# Only take LCB generation for evaluation
+datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')),
+               []) + [LCBCodeGeneration_dataset]
+#######################################################################
+#                       PART 2  Datset Summarizer                     #
+#######################################################################
+core_summary_groups = [
+    {
+        'name':
+        'core_average',
+        'subsets': [
+            ['IFEval', 'Prompt-level-strict-accuracy'],
+            ['bbh', 'naive_average'],
+            ['math_prm800k_500', 'accuracy'],
+            ['aime2024', 'accuracy'],
+            ['GPQA_diamond', 'accuracy'],
+            ['mmlu_pro', 'naive_average'],
+            ['openai_humaneval', 'humaneval_pass@1'],
+            ['lcb_code_generation', 'pass@1'],
+        ],
+    },
+]
+summarizer = dict(
+    dataset_abbrs=[
+        ['core_average', 'naive_average'],
+        '',
+        'Instruction Following',
+        ['IFEval', 'Prompt-level-strict-accuracy'],
+        '',
+        'General Reasoning',
+        ['bbh', 'naive_average'],
+        ['GPQA_diamond', 'accuracy'],
+        '',
+        'Math Calculation',
+        ['math_prm800k_500', 'accuracy'],
+        ['aime2024', 'accuracy'],
+        '',
+        'Knowledge',
+        ['mmlu_pro', 'naive_average'],
+        '',
+        'Code',
+        ['openai_humaneval', 'humaneval_pass@1'],
+        ['lcb_code_generation', 'pass@1'],
+    ],
+    summary_groups=sum(
+        [v for k, v in locals().items() if k.endswith('_summary_groups')], []),
+)
+#######################################################################
+#                        PART 3  Models  List                         #
+#######################################################################
+models = sum([v for k, v in locals().items() if k.endswith('_model')], [])
+#######################################################################
+#                 PART 4  Inference/Evaluation Configuaration         #
+#######################################################################
+# Local Runner
+infer = dict(
+    partitioner=dict(type=NumWorkerPartitioner, num_worker=8),
+    runner=dict(
+        type=LocalRunner,
+        max_num_workers=16,
+        retry=0,  # Modify if needed
+        task=dict(type=OpenICLInferTask),
+    ),
+)
+# eval with local runner
+eval = dict(
+    partitioner=dict(type=NaivePartitioner, n=10),
+    runner=dict(type=LocalRunner,
+                max_num_workers=16,
+                task=dict(type=OpenICLEvalTask)),
+)
+#######################################################################
+#                      PART 5  Utils Configuaration                   #
+#######################################################################
+work_dir = './outputs/oc_academic_202412'

examples/eval_academic_leaderboard_202502.py ADDED Viewed

	@@ -0,0 +1,137 @@

+# flake8: noqa
+from mmengine.config import read_base
+from opencompass.partitioners import NaivePartitioner, NumWorkerPartitioner
+from opencompass.runners import LocalRunner, VOLCRunner
+from opencompass.tasks import OpenICLEvalTask, OpenICLInferTask
+#######################################################################
+#                          PART 0  Essential Configs                  #
+#######################################################################
+with read_base():
+    # Datasets Part
+    # Knowledge
+    # Math
+    from opencompass.configs.datasets.aime2024.aime2024_0shot_nocot_genericllmeval_academic_gen import \
+        aime2024_datasets
+    from opencompass.configs.datasets.bbh.bbh_0shot_nocot_academic_gen import \
+        bbh_datasets
+    # General Reasoning
+    from opencompass.configs.datasets.gpqa.gpqa_openai_simple_evals_gen_5aeece import \
+        gpqa_datasets
+    from opencompass.configs.datasets.humaneval.humaneval_openai_sample_evals_gen_dcae0e import \
+        humaneval_datasets
+    # Instruction Following
+    from opencompass.configs.datasets.IFEval.IFEval_gen_353ae7 import \
+        ifeval_datasets
+    from opencompass.configs.datasets.livecodebench.livecodebench_gen_a4f90b import \
+        LCBCodeGeneration_dataset
+    from opencompass.configs.datasets.math.math_prm800k_500_0shot_cot_gen import \
+        math_datasets
+    from opencompass.configs.datasets.mmlu_pro.mmlu_pro_0shot_cot_gen_08c1de import \
+        mmlu_pro_datasets
+    # Model List
+    from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_5_7b_chat import \
+        models as hf_internlm2_5_7b_chat_model
+    # Summary Groups
+    from opencompass.configs.summarizers.groups.bbh import bbh_summary_groups
+    from opencompass.configs.summarizers.groups.mmlu_pro import \
+        mmlu_pro_summary_groups
+#######################################################################
+#                          PART 1  Datasets List                      #
+#######################################################################
+# datasets list for evaluation
+# Only take LCB generation for evaluation
+datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')),
+               []) + [LCBCodeGeneration_dataset]
+# LLM judge config: using LLM to evaluate predictions
+judge_cfg = dict()
+for dataset in datasets:
+    dataset['infer_cfg']['inferencer']['max_out_len'] = 32768
+    if 'judge_cfg' in dataset['eval_cfg']['evaluator']:
+        dataset['eval_cfg']['evaluator']['judge_cfg'] = judge_cfg
+#######################################################################
+#                       PART 2  Datset Summarizer                     #
+#######################################################################
+core_summary_groups = [
+    {
+        'name':
+        'core_average',
+        'subsets': [
+            ['IFEval', 'Prompt-level-strict-accuracy'],
+            ['bbh', 'naive_average'],
+            ['math_prm800k_500', 'accuracy'],
+            ['aime2024', 'accuracy'],
+            ['GPQA_diamond', 'accuracy'],
+            ['mmlu_pro', 'naive_average'],
+            ['openai_humaneval', 'humaneval_pass@1'],
+            ['lcb_code_generation', 'pass@1'],
+        ],
+    },
+]
+summarizer = dict(
+    dataset_abbrs=[
+        ['core_average', 'naive_average'],
+        '',
+        'Instruction Following',
+        ['IFEval', 'Prompt-level-strict-accuracy'],
+        '',
+        'General Reasoning',
+        ['bbh', 'naive_average'],
+        ['GPQA_diamond', 'accuracy'],
+        '',
+        'Math Calculation',
+        ['math_prm800k_500', 'accuracy'],
+        ['aime2024', 'accuracy'],
+        '',
+        'Knowledge',
+        ['mmlu_pro', 'naive_average'],
+        '',
+        'Code',
+        ['openai_humaneval', 'humaneval_pass@1'],
+        ['lcb_code_generation', 'pass@1'],
+    ],
+    summary_groups=sum(
+        [v for k, v in locals().items() if k.endswith('_summary_groups')], []),
+)
+#######################################################################
+#                        PART 3  Models  List                         #
+#######################################################################
+models = sum([v for k, v in locals().items() if k.endswith('_model')], [])
+#######################################################################
+#                 PART 4  Inference/Evaluation Configuaration         #
+#######################################################################
+# Local Runner
+infer = dict(
+    partitioner=dict(type=NumWorkerPartitioner, num_worker=8),
+    runner=dict(
+        type=LocalRunner,
+        max_num_workers=16,
+        retry=0,  # Modify if needed
+        task=dict(type=OpenICLInferTask),
+    ),
+)
+# eval with local runner
+eval = dict(
+    partitioner=dict(type=NaivePartitioner, n=10),
+    runner=dict(type=LocalRunner,
+                max_num_workers=16,
+                task=dict(type=OpenICLEvalTask)),
+)
+#######################################################################
+#                      PART 5  Utils Configuaration                   #
+#######################################################################
+work_dir = './outputs/oc_academic_202502'

examples/eval_babilong.py ADDED Viewed

	@@ -0,0 +1,51 @@

+from mmengine.config import read_base
+with read_base():
+    # Models
+    # Datasets
+    from opencompass.configs.datasets.babilong.babilong_0k_gen import \
+        babiLong_0k_datasets
+    from opencompass.configs.datasets.babilong.babilong_4k_gen import \
+        babiLong_4k_datasets
+    from opencompass.configs.datasets.babilong.babilong_16k_gen import \
+        babiLong_16k_datasets
+    from opencompass.configs.datasets.babilong.babilong_32k_gen import \
+        babiLong_32k_datasets
+    from opencompass.configs.datasets.babilong.babilong_128k_gen import \
+        babiLong_128k_datasets
+    from opencompass.configs.datasets.babilong.babilong_256k_gen import \
+        babiLong_256k_datasets
+    from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_5_7b_chat import \
+        models as lmdeploy_internlm2_5_7b_chat_model
+    from opencompass.configs.models.hf_llama.lmdeploy_llama3_1_8b_instruct import \
+        models as lmdeploy_llama3_1_8b_instruct_model
+    from opencompass.configs.models.mistral.lmdeploy_ministral_8b_instruct_2410 import \
+        models as lmdeploy_ministral_8b_instruct_2410_model
+    from opencompass.configs.models.qwen2_5.lmdeploy_qwen2_5_7b_instruct import \
+        models as lmdeploy_qwen2_5_7b_instruct_model
+    from opencompass.configs.summarizers.groups.babilong import \
+        babilong_summary_groups
+datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
+models = sum([v for k, v in locals().items() if k.endswith('_model')], [])
+for model in models:
+    model['engine_config']['session_len'] = 1024 * 1024
+    model['max_seq_len'] = 1024 * 1024
+    model['engine_config']['tp'] = 4
+    model['run_cfg']['num_gpus'] = 4
+summarizer = dict(
+    dataset_abbrs=[
+        'babilong_0k',
+        'babilong_4k',
+        'babilong_16k',
+        'babilong_32k',
+        'babilong_128k',
+        'babilong_256k',
+    ],
+    summary_groups=sum(
+        [v for k, v in locals().items() if k.endswith('_summary_groups')], []),
+)
+work_dir = './outputs/babilong'

examples/eval_bench_intern_s1.py ADDED Viewed

	@@ -0,0 +1,169 @@

+# flake8: noqa
+from mmengine.config import read_base
+from opencompass.partitioners import NaivePartitioner, NumWorkerPartitioner
+from opencompass.runners import LocalRunner
+from opencompass.tasks import OpenICLEvalTask, OpenICLInferTask
+#######################################################################
+#                          PART 0  Essential Configs                  #
+#######################################################################
+with read_base():
+    # Datasets
+    from opencompass.configs.datasets.aime2025.aime2025_cascade_eval_gen_5e9f4f import aime2025_datasets
+    from opencompass.configs.datasets.gpqa.gpqa_cascade_eval_gen_772ea0 import (
+        gpqa_datasets,
+    )
+    from opencompass.configs.datasets.mmlu_pro.mmlu_pro_0shot_nocot_genericllmeval_gen_08c1de import (
+        mmlu_pro_datasets,
+    )
+    from opencompass.configs.datasets.IFEval.IFEval_gen_353ae7 import (
+        ifeval_datasets,
+    )
+    from opencompass.configs.datasets.SmolInstruct.smolinstruct_0shot_instruct_gen import (
+        smolinstruct_datasets_0shot_instruct as smolinstruct_datasets,
+    )
+    from opencompass.configs.datasets.ChemBench.ChemBench_llmjudge_gen_c584cf import (
+        chembench_datasets,
+    )
+    from opencompass.configs.datasets.matbench.matbench_llm_judge_gen_0e9276 import (
+        matbench_datasets,
+    )
+    from opencompass.configs.datasets.ProteinLMBench.ProteinLMBench_llmjudge_gen_a67965 import (
+        proteinlmbench_datasets,
+    )
+    # Summary Groups
+    from opencompass.configs.summarizers.groups.mmlu_pro import (
+        mmlu_pro_summary_groups,
+    )
+    # Models
+    from opencompass.configs.models.interns1.intern_s1 import \
+        models as interns1_model
+#######################################################################
+#                          PART 1  Datasets List                      #
+#######################################################################
+# datasets list for evaluation
+# Only take LCB generation for evaluation
+datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')),
+               [])
+# LLM judge config: using LLM to evaluate predictions
+judge_cfg = dict()
+for item in datasets:
+    item['infer_cfg']['inferencer']['max_out_len'] = 65536
+    if 'judge_cfg' in item['eval_cfg']['evaluator']:
+        item['eval_cfg']['evaluator']['judge_cfg'] = judge_cfg
+    if 'llm_evaluator' in item['eval_cfg']['evaluator'].keys() and 'judge_cfg' in item['eval_cfg']['evaluator']['llm_evaluator']:
+        item['eval_cfg']['evaluator']['llm_evaluator']['judge_cfg'] = judge_cfg
+#######################################################################
+#                       PART 2  Datset Summarizer                     #
+#######################################################################
+summary_groups = sum(
+    [v for k, v in locals().items() if k.endswith('_summary_groups')], []
+)
+summary_groups.extend(
+    [
+        {
+            'name': 'ChemBench',
+            'subsets': [
+                'ChemBench_Name_Conversion',
+                'ChemBench_Property_Prediction',
+                'ChemBench_Mol2caption',
+                'ChemBench_Caption2mol',
+                'ChemBench_Product_Prediction',
+                'ChemBench_Retrosynthesis',
+                'ChemBench_Yield_Prediction',
+                'ChemBench_Temperature_Prediction',
+            ],
+        },
+    ]
+)
+summarizer = dict(
+    dataset_abbrs=[
+        'Knowledge',
+        ['mmlu_pro', 'accuracy'],
+        '',
+        'Instruction Following',
+        ['IFEval', 'Prompt-level-strict-accuracy'],
+        '',
+        'General Reasoning',
+        ['GPQA_diamond', 'accuracy'],
+        '',
+        'Math Calculation',
+        ['aime2025', 'accuracy'],
+        '',
+        'Academic',
+        ['ChemBench', 'naive_average'],
+        ['ProteinLMBench', 'accuracy'],
+        '',
+        'SmolInstruct',
+        ['NC-I2F-0shot-instruct', 'score'],
+        ['NC-I2S-0shot-instruct', 'score'],
+        ['NC-S2F-0shot-instruct', 'score'],
+        ['NC-S2I-0shot-instruct', 'score'],
+        ['PP-ESOL-0shot-instruct', 'score'],
+        ['PP-Lipo-0shot-instruct', 'score'],
+        ['PP-BBBP-0shot-instruct', 'accuracy'],
+        ['PP-ClinTox-0shot-instruct', 'accuracy'],
+        ['PP-HIV-0shot-instruct', 'accuracy'],
+        ['PP-SIDER-0shot-instruct', 'accuracy'],
+        ['MC-0shot-instruct', 'score'],
+        ['MG-0shot-instruct', 'score'],
+        ['FS-0shot-instruct', 'score'],
+        ['RS-0shot-instruct', 'score'],
+        '',
+        ['matbench_expt_gap', 'mae'],
+        ['matbench_steels', 'mae'],
+        ['matbench_expt_is_metal', 'accuracy'],
+        ['matbench_glass', 'accuracy'],
+        '',
+    ],
+    summary_groups=summary_groups,
+)
+#######################################################################
+#                        PART 3  Models  List                         #
+#######################################################################
+models = sum([v for k, v in locals().items() if k.endswith('_model')], [])
+#######################################################################
+#                 PART 4  Inference/Evaluation Configuaration         #
+#######################################################################
+# infer with local runner
+infer = dict(
+    partitioner=dict(type=NumWorkerPartitioner, num_worker=8),
+    runner=dict(
+        type=LocalRunner,
+        max_num_workers=16,
+        retry=0,  # Modify if needed
+        task=dict(type=OpenICLInferTask),
+    ),
+)
+# eval with local runner
+eval = dict(
+    partitioner=dict(type=NaivePartitioner, n=10),
+    runner=dict(type=LocalRunner,
+                max_num_workers=16,
+                task=dict(type=OpenICLEvalTask)),
+)
+#######################################################################
+#                      PART 5  Utils Configuaration                   #
+#######################################################################
+work_dir = './outputs/oc_bench_intern_s1'

examples/eval_cascade_evaluator.py ADDED Viewed

	@@ -0,0 +1,130 @@

+from mmengine.config import read_base
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.openicl.icl_prompt_template import PromptTemplate
+from opencompass.openicl.icl_retriever import ZeroRetriever
+from opencompass.openicl.icl_inferencer import GenInferencer
+from opencompass.evaluator import (
+    GenericLLMEvaluator,
+    CascadeEvaluator,
+    MATHVerifyEvaluator,
+)
+from opencompass.datasets import generic_llmjudge_postprocess
+from opencompass.datasets import (
+    MATHDataset,
+    math_postprocess_v2,
+    normalize_final_answer,
+)
+#######################################################################
+#                          PART 0  Essential Configs                  #
+#######################################################################
+with read_base():
+    # Datasets, Summarizer
+    from opencompass.configs.models.qwen2_5.lmdeploy_qwen2_5_7b_instruct import (
+        models as lmdeploy_qwen2_5_7b_instruct_model,
+    )
+reader_cfg = dict(input_columns=['problem'], output_column='solution')
+infer_cfg = dict(
+    prompt_template=dict(
+        type=PromptTemplate,
+        template=dict(
+            round=[
+                dict(
+                    role='HUMAN',
+                    prompt='{problem}\nPlease reason step by step, and put your final answer within \\boxed{}.',
+                ),
+            ]
+        ),
+    ),
+    retriever=dict(type=ZeroRetriever),
+    inferencer=dict(type=GenInferencer),
+)
+########################## Evaluator  #################################
+GRADER_TEMPLATE = """
+    Please as a grading expert, judge whether the final answers given by the candidates below are consistent with the standard answers, that is, whether the candidates answered correctly.
+    Here are some evaluation criteria:
+    1. Please refer to the given standard answer. You don't need to re-generate the answer to the question because the standard answer has been given. You only need to judge whether the candidate's answer is consistent with the standard answer according to the form of the question. Don't try to answer the original question. You can assume that the standard answer is definitely correct.
+    2. Because the candidate's answer may be different from the standard answer in the form of expression, before making a judgment, please understand the question and the standard answer first, and then judge whether the candidate's answer is correct, but be careful not to try to answer the original question.
+    3. Some answers may contain multiple items, such as multiple-choice questions, multiple-select questions, fill-in-the-blank questions, etc. As long as the answer is the same as the standard answer, it is enough. For multiple-select questions and multiple-blank fill-in-the-blank questions, the candidate needs to answer all the corresponding options or blanks correctly to be considered correct.
+    4. Some answers may be expressed in different ways, such as some answers may be a mathematical expression, some answers may be a textual description, as long as the meaning expressed is the same. And some formulas are expressed in different ways, but they are equivalent and correct.
+    5. If the prediction is given with \\boxed{}, please ignore the \\boxed{} and only judge whether the candidate's answer is consistent with the standard answer.
+    Please judge whether the following answers are consistent with the standard answer based on the above criteria. Grade the predicted answer of this new question as one of:
+    A: CORRECT
+    B: INCORRECT
+    Just return the letters "A" or "B", with no text around it.
+    Here is your task. Simply reply with either CORRECT, INCORRECT. Don't apologize or correct yourself if there was a mistake; we are just trying to grade the answer.
+    <Original Question Begin>: \n{problem}\n<Original Question End>\n\n
+    <Gold Target Begin>: \n{solution}\n<Gold Target End>\n\n
+    <Predicted Answer Begin>: \n{prediction}\n<Predicted End>\n\n
+    Judging the correctness of candidates' answers:
+""".strip()
+llm_judge_evaluator =   dict(
+        type=GenericLLMEvaluator,
+        prompt_template=dict(
+            type=PromptTemplate,
+            template=dict(
+                begin=[
+                    dict(
+                        role='SYSTEM',
+                        fallback_role='HUMAN',
+                        prompt="You are a helpful assistant who evaluates the correctness and quality of models' outputs.",
+                    )
+                ],
+                round=[
+                    dict(role='HUMAN', prompt=GRADER_TEMPLATE),
+                ],
+            ),
+        ),
+        dataset_cfg=dict(
+        type=MATHDataset,
+        path='opencompass/math',
+        file_name='test_prm800k_500.json',
+        ),
+        judge_cfg=dict(),
+    )
+rule_evaluator =dict(type=MATHVerifyEvaluator)
+cascade_evaluator = dict(type=CascadeEvaluator,
+                   llm_evaluator=llm_judge_evaluator,
+                   rule_evaluator=rule_evaluator,
+                   parallel=False
+                   )
+########################## #################################
+eval_cfg = dict()
+# eval_cfg['evaluator'] = rule_evaluator
+# eval_cfg['evaluator'] = llm_judge_evaluator
+eval_cfg['evaluator'] = cascade_evaluator
+math_datasets = [
+    dict(
+        abbr='math_prm800k_500',
+        type=MATHDataset,
+        path='opencompass/math',
+        file_name='test_prm800k_500.json',
+        reader_cfg=reader_cfg,
+        infer_cfg=infer_cfg,
+        eval_cfg=eval_cfg,
+    )
+]
+datasets = math_datasets
+models = lmdeploy_qwen2_5_7b_instruct_model
+work_dir = 'math_prm800k_500_cascade_evaluator'

examples/eval_chat_agent.py ADDED Viewed

	@@ -0,0 +1,67 @@

+from lagent import ReAct
+from lagent.agents.react import ReActProtocol
+from mmengine.config import read_base
+from opencompass.lagent.actions.python_interpreter import PythonInterpreter
+from opencompass.models.lagent import LagentAgent
+from opencompass.models.openai_api import OpenAI
+from opencompass.partitioners import SizePartitioner
+from opencompass.runners import LocalRunner
+from opencompass.tasks import OpenICLInferTask
+with read_base():
+    from opencompass.configs.datasets.gsm8k.gsm8k_agent_gen_be1606 import \
+        gsm8k_datasets
+    from opencompass.configs.datasets.math.math_agent_gen_af2293 import \
+        math_datasets
+    from opencompass.configs.datasets.MathBench.mathbench_agent_gen_568903 import \
+        mathbench_agent_datasets
+    from opencompass.configs.summarizers.math_agent import summarizer
+datasets = []
+datasets += gsm8k_datasets
+datasets += math_datasets
+datasets += mathbench_agent_datasets
+system_prompt = """You are a helpful assistant which use tools to solve mathematical reasoning questions. The code must be a function, and the function name must be 'solution'. For mathematics, please use code tool to calculate. The example format is as follows:
+```
+def solution():
+    variable_names_with_real_meaning = func(variable)
+    return variable_names_with_real_meaning
+```"""
+protocol = dict(
+    type=ReActProtocol,
+    action=dict(role='ACTION', begin='Tool:', end='\n'),
+    action_input=dict(role='ARGS', begin='Tool Input:', end='\n'),
+    finish=dict(role='FINISH', begin='FinalAnswer:', end='\n'),
+    call_protocol=system_prompt,
+)
+models = [
+    dict(
+        abbr='gpt-3.5-react',
+        type=LagentAgent,
+        agent_type=ReAct,
+        max_turn=3,
+        llm=dict(
+            type=OpenAI,
+            path='gpt-3.5-turbo',
+            key='ENV',
+            query_per_second=1,
+            max_seq_len=4096,
+        ),
+        actions=[
+            dict(type=PythonInterpreter),
+        ],
+        protocol=protocol,
+        batch_size=1,
+    ),
+]
+infer = dict(
+    partitioner=dict(type=SizePartitioner, max_task_size=1000),
+    runner=dict(type=LocalRunner,
+                max_num_workers=16,
+                task=dict(type=OpenICLInferTask)),
+)

examples/eval_chat_demo.py ADDED Viewed

	@@ -0,0 +1,14 @@

+from mmengine.config import read_base
+with read_base():
+    from opencompass.configs.datasets.demo.demo_gsm8k_chat_gen import \
+        gsm8k_datasets
+    from opencompass.configs.datasets.demo.demo_math_chat_gen import \
+        math_datasets
+    from opencompass.configs.models.hf_internlm.hf_internlm2_chat_1_8b import \
+        models as hf_internlm2_chat_1_8b_models
+    from opencompass.configs.models.qwen.hf_qwen2_1_5b_instruct import \
+        models as hf_qwen2_1_5b_instruct_models
+datasets = gsm8k_datasets + math_datasets
+models = hf_qwen2_1_5b_instruct_models + hf_internlm2_chat_1_8b_models

examples/eval_chat_last.py ADDED Viewed

	@@ -0,0 +1,35 @@

+from mmengine.config import read_base
+from opencompass.models.openai_api import OpenAI
+from opencompass.openicl import ChatInferencer
+from opencompass.partitioners import SizePartitioner
+from opencompass.runners import LocalRunner
+from opencompass.tasks import OpenICLInferTask
+with read_base():
+    from opencompass.configs.datasets.gsm8k.gsm8k_gen_1d7fe4 import \
+        gsm8k_datasets as datasets
+models = [
+    dict(
+        abbr='gpt-3.5',
+        type=OpenAI,
+        path='gpt-3.5-turbo',
+        key='ENV',
+        max_out_len=100,
+        max_seq_len=2048,
+        batch_size=16,
+        run_cfg=dict(num_gpus=1, num_procs=1),
+    )
+]
+for dataset in datasets:
+    # Use ChatInferencer instead of GenInferencer
+    dataset['infer_cfg']['inferencer'] = dict(type=ChatInferencer)
+infer = dict(
+    partitioner=dict(type=SizePartitioner, max_task_size=1000),
+    runner=dict(type=LocalRunner,
+                max_num_workers=16,
+                task=dict(type=OpenICLInferTask)),
+)

examples/eval_chatml_datasets.py ADDED Viewed

	@@ -0,0 +1,51 @@

+# flake8: noqa
+from mmengine.config import read_base
+from opencompass.partitioners import NaivePartitioner, NumWorkerPartitioner
+from opencompass.runners import LocalRunner
+from opencompass.tasks import OpenICLEvalTask, OpenICLInferTask
+#######################################################################
+#                          PART 0  Essential Configs                  #
+#######################################################################
+with read_base():
+    # Models (add your models here)
+    from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_5_7b_chat import \
+        models as hf_internlm2_5_7b_chat_model
+    # Datasets
+    from opencompass.configs.chatml_datasets.MaScQA.MaScQA_gen import datasets as MaScQA_chatml
+    from opencompass.configs.chatml_datasets.CPsyExam.CPsyExam_gen import datasets as CPsyExam_chatml
+models = sum([v for k, v in locals().items() if k.endswith('_model')], [])
+chatml_datasets = sum(
+    (v for k, v in locals().items() if k.endswith('_chatml')),
+    [],
+)
+# Your Judge Model Configs Here
+judge_cfg = dict()
+for dataset in chatml_datasets:
+    if dataset['evaluator']['type'] == 'llm_evaluator':
+        dataset['evaluator']['judge_cfg'] = judge_cfg
+    if dataset['evaluator']['type'] == 'cascade_evaluator':
+        dataset['evaluator']['llm_evaluator']['judge_cfg'] = judge_cfg
+infer = dict(
+    partitioner=dict(type=NumWorkerPartitioner, num_worker=8),
+    runner=dict(type=LocalRunner, task=dict(type=OpenICLInferTask)),
+)
+eval = dict(
+    partitioner=dict(type=NaivePartitioner, n=8),
+    runner=dict(
+        type=LocalRunner, task=dict(type=OpenICLEvalTask), max_num_workers=32
+    ),
+)
+work_dir = 'outputs/ChatML_Datasets'

examples/eval_chembench.py ADDED Viewed

	@@ -0,0 +1,23 @@

+from mmengine.config import read_base
+with read_base():
+    from opencompass.configs.datasets.ChemBench.ChemBench_gen import \
+        chembench_datasets
+    from opencompass.configs.models.mistral.hf_mistral_7b_instruct_v0_2 import \
+        models
+datasets = [*chembench_datasets]
+models = [*models]
+'''
+dataset                           version    metric    mode      mistral-7b-instruct-v0.2-hf
+--------------------------------  ---------  --------  ------  -----------------------------
+ChemBench_Name_Conversion         d4e6a1     accuracy  gen                             45.43
+ChemBench_Property_Prediction     d4e6a1     accuracy  gen                             47.11
+ChemBench_Mol2caption             d4e6a1     accuracy  gen                             64.21
+ChemBench_Caption2mol             d4e6a1     accuracy  gen                             35.38
+ChemBench_Product_Prediction      d4e6a1     accuracy  gen                             38.67
+ChemBench_Retrosynthesis          d4e6a1     accuracy  gen                             27
+ChemBench_Yield_Prediction        d4e6a1     accuracy  gen                             27
+ChemBench_Temperature_Prediction  d4e6a1     accuracy  gen                             26.73
+ChemBench_Solvent_Prediction      d4e6a1     accuracy  gen                             32.67
+'''

examples/eval_chinese_simpleqa.py ADDED Viewed

	@@ -0,0 +1,73 @@

+from mmengine.config import read_base
+with read_base():
+    from opencompass.configs.datasets.chinese_simpleqa.chinese_simpleqa_gen import csimpleqa_datasets
+from opencompass.models import HuggingFacewithChatTemplate
+from opencompass.models.openai_api import OpenAI
+from opencompass.partitioners.sub_naive import SubjectiveNaivePartitioner
+from opencompass.runners import LocalRunner
+from opencompass.summarizers import DefaultSubjectiveSummarizer
+from opencompass.tasks.subjective_eval import SubjectiveEvalTask
+# -------------Inference Stage ----------------------------------------
+models = [
+    dict(
+        type=HuggingFacewithChatTemplate,
+        abbr='Qwen2.5-1.5B-Instruct',
+        path='Qwen/Qwen2.5-1.5B-Instruct',
+        model_kwargs=dict(
+            device_map='auto',
+            trust_remote_code=True,
+        ),
+        tokenizer_kwargs=dict(
+            padding_side='left',
+            truncation_side='left',
+            trust_remote_code=True,
+        ),
+        generation_kwargs=dict(do_sample=True, ),
+        max_out_len=200,
+        max_seq_len=4096,
+        batch_size=8,
+        run_cfg=dict(num_gpus=1, num_procs=1),
+    )
+]
+datasets = sum([v for k, v in locals().items() if ('datasets' in k)], [])
+summarizer = dict(type=DefaultSubjectiveSummarizer)
+# -------------Evalation Stage ----------------------------------------
+## ------------- JudgeLLM Configuration
+api_meta_template = dict(round=[
+    dict(role='SYSTEM', api_role='SYSTEM'),
+    dict(role='HUMAN', api_role='HUMAN'),
+    dict(role='BOT', api_role='BOT', generate=True),
+])
+judge_models = [
+    dict(
+        # GPT4o
+        abbr='gpt-4o-0513-global',
+        type=OpenAI,
+        # gpt-4o
+        path='gpt-4o-0513-global',
+        key='xxx',  # provide OPENAI_API_KEY
+        meta_template=api_meta_template,
+        query_per_second=16,
+        max_out_len=1000,
+        batch_size=8,
+        retry=3)
+]
+## ------------- Evaluation Configuration
+eval = dict(
+    partitioner=dict(type=SubjectiveNaivePartitioner,
+                     models=models,
+                     judge_models=judge_models),
+    runner=dict(type=LocalRunner,
+                max_num_workers=16,
+                task=dict(type=SubjectiveEvalTask)),
+)
+work_dir = 'outputs/chinese_simpleqa/'

examples/eval_cibench.py ADDED Viewed

	@@ -0,0 +1,154 @@

+from copy import deepcopy
+from lagent import ReAct
+from lagent.agents.react import ReActProtocol
+from mmengine.config import read_base
+from opencompass.lagent.actions.ipython_interpreter import IPythonInterpreter
+from opencompass.lagent.actions.python_interpreter import PythonInterpreter
+from opencompass.lagent.agents.react import CIReAct
+from opencompass.models import HuggingFaceCausalLM
+from opencompass.models.lagent import CodeAgent, LagentAgent
+from opencompass.partitioners import NaivePartitioner, SizePartitioner
+from opencompass.runners import LocalRunner, SlurmSequentialRunner
+from opencompass.tasks import OpenICLInferTask
+with read_base():
+    # Note that it might occur cuda OOM error for hf model
+    from opencompass.configs.datasets.CIBench.CIBench_generation_gen_8ab0dc import \
+        cibench_datasets as cibench_datasets_generation
+    from opencompass.configs.datasets.CIBench.CIBench_template_gen_e6b12a import \
+        cibench_datasets as cibench_datasets_template
+    from opencompass.configs.models.hf_llama.lmdeploy_llama3_8b_instruct import \
+        models as lmdeploy_llama3_8b_instruct_model
+    from opencompass.configs.summarizers.cibench import summarizer
+    # Oracle mode for analysis
+    # from opencompass.configs.datasets.CIBench.CIBench_template_oracle_gen_fecda1 import cibench_datasets as cibench_datasets_template_oracle
+    # from opencompass.configs.datasets.CIBench.CIBench_generation_oracle_gen_c4a7c1 import cibench_datasets as cibench_datasets_generation_oracle
+datasets = []
+datasets += cibench_datasets_template
+datasets += cibench_datasets_generation
+# datasets += cibench_datasets_template_oracle
+# datasets += cibench_datasets_generation_oracle
+_origin_models = sum([v for k, v in locals().items() if k.endswith('_model')],
+                     [])
+FORCE_STOP_PROMPT_EN = """You should directly give results based on history information."""
+FEWSHOT_INSTRUCTION = """\
+You are an assistant who can utilize external tools.
+{tool_description}
+To use a tool, please response with the following format:
+```
+{thought} Think what you need to solve, do you need to use tools?
+{action} The tool name, should be one of [{action_names}].
+{action_input} The input to the tool that you want to use.
+```
+The tool will give you response after your response using the following format:
+```
+{response} the results after call the tool.
+```
+Therefore DO NOT generate tool response by yourself.
+Also please follow the guidelines:
+1. Always use code interpreter to solve the problem.
+2. The generated codes should always in a markdown code block format.
+3. The generated codes will be executed in an ipython manner and the results will be cached.
+4. Your responded code should always be simple and only solves the problem in current step.
+For example:
+File url: `xxxx`
+### Step 1. Load the dataset from the url into a pandas DataFrame named `df`.
+{thought} We should use `pandas` to solve this step.
+{action} IPythonInterpreter
+{action_input} ```python
+import pandas as pd
+url = "xxxx"
+data = pd.read_csv(url)
+```
+{response} The code is succeed without any outputs.
+Let us begin from here!
+"""
+IPYTHON_INTERPRETER_DESCRIPTION = '''\
+It can run Python code in a manner as jupyter notebook. The code must be a valid code that contains only python method.'''
+actions = [
+    dict(type=IPythonInterpreter,
+         user_data_dir='./data/cibench_dataset/datasources',
+         description=IPYTHON_INTERPRETER_DESCRIPTION)
+]
+protocol = dict(
+    type=ReActProtocol,
+    call_protocol=FEWSHOT_INSTRUCTION,
+    force_stop=FORCE_STOP_PROMPT_EN,
+    finish=dict(role='FINISH', begin='Final Answer:', end='\n'),
+)
+work_dir = './outputs/cibench/'
+_agent_models = []
+for m in _origin_models:
+    m = deepcopy(m)
+    if 'meta_template' in m and 'round' in m['meta_template']:
+        round = m['meta_template']['round']
+        if all(r['role'].upper() != 'SYSTEM'
+               for r in round):  # no system round
+            if not any('api_role' in r for r in round):
+                m['meta_template']['round'].append(
+                    dict(role='system', begin='System response:', end='\n'))
+            else:
+                m['meta_template']['round'].append(
+                    dict(role='system', api_role='SYSTEM'))
+            print(
+                f'WARNING: adding SYSTEM round in meta_template for {m.get("abbr", None)}'
+            )
+    _agent_models.append(m)
+protocol = dict(
+    type=ReActProtocol,
+    call_protocol=FEWSHOT_INSTRUCTION,
+    force_stop=FORCE_STOP_PROMPT_EN,
+    finish=dict(role='FINISH', begin='Final Answer:', end='\n'),
+)
+models = []
+for m in _agent_models:
+    m = deepcopy(m)
+    origin_abbr = m.pop('abbr')
+    abbr = origin_abbr
+    m.pop('batch_size', None)
+    m.pop('max_out_len', None)
+    m.pop('max_seq_len', None)
+    run_cfg = m.pop('run_cfg', {})
+    agent_model = dict(
+        abbr=abbr,
+        summarizer_abbr=origin_abbr,
+        type=CodeAgent,
+        agent_type=CIReAct,
+        max_turn=3,
+        llm=m,
+        actions=[
+            dict(type=IPythonInterpreter,
+                 user_data_dir='./data/cibench_dataset/datasources',
+                 description=IPYTHON_INTERPRETER_DESCRIPTION)
+        ],
+        protocol=protocol,
+        batch_size=1,
+        run_cfg=run_cfg,
+    )
+    models.append(agent_model)
+infer = dict(
+    partitioner=dict(type=NaivePartitioner),
+    runner=dict(type=LocalRunner,
+                max_num_workers=4,
+                task=dict(type=OpenICLInferTask)),
+)

examples/eval_claude.py ADDED Viewed

	@@ -0,0 +1,19 @@

+from mmengine.config import read_base
+from opencompass.partitioners import NaivePartitioner
+from opencompass.runners import LocalRunner
+from opencompass.tasks import OpenICLInferTask
+with read_base():
+    # choose a list of datasets
+    from opencompass.configs.datasets.collections.chat_medium import datasets
+    from opencompass.configs.models.claude.claude import models
+    # and output the results in a choosen format
+    from opencompass.configs.summarizers.medium import summarizer
+infer = dict(
+    partitioner=dict(type=NaivePartitioner),
+    runner=dict(type=LocalRunner,
+                max_num_workers=8,
+                task=dict(type=OpenICLInferTask)),
+)

examples/eval_codeagent.py ADDED Viewed

	@@ -0,0 +1,52 @@

+from mmengine.config import read_base
+from opencompass.models import HuggingFaceCausalLM, OpenAI
+from opencompass.models.lagent import CodeAgent
+from opencompass.partitioners import SizePartitioner
+from opencompass.runners import LocalRunner
+from opencompass.tasks import OpenICLInferTask
+with read_base():
+    from opencompass.configs.datasets.gsm8k.gsm8k_gen_57b0b1 import \
+        gsm8k_datasets
+    from opencompass.configs.datasets.math.math_gen_943d32 import math_datasets
+datasets = []
+datasets += gsm8k_datasets
+datasets += math_datasets
+models = [
+    dict(abbr='gpt-3.5-react',
+         type=CodeAgent,
+         llm=dict(
+             type=OpenAI,
+             path='gpt-3.5-turbo',
+             key='ENV',
+             query_per_second=1,
+             max_seq_len=4096,
+         ),
+         batch_size=8),
+    dict(abbr='WizardCoder-Python-13B-V1.0-react',
+         type=CodeAgent,
+         llm=dict(
+             type=HuggingFaceCausalLM,
+             path='WizardLM/WizardCoder-Python-13B-V1.0',
+             tokenizer_path='WizardLM/WizardCoder-Python-13B-V1.0',
+             tokenizer_kwargs=dict(
+                 padding_side='left',
+                 truncation_side='left',
+                 trust_remote_code=True,
+             ),
+             max_seq_len=2048,
+             model_kwargs=dict(trust_remote_code=True, device_map='auto'),
+         ),
+         batch_size=8,
+         run_cfg=dict(num_gpus=2, num_procs=1)),
+]
+infer = dict(
+    partitioner=dict(type=SizePartitioner, max_task_size=40000),
+    runner=dict(type=LocalRunner,
+                max_num_workers=16,
+                task=dict(type=OpenICLInferTask)),
+)

examples/eval_codebench_full.py ADDED Viewed

	@@ -0,0 +1,155 @@

+# This config is used to test all the code benchmarks
+from mmengine.config import read_base
+import os.path as osp
+from opencompass.runners import LocalRunner, VOLCRunner
+from opencompass.partitioners import NaivePartitioner, NumWorkerPartitioner
+from opencompass.tasks import OpenICLInferTask, OpenICLEvalTask
+with read_base():
+    # Datasets Part
+    # bigcodebench
+    from opencompass.configs.datasets.bigcodebench.bigcodebench_full_instruct_gen import (
+        bigcodebench_full_instruct_datasets
+    )
+    from opencompass.configs.datasets.bigcodebench.bigcodebench_hard_instruct_gen import (
+        bigcodebench_hard_instruct_datasets
+    )
+    # livecodebench code generation lite v5
+    from opencompass.configs.datasets.livecodebench.livecodebench_time_split_gen_a4f90b import (
+        LCB_datasets
+    )
+    # huamneval series
+    from opencompass.configs.datasets.humaneval.humaneval_openai_sample_evals_gen_dcae0e import (
+        humaneval_datasets
+    )
+    from opencompass.configs.datasets.humaneval_pro.humaneval_pro_gen import (
+        humanevalpro_datasets
+    )
+    from opencompass.configs.datasets.humanevalx.humanevalx_gen_620cfa import (
+        humanevalx_datasets
+    )
+    from opencompass.configs.datasets.humaneval_plus.humaneval_plus_gen import (
+        humaneval_plus_datasets
+    )
+    # mbpp series
+    from opencompass.configs.datasets.mbpp.mbpp_gen import (
+        mbpp_datasets
+    )
+    from opencompass.configs.datasets.mbpp_pro.mbpp_pro_gen import (
+        mbpppro_datasets
+    )
+    # multipl-e
+    from opencompass.configs.datasets.multipl_e.multiple_gen import (
+        multiple_datasets
+    )
+    # ds1000
+    from opencompass.configs.datasets.ds1000.ds1000_service_eval_gen_cbc84f import (
+        ds1000_datasets
+    )
+    # Models Part
+    from opencompass.configs.models.qwen2_5.lmdeploy_qwen2_5_7b_instruct import (
+        models as lmdeploy_qwen2_5_7b_instruct_model,
+    )
+    # Summary Groups
+    from opencompass.configs.summarizers.groups.ds1000 import (
+        ds1000_summary_groups,
+    )
+    from opencompass.configs.summarizers.groups.multipl_e import (
+        multiple_summary_groups,
+    )
+    from opencompass.configs.summarizers.groups.humanevalx import (
+        humanevalx_summary_groups,
+    )
+# models config
+models = sum([v for k, v in locals().items() if k.endswith('_model')], [])
+for model in models:
+    model['max_seq_len'] = 16384
+    model['max_out_len'] = 8192
+# datasets config
+datasets = sum(
+    (v for k, v in locals().items() if k.endswith('_datasets')),
+    [],
+)
+for item in humanevalx_datasets:
+    item['eval_cfg']['evaluator'][
+        'ip_address'
+    ] = 'codeeval.opencompass.org.cn/humanevalx'
+    item['eval_cfg']['evaluator']['port'] = ''
+for item in ds1000_datasets:
+    item['eval_cfg']['evaluator'][
+        'ip_address'
+    ] = 'codeeval.opencompass.org.cn/ds1000'
+    item['eval_cfg']['evaluator']['port'] = ''
+for dataset in datasets:
+    dataset['infer_cfg']['inferencer']['max_out_len'] = 8192
+# summary
+summary_groups = sum(
+    [v for k, v in locals().items() if k.endswith('_summary_groups')], []
+)
+summary_groups.append(
+    {'name': 'humanevalx',
+    'subsets': ['humanevalx-python', 'humanevalx-cpp', 'humanevalx-java', 'humanevalx-js']}
+)
+summarizer = dict(
+    dataset_abbrs = [
+        ['bigcodebench_hard_instruct', 'pass@1'],
+        ['bigcodebench_full_instruct', 'pass@1'],
+        ['lcb_code_generation', 'pass@1'],
+        ['openai_humaneval', 'humaneval_pass@1'],
+        ['mbpp', 'score'],
+        ['humaneval_pro', 'pass@1'],
+        ['mbpp_pro', 'pass@1'],
+        ['humaneval_plus', 'humaneval_plus_pass@1'],
+        ['multiple', 'naive_average'],
+        ['humanevalx', 'naive_average'],
+        ['ds1000', 'naive_average'],
+        '',
+        'humanevalx-python',
+        'humanevalx-cpp',
+        'humanevalx-java',
+        'humanevalx-js',
+        '',
+        'ds1000_Pandas',
+        'ds1000_Numpy',
+        'ds1000_Tensorflow',
+        'ds1000_Scipy',
+        'ds1000_Sklearn',
+        'ds1000_Pytorch',
+        'ds1000_Matplotlib',
+        '',
+        'humaneval-multiple-cpp',
+        'humaneval-multiple-cs',
+        'humaneval-multiple-go',
+        'humaneval-multiple-java',
+        'humaneval-multiple-rb',
+        'humaneval-multiple-js',
+        'humaneval-multiple-php',
+        'humaneval-multiple-r',
+        'humaneval-multiple-rs',
+        'humaneval-multiple-sh',
+        '',
+        'mbpp-multiple-cpp',
+        'mbpp-multiple-cs',
+        'mbpp-multiple-go',
+        'mbpp-multiple-java',
+        'mbpp-multiple-rb',
+        'mbpp-multiple-js',
+        'mbpp-multiple-php',
+        'mbpp-multiple-r',
+        'mbpp-multiple-rs',
+        'mbpp-multiple-sh'
+    ],
+    summary_groups=summary_groups,
+)
+work_dir = 'outputs/code'

examples/eval_compassarena_subjectivebench_bradleyterry.py ADDED Viewed

	@@ -0,0 +1,119 @@

+from mmengine.config import read_base
+with read_base():
+    from opencompass.configs.datasets.subjective.compass_arena_subjective_bench.singleturn.pairwise_bt_judge import (
+        compassarena_subjectivebench_bradleyterry_singleturn_datasets, )
+    from opencompass.configs.datasets.subjective.compass_arena_subjective_bench.multiturn.pairwise_bt_judge import (
+        compassarena_subjectivebench_bradleyterry_multiturn_datasets, )
+    from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_5_7b_chat import (
+        models as lmdeploy_internlm2_5_7b_chat, )
+    from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_5_20b_chat import (
+        models as lmdeploy_internlm2_5_20b_chat, )
+    from opencompass.configs.models.hf_llama.lmdeploy_llama3_1_8b_instruct import (
+        models as lmdeploy_llama3_1_8b_instruct, )
+    from opencompass.configs.models.hf_llama.lmdeploy_llama3_1_70b_instruct import (
+        models as lmdeploy_llama3_1_70b_instruct, )
+    from opencompass.configs.models.qwen2_5.lmdeploy_qwen2_5_0_5b_instruct import (
+        models as lmdeploy_qwen2_5_0_5b_instruct, )
+    from opencompass.configs.models.qwen2_5.lmdeploy_qwen2_5_1_5b_instruct import (
+        models as lmdeploy_qwen2_5_1_5b_instruct, )
+    from opencompass.configs.models.qwen2_5.lmdeploy_qwen2_5_3b_instruct import (
+        models as lmdeploy_qwen2_5_3b_instruct, )
+    from opencompass.configs.models.qwen2_5.lmdeploy_qwen2_5_7b_instruct import (
+        models as lmdeploy_qwen2_5_7b_instruct, )
+    from opencompass.configs.models.qwen2_5.lmdeploy_qwen2_5_14b_instruct import (
+        models as lmdeploy_qwen2_5_14b_instruct, )
+    from opencompass.configs.models.qwen2_5.lmdeploy_qwen2_5_32b_instruct import (
+        models as lmdeploy_qwen2_5_32b_instruct, )
+    from opencompass.configs.models.qwen2_5.lmdeploy_qwen2_5_72b_instruct import (
+        models as lmdeploy_qwen2_5_72b_instruct, )
+    from opencompass.configs.models.qwen.lmdeploy_qwen2_7b_instruct import (
+        models as lmdeploy_qwen2_7b_instruct, )
+from opencompass.models import (HuggingFace, HuggingFaceCausalLM,
+                                HuggingFaceChatGLM3, OpenAI,
+                                TurboMindModelwithChatTemplate)
+from opencompass.partitioners import NaivePartitioner, SizePartitioner
+from opencompass.partitioners.sub_naive import SubjectiveNaivePartitioner
+from opencompass.partitioners.sub_num_worker import \
+    SubjectiveNumWorkerPartitioner
+from opencompass.partitioners.sub_size import SubjectiveSizePartitioner
+from opencompass.runners import LocalRunner, SlurmSequentialRunner
+from opencompass.summarizers import CompassArenaBradleyTerrySummarizer
+from opencompass.tasks import OpenICLInferTask
+from opencompass.tasks.subjective_eval import SubjectiveEvalTask
+api_meta_template = dict(round=[
+    dict(role='HUMAN', api_role='HUMAN'),
+    dict(role='BOT', api_role='BOT', generate=True),
+])
+# -------------Inference Stage ----------------------------------------
+models = [
+    *lmdeploy_qwen2_5_14b_instruct,
+    *lmdeploy_qwen2_5_32b_instruct,
+    *lmdeploy_qwen2_5_7b_instruct,
+    *lmdeploy_qwen2_7b_instruct,
+]
+datasets = [
+    *compassarena_subjectivebench_bradleyterry_singleturn_datasets,
+    *compassarena_subjectivebench_bradleyterry_multiturn_datasets,
+]
+infer = dict(
+    partitioner=dict(type=NaivePartitioner),
+    runner=dict(type=LocalRunner,
+                max_num_workers=16,
+                task=dict(type=OpenICLInferTask)),
+)
+# -------------Evalation Stage ----------------------------------------
+## ------------- JudgeLLM Configuration
+judge_models = [
+    dict(
+        type=TurboMindModelwithChatTemplate,
+        abbr='CompassJudger-1-32B-Instruct',
+        path='opencompass/CompassJudger-1-32B-Instruct',
+        engine_config=dict(session_len=16384, max_batch_size=16, tp=4),
+        gen_config=dict(top_k=1,
+                        temperature=1e-6,
+                        top_p=0.9,
+                        max_new_tokens=2048),
+        max_seq_len=16384,
+        max_out_len=2048,
+        batch_size=16,
+        run_cfg=dict(num_gpus=4),
+    )
+]
+## ------------- Evaluation Configuration
+eval = dict(
+    partitioner=dict(
+        type=SubjectiveNaivePartitioner,
+        models=models,
+        judge_models=judge_models,
+    ),
+    runner=dict(type=LocalRunner,
+                max_num_workers=16,
+                task=dict(type=SubjectiveEvalTask)),
+)
+## ------------- Summary Configuration
+# This step fits a Bradley-Terry model (statistical model) with an option
+# to include style features and control variables based on groups
+# (group variables must be available in the input dataset for each observation).
+summarizer = dict(
+    type=CompassArenaBradleyTerrySummarizer,
+    rating_system='bradleyterry',
+    report_pred_win_rates=True,
+    num_bootstrap=100,
+    num_cpu=None,
+    with_control_vars=True,
+    normalize_style_features=False,
+    odds_ratio=True,
+    groups=['difficulty', 'category'],
+)
+work_dir = 'outputs/compassarena_subjectivebench_bradleyterry/'

examples/eval_contamination.py ADDED Viewed

	@@ -0,0 +1,21 @@

+from mmengine.config import read_base
+with read_base():
+    from opencompass.configs.datasets.ARC_c.ARC_c_clean_ppl import \
+        ARC_c_datasets
+    from opencompass.configs.datasets.ceval.ceval_clean_ppl import \
+        ceval_datasets
+    from opencompass.configs.datasets.hellaswag.hellaswag_clean_ppl import \
+        hellaswag_datasets
+    from opencompass.configs.datasets.mmlu.mmlu_clean_ppl import mmlu_datasets
+    from opencompass.configs.models.hf_llama.hf_llama2_7b import \
+        models as hf_llama2_7b_model
+    from opencompass.configs.models.qwen.hf_qwen_7b import \
+        models as hf_qwen_7b_model
+    from opencompass.configs.models.yi.hf_yi_6b import models as hf_yi_6b_model
+    from opencompass.configs.summarizers.contamination import summarizer
+datasets = [
+    *ceval_datasets, *mmlu_datasets, *hellaswag_datasets, *ARC_c_datasets
+]
+models = [*hf_yi_6b_model, *hf_qwen_7b_model, *hf_llama2_7b_model]

examples/eval_corebench_2409_longcontext.py ADDED Viewed

	@@ -0,0 +1,127 @@

+import os.path as osp
+from copy import deepcopy
+from mmengine.config import read_base
+from opencompass.models import (HuggingFacewithChatTemplate,
+                                TurboMindModelwithChatTemplate)
+from opencompass.partitioners import NaivePartitioner, NumWorkerPartitioner
+from opencompass.runners import DLCRunner, LocalRunner
+from opencompass.tasks import OpenICLEvalTask, OpenICLInferTask
+#######################################################################
+#                          PART 0  Essential Configs                  #
+#######################################################################
+with read_base():
+    from opencompass.configs.datasets.longbench.longbench import \
+        longbench_datasets
+    from opencompass.configs.datasets.needlebench.needlebench_8k.needlebench_8k import \
+        needlebench_datasets as needlebench_8k_datasets
+    from opencompass.configs.datasets.needlebench.needlebench_32k.needlebench_32k import \
+        needlebench_datasets as needlebench_32k_datasets
+    from opencompass.configs.datasets.needlebench.needlebench_128k.needlebench_128k import \
+        needlebench_datasets as needlebench_128k_datasets
+    from opencompass.configs.datasets.ruler.ruler_8k_gen import \
+        ruler_datasets as ruler_8k_datasets
+    from opencompass.configs.datasets.ruler.ruler_32k_gen import \
+        ruler_datasets as ruler_32k_datasets
+    from opencompass.configs.datasets.ruler.ruler_128k_gen import \
+        ruler_datasets as ruler_128k_datasets
+    from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_5_7b_chat_1m import \
+        models as lmdeploy_internlm2_5_7b_1m_chat_model
+    from opencompass.configs.models.hf_llama.lmdeploy_llama3_1_8b_instruct import \
+        models as llama3_1_8b_instruct_model
+    # Instruct models
+    from opencompass.configs.models.qwen.lmdeploy_qwen2_7b_instruct import \
+        models as lmdeploy_qwen2_7b_instruct_model
+    # Summary Groups
+    from opencompass.configs.summarizers.groups.longbench import \
+        longbench_summary_groups
+    from opencompass.configs.summarizers.groups.ruler import \
+        ruler_summary_groups
+    from opencompass.configs.summarizers.needlebench import (
+        needlebench_8k_summarizer, needlebench_32k_summarizer,
+        needlebench_128k_summarizer)
+#######################################################################
+#                          PART 1  Datasets List                      #
+#######################################################################
+# datasets list for evaluation
+datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
+#######################################################################
+#                       PART 2  Datset Summarizer                     #
+#######################################################################
+needlebench_8k_summary_groups = needlebench_8k_summarizer['summary_groups']
+needlebench_32k_summary_groups = needlebench_32k_summarizer['summary_groups']
+needlebench_128k_summary_groups = needlebench_128k_summarizer['summary_groups']
+# Instruct models summarizer
+summarizer = dict(
+    dataset_abbrs=[
+        ['ruler_8k', 'naive_average'],
+        ['ruler_32k', 'naive_average'],
+        ['ruler_128k', 'naive_average'],
+        ['NeedleBench-Overall-Score-8K', 'weighted_average'],
+        ['NeedleBench-Overall-Score-32K', 'weighted_average'],
+        ['NeedleBench-Overall-Score-128K', 'weighted_average'],
+        ['longbench', 'naive_average'],
+        ['longbench_zh', 'naive_average'],
+        ['longbench_en', 'naive_average'],
+        '',
+        'longbench_single-document-qa',
+        'longbench_multi-document-qa',
+        'longbench_summarization',
+        'longbench_few-shot-learning',
+        'longbench_synthetic-tasks',
+        'longbench_code-completion',
+    ],
+    summary_groups=sum(
+        [v for k, v in locals().items() if k.endswith('_summary_groups')], []),
+)
+#######################################################################
+#                        PART 3  Models  List                         #
+#######################################################################
+lmdeploy_qwen2_7b_instruct_model[0]['max_seq_len'] = 1048576
+lmdeploy_qwen2_7b_instruct_model[0]['engine_config']['session_len'] = 1048576
+lmdeploy_qwen2_7b_instruct_model[0]['engine_config']['tp'] = 4
+lmdeploy_qwen2_7b_instruct_model[0]['engine_config']['rope_scaling_factor'] = 4
+lmdeploy_qwen2_7b_instruct_model[0]['run_cfg']['num_gpus'] = 4
+llama3_1_8b_instruct_model[0]['max_seq_len'] = 1048576
+llama3_1_8b_instruct_model[0]['engine_config']['session_len'] = 1048576
+llama3_1_8b_instruct_model[0]['engine_config']['tp'] = 4
+llama3_1_8b_instruct_model[0]['engine_config']['rope_scaling_factor'] = 4
+llama3_1_8b_instruct_model[0]['run_cfg']['num_gpus'] = 4
+models = sum([v for k, v in locals().items() if k.endswith('_model')], [])
+#######################################################################
+#                 PART 4  Inference/Evaluation Configuaration         #
+#######################################################################
+# Local Runner
+infer = dict(
+    partitioner=dict(type=NumWorkerPartitioner, num_worker=8),
+    runner=dict(
+        type=LocalRunner,
+        max_num_workers=16,
+        retry=0,  # Modify if needed
+        task=dict(type=OpenICLInferTask)),
+)
+# eval with local runner
+eval = dict(
+    partitioner=dict(type=NaivePartitioner, n=10),
+    runner=dict(type=LocalRunner,
+                max_num_workers=16,
+                task=dict(type=OpenICLEvalTask)),
+)
+#######################################################################
+#                      PART 5  Utils Configuaration                   #
+#######################################################################
+base_exp_dir = 'outputs/corebench/'
+work_dir = osp.join(base_exp_dir, 'long_context')

examples/eval_corebench_2409_subjective.py ADDED Viewed

	@@ -0,0 +1,123 @@

+import os.path as osp
+from copy import deepcopy
+from mmengine.config import read_base
+from opencompass.models import (HuggingFacewithChatTemplate,
+                                TurboMindModelwithChatTemplate)
+from opencompass.models.openai_api import OpenAI, OpenAISDK
+from opencompass.partitioners import NaivePartitioner, NumWorkerPartitioner
+from opencompass.partitioners.sub_naive import SubjectiveNaivePartitioner
+from opencompass.runners import DLCRunner, LocalRunner
+from opencompass.summarizers import SubjectiveSummarizer
+from opencompass.tasks import OpenICLEvalTask, OpenICLInferTask
+from opencompass.tasks.subjective_eval import SubjectiveEvalTask
+#######################################################################
+#                          PART 0  Essential Configs                  #
+#######################################################################
+with read_base():
+    # Datasets Part
+    from opencompass.configs.datasets.subjective.alignbench.alignbench_v1_1_judgeby_critiquellm import \
+        alignbench_datasets
+    from opencompass.configs.datasets.subjective.arena_hard.arena_hard_compare import \
+        arenahard_datasets
+    from opencompass.configs.datasets.subjective.multiround.mtbench_single_judge_diff_temp import \
+        mtbench_datasets
+    # Summarizer
+    # Model List
+    # from opencompass.configs.models.qwen.lmdeploy_qwen2_1_5b_instruct import models as lmdeploy_qwen2_1_5b_instruct_model
+    # from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_5_7b_chat import models as hf_internlm2_5_7b_chat_model
+#######################################################################
+#                          PART 1  Datasets List                      #
+#######################################################################
+# datasets list for evaluation
+datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
+#######################################################################
+#                       PART 2  Datset Summarizer                     #
+#######################################################################
+summarizer = dict(type=SubjectiveSummarizer, function='subjective')
+#######################################################################
+#                        PART 3  Models  List                         #
+#######################################################################
+models = [
+    dict(
+        type=TurboMindModelwithChatTemplate,
+        abbr='internlm2_5-7b-chat-turbomind',
+        path='internlm/internlm2_5-7b-chat',
+        engine_config=dict(session_len=16384, max_batch_size=16, tp=1),
+        gen_config=dict(top_k=40,
+                        temperature=1.0,
+                        top_p=0.9,
+                        max_new_tokens=4096),
+        max_seq_len=16384,
+        max_out_len=4096,
+        batch_size=16,
+        run_cfg=dict(num_gpus=1),
+    )
+]
+models = sum([v for k, v in locals().items() if k.endswith('_model')], models)
+#######################################################################
+#                 PART 4  Inference/Evaluation Configuaration         #
+#######################################################################
+# Local Runner
+infer = dict(
+    partitioner=dict(type=NumWorkerPartitioner, num_worker=8),
+    runner=dict(
+        type=LocalRunner,
+        max_num_workers=16,
+        retry=0,  # Modify if needed
+        task=dict(type=OpenICLInferTask)),
+)
+# JudgeLLM
+api_meta_template = dict(round=[
+    dict(role='HUMAN', api_role='HUMAN'),
+    dict(role='BOT', api_role='BOT', generate=True),
+])
+judge_models = [
+    dict(
+        type=OpenAISDK,
+        abbr='gpt-4o-2024-08-06',
+        path='gpt-4o-2024-08-06',
+        # openai_api_base=
+        # 'http://10.140.1.86:10001/v1',  # Change to your own url if needed.
+        key='YOUR_API_KEY',
+        retry=10,
+        meta_template=api_meta_template,
+        rpm_verbose=True,
+        query_per_second=1,
+        max_out_len=4096,
+        max_seq_len=16384,
+        batch_size=16,
+        temperature=0.01,
+        tokenizer_path='gpt-4o-2024-08-06')
+]
+# Evaluation with local runner
+eval = dict(
+    partitioner=dict(
+        type=SubjectiveNaivePartitioner,
+        models=models,
+        judge_models=judge_models,
+    ),
+    runner=dict(type=LocalRunner,
+                max_num_workers=16,
+                task=dict(type=SubjectiveEvalTask)),
+)
+#######################################################################
+#                      PART 5  Utils Configuaration                   #
+#######################################################################
+base_exp_dir = 'outputs/corebench/'
+work_dir = osp.join(base_exp_dir, 'chat_subjective')

examples/eval_edgellm_demo.py ADDED Viewed

	@@ -0,0 +1,65 @@

+from mmengine.config import read_base
+with read_base():
+    # datasets
+    from opencompass.configs.datasets.bbh.bbh_gen import bbh_datasets
+    from opencompass.configs.datasets.commonsenseqa.commonsenseqa_7shot_cot_gen_734a22 import \
+        commonsenseqa_datasets
+    from opencompass.configs.datasets.FewCLUE_chid.FewCLUE_chid_gen import \
+        chid_datasets
+    from opencompass.configs.datasets.gsm8k.gsm8k_gen import gsm8k_datasets
+    from opencompass.configs.datasets.humaneval.humaneval_gen import \
+        humaneval_datasets
+    from opencompass.configs.datasets.longbench.longbench import \
+        longbench_datasets
+    from opencompass.configs.datasets.truthfulqa.truthfulqa_gen import \
+        truthfulqa_datasets
+    # models
+    from opencompass.configs.models.hf_llama.hf_llama3_8b import \
+        models as hf_llama3_8b_model
+    from opencompass.configs.models.others.hf_phi_2 import \
+        models as hf_phi_2_model
+    from opencompass.configs.models.qwen.hf_qwen2_7b import \
+        models as hf_qwen2_7b_model
+datasets = sum([
+    v
+    for k, v in locals().items() if k.endswith('_datasets') or k == 'datasets'
+], [])
+models = sum([v for k, v in locals().items() if k.endswith('_model')], [])
+work_dir = './outputs/edgellm/'
+# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+# dataset                                      version    metric            mode      phi-2_hf
+# -------------------------------------------  ---------  ----------------  ------  ----------
+# commonsense_qa                               c946f2     accuracy          gen          65.19
+# openai_humaneval                             8e312c     humaneval_pass@1  gen          30.49
+# truthful_qa                                  5ddc62     rouge_max         gen           0.08
+# truthful_qa                                  5ddc62     rouge_diff        gen          -0.00
+# truthful_qa                                  5ddc62     rouge_acc         gen           0.41
+# gsm8k                                        1d7fe4     accuracy          gen          62.40
+# chid-dev                                     211ee7     accuracy          gen          12.87
+# chid-test                                    211ee7     accuracy          gen          14.34
+# bbh                                          -          naive_average     gen          59.50
+# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+# dataset                                      version    metric            mode      Meta-Llama-3-8B_hf
+# -------------------------------------------  ---------  ----------------  ------  --------------------
+# commonsense_qa                               c946f2     accuracy          gen                     70.11
+# openai_humaneval                             8e312c     humaneval_pass@1  gen                    26.22
+# truthful_qa                                  5ddc62     rouge_max         gen                     0.07
+# truthful_qa                                  5ddc62     rouge_diff        gen                    -0.01
+# truthful_qa                                  5ddc62     rouge_acc         gen                     0.41
+# gsm8k                                        1d7fe4     accuracy          gen                    55.80
+# chid-dev                                     211ee7     accuracy          gen                    40.59
+# chid-test                                    211ee7     accuracy          gen                    36.66
+# bbh                                          -          naive_average     gen                    61.62
+# 20240816_060452
+# tabulate format
+# ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+# dataset         version    metric      mode      qwen2-7b-hf
+# --------------  ---------  ----------  ------  -------------
+# commonsense_qa  734a22     accuracy    gen             65.19
+# truthful_qa     5ddc62     rouge_max   gen              0.08
+# truthful_qa     5ddc62     rouge_diff  gen             -0.02
+# truthful_qa     5ddc62     rouge_acc   gen              0.44

examples/eval_gpt3.5.py ADDED Viewed

	@@ -0,0 +1,38 @@

+from mmengine.config import read_base
+from opencompass.models import OpenAI
+from opencompass.partitioners import NaivePartitioner
+from opencompass.runners import LocalRunner
+from opencompass.tasks import OpenICLInferTask
+with read_base():
+    # choose a list of datasets
+    from opencompass.configs.datasets.collections.chat_medium import datasets
+    # and output the results in a choosen format
+    from opencompass.configs.summarizers.medium import summarizer
+api_meta_template = dict(round=[
+    dict(role='HUMAN', api_role='HUMAN'),
+    dict(role='BOT', api_role='BOT', generate=True),
+], )
+models = [
+    dict(
+        abbr='GPT-3.5-turbo-0613',
+        type=OpenAI,
+        path='gpt-3.5-turbo-0613',
+        key=
+        'ENV',  # The key will be obtained from $OPENAI_API_KEY, but you can write down your key here as well
+        meta_template=api_meta_template,
+        query_per_second=1,
+        max_out_len=2048,
+        max_seq_len=4096,
+        batch_size=8),
+]
+infer = dict(
+    partitioner=dict(type=NaivePartitioner),
+    runner=dict(type=LocalRunner,
+                max_num_workers=8,
+                task=dict(type=OpenICLInferTask)),
+)

examples/eval_hellobench.py ADDED Viewed

	@@ -0,0 +1,106 @@

+from mmengine.config import read_base
+with read_base():
+    from opencompass.configs.datasets.subjective.hellobench.hellobench import hellobench_datasets
+from opencompass.models import HuggingFacewithChatTemplate, OpenAI
+from opencompass.partitioners import NaivePartitioner
+from opencompass.partitioners.sub_naive import SubjectiveNaivePartitioner
+from opencompass.runners import LocalRunner
+from opencompass.summarizers import DefaultSubjectiveSummarizer
+from opencompass.tasks import OpenICLInferTask
+from opencompass.tasks.subjective_eval import SubjectiveEvalTask
+api_meta_template = dict(round=[
+    dict(role='HUMAN', api_role='HUMAN'),
+    dict(role='BOT', api_role='BOT', generate=True),
+])
+# -------------Inference Stage ----------------------------------------
+# For subjective evaluation, we often set do sample for models
+# make sure your models' generation parameters are set properly, for example, if you set temperature=0.8, make sure you set all models' temperature to 0.8
+models = [
+    dict(
+        type=HuggingFacewithChatTemplate,
+        abbr='glm-4-9b-chat-hf',
+        path='THUDM/glm-4-9b-chat',
+        max_out_len=16384,
+        generation_kwargs=dict(
+            temperature=0.8,
+            do_sample=
+            True,  #For subjective evaluation, we suggest you do set do_sample when running model inference!
+        ),
+        model_kwargs=dict(
+            device_map='auto',
+            trust_remote_code=True,
+        ),
+        batch_size=1,
+        run_cfg=dict(num_gpus=2, num_procs=1),
+        stop_words=['<|endoftext|>', '<|user|>', '<|observation|>'],
+    )
+]
+datasets = [*hellobench_datasets]  # add datasets you want
+infer = dict(
+    partitioner=dict(type=NaivePartitioner),
+    runner=dict(type=LocalRunner,
+                max_num_workers=16,
+                task=dict(type=OpenICLInferTask)),
+)
+# -------------Evalation Stage ----------------------------------------
+# ------------- JudgeLLM Configuration
+# we recommand to use gpt4o-mini as the judge model
+# if you want to use open-source LLMs as judge models, you can uncomment the following code
+# judge_models = [
+#     dict(
+#         type=HuggingFacewithChatTemplate,
+#         abbr='glm-4-9b-chat-hf',
+#         path='THUDM/glm-4-9b-chat',
+#         max_out_len=16384,
+#         generation_kwargs=dict(
+#             temperature=0.8,
+#             do_sample=True, #For subjective evaluation, we suggest you do set do_sample when running model inference!
+#         ),
+#         model_kwargs=dict(
+#             device_map='auto',
+#             trust_remote_code=True,
+#         ),
+#         batch_size=1,
+#         run_cfg=dict(num_gpus=2, num_procs=1),
+#         stop_words=['<|endoftext|>', '<|user|>', '<|observation|>'],
+#     )
+# ]
+judge_models = [
+    dict(
+        abbr='GPT4o',
+        type=OpenAI,
+        path='gpt-4o',
+        key=
+        'xxxx',  # The key will be obtained from $OPENAI_API_KEY, but you can write down your key here as well
+        meta_template=api_meta_template,
+        query_per_second=16,
+        max_out_len=4096,
+        batch_size=1,
+        temperature=0.8,
+        seed=42,
+    )
+]
+## ------------- Evaluation Configuration
+eval = dict(
+    partitioner=dict(
+        type=SubjectiveNaivePartitioner,
+        models=models,
+        judge_models=judge_models,
+    ),
+    runner=dict(type=LocalRunner,
+                max_num_workers=16,
+                task=dict(type=SubjectiveEvalTask)),
+)
+summarizer = dict(type=DefaultSubjectiveSummarizer)
+work_dir = 'outputs/hellobench/'

examples/eval_internlm2_chat_keyset.py ADDED Viewed

	@@ -0,0 +1,46 @@

+from copy import deepcopy
+from mmengine.config import read_base
+with read_base():
+    from opencompass.configs.datasets.agieval.agieval_gen_64afd3 import \
+        agieval_datasets
+    from opencompass.configs.datasets.bbh.bbh_gen_5b92b0 import bbh_datasets
+    from opencompass.configs.datasets.gsm8k.gsm8k_gen_1d7fe4 import \
+        gsm8k_datasets
+    from opencompass.configs.datasets.humaneval.humaneval_gen_8e312c import \
+        humaneval_datasets
+    from opencompass.configs.datasets.math.math_evaluatorv2_gen_cecb31 import \
+        math_datasets
+    from opencompass.configs.datasets.mbpp.deprecated_sanitized_mbpp_gen_1e1056 import \
+        sanitized_mbpp_datasets
+    from opencompass.configs.datasets.mmlu.mmlu_gen_a484b3 import mmlu_datasets
+    from opencompass.configs.models.hf_internlm.hf_internlm2_chat_7b import \
+        models as hf_internlm2_chat_7b_model
+    from opencompass.configs.models.hf_internlm.hf_internlm2_chat_20b import \
+        models as hf_internlm2_chat_20b_model
+    from opencompass.configs.summarizers.internlm2_keyset import summarizer
+work_dir = './outputs/internlm2-chat-keyset/'
+_origin_datasets = sum(
+    [v for k, v in locals().items() if k.endswith('_datasets')], [])
+_origin_models = sum([v for k, v in locals().items() if k.endswith('_model')],
+                     [])
+_vanilla_datasets = [deepcopy(d) for d in _origin_datasets]
+_vanilla_models = []
+for m in _origin_models:
+    m = deepcopy(m)
+    if 'meta_template' in m and 'round' in m['meta_template']:
+        round = m['meta_template']['round']
+        if any(r['role'] == 'SYSTEM' for r in round):
+            new_round = [r for r in round if r['role'] != 'SYSTEM']
+            print(
+                f'WARNING: remove SYSTEM round in meta_template for {m.get("abbr", None)}'
+            )
+            m['meta_template']['round'] = new_round
+    _vanilla_models.append(m)
+datasets = _vanilla_datasets
+models = _vanilla_models

examples/eval_internlm2_keyset.py ADDED Viewed

	@@ -0,0 +1,24 @@

+from mmengine.config import read_base
+with read_base():
+    from opencompass.configs.datasets.agieval.agieval_mixed_713d14 import \
+        agieval_datasets
+    from opencompass.configs.datasets.bbh.bbh_gen_5b92b0 import bbh_datasets
+    from opencompass.configs.datasets.gsm8k.gsm8k_gen_1d7fe4 import \
+        gsm8k_datasets
+    from opencompass.configs.datasets.humaneval.deprecated_humaneval_gen_a82cae import \
+        humaneval_datasets
+    from opencompass.configs.datasets.math.math_gen_265cce import math_datasets
+    from opencompass.configs.datasets.mbpp.deprecated_sanitized_mbpp_gen_1e1056 import \
+        sanitized_mbpp_datasets
+    from opencompass.configs.datasets.mmlu.mmlu_ppl_ac766d import mmlu_datasets
+    from opencompass.configs.models.hf_internlm.hf_internlm2_7b import \
+        models as hf_internlm2_7b_model
+    from opencompass.configs.models.hf_internlm.hf_internlm2_20b import \
+        models as hf_internlm2_20b_model
+    from opencompass.configs.summarizers.internlm2_keyset import summarizer
+work_dir = './outputs/internlm2-keyset/'
+datasets = sum([v for k, v in locals().items() if k.endswith('_datasets')], [])
+models = sum([v for k, v in locals().items() if k.endswith('_model')], [])

examples/eval_internlm3_math500_thinking.py ADDED Viewed

	@@ -0,0 +1,120 @@

+# To run this example, you need to do the following steps:
+# 1. Install latest opencompass
+# 2. Start a local server with Qwen2.5-72B-Instruct as LLMJudge server (i.e. using vLLM or LMDeploy)
+# 3. Change the judge_cfg openai_api_base to your corresponindg local server address
+# 4. Start this evaluation by running 'opencompass eval_internlm3_math500_thinking.py'
+from opencompass.models import VLLMwithChatTemplate, OpenAISDK
+from mmengine.config import read_base
+with read_base():
+    from opencompass.configs.datasets.math.math_prm800k_500_0shot_nocot_genericllmeval_gen_63a000 import (
+        math_datasets,
+    )
+api_meta_template = dict(
+    round=[
+        dict(role='HUMAN', api_role='HUMAN'),
+        dict(role='BOT', api_role='BOT', generate=True),
+    ],
+)
+judge_cfg = dict(
+    abbr='qwen2-5-72b-instruct',
+    type=OpenAISDK,
+    path='Qwen/Qwen2.5-72B-Instruct',
+    key='YOUR_API_KEY',
+    openai_api_base=[
+        'http://172.30.56.81:23333/v1/',  ### Change to your own server
+    ],
+    meta_template=api_meta_template,
+    query_per_second=16,
+    batch_size=16,
+    temperature=0.001,
+    max_seq_len=32768,
+    max_completion_tokens=32768,
+)
+datasets = sum(
+    (v for k, v in locals().items() if k.endswith('_datasets')),
+    [],
+)
+# set max_out_len for inference
+for item in datasets:
+    item['infer_cfg']['inferencer']['max_out_len'] = 16384
+    if 'judge_cfg' in item['eval_cfg']['evaluator']:
+        item['eval_cfg']['evaluator']['judge_cfg'] = judge_cfg
+reasoning_chat_template = """You are an expert mathematician with extensive experience in mathematical competitions. You approach problems through systematic thinking and rigorous reasoning. When solving problems, follow these thought processes:
+## Deep Understanding
+Take time to fully comprehend the problem before attempting a solution. Consider:
+- What is the real question being asked?
+- What are the given conditions and what do they tell us?
+- Are there any special restrictions or assumptions?
+- Which information is crucial and which is supplementary?
+## Multi-angle Analysis
+Before solving, conduct thorough analysis:
+- What mathematical concepts and properties are involved?
+- Can you recall similar classic problems or solution methods?
+- Would diagrams or tables help visualize the problem?
+- Are there special cases that need separate consideration?
+## Systematic Thinking
+Plan your solution path:
+- Propose multiple possible approaches
+- Analyze the feasibility and merits of each method
+- Choose the most appropriate method and explain why
+- Break complex problems into smaller, manageable steps
+## Rigorous Proof
+During the solution process:
+- Provide solid justification for each step
+- Include detailed proofs for key conclusions
+- Pay attention to logical connections
+- Be vigilant about potential oversights
+## Repeated Verification
+After completing your solution:
+- Verify your results satisfy all conditions
+- Check for overlooked special cases
+- Consider if the solution can be optimized or simplified
+- Review your reasoning process
+Remember:
+1. Take time to think thoroughly rather than rushing to an answer
+2. Rigorously prove each key conclusion
+3. Keep an open mind and try different approaches
+4. Summarize valuable problem-solving methods
+5. Maintain healthy skepticism and verify multiple times
+Your response should reflect deep mathematical understanding and precise logical thinking, making your solution path and reasoning clear to others.
+When you're ready, present your complete solution with:
+- Clear problem understanding
+- Detailed solution process
+- Key insights
+- Thorough verification
+Focus on clear, logical progression of ideas and thorough explanation of your mathematical reasoning. Provide answers in the same language as the user asking the question, repeat the final answer using a '\\boxed{}' without any units, you have [[8192]] tokens to complete the answer.
+"""
+reasoning_meta_template = dict(
+    begin=dict(
+        role='SYSTEM', api_role='SYSTEM', prompt=reasoning_chat_template
+    ),
+    round=[
+        dict(role='HUMAN', api_role='HUMAN'),
+        # XXX: all system roles are mapped to human in purpose
+        dict(role='BOT', api_role='BOT', generate=True),
+    ],
+)
+models = [
+    dict(
+        type=VLLMwithChatTemplate,
+        abbr='internlm3-8b-instruct-vllm',
+        path='internlm/internlm3-8b-instruct',
+        model_kwargs=dict(tensor_parallel_size=1),
+        generation_kwargs=dict(do_sample=False),  # greedy
+        max_seq_len=32768,
+        max_out_len=16384,
+        batch_size=16,
+        run_cfg=dict(num_gpus=1),
+        meta_template=reasoning_meta_template,
+    )
+]
+datasets = math_datasets

examples/eval_internlm_chat_lmdeploy_apiserver.py ADDED Viewed

	@@ -0,0 +1,58 @@

+from mmengine.config import read_base
+from opencompass.models.turbomind_api import TurboMindAPIModel
+with read_base():
+    # choose a list of datasets
+    from opencompass.configs.datasets.ceval.ceval_gen_5f30c7 import \
+        ceval_datasets
+    from opencompass.configs.datasets.crowspairs.crowspairs_gen_381af0 import \
+        crowspairs_datasets
+    from opencompass.configs.datasets.gsm8k.gsm8k_gen_1d7fe4 import \
+        gsm8k_datasets
+    from opencompass.configs.datasets.mmlu.mmlu_gen_a484b3 import mmlu_datasets
+    from opencompass.configs.datasets.race.race_gen_69ee4f import race_datasets
+    from opencompass.configs.datasets.SuperGLUE_WiC.SuperGLUE_WiC_gen_d06864 import \
+        WiC_datasets
+    from opencompass.configs.datasets.SuperGLUE_WSC.SuperGLUE_WSC_gen_7902a7 import \
+        WSC_datasets
+    from opencompass.configs.datasets.triviaqa.triviaqa_gen_2121ce import \
+        triviaqa_datasets
+    # and output the results in a choosen format
+    from opencompass.configs.summarizers.medium import summarizer
+datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
+meta_template = dict(round=[
+    dict(role='HUMAN', begin='<|User|>:', end='\n'),
+    dict(role='BOT', begin='<|Bot|>:', end='<eoa>\n', generate=True),
+],
+                     eos_token_id=103028)
+internlm_chat_20b = dict(
+    type=TurboMindAPIModel,
+    abbr='internlm-chat-20b-turbomind',
+    api_addr='http://0.0.0.0:23333',
+    api_key='internlm-chat-20b',  # api_key
+    max_out_len=100,
+    max_seq_len=2048,
+    batch_size=8,
+    meta_template=meta_template,
+    run_cfg=dict(num_gpus=1, num_procs=1),
+    end_str='<eoa>',
+)
+internlm_chat_7b = dict(
+    type=TurboMindAPIModel,
+    abbr='internlm-chat-7b-turbomind',
+    api_addr='http://0.0.0.0:23333',
+    api_key='interlm-chat-7b',  # api_key
+    max_out_len=100,
+    max_seq_len=2048,
+    batch_size=16,
+    meta_template=meta_template,
+    run_cfg=dict(num_gpus=1, num_procs=1),
+    end_str='<eoa>',
+)
+models = [internlm_chat_20b]

examples/eval_internlm_flames_chat.py ADDED Viewed

	@@ -0,0 +1,116 @@

+from mmengine.config import read_base
+from opencompass.models import HuggingFaceCausalLM
+from opencompass.partitioners import NaivePartitioner
+from opencompass.partitioners.sub_naive import SubjectiveNaivePartitioner
+from opencompass.runners import LocalRunner
+from opencompass.summarizers import FlamesSummarizer
+from opencompass.tasks import OpenICLInferTask
+from opencompass.tasks.subjective_eval import SubjectiveEvalTask
+# -------------Inferen Stage ----------------------------------------
+with read_base():
+    from opencompass.configs.datasets.flames.flames_gen import flames_datasets
+    from opencompass.configs.models.hf_internlm.hf_internlm2_chat_7b import \
+        models
+datasets = [*flames_datasets]
+from opencompass.models import HuggingFaceCausalLM
+_meta_template = dict(round=[
+    dict(role='HUMAN', begin='<|im_start|>user\n', end='<|im_end|>\n'),
+    dict(role='BOT',
+         begin='<|im_start|>assistant\n',
+         end='<|im_end|>\n',
+         generate=True),
+], )
+models = [
+    dict(
+        type=HuggingFaceCausalLM,
+        abbr='internlm2-chat-7b-hf',
+        path='internlm/internlm2-chat-7b',
+        tokenizer_path='internlm/internlm2-chat-7b',
+        model_kwargs=dict(
+            trust_remote_code=True,
+            device_map='auto',
+        ),
+        tokenizer_kwargs=dict(
+            padding_side='left',
+            truncation_side='left',
+            use_fast=False,
+            trust_remote_code=True,
+        ),
+        max_out_len=2048,
+        max_seq_len=2048,
+        batch_size=8,
+        meta_template=_meta_template,
+        run_cfg=dict(num_gpus=1, num_procs=1),
+        end_str='<|im_end|>',
+        generation_kwargs={
+            'eos_token_id': [2, 92542],
+            'do_sample': True
+        },
+        batch_padding=True,
+    )
+]
+infer = dict(
+    partitioner=dict(type=NaivePartitioner),
+    runner=dict(type=LocalRunner,
+                max_num_workers=256,
+                task=dict(type=OpenICLInferTask)),
+)
+# -------------Evalation Stage ----------------------------------------
+## ------------- JudgeLLM Configuration---------------------------------
+internlm1_chat_template = dict(round=[
+    dict(role='HUMAN', begin='<|User|>:', end='\n'),
+    dict(role='BOT', begin='<|Bot|>:', end='<eoa>\n', generate=True),
+], )
+judge_models = [
+    dict(
+        type=HuggingFaceCausalLM,
+        abbr='flames-scorer',
+        path='CaasiHUANG/flames-scorer',
+        tokenizer_path='CaasiHUANG/flames-scorer',
+        model_kwargs=dict(
+            trust_remote_code=True,
+            device_map='auto',
+        ),
+        tokenizer_kwargs=dict(
+            padding_side='left',
+            truncation_side='left',
+            use_fast=False,
+            trust_remote_code=True,
+        ),
+        generation_kwargs={'do_sample': True},
+        max_out_len=512,
+        max_seq_len=4096,
+        batch_size=8,
+        meta_template=internlm1_chat_template,
+        run_cfg=dict(num_gpus=1, num_procs=1),
+        end_str='<eoa>',
+    )
+]
+## ------------- Evaluation Configuration----------------
+eval = dict(
+    partitioner=dict(
+        type=SubjectiveNaivePartitioner,
+        mode='singlescore',
+        models=models,
+        judge_models=judge_models,
+    ),
+    runner=dict(type=LocalRunner,
+                max_num_workers=256,
+                task=dict(type=SubjectiveEvalTask)),
+)
+summarizer = dict(type=FlamesSummarizer, judge_type='general')
+work_dir = 'outputs/flames/'

examples/eval_internlm_lmdeploy_apiserver.py ADDED Viewed

	@@ -0,0 +1,43 @@

+from mmengine.config import read_base
+from opencompass.models.turbomind_api import TurboMindAPIModel
+with read_base():
+    # choose a list of datasets
+    from opencompass.configs.datasets.ceval.ceval_gen_5f30c7 import \
+        ceval_datasets
+    from opencompass.configs.datasets.gsm8k.gsm8k_gen_1d7fe4 import \
+        gsm8k_datasets
+    from opencompass.configs.datasets.humaneval.humaneval_gen_8e312c import \
+        humaneval_datasets
+    from opencompass.configs.datasets.mmlu.mmlu_gen_a484b3 import mmlu_datasets
+    from opencompass.configs.datasets.SuperGLUE_WiC.SuperGLUE_WiC_gen_d06864 import \
+        WiC_datasets
+    from opencompass.configs.datasets.triviaqa.triviaqa_gen_2121ce import \
+        triviaqa_datasets
+    # and output the results in a choosen format
+    from opencompass.configs.summarizers.medium import summarizer
+datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
+internlm_chat_20b = dict(
+    type=TurboMindAPIModel,
+    abbr='internlm-chat-20b-turbomind',
+    api_addr='http://0.0.0.0:23333',
+    max_out_len=100,
+    max_seq_len=2048,
+    batch_size=8,
+    run_cfg=dict(num_gpus=1, num_procs=1),
+)
+internlm_chat_7b = dict(
+    type=TurboMindAPIModel,
+    abbr='internlm-chat-7b-turbomind',
+    api_addr='http://0.0.0.0:23333',
+    max_out_len=100,
+    max_seq_len=2048,
+    batch_size=16,
+    run_cfg=dict(num_gpus=1, num_procs=1),
+)
+models = [internlm_chat_20b]

examples/eval_internlm_math_chat.py ADDED Viewed

	@@ -0,0 +1,17 @@

+from mmengine.config import read_base
+from opencompass.models.huggingface import HuggingFaceCausalLM
+with read_base():
+    # choose a list of datasets
+    from opencompass.configs.datasets.gsm8k.gsm8k_gen import gsm8k_datasets
+    from opencompass.configs.datasets.math.math_gen_736506 import math_datasets
+    from opencompass.configs.models.hf_internlm.hf_internlm2_chat_math_7b import \
+        models as internlm_math_chat_7b_models
+    from opencompass.configs.models.hf_internlm.hf_internlm2_chat_math_20b import \
+        models as internlm_math_chat_20b_models
+datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
+# Eval Math and GSM8k for both Internlm-Math-Chat-7B and 20b
+datasets = [*math_datasets, *gsm8k_datasets]
+models = [*internlm_math_chat_7b_models, *internlm_math_chat_20b_models]

examples/eval_lightllm.py ADDED Viewed

	@@ -0,0 +1,52 @@

+from mmengine.config import read_base
+from opencompass.models import LightllmAPI
+from opencompass.partitioners import NaivePartitioner
+from opencompass.runners import LocalRunner
+from opencompass.tasks import OpenICLInferTask
+with read_base():
+    from opencompass.configs.datasets.humaneval.deprecated_humaneval_gen_a82cae import \
+        humaneval_datasets
+    from opencompass.configs.summarizers.leaderboard import summarizer
+datasets = [*humaneval_datasets]
+'''
+# Prompt template for InternLM2-Chat
+# https://github.com/InternLM/InternLM/blob/main/chat/chat_format.md
+_meta_template = dict(
+    begin='<|im_start|>system\nYou are InternLM2-Chat, a harmless AI assistant<|im_end|>\n',
+    round=[
+        dict(role='HUMAN', begin='<|im_start|>user\n', end='<|im_end|>\n'),
+        dict(role='BOT', begin='<|im_start|>assistant\n', end='<|im_end|>\n', generate=True),
+    ]
+)
+'''
+_meta_template = None
+models = [
+    dict(
+        abbr='LightllmAPI',
+        type=LightllmAPI,
+        url='http://localhost:1030/generate',
+        meta_template=_meta_template,
+        batch_size=32,
+        max_workers_per_task=128,
+        rate_per_worker=1024,
+        retry=4,
+        generation_kwargs=dict(do_sample=False,
+                               ignore_eos=False,
+                               max_new_tokens=1024),
+    ),
+]
+infer = dict(
+    partitioner=dict(type=NaivePartitioner),
+    runner=dict(
+        type=LocalRunner,
+        max_num_workers=32,
+        task=dict(type=OpenICLInferTask),
+    ),
+)

examples/eval_math_llm_judge_internal.py ADDED Viewed

	@@ -0,0 +1,43 @@

+from mmengine.config import read_base
+with read_base():
+    from opencompass.configs.datasets.math.math_0shot_llm_judge_v2_gen_31d777 import \
+        math_datasets
+    # 选择一个感兴趣的模型
+    from opencompass.configs.models.qwen2_5.lmdeploy_qwen2_5_72b_instruct import \
+        models as qwen2_5_72b_instruct_model
+eval_model_name = 'eval_model_name'
+postprocessor_model_name = 'postprocessor_model_name'
+eval_model_urls = ['http://0.0.0.0:23333/v1']
+postprocessor_model_urls = ['http://0.0.0.0:23333/v1']
+datasets = sum([v for k, v in locals().items() if k.endswith('_datasets')], [])
+models = sum([v for k, v in locals().items() if k.endswith('_model')], [])
+for dataset in datasets:
+    dataset['eval_cfg']['evaluator']['model_name'] = eval_model_name
+    dataset['eval_cfg']['evaluator']['url'] = eval_model_urls
+    dataset['eval_cfg']['evaluator']['post_url'] = postprocessor_model_urls
+    dataset['eval_cfg']['evaluator'][
+        'post_model_name'] = postprocessor_model_name
+# -------------Inferen Stage ----------------------------------------
+from opencompass.partitioners import NaivePartitioner, NumWorkerPartitioner
+from opencompass.runners import LocalRunner
+from opencompass.tasks import OpenICLEvalTask, OpenICLInferTask
+infer = dict(
+    partitioner=dict(type=NumWorkerPartitioner, num_worker=8),
+    runner=dict(type=LocalRunner,
+                max_num_workers=8,
+                task=dict(type=OpenICLInferTask)),
+)
+eval = dict(
+    partitioner=dict(type=NaivePartitioner, n=10),
+    runner=dict(type=LocalRunner,
+                max_num_workers=256,
+                task=dict(type=OpenICLEvalTask)),
+)

examples/eval_mathbench.py ADDED Viewed

	@@ -0,0 +1,41 @@

+from mmengine.config import read_base
+with read_base():
+    # Import models
+    # Import datasets
+    from opencompass.configs.datasets.MathBench.mathbench_gen import \
+        mathbench_datasets
+    from opencompass.configs.models.hf_internlm.hf_internlm2_chat_7b import \
+        models as internlm2_chat_7b_model
+    from opencompass.configs.models.hf_llama.hf_llama3_8b_instruct import \
+        models as llama3_8b_instruct_model
+    # Import summarizers for display results
+    from opencompass.configs.summarizers.groups.mathbench_v1_2024 import \
+        summarizer  # Grouped results for MathBench-A and MathBench-T separately
+    # from opencompass.configs.summarizers.mathbench_v1 import summarizer # Detailed results for every sub-dataset
+    # from opencompass.configs.summarizers.groups.mathbench_v1_2024_lang import summarizer # Grouped results for bilingual results
+datasets = sum([v for k, v in locals().items() if k.endswith('_datasets')], [])
+models = sum([v for k, v in locals().items() if k.endswith('_model')], [])
+from opencompass.partitioners import NaivePartitioner, NumWorkerPartitioner
+from opencompass.runners import LocalRunner
+from opencompass.tasks import OpenICLEvalTask, OpenICLInferTask
+eval = dict(
+    partitioner=dict(type=NaivePartitioner, n=8),
+    runner=dict(type=LocalRunner,
+                max_num_workers=256,
+                task=dict(type=OpenICLEvalTask)),
+)
+infer = dict(
+    partitioner=dict(type=NumWorkerPartitioner, num_worker=4),
+    runner=dict(type=LocalRunner,
+                max_num_workers=256,
+                task=dict(type=OpenICLInferTask)),
+)
+work_dir = './outputs/mathbench_results'

examples/eval_modelscope_datasets.py ADDED Viewed

	@@ -0,0 +1,112 @@

+# export DATASET_SOURCE='ModelScope' # before run this script
+from datasets import Dataset, DatasetDict
+from mmengine.config import read_base
+from tqdm import tqdm
+with read_base():
+    from opencompass.configs.datasets.agieval.agieval_gen import \
+        agieval_datasets as agieval_v2_datasets  # ok
+    from opencompass.configs.datasets.agieval.agieval_gen_a0c741 import \
+        agieval_datasets as agieval_v1_datasets  # ok
+    from opencompass.configs.datasets.ARC_c.ARC_c_clean_ppl import \
+        ARC_c_datasets as ARC_c_clean_datasets  # ok
+    from opencompass.configs.datasets.ARC_c.ARC_c_gen import \
+        ARC_c_datasets  # ok
+    from opencompass.configs.datasets.ARC_e.ARC_e_gen import \
+        ARC_e_datasets  # ok
+    from opencompass.configs.datasets.bbh.bbh_gen import bbh_datasets
+    from opencompass.configs.datasets.ceval.ceval_clean_ppl import \
+        ceval_datasets as ceval_clean_datasets  # ok
+    from opencompass.configs.datasets.ceval.ceval_gen import \
+        ceval_datasets  # ok
+    from opencompass.configs.datasets.CLUE_afqmc.CLUE_afqmc_gen import \
+        afqmc_datasets  # ok
+    from opencompass.configs.datasets.CLUE_cmnli.CLUE_cmnli_gen import \
+        cmnli_datasets  # ok
+    from opencompass.configs.datasets.CLUE_cmnli.CLUE_cmnli_ppl import \
+        cmnli_datasets as cmnli_ppl_datasets  # ok
+    from opencompass.configs.datasets.CLUE_ocnli.CLUE_ocnli_gen import \
+        ocnli_datasets  # ok
+    from opencompass.configs.datasets.cmmlu.cmmlu_gen import \
+        cmmlu_datasets  # ok
+    from opencompass.configs.datasets.commonsenseqa.commonsenseqa_gen import \
+        commonsenseqa_datasets  # 额外处理gpt
+    from opencompass.configs.datasets.GaokaoBench.GaokaoBench_gen import \
+        GaokaoBench_datasets  # ok
+    from opencompass.configs.datasets.GaokaoBench.GaokaoBench_mixed import \
+        GaokaoBench_datasets as GaokaoBench_mixed_datasets  # ok
+    from opencompass.configs.datasets.GaokaoBench.GaokaoBench_no_subjective_gen_4c31db import \
+        GaokaoBench_datasets as GaokaoBench_no_subjective_datasets  # ok
+    from opencompass.configs.datasets.gsm8k.gsm8k_gen import \
+        gsm8k_datasets  # ok
+    from opencompass.configs.datasets.hellaswag.hellaswag_10shot_gen_e42710 import \
+        hellaswag_datasets as hellaswag_ice_datasets  # ok
+    from opencompass.configs.datasets.hellaswag.hellaswag_clean_ppl import \
+        hellaswag_datasets as hellaswag_clean_datasets  # ok
+    from opencompass.configs.datasets.hellaswag.hellaswag_gen import \
+        hellaswag_datasets as hellaswag_v2_datasets  # ok
+    from opencompass.configs.datasets.hellaswag.hellaswag_ppl_9dbb12 import \
+        hellaswag_datasets as hellaswag_v1_datasets  # ok
+    from opencompass.configs.datasets.hellaswag.hellaswag_ppl_a6e128 import \
+        hellaswag_datasets as hellaswag_v3_datasets  # ok
+    from opencompass.configs.datasets.humaneval.humaneval_gen import \
+        humaneval_datasets  # ok
+    from opencompass.configs.datasets.humaneval.humaneval_repeat10_gen_8e312c import \
+        humaneval_datasets as humaneval_repeat10_datasets  # ok
+    from opencompass.configs.datasets.lambada.lambada_gen import \
+        lambada_datasets  # ok
+    from opencompass.configs.datasets.lcsts.lcsts_gen import \
+        lcsts_datasets  # ok
+    from opencompass.configs.datasets.math.math_gen import math_datasets  # ok
+    from opencompass.configs.datasets.mbpp.mbpp_gen import \
+        mbpp_datasets as mbpp_v1_datasets  # ok
+    from opencompass.configs.datasets.mbpp.mbpp_passk_gen_830460 import \
+        mbpp_datasets as mbpp_v2_datasets  # ok
+    from opencompass.configs.datasets.mbpp.sanitized_mbpp_gen_830460 import \
+        sanitized_mbpp_datasets  # ok
+    from opencompass.configs.datasets.mmlu.mmlu_clean_ppl import \
+        mmlu_datasets as mmlu_clean_datasets  # ok
+    from opencompass.configs.datasets.mmlu.mmlu_gen import mmlu_datasets  # ok
+    from opencompass.configs.datasets.nq.nq_gen import nq_datasets  # ok
+    from opencompass.configs.datasets.obqa.obqa_gen import obqa_datasets  # ok
+    from opencompass.configs.datasets.obqa.obqa_ppl_6aac9e import \
+        obqa_datasets as obqa_ppl_datasets  # ok
+    from opencompass.configs.datasets.piqa.piqa_gen import \
+        piqa_datasets as piqa_v2_datasets  # ok
+    from opencompass.configs.datasets.piqa.piqa_ppl import \
+        piqa_datasets as piqa_v1_datasets  # ok
+    from opencompass.configs.datasets.piqa.piqa_ppl_0cfff2 import \
+        piqa_datasets as piqa_v3_datasets  # ok
+    from opencompass.configs.datasets.race.race_ppl import race_datasets  # ok
+    from opencompass.configs.datasets.siqa.siqa_gen import \
+        siqa_datasets as siqa_v2_datasets  # ok
+    from opencompass.configs.datasets.siqa.siqa_gen_18632c import \
+        siqa_datasets as siqa_v3_datasets  # ok
+    from opencompass.configs.datasets.siqa.siqa_ppl_42bc6e import \
+        siqa_datasets as siqa_ppl_datasets  # ok
+    from opencompass.configs.datasets.storycloze.storycloze_gen import \
+        storycloze_datasets  # ok
+    from opencompass.configs.datasets.storycloze.storycloze_ppl import \
+        storycloze_datasets as storycloze_ppl_datasets  # ok
+    from opencompass.configs.datasets.strategyqa.strategyqa_gen import \
+        strategyqa_datasets
+    from opencompass.configs.datasets.summedits.summedits_gen import \
+        summedits_datasets as summedits_v2_datasets  # ok
+    from opencompass.configs.datasets.triviaqa.triviaqa_gen import \
+        triviaqa_datasets  # ok
+    from opencompass.configs.datasets.triviaqa.triviaqa_wiki_1shot_gen_20a989 import \
+        triviaqa_datasets as triviaqa_wiki_1shot_datasets  # ok
+    from opencompass.configs.datasets.tydiqa.tydiqa_gen import \
+        tydiqa_datasets  # ok
+    from opencompass.configs.datasets.winogrande.winogrande_5shot_ll_252f01 import \
+        winogrande_datasets as winogrande_5shot_ll_datasets  # ok
+    from opencompass.configs.datasets.winogrande.winogrande_gen import \
+        winogrande_datasets
+    from opencompass.configs.datasets.winogrande.winogrande_ll import \
+        winogrande_datasets as winogrande_ll_datasets  # ok
+    from opencompass.configs.datasets.Xsum.Xsum_gen import Xsum_datasets
+    from opencompass.configs.models.opt.hf_opt_125m import models
+datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
+for d in datasets:
+    d['reader_cfg'].update({'train_range': '[0:5]', 'test_range': '[0:5]'})

examples/eval_qwen_7b.py ADDED Viewed

	@@ -0,0 +1,58 @@

+from mmengine.config import read_base
+with read_base():
+    from opencompass.configs.datasets.collections.leaderboard.qwen import \
+        datasets
+    from opencompass.configs.models.qwen.hf_qwen_7b import models
+    from opencompass.configs.summarizers.leaderboard import summarizer
+'''
+dataset                                 version    metric            mode    qwen-7b-hf
+--------------------------------------  ---------  ----------------  ------  ------------
+--------- 考试 Exam ---------           -          -                 -       -
+ceval                                   -          naive_average     ppl     58.65
+agieval                                 -          naive_average     mixed   40.49
+mmlu                                    -          naive_average     ppl     57.78
+cmmlu                                   -          naive_average     ppl     58.57
+GaokaoBench                             -          weighted_average  mixed   51.76
+ARC-c                                   72cf91     accuracy          gen     83.73
+ARC-e                                   72cf91     accuracy          gen     90.65
+--------- 语言 Language ---------       -          -                 -       -
+WiC                                     ce62e6     accuracy          ppl     51.10
+chid-dev                                25f3d3     accuracy          ppl     86.63
+afqmc-dev                               cc328c     accuracy          ppl     69.00
+WSC                                     678cb5     accuracy          ppl     63.46
+tydiqa-goldp                            -          naive_average     gen     19.98
+flores_100                              -          naive_average     gen     3.20
+--------- 知识 Knowledge ---------      -          -                 -       -
+BoolQ                                   463fee     accuracy          ppl     83.00
+commonsense_qa                          0d8e25     accuracy          ppl     67.49
+triviaqa                                b6904f     score             gen     40.45
+nq                                      b6904f     score             gen     14.16
+--------- 理解 Understanding ---------  -          -                 -       -
+C3                                      e6778d     accuracy          gen     75.29
+race-middle                             73bdec     accuracy          ppl     90.53
+race-high                               73bdec     accuracy          ppl     87.71
+openbookqa_fact                         fa871c     accuracy          gen     92.20
+csl_dev                                 3c4211     accuracy          ppl     56.25
+lcsts                                   0b3969     rouge1            gen     12.38
+Xsum                                    207e69     rouge1            gen     36.00
+eprstmt-dev                             101429     accuracy          gen     89.38
+lambada                                 de1af2     accuracy          gen     67.88
+--------- 推理 Reasoning ---------      -          -                 -       -
+cmnli                                   15e783     accuracy          ppl     54.85
+ocnli                                   1471e7     accuracy          gen     42.34
+AX_b                                    793c72     accuracy          gen     58.61
+AX_g                                    c4c886     accuracy          gen     69.10
+RTE                                     c4c886     accuracy          gen     57.76
+COPA                                    59f42c     accuracy          gen     88.00
+ReCoRD                                  3e0689     score             gen     27.78
+hellaswag                               06a1e2     accuracy          gen     92.47
+piqa                                    24369d     accuracy          gen     78.02
+siqa                                    ea30d1     accuracy          ppl     75.03
+math                                    2c0b9e     accuracy          gen     11.06
+gsm8k                                   4c7f6e     accuracy          gen     50.87
+drop                                    53a0a7     score             gen     44.95
+openai_humaneval                        dd0dff     humaneval_pass@1  gen     23.78
+mbpp                                    60ca11     score             gen     31.20
+bbh                                     -          naive_average     gen     40.03
+'''

examples/eval_ruler_fix_tokenizer.py ADDED Viewed

	@@ -0,0 +1,38 @@

+from mmengine.config import read_base
+from opencompass.partitioners import NaivePartitioner, NumWorkerPartitioner
+from opencompass.runners import LocalRunner
+from opencompass.tasks import OpenICLEvalTask, OpenICLInferTask
+with read_base():
+    from opencompass.configs.datasets.ruler.ruler_combined_gen import \
+        ruler_combined_datasets
+    from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_5_7b_chat_1m import \
+        models as internlm2_5_7b_chat_1m
+    from opencompass.configs.summarizers.groups.ruler import \
+        ruler_summary_groups
+datasets = sum((v for k, v in locals().items() if k.endswith('_datasets')), [])
+models = internlm2_5_7b_chat_1m
+work_dir = './outputs/ruler'
+infer = dict(
+    partitioner=dict(type=NumWorkerPartitioner, num_worker=2),
+    runner=dict(type=LocalRunner,
+                max_num_workers=16,
+                task=dict(type=OpenICLInferTask),
+                retry=5),
+)
+eval = dict(
+    partitioner=dict(type=NaivePartitioner),
+    runner=dict(type=LocalRunner,
+                max_num_workers=32,
+                task=dict(type=OpenICLEvalTask)),
+)
+summarizer = dict(
+    dataset_abbrs=['ruler_4k', 'ruler_8k', 'ruler_16k', 'ruler_32k'],
+    summary_groups=sum(
+        [v for k, v in locals().items() if k.endswith('_summary_groups')], []),
+)

examples/eval_subjective_alpacaeval_official.py ADDED Viewed

	@@ -0,0 +1,72 @@

+from mmengine.config import read_base
+with read_base():
+    from opencompass.configs.datasets.subjective.alpaca_eval.alpacav2_judgeby_gpt4 import subjective_datasets as alpacav2
+from opencompass.models import (HuggingFace, HuggingFaceCausalLM,
+                                HuggingFaceChatGLM3)
+from opencompass.models.openai_api import OpenAI
+from opencompass.partitioners import NaivePartitioner, SizePartitioner
+from opencompass.partitioners.sub_naive import SubjectiveNaivePartitioner
+from opencompass.partitioners.sub_size import SubjectiveSizePartitioner
+from opencompass.runners import LocalRunner, SlurmSequentialRunner
+from opencompass.summarizers import AlpacaSummarizer
+from opencompass.tasks import OpenICLInferTask
+from opencompass.tasks.outer_eval.alpacaeval import AlpacaEvalTask
+api_meta_template = dict(
+    round=[
+        dict(role='HUMAN', api_role='HUMAN'),
+        dict(role='BOT', api_role='BOT', generate=True),
+    ],
+    reserved_roles=[dict(role='SYSTEM', api_role='SYSTEM')],
+)
+# To run this config, please ensure to successfully installed `alpaca-eval==0.6` and `scikit-learn==1.5`
+# -------------Inference Stage ----------------------------------------
+# For subjective evaluation, we often set do sample for models
+models = [
+    dict(
+        type=HuggingFaceChatGLM3,
+        abbr='chatglm3-6b',
+        path='THUDM/chatglm3-6b',
+        tokenizer_path='THUDM/chatglm3-6b',
+        model_kwargs=dict(
+            device_map='auto',
+            trust_remote_code=True,
+        ),
+        tokenizer_kwargs=dict(
+            padding_side='left',
+            truncation_side='left',
+            trust_remote_code=True,
+        ),
+        generation_kwargs=dict(do_sample=True, ),
+        meta_template=api_meta_template,
+        max_out_len=2048,
+        max_seq_len=4096,
+        batch_size=1,
+        run_cfg=dict(num_gpus=1, num_procs=1),
+    )
+]
+datasets = [*alpacav2]
+# -------------Evalation Stage ----------------------------------------
+## ------------- JudgeLLM Configuration
+gpt4_judge = dict(
+    abbr='GPT4-Turbo',
+    path='gpt-4-1106-preview',
+    key=
+    '',  # The key will be obtained from $OPENAI_API_KEY, but you can write down your key here as well
+    config='weighted_alpaca_eval_gpt4_turbo')
+## ------------- Evaluation Configuration
+eval = dict(partitioner=dict(type=NaivePartitioner),
+            runner=dict(
+                type=LocalRunner,
+                max_num_workers=256,
+                task=dict(type=AlpacaEvalTask, judge_cfg=gpt4_judge),
+            ))
+work_dir = 'outputs/alpaca/'

requirements/vllm.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ vllm

tmp/08b1e522-33ea-430a-ba78-4d273bf09a88_params.py ADDED Viewed

	@@ -0,0 +1,1424 @@

+datasets = [
+    [
+        dict(
+            abbr='LongBench_2wikimqa_5',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchF1Evaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=32,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Answer the question based on the given passages. Only give me the answer and do not output any other words.\n\nThe following are given passages.\n{context}\n\nAnswer the question based on the given passages. Only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='2wikimqa',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[125:150]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBench2wikimqaDataset'),
+        dict(
+            abbr='LongBench_hotpotqa_5',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchF1Evaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=32,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Answer the question based on the given passages. Only give me the answer and do not output any other words.\n\nThe following are given passages.\n{context}\n\nAnswer the question based on the given passages. Only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='hotpotqa',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[125:150]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchhotpotqaDataset'),
+        dict(
+            abbr='LongBench_musique_5',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchF1Evaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=32,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Answer the question based on the given passages. Only give me the answer and do not output any other words.\n\nThe following are given passages.\n{context}\n\nAnswer the question based on the given passages. Only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='musique',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[125:150]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchmusiqueDataset'),
+        dict(
+            abbr='LongBench_multifieldqa_en_5',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchF1Evaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=64,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Read the following text and answer briefly.\n\n{context}\n\nNow, answer the following question based on the above text, only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='multifieldqa_en',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[95:114]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchmultifieldqa_enDataset'),
+        dict(
+            abbr='LongBench_multifieldqa_zh_5',
+            eval_cfg=dict(
+                evaluator=dict(
+                    language='zh',
+                    type='opencompass.datasets.LongBenchF1Evaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=64,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            '阅读以下文字并用中文简短回答：\n\n{context}\n\n现在请基于上面的文章回答下面的问题，只告诉我答案，不要输出任何其他字词。\n\n问题：{input}\n回答：',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='multifieldqa_zh',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[125:150]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchmultifieldqa_zhDataset'),
+        dict(
+            abbr='LongBench_narrativeqa_5',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchF1Evaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=128,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'You are given a story, which can be either a novel or a movie script, and a question. Answer the question as concisely as you can, using a single phrase if possible. Do not provide any explanation.\n\nStory: {context}\n\nNow, answer the question based on the story as concisely as you can, using a single phrase if possible. Do not provide any explanation.\n\nQuestion: {input}\n\nAnswer:',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='narrativeqa',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[125:150]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchnarrativeqaDataset'),
+        dict(
+            abbr='LongBench_qasper_5',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchF1Evaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=32,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Answer the question based on the given passages. Only give me the answer and do not output any other words.\n\nThe following are given passages.\n{context}\n\nAnswer the question based on the given passages. Only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='qasper',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[125:150]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchqasperDataset'),
+        dict(
+            abbr='LongBench_triviaqa_5',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchF1Evaluator'),
+                pred_postprocessor=dict(
+                    type='opencompass.datasets.triviaqa_postprocess'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=32,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Answer the question based on the given passage. Only give me the answer and do not output any other words. The following are some examples.\n\n{context}\n\n{input}',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='triviaqa',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[125:150]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchtriviaqaDataset'),
+        dict(
+            abbr='LongBench_gov_report_5',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchRougeEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=512,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'You are given a report by a government agency. Write a one-page summary of the report.\n\nReport:\n{context}\n\nNow, write a one-page summary of the report.\n\nSummary:',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='gov_report',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                ],
+                output_column='answers',
+                test_range='[125:150]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchgov_reportDataset'),
+        dict(
+            abbr='LongBench_qmsum_5',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchRougeEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=512,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'You are given a meeting transcript and a query containing a question or instruction. Answer the query in one or more sentences.\n\nTranscript:\n{context}\n\nNow, answer the query based on the above meeting transcript in one or more sentences.\n\nQuery: {input}\nAnswer:',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='qmsum',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[125:150]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchqmsumDataset'),
+        dict(
+            abbr='LongBench_vcsum_5',
+            eval_cfg=dict(
+                evaluator=dict(
+                    language='zh',
+                    type='opencompass.datasets.LongBenchRougeEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=512,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            '下面有一段会议记录，请你阅读后，写一段总结，总结会议的内容。\n会议记录：\n{context}\n\n会议总结：',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='vcsum',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                ],
+                output_column='answers',
+                test_range='[125:150]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchvcsumDataset'),
+        dict(
+            abbr='LongBench_dureader_5',
+            eval_cfg=dict(
+                evaluator=dict(
+                    language='zh',
+                    type='opencompass.datasets.LongBenchRougeEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=128,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            '请基于给定的文章回答下述问题。\n\n文章：{context}\n\n请基于上述文章回答下面的问题。\n\n问题：{input}\n回答：',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='dureader',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[125:150]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchdureaderDataset'),
+        dict(
+            abbr='LongBench_lcc_5',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchCodeSimEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=64,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Please complete the code given below. \n{context}Next line of code:\n',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='lcc',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                ],
+                output_column='answers',
+                test_range='[315:378]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchlccDataset'),
+        dict(
+            abbr='LongBench_repobench-p_5',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchCodeSimEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=64,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Please complete the code given below. \n{context}{input}Next line of code:\n',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='repobench-p',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[315:378]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchrepobenchDataset'),
+        dict(
+            abbr='LongBench_passage_retrieval_en_5',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchRetrievalEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=32,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Here are 30 paragraphs from Wikipedia, along with an abstract. Please determine which paragraph the abstract is from.\n\n{context}\n\nThe following is an abstract.\n\n{input}\n\nPlease enter the number of the paragraph that the abstract is from. The answer format must be like "Paragraph 1", "Paragraph 2", etc.\n\nThe answer is: ',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='passage_retrieval_en',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[125:150]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchpassage_retrieval_enDataset'),
+        dict(
+            abbr='LongBench_passage_retrieval_zh_5',
+            eval_cfg=dict(
+                evaluator=dict(
+                    language='zh',
+                    type='opencompass.datasets.LongBenchRetrievalEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=32,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            '以下是若干段落文字，以及其中一个段落的摘要。请确定给定的摘要出自哪一段。\n\n{context}\n\n下面是一个摘要\n\n{input}\n\n请输入摘要所属段落的编号。答案格式必须是"段落1"，"段落2"等格式\n\n答案是：',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='passage_retrieval_zh',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[125:150]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchpassage_retrieval_zhDataset'),
+        dict(
+            abbr='LongBench_passage_count_5',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchCountEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=32,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'There are some paragraphs below sourced from Wikipedia. Some of them may be duplicates. Please carefully read these paragraphs and determine how many unique paragraphs there are after removing duplicates. In other words, how many non-repeating paragraphs are there in total?\n\n{context}\n\nPlease enter the final count of unique paragraphs after removing duplicates. The output format should only contain the number, such as 1, 2, 3, and so on.\n\nThe final answer is: ',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='passage_count',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[125:150]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchpassage_countDataset'),
+        dict(
+            abbr='LongBench_trec_5',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchClassificationEvaluator'
+                ),
+                pred_postprocessor=dict(
+                    type='opencompass.datasets.trec_postprocess'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=64,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Please determine the type of the question below. Here are some examples of questions.\n\n{context}\n{input}',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='trec',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='all_labels',
+                test_range='[125:150]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchtrecDataset'),
+        dict(
+            abbr='LongBench_lsht_5',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchClassificationEvaluator'
+                ),
+                pred_postprocessor=dict(
+                    type='opencompass.datasets.lsht_postprocess'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=64,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt='请判断给定新闻的类别，下面是一些例子。\n\n{context}\n{input}',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='lsht',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='all_labels',
+                test_range='[125:150]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchlshtDataset'),
+        dict(
+            abbr='LongBench_multi_news_5',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchRougeEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=512,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'You are given several news passages. Write a one-page summary of all news. \n\nNews:\n{context}\n\nNow, write a one-page summary of all the news.\n\nSummary:\n',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='multi_news',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                ],
+                output_column='answers',
+                test_range='[125:150]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchmulti_newsDataset'),
+        dict(
+            abbr='LongBench_samsum_5',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchRougeEvaluator'),
+                pred_postprocessor=dict(
+                    type='opencompass.datasets.samsum_postprocess'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=128,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Summarize the dialogue into a few short sentences. The following are some examples.\n\n{context}\n\n{input}',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='samsum',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[125:150]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchsamsumDataset'),
+        dict(
+            abbr='LongBench_2wikimqa_5',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchF1Evaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=32,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Answer the question based on the given passages. Only give me the answer and do not output any other words.\n\nThe following are given passages.\n{context}\n\nAnswer the question based on the given passages. Only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='2wikimqa',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[125:150]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBench2wikimqaDataset'),
+        dict(
+            abbr='LongBench_hotpotqa_5',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchF1Evaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=32,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Answer the question based on the given passages. Only give me the answer and do not output any other words.\n\nThe following are given passages.\n{context}\n\nAnswer the question based on the given passages. Only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='hotpotqa',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[125:150]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchhotpotqaDataset'),
+        dict(
+            abbr='LongBench_musique_5',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchF1Evaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=32,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Answer the question based on the given passages. Only give me the answer and do not output any other words.\n\nThe following are given passages.\n{context}\n\nAnswer the question based on the given passages. Only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='musique',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[125:150]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchmusiqueDataset'),
+        dict(
+            abbr='LongBench_multifieldqa_en_5',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchF1Evaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=64,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Read the following text and answer briefly.\n\n{context}\n\nNow, answer the following question based on the above text, only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='multifieldqa_en',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[95:114]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchmultifieldqa_enDataset'),
+        dict(
+            abbr='LongBench_multifieldqa_zh_5',
+            eval_cfg=dict(
+                evaluator=dict(
+                    language='zh',
+                    type='opencompass.datasets.LongBenchF1Evaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=64,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            '阅读以下文字并用中文简短回答：\n\n{context}\n\n现在请基于上面的文章回答下面的问题，只告诉我答案，不要输出任何其他字词。\n\n问题：{input}\n回答：',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='multifieldqa_zh',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[125:150]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchmultifieldqa_zhDataset'),
+        dict(
+            abbr='LongBench_narrativeqa_5',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchF1Evaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=128,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'You are given a story, which can be either a novel or a movie script, and a question. Answer the question as concisely as you can, using a single phrase if possible. Do not provide any explanation.\n\nStory: {context}\n\nNow, answer the question based on the story as concisely as you can, using a single phrase if possible. Do not provide any explanation.\n\nQuestion: {input}\n\nAnswer:',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='narrativeqa',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[125:150]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchnarrativeqaDataset'),
+        dict(
+            abbr='LongBench_qasper_5',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchF1Evaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=32,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Answer the question based on the given passages. Only give me the answer and do not output any other words.\n\nThe following are given passages.\n{context}\n\nAnswer the question based on the given passages. Only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='qasper',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[125:150]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchqasperDataset'),
+        dict(
+            abbr='LongBench_triviaqa_5',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchF1Evaluator'),
+                pred_postprocessor=dict(
+                    type='opencompass.datasets.triviaqa_postprocess'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=32,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Answer the question based on the given passage. Only give me the answer and do not output any other words. The following are some examples.\n\n{context}\n\n{input}',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='triviaqa',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[125:150]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchtriviaqaDataset'),
+        dict(
+            abbr='LongBench_gov_report_5',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchRougeEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=512,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'You are given a report by a government agency. Write a one-page summary of the report.\n\nReport:\n{context}\n\nNow, write a one-page summary of the report.\n\nSummary:',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='gov_report',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                ],
+                output_column='answers',
+                test_range='[125:150]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchgov_reportDataset'),
+        dict(
+            abbr='LongBench_qmsum_5',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchRougeEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=512,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'You are given a meeting transcript and a query containing a question or instruction. Answer the query in one or more sentences.\n\nTranscript:\n{context}\n\nNow, answer the query based on the above meeting transcript in one or more sentences.\n\nQuery: {input}\nAnswer:',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='qmsum',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[125:150]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchqmsumDataset'),
+        dict(
+            abbr='LongBench_vcsum_5',
+            eval_cfg=dict(
+                evaluator=dict(
+                    language='zh',
+                    type='opencompass.datasets.LongBenchRougeEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=512,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            '下面有一段会议记录，请你阅读后，写一段总结，总结会议的内容。\n会议记录：\n{context}\n\n会议总结：',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='vcsum',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                ],
+                output_column='answers',
+                test_range='[125:150]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchvcsumDataset'),
+        dict(
+            abbr='LongBench_dureader_5',
+            eval_cfg=dict(
+                evaluator=dict(
+                    language='zh',
+                    type='opencompass.datasets.LongBenchRougeEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=128,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            '请基于给定的文章回答下述问题。\n\n文章：{context}\n\n请基于上述文章回答下面的问题。\n\n问题：{input}\n回答：',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='dureader',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[125:150]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchdureaderDataset'),
+        dict(
+            abbr='LongBench_lcc_5',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchCodeSimEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=64,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Please complete the code given below. \n{context}Next line of code:\n',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='lcc',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                ],
+                output_column='answers',
+                test_range='[315:378]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchlccDataset'),
+        dict(
+            abbr='LongBench_repobench-p_5',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchCodeSimEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=64,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Please complete the code given below. \n{context}{input}Next line of code:\n',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='repobench-p',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[315:378]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchrepobenchDataset'),
+        dict(
+            abbr='LongBench_passage_retrieval_en_5',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchRetrievalEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=32,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Here are 30 paragraphs from Wikipedia, along with an abstract. Please determine which paragraph the abstract is from.\n\n{context}\n\nThe following is an abstract.\n\n{input}\n\nPlease enter the number of the paragraph that the abstract is from. The answer format must be like "Paragraph 1", "Paragraph 2", etc.\n\nThe answer is: ',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='passage_retrieval_en',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[125:150]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchpassage_retrieval_enDataset'),
+        dict(
+            abbr='LongBench_passage_retrieval_zh_5',
+            eval_cfg=dict(
+                evaluator=dict(
+                    language='zh',
+                    type='opencompass.datasets.LongBenchRetrievalEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=32,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            '以下是若干段落文字，以及其中一个段落的摘要。请确定给定的摘要出自哪一段。\n\n{context}\n\n下面是一个摘要\n\n{input}\n\n请输入摘要所属段落的编号。答案格式必须是"段落1"，"段落2"等格式\n\n答案是：',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='passage_retrieval_zh',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[125:150]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchpassage_retrieval_zhDataset'),
+        dict(
+            abbr='LongBench_passage_count_5',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchCountEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=32,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'There are some paragraphs below sourced from Wikipedia. Some of them may be duplicates. Please carefully read these paragraphs and determine how many unique paragraphs there are after removing duplicates. In other words, how many non-repeating paragraphs are there in total?\n\n{context}\n\nPlease enter the final count of unique paragraphs after removing duplicates. The output format should only contain the number, such as 1, 2, 3, and so on.\n\nThe final answer is: ',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='passage_count',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[125:150]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchpassage_countDataset'),
+        dict(
+            abbr='LongBench_trec_5',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchClassificationEvaluator'
+                ),
+                pred_postprocessor=dict(
+                    type='opencompass.datasets.trec_postprocess'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=64,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Please determine the type of the question below. Here are some examples of questions.\n\n{context}\n{input}',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='trec',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='all_labels',
+                test_range='[125:150]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchtrecDataset'),
+        dict(
+            abbr='LongBench_lsht_5',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchClassificationEvaluator'
+                ),
+                pred_postprocessor=dict(
+                    type='opencompass.datasets.lsht_postprocess'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=64,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt='请判断给定新闻的类别，下面是一些例子。\n\n{context}\n{input}',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='lsht',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='all_labels',
+                test_range='[125:150]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchlshtDataset'),
+        dict(
+            abbr='LongBench_multi_news_5',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchRougeEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=512,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'You are given several news passages. Write a one-page summary of all news. \n\nNews:\n{context}\n\nNow, write a one-page summary of all the news.\n\nSummary:\n',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='multi_news',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                ],
+                output_column='answers',
+                test_range='[125:150]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchmulti_newsDataset'),
+        dict(
+            abbr='LongBench_samsum_5',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchRougeEvaluator'),
+                pred_postprocessor=dict(
+                    type='opencompass.datasets.samsum_postprocess'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=128,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Summarize the dialogue into a few short sentences. The following are some examples.\n\n{context}\n\n{input}',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='samsum',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[125:150]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchsamsumDataset'),
+    ],
+]
+models = [
+    dict(
+        abbr='delta_net',
+        batch_size=128,
+        max_seq_len=2048,
+        model_kwargs=dict(
+            device_map='auto',
+            torch_dtype='torch.bfloat16',
+            trust_remote_code=True),
+        path='/mnt/jfzn/msj/delta_net-1.3B-100B',
+        run_cfg=dict(num_gpus=1),
+        tokenizer_kwargs=dict(padding_side='left', truncation_side='left'),
+        tokenizer_path='/mnt/jfzn/msj/delta_net-1.3B-100B',
+        type='opencompass.models.HuggingFaceBaseModel'),
+]
+work_dir = 'outputs/default/20251127_202918'

tmp/0954e290-fcd0-400c-8c58-f14a577dc5e4_params.py ADDED Viewed

	@@ -0,0 +1,1424 @@

+datasets = [
+    [
+        dict(
+            abbr='LongBench_2wikimqa_0',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchF1Evaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=32,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Answer the question based on the given passages. Only give me the answer and do not output any other words.\n\nThe following are given passages.\n{context}\n\nAnswer the question based on the given passages. Only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='2wikimqa',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[0:25]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBench2wikimqaDataset'),
+        dict(
+            abbr='LongBench_hotpotqa_0',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchF1Evaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=32,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Answer the question based on the given passages. Only give me the answer and do not output any other words.\n\nThe following are given passages.\n{context}\n\nAnswer the question based on the given passages. Only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='hotpotqa',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[0:25]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchhotpotqaDataset'),
+        dict(
+            abbr='LongBench_musique_0',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchF1Evaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=32,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Answer the question based on the given passages. Only give me the answer and do not output any other words.\n\nThe following are given passages.\n{context}\n\nAnswer the question based on the given passages. Only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='musique',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[0:25]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchmusiqueDataset'),
+        dict(
+            abbr='LongBench_multifieldqa_en_0',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchF1Evaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=64,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Read the following text and answer briefly.\n\n{context}\n\nNow, answer the following question based on the above text, only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='multifieldqa_en',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[0:19]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchmultifieldqa_enDataset'),
+        dict(
+            abbr='LongBench_multifieldqa_zh_0',
+            eval_cfg=dict(
+                evaluator=dict(
+                    language='zh',
+                    type='opencompass.datasets.LongBenchF1Evaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=64,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            '阅读以下文字并用中文简短回答：\n\n{context}\n\n现在请基于上面的文章回答下面的问题，只告诉我答案，不要输出任何其他字词。\n\n问题：{input}\n回答：',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='multifieldqa_zh',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[0:25]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchmultifieldqa_zhDataset'),
+        dict(
+            abbr='LongBench_narrativeqa_0',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchF1Evaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=128,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'You are given a story, which can be either a novel or a movie script, and a question. Answer the question as concisely as you can, using a single phrase if possible. Do not provide any explanation.\n\nStory: {context}\n\nNow, answer the question based on the story as concisely as you can, using a single phrase if possible. Do not provide any explanation.\n\nQuestion: {input}\n\nAnswer:',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='narrativeqa',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[0:25]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchnarrativeqaDataset'),
+        dict(
+            abbr='LongBench_qasper_0',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchF1Evaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=32,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Answer the question based on the given passages. Only give me the answer and do not output any other words.\n\nThe following are given passages.\n{context}\n\nAnswer the question based on the given passages. Only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='qasper',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[0:25]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchqasperDataset'),
+        dict(
+            abbr='LongBench_triviaqa_0',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchF1Evaluator'),
+                pred_postprocessor=dict(
+                    type='opencompass.datasets.triviaqa_postprocess'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=32,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Answer the question based on the given passage. Only give me the answer and do not output any other words. The following are some examples.\n\n{context}\n\n{input}',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='triviaqa',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[0:25]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchtriviaqaDataset'),
+        dict(
+            abbr='LongBench_gov_report_0',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchRougeEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=512,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'You are given a report by a government agency. Write a one-page summary of the report.\n\nReport:\n{context}\n\nNow, write a one-page summary of the report.\n\nSummary:',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='gov_report',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                ],
+                output_column='answers',
+                test_range='[0:25]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchgov_reportDataset'),
+        dict(
+            abbr='LongBench_qmsum_0',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchRougeEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=512,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'You are given a meeting transcript and a query containing a question or instruction. Answer the query in one or more sentences.\n\nTranscript:\n{context}\n\nNow, answer the query based on the above meeting transcript in one or more sentences.\n\nQuery: {input}\nAnswer:',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='qmsum',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[0:25]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchqmsumDataset'),
+        dict(
+            abbr='LongBench_vcsum_0',
+            eval_cfg=dict(
+                evaluator=dict(
+                    language='zh',
+                    type='opencompass.datasets.LongBenchRougeEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=512,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            '下面有一段会议记录，请你阅读后，写一段总结，总结会议的内容。\n会议记录：\n{context}\n\n会议总结：',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='vcsum',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                ],
+                output_column='answers',
+                test_range='[0:25]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchvcsumDataset'),
+        dict(
+            abbr='LongBench_dureader_0',
+            eval_cfg=dict(
+                evaluator=dict(
+                    language='zh',
+                    type='opencompass.datasets.LongBenchRougeEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=128,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            '请基于给定的文章回答下述问题。\n\n文章：{context}\n\n请基于上述文章回答下面的问题。\n\n问题：{input}\n回答：',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='dureader',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[0:25]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchdureaderDataset'),
+        dict(
+            abbr='LongBench_lcc_0',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchCodeSimEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=64,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Please complete the code given below. \n{context}Next line of code:\n',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='lcc',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                ],
+                output_column='answers',
+                test_range='[0:63]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchlccDataset'),
+        dict(
+            abbr='LongBench_repobench-p_0',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchCodeSimEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=64,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Please complete the code given below. \n{context}{input}Next line of code:\n',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='repobench-p',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[0:63]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchrepobenchDataset'),
+        dict(
+            abbr='LongBench_passage_retrieval_en_0',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchRetrievalEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=32,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Here are 30 paragraphs from Wikipedia, along with an abstract. Please determine which paragraph the abstract is from.\n\n{context}\n\nThe following is an abstract.\n\n{input}\n\nPlease enter the number of the paragraph that the abstract is from. The answer format must be like "Paragraph 1", "Paragraph 2", etc.\n\nThe answer is: ',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='passage_retrieval_en',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[0:25]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchpassage_retrieval_enDataset'),
+        dict(
+            abbr='LongBench_passage_retrieval_zh_0',
+            eval_cfg=dict(
+                evaluator=dict(
+                    language='zh',
+                    type='opencompass.datasets.LongBenchRetrievalEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=32,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            '以下是若干段落文字，以及其中一个段落的摘要。请确定给定的摘要出自哪一段。\n\n{context}\n\n下面是一个摘要\n\n{input}\n\n请输入摘要所属段落的编号。答案格式必须是"段落1"，"段落2"等格式\n\n答案是：',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='passage_retrieval_zh',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[0:25]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchpassage_retrieval_zhDataset'),
+        dict(
+            abbr='LongBench_passage_count_0',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchCountEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=32,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'There are some paragraphs below sourced from Wikipedia. Some of them may be duplicates. Please carefully read these paragraphs and determine how many unique paragraphs there are after removing duplicates. In other words, how many non-repeating paragraphs are there in total?\n\n{context}\n\nPlease enter the final count of unique paragraphs after removing duplicates. The output format should only contain the number, such as 1, 2, 3, and so on.\n\nThe final answer is: ',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='passage_count',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[0:25]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchpassage_countDataset'),
+        dict(
+            abbr='LongBench_trec_0',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchClassificationEvaluator'
+                ),
+                pred_postprocessor=dict(
+                    type='opencompass.datasets.trec_postprocess'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=64,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Please determine the type of the question below. Here are some examples of questions.\n\n{context}\n{input}',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='trec',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='all_labels',
+                test_range='[0:25]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchtrecDataset'),
+        dict(
+            abbr='LongBench_lsht_0',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchClassificationEvaluator'
+                ),
+                pred_postprocessor=dict(
+                    type='opencompass.datasets.lsht_postprocess'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=64,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt='请判断给定新闻的类别，下面是一些例子。\n\n{context}\n{input}',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='lsht',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='all_labels',
+                test_range='[0:25]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchlshtDataset'),
+        dict(
+            abbr='LongBench_multi_news_0',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchRougeEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=512,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'You are given several news passages. Write a one-page summary of all news. \n\nNews:\n{context}\n\nNow, write a one-page summary of all the news.\n\nSummary:\n',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='multi_news',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                ],
+                output_column='answers',
+                test_range='[0:25]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchmulti_newsDataset'),
+        dict(
+            abbr='LongBench_samsum_0',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchRougeEvaluator'),
+                pred_postprocessor=dict(
+                    type='opencompass.datasets.samsum_postprocess'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=128,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Summarize the dialogue into a few short sentences. The following are some examples.\n\n{context}\n\n{input}',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='samsum',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[0:25]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchsamsumDataset'),
+        dict(
+            abbr='LongBench_2wikimqa_0',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchF1Evaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=32,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Answer the question based on the given passages. Only give me the answer and do not output any other words.\n\nThe following are given passages.\n{context}\n\nAnswer the question based on the given passages. Only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='2wikimqa',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[0:25]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBench2wikimqaDataset'),
+        dict(
+            abbr='LongBench_hotpotqa_0',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchF1Evaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=32,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Answer the question based on the given passages. Only give me the answer and do not output any other words.\n\nThe following are given passages.\n{context}\n\nAnswer the question based on the given passages. Only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='hotpotqa',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[0:25]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchhotpotqaDataset'),
+        dict(
+            abbr='LongBench_musique_0',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchF1Evaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=32,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Answer the question based on the given passages. Only give me the answer and do not output any other words.\n\nThe following are given passages.\n{context}\n\nAnswer the question based on the given passages. Only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='musique',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[0:25]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchmusiqueDataset'),
+        dict(
+            abbr='LongBench_multifieldqa_en_0',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchF1Evaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=64,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Read the following text and answer briefly.\n\n{context}\n\nNow, answer the following question based on the above text, only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='multifieldqa_en',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[0:19]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchmultifieldqa_enDataset'),
+        dict(
+            abbr='LongBench_multifieldqa_zh_0',
+            eval_cfg=dict(
+                evaluator=dict(
+                    language='zh',
+                    type='opencompass.datasets.LongBenchF1Evaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=64,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            '阅读以下文字并用中文简短回答：\n\n{context}\n\n现在请基于上面的文章回答下面的问题，只告诉我答案，不要输出任何其他字词。\n\n问题：{input}\n回答：',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='multifieldqa_zh',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[0:25]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchmultifieldqa_zhDataset'),
+        dict(
+            abbr='LongBench_narrativeqa_0',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchF1Evaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=128,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'You are given a story, which can be either a novel or a movie script, and a question. Answer the question as concisely as you can, using a single phrase if possible. Do not provide any explanation.\n\nStory: {context}\n\nNow, answer the question based on the story as concisely as you can, using a single phrase if possible. Do not provide any explanation.\n\nQuestion: {input}\n\nAnswer:',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='narrativeqa',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[0:25]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchnarrativeqaDataset'),
+        dict(
+            abbr='LongBench_qasper_0',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchF1Evaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=32,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Answer the question based on the given passages. Only give me the answer and do not output any other words.\n\nThe following are given passages.\n{context}\n\nAnswer the question based on the given passages. Only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='qasper',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[0:25]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchqasperDataset'),
+        dict(
+            abbr='LongBench_triviaqa_0',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchF1Evaluator'),
+                pred_postprocessor=dict(
+                    type='opencompass.datasets.triviaqa_postprocess'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=32,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Answer the question based on the given passage. Only give me the answer and do not output any other words. The following are some examples.\n\n{context}\n\n{input}',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='triviaqa',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[0:25]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchtriviaqaDataset'),
+        dict(
+            abbr='LongBench_gov_report_0',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchRougeEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=512,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'You are given a report by a government agency. Write a one-page summary of the report.\n\nReport:\n{context}\n\nNow, write a one-page summary of the report.\n\nSummary:',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='gov_report',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                ],
+                output_column='answers',
+                test_range='[0:25]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchgov_reportDataset'),
+        dict(
+            abbr='LongBench_qmsum_0',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchRougeEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=512,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'You are given a meeting transcript and a query containing a question or instruction. Answer the query in one or more sentences.\n\nTranscript:\n{context}\n\nNow, answer the query based on the above meeting transcript in one or more sentences.\n\nQuery: {input}\nAnswer:',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='qmsum',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[0:25]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchqmsumDataset'),
+        dict(
+            abbr='LongBench_vcsum_0',
+            eval_cfg=dict(
+                evaluator=dict(
+                    language='zh',
+                    type='opencompass.datasets.LongBenchRougeEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=512,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            '下面有一段会议记录，请你阅读后，写一段总结，总结会议的内容。\n会议记录：\n{context}\n\n会议总结：',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='vcsum',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                ],
+                output_column='answers',
+                test_range='[0:25]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchvcsumDataset'),
+        dict(
+            abbr='LongBench_dureader_0',
+            eval_cfg=dict(
+                evaluator=dict(
+                    language='zh',
+                    type='opencompass.datasets.LongBenchRougeEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=128,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            '请基于给定的文章回答下述问题。\n\n文章：{context}\n\n请基于上述文章回答下面的问题。\n\n问题：{input}\n回答：',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='dureader',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[0:25]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchdureaderDataset'),
+        dict(
+            abbr='LongBench_lcc_0',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchCodeSimEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=64,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Please complete the code given below. \n{context}Next line of code:\n',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='lcc',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                ],
+                output_column='answers',
+                test_range='[0:63]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchlccDataset'),
+        dict(
+            abbr='LongBench_repobench-p_0',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchCodeSimEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=64,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Please complete the code given below. \n{context}{input}Next line of code:\n',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='repobench-p',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[0:63]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchrepobenchDataset'),
+        dict(
+            abbr='LongBench_passage_retrieval_en_0',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchRetrievalEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=32,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Here are 30 paragraphs from Wikipedia, along with an abstract. Please determine which paragraph the abstract is from.\n\n{context}\n\nThe following is an abstract.\n\n{input}\n\nPlease enter the number of the paragraph that the abstract is from. The answer format must be like "Paragraph 1", "Paragraph 2", etc.\n\nThe answer is: ',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='passage_retrieval_en',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[0:25]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchpassage_retrieval_enDataset'),
+        dict(
+            abbr='LongBench_passage_retrieval_zh_0',
+            eval_cfg=dict(
+                evaluator=dict(
+                    language='zh',
+                    type='opencompass.datasets.LongBenchRetrievalEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=32,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            '以下是若干段落文字，以及其中一个段落的摘要。请确定给定的摘要出自哪一段。\n\n{context}\n\n下面是一个摘要\n\n{input}\n\n请输入摘要所属段落的编号。答案格式必须是"段落1"，"段落2"等格式\n\n答案是：',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='passage_retrieval_zh',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[0:25]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchpassage_retrieval_zhDataset'),
+        dict(
+            abbr='LongBench_passage_count_0',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchCountEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=32,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'There are some paragraphs below sourced from Wikipedia. Some of them may be duplicates. Please carefully read these paragraphs and determine how many unique paragraphs there are after removing duplicates. In other words, how many non-repeating paragraphs are there in total?\n\n{context}\n\nPlease enter the final count of unique paragraphs after removing duplicates. The output format should only contain the number, such as 1, 2, 3, and so on.\n\nThe final answer is: ',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='passage_count',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[0:25]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchpassage_countDataset'),
+        dict(
+            abbr='LongBench_trec_0',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchClassificationEvaluator'
+                ),
+                pred_postprocessor=dict(
+                    type='opencompass.datasets.trec_postprocess'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=64,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Please determine the type of the question below. Here are some examples of questions.\n\n{context}\n{input}',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='trec',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='all_labels',
+                test_range='[0:25]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchtrecDataset'),
+        dict(
+            abbr='LongBench_lsht_0',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchClassificationEvaluator'
+                ),
+                pred_postprocessor=dict(
+                    type='opencompass.datasets.lsht_postprocess'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=64,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt='请判断给定新闻的类别，下面是一些例子。\n\n{context}\n{input}',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='lsht',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='all_labels',
+                test_range='[0:25]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchlshtDataset'),
+        dict(
+            abbr='LongBench_multi_news_0',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchRougeEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=512,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'You are given several news passages. Write a one-page summary of all news. \n\nNews:\n{context}\n\nNow, write a one-page summary of all the news.\n\nSummary:\n',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='multi_news',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                ],
+                output_column='answers',
+                test_range='[0:25]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchmulti_newsDataset'),
+        dict(
+            abbr='LongBench_samsum_0',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchRougeEvaluator'),
+                pred_postprocessor=dict(
+                    type='opencompass.datasets.samsum_postprocess'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=128,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Summarize the dialogue into a few short sentences. The following are some examples.\n\n{context}\n\n{input}',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='samsum',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[0:25]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchsamsumDataset'),
+    ],
+]
+models = [
+    dict(
+        abbr='delta_net',
+        batch_size=128,
+        max_seq_len=2048,
+        model_kwargs=dict(
+            device_map='auto',
+            torch_dtype='torch.bfloat16',
+            trust_remote_code=True),
+        path='/mnt/jfzn/msj/delta_net-1.3B-100B',
+        run_cfg=dict(num_gpus=1),
+        tokenizer_kwargs=dict(padding_side='left', truncation_side='left'),
+        tokenizer_path='/mnt/jfzn/msj/delta_net-1.3B-100B',
+        type='opencompass.models.HuggingFaceBaseModel'),
+]
+work_dir = 'outputs/default/20251127_202918'

tmp/0985e09b-75af-404f-ac0c-079c3aa085fb_params.py ADDED Viewed

File without changes

tmp/09d7374d-16f6-44e6-a2fa-f4925f8fb3fc_params.py ADDED Viewed

	@@ -0,0 +1,56 @@

+datasets = [
+    [
+        dict(
+            abbr='LongBench_trec',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchClassificationEvaluator'
+                ),
+                pred_postprocessor=dict(
+                    type='opencompass.datasets.trec_postprocess'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=64,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Please determine the type of the question below. Here are some examples of questions.\n\n{context}\n{input}',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='trec',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='all_labels',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchtrecDataset'),
+    ],
+]
+eval = dict(runner=dict(task=dict(dump_details=True)))
+models = [
+    dict(
+        abbr='gated_deltanet',
+        batch_size=128,
+        max_seq_len=2048,
+        model_kwargs=dict(
+            device_map='auto',
+            torch_dtype='torch.bfloat16',
+            trust_remote_code=True),
+        path='download_model/hgrn2-1.3B-100B',
+        run_cfg=dict(num_gpus=1),
+        tokenizer_kwargs=dict(padding_side='left', truncation_side='left'),
+        tokenizer_path='download_model/hgrn2-1.3B-100B',
+        type='opencompass.models.HuggingFaceBaseModel'),
+]
+work_dir = 'outputs/default/20251219_163447'

tmp/0a5aa083-12c4-41a8-92db-57a728f50ed5_params.py ADDED Viewed

File without changes

tmp/0bd141af-ea86-420f-b26c-b2890fc57de2_params.py ADDED Viewed

	@@ -0,0 +1,56 @@

+datasets = [
+    [
+        dict(
+            abbr='LongBench_trec',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchClassificationEvaluator'
+                ),
+                pred_postprocessor=dict(
+                    type='opencompass.datasets.trec_postprocess'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=64,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Please determine the type of the question below. Here are some examples of questions.\n\n{context}\n{input}',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='trec',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='all_labels',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchtrecDataset'),
+    ],
+]
+eval = dict(runner=dict(task=dict(dump_details=True)))
+models = [
+    dict(
+        abbr='gated_deltanet',
+        batch_size=128,
+        max_seq_len=2048,
+        model_kwargs=dict(
+            device_map='auto',
+            torch_dtype='torch.bfloat16',
+            trust_remote_code=True),
+        path='download_model/hgrn2-1.3B-100B',
+        run_cfg=dict(num_gpus=1),
+        tokenizer_kwargs=dict(padding_side='left', truncation_side='left'),
+        tokenizer_path='download_model/hgrn2-1.3B-100B',
+        type='opencompass.models.HuggingFaceBaseModel'),
+]
+work_dir = 'outputs/default/20251219_164057'

tmp/0c3d2c0a-49a1-40b1-b0b6-3d32b7381062_params.py ADDED Viewed

	@@ -0,0 +1,1420 @@

+datasets = [
+    [
+        dict(
+            abbr='LongBench_2wikimqa_6',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchF1Evaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=32,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Answer the question based on the given passages. Only give me the answer and do not output any other words.\n\nThe following are given passages.\n{context}\n\nAnswer the question based on the given passages. Only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='2wikimqa',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[150:175]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBench2wikimqaDataset'),
+        dict(
+            abbr='LongBench_hotpotqa_6',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchF1Evaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=32,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Answer the question based on the given passages. Only give me the answer and do not output any other words.\n\nThe following are given passages.\n{context}\n\nAnswer the question based on the given passages. Only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='hotpotqa',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[150:175]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchhotpotqaDataset'),
+        dict(
+            abbr='LongBench_musique_6',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchF1Evaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=32,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Answer the question based on the given passages. Only give me the answer and do not output any other words.\n\nThe following are given passages.\n{context}\n\nAnswer the question based on the given passages. Only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='musique',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[150:175]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchmusiqueDataset'),
+        dict(
+            abbr='LongBench_multifieldqa_en_6',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchF1Evaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=64,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Read the following text and answer briefly.\n\n{context}\n\nNow, answer the following question based on the above text, only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='multifieldqa_en',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[114:133]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchmultifieldqa_enDataset'),
+        dict(
+            abbr='LongBench_multifieldqa_zh_6',
+            eval_cfg=dict(
+                evaluator=dict(
+                    language='zh',
+                    type='opencompass.datasets.LongBenchF1Evaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=64,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            '阅读以下文字并用中文简短回答：\n\n{context}\n\n现在请基于上面的文章回答下面的问题，只告诉我答案，不要输出任何其他字词。\n\n问题：{input}\n回答：',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='multifieldqa_zh',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[150:175]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchmultifieldqa_zhDataset'),
+        dict(
+            abbr='LongBench_narrativeqa_6',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchF1Evaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=128,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'You are given a story, which can be either a novel or a movie script, and a question. Answer the question as concisely as you can, using a single phrase if possible. Do not provide any explanation.\n\nStory: {context}\n\nNow, answer the question based on the story as concisely as you can, using a single phrase if possible. Do not provide any explanation.\n\nQuestion: {input}\n\nAnswer:',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='narrativeqa',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[150:175]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchnarrativeqaDataset'),
+        dict(
+            abbr='LongBench_qasper_6',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchF1Evaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=32,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Answer the question based on the given passages. Only give me the answer and do not output any other words.\n\nThe following are given passages.\n{context}\n\nAnswer the question based on the given passages. Only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='qasper',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[150:175]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchqasperDataset'),
+        dict(
+            abbr='LongBench_triviaqa_6',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchF1Evaluator'),
+                pred_postprocessor=dict(
+                    type='opencompass.datasets.triviaqa_postprocess'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=32,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Answer the question based on the given passage. Only give me the answer and do not output any other words. The following are some examples.\n\n{context}\n\n{input}',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='triviaqa',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[150:175]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchtriviaqaDataset'),
+        dict(
+            abbr='LongBench_gov_report_6',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchRougeEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=512,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'You are given a report by a government agency. Write a one-page summary of the report.\n\nReport:\n{context}\n\nNow, write a one-page summary of the report.\n\nSummary:',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='gov_report',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                ],
+                output_column='answers',
+                test_range='[150:175]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchgov_reportDataset'),
+        dict(
+            abbr='LongBench_qmsum_6',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchRougeEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=512,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'You are given a meeting transcript and a query containing a question or instruction. Answer the query in one or more sentences.\n\nTranscript:\n{context}\n\nNow, answer the query based on the above meeting transcript in one or more sentences.\n\nQuery: {input}\nAnswer:',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='qmsum',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[150:175]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchqmsumDataset'),
+        dict(
+            abbr='LongBench_vcsum_6',
+            eval_cfg=dict(
+                evaluator=dict(
+                    language='zh',
+                    type='opencompass.datasets.LongBenchRougeEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=512,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            '下面有一段会议记录，请你阅读后，写一段总结，总结会议的内容。\n会议记录：\n{context}\n\n会议总结：',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='vcsum',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                ],
+                output_column='answers',
+                test_range='[150:175]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchvcsumDataset'),
+        dict(
+            abbr='LongBench_dureader_6',
+            eval_cfg=dict(
+                evaluator=dict(
+                    language='zh',
+                    type='opencompass.datasets.LongBenchRougeEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=128,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            '请基于给定的文章回答下述问题。\n\n文章：{context}\n\n请基于上述文章回答下面的问题。\n\n问题：{input}\n回答：',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='dureader',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[150:175]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchdureaderDataset'),
+        dict(
+            abbr='LongBench_lcc_6',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchCodeSimEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=64,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Please complete the code given below. \n{context}Next line of code:\n',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='lcc',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                ],
+                output_column='answers',
+                test_range='[378:441]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchlccDataset'),
+        dict(
+            abbr='LongBench_repobench-p_6',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchCodeSimEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=64,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Please complete the code given below. \n{context}{input}Next line of code:\n',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='repobench-p',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[378:441]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchrepobenchDataset'),
+        dict(
+            abbr='LongBench_passage_retrieval_en_6',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchRetrievalEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=32,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Here are 30 paragraphs from Wikipedia, along with an abstract. Please determine which paragraph the abstract is from.\n\n{context}\n\nThe following is an abstract.\n\n{input}\n\nPlease enter the number of the paragraph that the abstract is from. The answer format must be like "Paragraph 1", "Paragraph 2", etc.\n\nThe answer is: ',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='passage_retrieval_en',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[150:175]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchpassage_retrieval_enDataset'),
+        dict(
+            abbr='LongBench_passage_retrieval_zh_6',
+            eval_cfg=dict(
+                evaluator=dict(
+                    language='zh',
+                    type='opencompass.datasets.LongBenchRetrievalEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=32,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            '以下是若干段落文字，以及其中一个段落的摘要。请确定给定的摘要出自哪一段。\n\n{context}\n\n下面是一个摘要\n\n{input}\n\n请输入摘要所属段落的编号。答案格式必须是"段落1"，"段落2"等格式\n\n答案是：',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='passage_retrieval_zh',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[150:175]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchpassage_retrieval_zhDataset'),
+        dict(
+            abbr='LongBench_passage_count_6',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchCountEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=32,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'There are some paragraphs below sourced from Wikipedia. Some of them may be duplicates. Please carefully read these paragraphs and determine how many unique paragraphs there are after removing duplicates. In other words, how many non-repeating paragraphs are there in total?\n\n{context}\n\nPlease enter the final count of unique paragraphs after removing duplicates. The output format should only contain the number, such as 1, 2, 3, and so on.\n\nThe final answer is: ',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='passage_count',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[150:175]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchpassage_countDataset'),
+        dict(
+            abbr='LongBench_trec_6',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchClassificationEvaluator'
+                ),
+                pred_postprocessor=dict(
+                    type='opencompass.datasets.trec_postprocess'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=64,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Please determine the type of the question below. Here are some examples of questions.\n\n{context}\n{input}',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='trec',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='all_labels',
+                test_range='[150:175]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchtrecDataset'),
+        dict(
+            abbr='LongBench_lsht_6',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchClassificationEvaluator'
+                ),
+                pred_postprocessor=dict(
+                    type='opencompass.datasets.lsht_postprocess'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=64,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt='请判断给定新闻的类别，下面是一些例子。\n\n{context}\n{input}',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='lsht',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='all_labels',
+                test_range='[150:175]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchlshtDataset'),
+        dict(
+            abbr='LongBench_multi_news_6',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchRougeEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=512,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'You are given several news passages. Write a one-page summary of all news. \n\nNews:\n{context}\n\nNow, write a one-page summary of all the news.\n\nSummary:\n',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='multi_news',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                ],
+                output_column='answers',
+                test_range='[150:175]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchmulti_newsDataset'),
+        dict(
+            abbr='LongBench_samsum_6',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchRougeEvaluator'),
+                pred_postprocessor=dict(
+                    type='opencompass.datasets.samsum_postprocess'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=128,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Summarize the dialogue into a few short sentences. The following are some examples.\n\n{context}\n\n{input}',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='samsum',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[150:175]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchsamsumDataset'),
+        dict(
+            abbr='LongBench_2wikimqa_6',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchF1Evaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=32,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Answer the question based on the given passages. Only give me the answer and do not output any other words.\n\nThe following are given passages.\n{context}\n\nAnswer the question based on the given passages. Only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='2wikimqa',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[150:175]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBench2wikimqaDataset'),
+        dict(
+            abbr='LongBench_hotpotqa_6',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchF1Evaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=32,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Answer the question based on the given passages. Only give me the answer and do not output any other words.\n\nThe following are given passages.\n{context}\n\nAnswer the question based on the given passages. Only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='hotpotqa',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[150:175]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchhotpotqaDataset'),
+        dict(
+            abbr='LongBench_musique_6',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchF1Evaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=32,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Answer the question based on the given passages. Only give me the answer and do not output any other words.\n\nThe following are given passages.\n{context}\n\nAnswer the question based on the given passages. Only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='musique',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[150:175]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchmusiqueDataset'),
+        dict(
+            abbr='LongBench_multifieldqa_en_6',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchF1Evaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=64,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Read the following text and answer briefly.\n\n{context}\n\nNow, answer the following question based on the above text, only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='multifieldqa_en',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[114:133]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchmultifieldqa_enDataset'),
+        dict(
+            abbr='LongBench_multifieldqa_zh_6',
+            eval_cfg=dict(
+                evaluator=dict(
+                    language='zh',
+                    type='opencompass.datasets.LongBenchF1Evaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=64,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            '阅读以下文字并用中文简短回答：\n\n{context}\n\n现在请基于上面的文章回答下面的问题，只告诉我答案，不要输出任何其他字词。\n\n问题：{input}\n回答：',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='multifieldqa_zh',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[150:175]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchmultifieldqa_zhDataset'),
+        dict(
+            abbr='LongBench_narrativeqa_6',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchF1Evaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=128,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'You are given a story, which can be either a novel or a movie script, and a question. Answer the question as concisely as you can, using a single phrase if possible. Do not provide any explanation.\n\nStory: {context}\n\nNow, answer the question based on the story as concisely as you can, using a single phrase if possible. Do not provide any explanation.\n\nQuestion: {input}\n\nAnswer:',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='narrativeqa',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[150:175]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchnarrativeqaDataset'),
+        dict(
+            abbr='LongBench_qasper_6',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchF1Evaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=32,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Answer the question based on the given passages. Only give me the answer and do not output any other words.\n\nThe following are given passages.\n{context}\n\nAnswer the question based on the given passages. Only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='qasper',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[150:175]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchqasperDataset'),
+        dict(
+            abbr='LongBench_triviaqa_6',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchF1Evaluator'),
+                pred_postprocessor=dict(
+                    type='opencompass.datasets.triviaqa_postprocess'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=32,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Answer the question based on the given passage. Only give me the answer and do not output any other words. The following are some examples.\n\n{context}\n\n{input}',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='triviaqa',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[150:175]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchtriviaqaDataset'),
+        dict(
+            abbr='LongBench_gov_report_6',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchRougeEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=512,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'You are given a report by a government agency. Write a one-page summary of the report.\n\nReport:\n{context}\n\nNow, write a one-page summary of the report.\n\nSummary:',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='gov_report',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                ],
+                output_column='answers',
+                test_range='[150:175]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchgov_reportDataset'),
+        dict(
+            abbr='LongBench_qmsum_6',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchRougeEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=512,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'You are given a meeting transcript and a query containing a question or instruction. Answer the query in one or more sentences.\n\nTranscript:\n{context}\n\nNow, answer the query based on the above meeting transcript in one or more sentences.\n\nQuery: {input}\nAnswer:',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='qmsum',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[150:175]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchqmsumDataset'),
+        dict(
+            abbr='LongBench_vcsum_6',
+            eval_cfg=dict(
+                evaluator=dict(
+                    language='zh',
+                    type='opencompass.datasets.LongBenchRougeEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=512,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            '下面有一段会议记录，请你阅读后，写一段总结，总结会议的内容。\n会议记录：\n{context}\n\n会议总结：',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='vcsum',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                ],
+                output_column='answers',
+                test_range='[150:175]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchvcsumDataset'),
+        dict(
+            abbr='LongBench_dureader_6',
+            eval_cfg=dict(
+                evaluator=dict(
+                    language='zh',
+                    type='opencompass.datasets.LongBenchRougeEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=128,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            '请基于给定的文章回答下述问题。\n\n文章：{context}\n\n请基于上述文章回答下面的问题。\n\n问题：{input}\n回答：',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='dureader',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[150:175]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchdureaderDataset'),
+        dict(
+            abbr='LongBench_lcc_6',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchCodeSimEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=64,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Please complete the code given below. \n{context}Next line of code:\n',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='lcc',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                ],
+                output_column='answers',
+                test_range='[378:441]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchlccDataset'),
+        dict(
+            abbr='LongBench_repobench-p_6',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchCodeSimEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=64,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Please complete the code given below. \n{context}{input}Next line of code:\n',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='repobench-p',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[378:441]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchrepobenchDataset'),
+        dict(
+            abbr='LongBench_passage_retrieval_en_6',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchRetrievalEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=32,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Here are 30 paragraphs from Wikipedia, along with an abstract. Please determine which paragraph the abstract is from.\n\n{context}\n\nThe following is an abstract.\n\n{input}\n\nPlease enter the number of the paragraph that the abstract is from. The answer format must be like "Paragraph 1", "Paragraph 2", etc.\n\nThe answer is: ',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='passage_retrieval_en',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[150:175]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchpassage_retrieval_enDataset'),
+        dict(
+            abbr='LongBench_passage_retrieval_zh_6',
+            eval_cfg=dict(
+                evaluator=dict(
+                    language='zh',
+                    type='opencompass.datasets.LongBenchRetrievalEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=32,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            '以下是若干段落文字，以及其中一个段落的摘要。请确定给定的摘要出自哪一段。\n\n{context}\n\n下面是一个摘要\n\n{input}\n\n请输入摘要所属段落的编号。答案格式必须是"段落1"，"段落2"等格式\n\n答案是：',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='passage_retrieval_zh',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[150:175]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchpassage_retrieval_zhDataset'),
+        dict(
+            abbr='LongBench_passage_count_6',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchCountEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=32,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'There are some paragraphs below sourced from Wikipedia. Some of them may be duplicates. Please carefully read these paragraphs and determine how many unique paragraphs there are after removing duplicates. In other words, how many non-repeating paragraphs are there in total?\n\n{context}\n\nPlease enter the final count of unique paragraphs after removing duplicates. The output format should only contain the number, such as 1, 2, 3, and so on.\n\nThe final answer is: ',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='passage_count',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[150:175]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchpassage_countDataset'),
+        dict(
+            abbr='LongBench_trec_6',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchClassificationEvaluator'
+                ),
+                pred_postprocessor=dict(
+                    type='opencompass.datasets.trec_postprocess'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=64,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Please determine the type of the question below. Here are some examples of questions.\n\n{context}\n{input}',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='trec',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='all_labels',
+                test_range='[150:175]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchtrecDataset'),
+        dict(
+            abbr='LongBench_lsht_6',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchClassificationEvaluator'
+                ),
+                pred_postprocessor=dict(
+                    type='opencompass.datasets.lsht_postprocess'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=64,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt='请判断给定新闻的类别，下面是一些例子。\n\n{context}\n{input}',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='lsht',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='all_labels',
+                test_range='[150:175]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchlshtDataset'),
+        dict(
+            abbr='LongBench_multi_news_6',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchRougeEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=512,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'You are given several news passages. Write a one-page summary of all news. \n\nNews:\n{context}\n\nNow, write a one-page summary of all the news.\n\nSummary:\n',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='multi_news',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                ],
+                output_column='answers',
+                test_range='[150:175]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchmulti_newsDataset'),
+        dict(
+            abbr='LongBench_samsum_6',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchRougeEvaluator'),
+                pred_postprocessor=dict(
+                    type='opencompass.datasets.samsum_postprocess'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=128,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Summarize the dialogue into a few short sentences. The following are some examples.\n\n{context}\n\n{input}',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='samsum',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[150:175]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchsamsumDataset'),
+    ],
+]
+models = [
+    dict(
+        abbr='delta_net-1.3B',
+        batch_size=16,
+        max_out_len=100,
+        max_seq_len=16384,
+        path='/mnt/jfzn/msj/delta_net-1.3B-100B',
+        run_cfg=dict(num_gpus=1),
+        tokenizer_path='/mnt/jfzn/msj/delta_net-1.3B-100B',
+        type='opencompass.models.HuggingFaceCausalLM'),
+]
+work_dir = 'outputs/default/20251127_163453'

tmp/0d03fed5-a949-4dc0-815b-cf2f740d6181_params.py ADDED Viewed

	@@ -0,0 +1,53 @@

+datasets = [
+    [
+        dict(
+            abbr='LongBench_2wikimqa',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchF1Evaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=32,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Answer the question based on the given passages. Only give me the answer and do not output any other words.\n\nThe following are given passages.\n{context}\n\nAnswer the question based on the given passages. Only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='2wikimqa',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBench2wikimqaDataset'),
+    ],
+]
+eval = dict(runner=dict(task=dict(dump_details=True)))
+models = [
+    dict(
+        abbr='gated_deltanet',
+        batch_size=128,
+        max_seq_len=2048,
+        model_kwargs=dict(
+            device_map='auto',
+            torch_dtype='torch.bfloat16',
+            trust_remote_code=True),
+        path='download_model/hgrn2-1.3B-100B',
+        run_cfg=dict(num_gpus=1),
+        tokenizer_kwargs=dict(padding_side='left', truncation_side='left'),
+        tokenizer_path='download_model/hgrn2-1.3B-100B',
+        type='opencompass.models.HuggingFaceBaseModel'),
+]
+work_dir = 'outputs/default/20251219_163447'

tmp/0d2ff363-9d6a-489c-b18d-e978d436a065_params.py ADDED Viewed

File without changes

tmp/10481e04-ca08-4f83-972f-e8fccc958b91_params.py ADDED Viewed

	@@ -0,0 +1,61 @@

+datasets = [
+    [
+        dict(
+            abbr='triviaqa_wiki_1shot_0',
+            eval_cfg=dict(
+                evaluator=dict(type='opencompass.datasets.TriviaQAEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                ice_template=dict(
+                    template='Q: {question}\nA: {answer}.\n',
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                inferencer=dict(
+                    max_out_len=50,
+                    stopping_criteria=[
+                        'Q:',
+                        '\n',
+                    ],
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    ice_token='</E>',
+                    template='</E>Q: {question}\nA: ',
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    fix_id_list=[
+                        0,
+                    ],
+                    type='opencompass.openicl.icl_retriever.FixKRetriever')),
+            path='opencompass/trivia_qa',
+            reader_cfg=dict(
+                input_columns=[
+                    'question',
+                ],
+                output_column='answer',
+                test_range='[0:1000]',
+                test_split='validation',
+                train_split='train'),
+            type='opencompass.datasets.TriviaQADatasetV2'),
+    ],
+]
+models = [
+    dict(
+        abbr='mask_gdn_1B_hrr-rank4_hf',
+        batch_size=8,
+        generation_kwargs=dict(),
+        max_out_len=256,
+        max_seq_len=None,
+        model_kwargs=dict(),
+        pad_token_id=None,
+        path='/mnt/jfzn/msj/train_exp/mask_gdn_1B_hrr-rank4',
+        peft_kwargs=dict(),
+        peft_path=None,
+        run_cfg=dict(num_gpus=1),
+        stop_words=[],
+        tokenizer_kwargs=dict(),
+        tokenizer_path=None,
+        type='opencompass.models.huggingface_above_v4_33.HuggingFaceBaseModel'
+    ),
+]
+work_dir = 'outputs/default/20251127_190244'

tmp/104a1807-a194-4864-99ea-1a9fe1a47bac_params.py ADDED Viewed

File without changes

tmp/11308d03-3ab0-43b0-9f06-64b71c4140c1_params.py ADDED Viewed

	@@ -0,0 +1,55 @@

+datasets = [
+    [
+        dict(
+            abbr='LongBench_lsht',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchClassificationEvaluator'
+                ),
+                pred_postprocessor=dict(
+                    type='opencompass.datasets.lsht_postprocess'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=64,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt='请判断给定新闻的类别，下面是一些例子。\n\n{context}\n{input}',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='lsht',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='all_labels',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchlshtDataset'),
+    ],
+]
+eval = dict(runner=dict(task=dict(dump_details=True)))
+models = [
+    dict(
+        abbr='retnet',
+        batch_size=128,
+        max_seq_len=2048,
+        model_kwargs=dict(
+            device_map='auto',
+            torch_dtype='torch.bfloat16',
+            trust_remote_code=True),
+        path='/mnt/jfzn/msj/retnet-1.3B-100B',
+        run_cfg=dict(num_gpus=1),
+        tokenizer_kwargs=dict(padding_side='left', truncation_side='left'),
+        tokenizer_path='/mnt/jfzn/msj/retnet-1.3B-100B',
+        type='opencompass.models.HuggingFaceBaseModel'),
+]
+work_dir = 'outputs/default/20251207_222645'

tmp/1405e46f-8be4-462d-a794-3b47ef9839c2_params.py ADDED Viewed

	@@ -0,0 +1,1424 @@

+datasets = [
+    [
+        dict(
+            abbr='LongBench_2wikimqa_4',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchF1Evaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=32,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Answer the question based on the given passages. Only give me the answer and do not output any other words.\n\nThe following are given passages.\n{context}\n\nAnswer the question based on the given passages. Only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='2wikimqa',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[100:125]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBench2wikimqaDataset'),
+        dict(
+            abbr='LongBench_hotpotqa_4',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchF1Evaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=32,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Answer the question based on the given passages. Only give me the answer and do not output any other words.\n\nThe following are given passages.\n{context}\n\nAnswer the question based on the given passages. Only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='hotpotqa',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[100:125]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchhotpotqaDataset'),
+        dict(
+            abbr='LongBench_musique_4',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchF1Evaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=32,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Answer the question based on the given passages. Only give me the answer and do not output any other words.\n\nThe following are given passages.\n{context}\n\nAnswer the question based on the given passages. Only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='musique',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[100:125]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchmusiqueDataset'),
+        dict(
+            abbr='LongBench_multifieldqa_en_4',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchF1Evaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=64,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Read the following text and answer briefly.\n\n{context}\n\nNow, answer the following question based on the above text, only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='multifieldqa_en',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[76:95]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchmultifieldqa_enDataset'),
+        dict(
+            abbr='LongBench_multifieldqa_zh_4',
+            eval_cfg=dict(
+                evaluator=dict(
+                    language='zh',
+                    type='opencompass.datasets.LongBenchF1Evaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=64,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            '阅读以下文字并用中文简短回答：\n\n{context}\n\n现在请基于上面的文章回答下面的问题，只告诉我答案，不要输出任何其他字词。\n\n问题：{input}\n回答：',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='multifieldqa_zh',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[100:125]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchmultifieldqa_zhDataset'),
+        dict(
+            abbr='LongBench_narrativeqa_4',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchF1Evaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=128,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'You are given a story, which can be either a novel or a movie script, and a question. Answer the question as concisely as you can, using a single phrase if possible. Do not provide any explanation.\n\nStory: {context}\n\nNow, answer the question based on the story as concisely as you can, using a single phrase if possible. Do not provide any explanation.\n\nQuestion: {input}\n\nAnswer:',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='narrativeqa',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[100:125]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchnarrativeqaDataset'),
+        dict(
+            abbr='LongBench_qasper_4',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchF1Evaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=32,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Answer the question based on the given passages. Only give me the answer and do not output any other words.\n\nThe following are given passages.\n{context}\n\nAnswer the question based on the given passages. Only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='qasper',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[100:125]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchqasperDataset'),
+        dict(
+            abbr='LongBench_triviaqa_4',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchF1Evaluator'),
+                pred_postprocessor=dict(
+                    type='opencompass.datasets.triviaqa_postprocess'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=32,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Answer the question based on the given passage. Only give me the answer and do not output any other words. The following are some examples.\n\n{context}\n\n{input}',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='triviaqa',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[100:125]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchtriviaqaDataset'),
+        dict(
+            abbr='LongBench_gov_report_4',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchRougeEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=512,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'You are given a report by a government agency. Write a one-page summary of the report.\n\nReport:\n{context}\n\nNow, write a one-page summary of the report.\n\nSummary:',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='gov_report',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                ],
+                output_column='answers',
+                test_range='[100:125]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchgov_reportDataset'),
+        dict(
+            abbr='LongBench_qmsum_4',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchRougeEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=512,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'You are given a meeting transcript and a query containing a question or instruction. Answer the query in one or more sentences.\n\nTranscript:\n{context}\n\nNow, answer the query based on the above meeting transcript in one or more sentences.\n\nQuery: {input}\nAnswer:',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='qmsum',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[100:125]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchqmsumDataset'),
+        dict(
+            abbr='LongBench_vcsum_4',
+            eval_cfg=dict(
+                evaluator=dict(
+                    language='zh',
+                    type='opencompass.datasets.LongBenchRougeEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=512,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            '下面有一段会议记录，请你阅读后，写一段总结，总结会议的内容。\n会议记录：\n{context}\n\n会议总结：',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='vcsum',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                ],
+                output_column='answers',
+                test_range='[100:125]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchvcsumDataset'),
+        dict(
+            abbr='LongBench_dureader_4',
+            eval_cfg=dict(
+                evaluator=dict(
+                    language='zh',
+                    type='opencompass.datasets.LongBenchRougeEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=128,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            '请基于给定的文章回答下述问题。\n\n文章：{context}\n\n请基于上述文章回答下面的问题。\n\n问题：{input}\n回答：',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='dureader',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[100:125]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchdureaderDataset'),
+        dict(
+            abbr='LongBench_lcc_4',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchCodeSimEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=64,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Please complete the code given below. \n{context}Next line of code:\n',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='lcc',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                ],
+                output_column='answers',
+                test_range='[252:315]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchlccDataset'),
+        dict(
+            abbr='LongBench_repobench-p_4',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchCodeSimEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=64,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Please complete the code given below. \n{context}{input}Next line of code:\n',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='repobench-p',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[252:315]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchrepobenchDataset'),
+        dict(
+            abbr='LongBench_passage_retrieval_en_4',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchRetrievalEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=32,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Here are 30 paragraphs from Wikipedia, along with an abstract. Please determine which paragraph the abstract is from.\n\n{context}\n\nThe following is an abstract.\n\n{input}\n\nPlease enter the number of the paragraph that the abstract is from. The answer format must be like "Paragraph 1", "Paragraph 2", etc.\n\nThe answer is: ',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='passage_retrieval_en',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[100:125]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchpassage_retrieval_enDataset'),
+        dict(
+            abbr='LongBench_passage_retrieval_zh_4',
+            eval_cfg=dict(
+                evaluator=dict(
+                    language='zh',
+                    type='opencompass.datasets.LongBenchRetrievalEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=32,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            '以下是若干段落文字，以及其中一个段落的摘要。请确定给定的摘要出自哪一段。\n\n{context}\n\n下面是一个摘要\n\n{input}\n\n请输入摘要所属段落的编号。答案格式必须是"段落1"，"段落2"等格式\n\n答案是：',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='passage_retrieval_zh',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[100:125]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchpassage_retrieval_zhDataset'),
+        dict(
+            abbr='LongBench_passage_count_4',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchCountEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=32,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'There are some paragraphs below sourced from Wikipedia. Some of them may be duplicates. Please carefully read these paragraphs and determine how many unique paragraphs there are after removing duplicates. In other words, how many non-repeating paragraphs are there in total?\n\n{context}\n\nPlease enter the final count of unique paragraphs after removing duplicates. The output format should only contain the number, such as 1, 2, 3, and so on.\n\nThe final answer is: ',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='passage_count',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[100:125]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchpassage_countDataset'),
+        dict(
+            abbr='LongBench_trec_4',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchClassificationEvaluator'
+                ),
+                pred_postprocessor=dict(
+                    type='opencompass.datasets.trec_postprocess'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=64,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Please determine the type of the question below. Here are some examples of questions.\n\n{context}\n{input}',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='trec',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='all_labels',
+                test_range='[100:125]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchtrecDataset'),
+        dict(
+            abbr='LongBench_lsht_4',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchClassificationEvaluator'
+                ),
+                pred_postprocessor=dict(
+                    type='opencompass.datasets.lsht_postprocess'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=64,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt='请判断给定新闻的类别，下面是一些例子。\n\n{context}\n{input}',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='lsht',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='all_labels',
+                test_range='[100:125]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchlshtDataset'),
+        dict(
+            abbr='LongBench_multi_news_4',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchRougeEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=512,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'You are given several news passages. Write a one-page summary of all news. \n\nNews:\n{context}\n\nNow, write a one-page summary of all the news.\n\nSummary:\n',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='multi_news',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                ],
+                output_column='answers',
+                test_range='[100:125]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchmulti_newsDataset'),
+        dict(
+            abbr='LongBench_samsum_4',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchRougeEvaluator'),
+                pred_postprocessor=dict(
+                    type='opencompass.datasets.samsum_postprocess'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=128,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Summarize the dialogue into a few short sentences. The following are some examples.\n\n{context}\n\n{input}',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='samsum',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[100:125]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchsamsumDataset'),
+        dict(
+            abbr='LongBench_2wikimqa_4',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchF1Evaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=32,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Answer the question based on the given passages. Only give me the answer and do not output any other words.\n\nThe following are given passages.\n{context}\n\nAnswer the question based on the given passages. Only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='2wikimqa',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[100:125]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBench2wikimqaDataset'),
+        dict(
+            abbr='LongBench_hotpotqa_4',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchF1Evaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=32,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Answer the question based on the given passages. Only give me the answer and do not output any other words.\n\nThe following are given passages.\n{context}\n\nAnswer the question based on the given passages. Only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='hotpotqa',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[100:125]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchhotpotqaDataset'),
+        dict(
+            abbr='LongBench_musique_4',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchF1Evaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=32,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Answer the question based on the given passages. Only give me the answer and do not output any other words.\n\nThe following are given passages.\n{context}\n\nAnswer the question based on the given passages. Only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='musique',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[100:125]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchmusiqueDataset'),
+        dict(
+            abbr='LongBench_multifieldqa_en_4',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchF1Evaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=64,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Read the following text and answer briefly.\n\n{context}\n\nNow, answer the following question based on the above text, only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='multifieldqa_en',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[76:95]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchmultifieldqa_enDataset'),
+        dict(
+            abbr='LongBench_multifieldqa_zh_4',
+            eval_cfg=dict(
+                evaluator=dict(
+                    language='zh',
+                    type='opencompass.datasets.LongBenchF1Evaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=64,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            '阅读以下文字并用中文简短回答：\n\n{context}\n\n现在请基于上面的文章回答下面的问题，只告诉我答案，不要输出任何其他字词。\n\n问题：{input}\n回答：',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='multifieldqa_zh',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[100:125]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchmultifieldqa_zhDataset'),
+        dict(
+            abbr='LongBench_narrativeqa_4',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchF1Evaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=128,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'You are given a story, which can be either a novel or a movie script, and a question. Answer the question as concisely as you can, using a single phrase if possible. Do not provide any explanation.\n\nStory: {context}\n\nNow, answer the question based on the story as concisely as you can, using a single phrase if possible. Do not provide any explanation.\n\nQuestion: {input}\n\nAnswer:',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='narrativeqa',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[100:125]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchnarrativeqaDataset'),
+        dict(
+            abbr='LongBench_qasper_4',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchF1Evaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=32,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Answer the question based on the given passages. Only give me the answer and do not output any other words.\n\nThe following are given passages.\n{context}\n\nAnswer the question based on the given passages. Only give me the answer and do not output any other words.\n\nQuestion: {input}\nAnswer:',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='qasper',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[100:125]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchqasperDataset'),
+        dict(
+            abbr='LongBench_triviaqa_4',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchF1Evaluator'),
+                pred_postprocessor=dict(
+                    type='opencompass.datasets.triviaqa_postprocess'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=32,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Answer the question based on the given passage. Only give me the answer and do not output any other words. The following are some examples.\n\n{context}\n\n{input}',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='triviaqa',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[100:125]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchtriviaqaDataset'),
+        dict(
+            abbr='LongBench_gov_report_4',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchRougeEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=512,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'You are given a report by a government agency. Write a one-page summary of the report.\n\nReport:\n{context}\n\nNow, write a one-page summary of the report.\n\nSummary:',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='gov_report',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                ],
+                output_column='answers',
+                test_range='[100:125]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchgov_reportDataset'),
+        dict(
+            abbr='LongBench_qmsum_4',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchRougeEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=512,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'You are given a meeting transcript and a query containing a question or instruction. Answer the query in one or more sentences.\n\nTranscript:\n{context}\n\nNow, answer the query based on the above meeting transcript in one or more sentences.\n\nQuery: {input}\nAnswer:',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='qmsum',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[100:125]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchqmsumDataset'),
+        dict(
+            abbr='LongBench_vcsum_4',
+            eval_cfg=dict(
+                evaluator=dict(
+                    language='zh',
+                    type='opencompass.datasets.LongBenchRougeEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=512,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            '下面有一段会议记录，请你阅读后，写一段总结，总结会议的内容。\n会议记录：\n{context}\n\n会议总结：',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='vcsum',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                ],
+                output_column='answers',
+                test_range='[100:125]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchvcsumDataset'),
+        dict(
+            abbr='LongBench_dureader_4',
+            eval_cfg=dict(
+                evaluator=dict(
+                    language='zh',
+                    type='opencompass.datasets.LongBenchRougeEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=128,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            '请基于给定的文章回答下述问题。\n\n文章：{context}\n\n请基于上述文章回答下面的问题。\n\n问题：{input}\n回答：',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='dureader',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[100:125]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchdureaderDataset'),
+        dict(
+            abbr='LongBench_lcc_4',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchCodeSimEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=64,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Please complete the code given below. \n{context}Next line of code:\n',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='lcc',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                ],
+                output_column='answers',
+                test_range='[252:315]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchlccDataset'),
+        dict(
+            abbr='LongBench_repobench-p_4',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchCodeSimEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=64,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Please complete the code given below. \n{context}{input}Next line of code:\n',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='repobench-p',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[252:315]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchrepobenchDataset'),
+        dict(
+            abbr='LongBench_passage_retrieval_en_4',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchRetrievalEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=32,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Here are 30 paragraphs from Wikipedia, along with an abstract. Please determine which paragraph the abstract is from.\n\n{context}\n\nThe following is an abstract.\n\n{input}\n\nPlease enter the number of the paragraph that the abstract is from. The answer format must be like "Paragraph 1", "Paragraph 2", etc.\n\nThe answer is: ',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='passage_retrieval_en',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[100:125]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchpassage_retrieval_enDataset'),
+        dict(
+            abbr='LongBench_passage_retrieval_zh_4',
+            eval_cfg=dict(
+                evaluator=dict(
+                    language='zh',
+                    type='opencompass.datasets.LongBenchRetrievalEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=32,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            '以下是若干段落文字，以及其中一个段落的摘要。请确定给定的摘要出自哪一段。\n\n{context}\n\n下面是一个摘要\n\n{input}\n\n请输入摘要所属段落的编号。答案格式必须是"段落1"，"段落2"等格式\n\n答案是：',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='passage_retrieval_zh',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[100:125]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchpassage_retrieval_zhDataset'),
+        dict(
+            abbr='LongBench_passage_count_4',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchCountEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=32,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'There are some paragraphs below sourced from Wikipedia. Some of them may be duplicates. Please carefully read these paragraphs and determine how many unique paragraphs there are after removing duplicates. In other words, how many non-repeating paragraphs are there in total?\n\n{context}\n\nPlease enter the final count of unique paragraphs after removing duplicates. The output format should only contain the number, such as 1, 2, 3, and so on.\n\nThe final answer is: ',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='passage_count',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[100:125]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchpassage_countDataset'),
+        dict(
+            abbr='LongBench_trec_4',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchClassificationEvaluator'
+                ),
+                pred_postprocessor=dict(
+                    type='opencompass.datasets.trec_postprocess'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=64,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Please determine the type of the question below. Here are some examples of questions.\n\n{context}\n{input}',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='trec',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='all_labels',
+                test_range='[100:125]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchtrecDataset'),
+        dict(
+            abbr='LongBench_lsht_4',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchClassificationEvaluator'
+                ),
+                pred_postprocessor=dict(
+                    type='opencompass.datasets.lsht_postprocess'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=64,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt='请判断给定新闻的类别，下面是一些例子。\n\n{context}\n{input}',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='lsht',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='all_labels',
+                test_range='[100:125]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchlshtDataset'),
+        dict(
+            abbr='LongBench_multi_news_4',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchRougeEvaluator'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=512,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'You are given several news passages. Write a one-page summary of all news. \n\nNews:\n{context}\n\nNow, write a one-page summary of all the news.\n\nSummary:\n',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='multi_news',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                ],
+                output_column='answers',
+                test_range='[100:125]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchmulti_newsDataset'),
+        dict(
+            abbr='LongBench_samsum_4',
+            eval_cfg=dict(
+                evaluator=dict(
+                    type='opencompass.datasets.LongBenchRougeEvaluator'),
+                pred_postprocessor=dict(
+                    type='opencompass.datasets.samsum_postprocess'),
+                pred_role='BOT'),
+            infer_cfg=dict(
+                inferencer=dict(
+                    max_out_len=128,
+                    type='opencompass.openicl.icl_inferencer.GenInferencer'),
+                prompt_template=dict(
+                    template=dict(round=[
+                        dict(
+                            prompt=
+                            'Summarize the dialogue into a few short sentences. The following are some examples.\n\n{context}\n\n{input}',
+                            role='HUMAN'),
+                    ]),
+                    type=
+                    'opencompass.openicl.icl_prompt_template.PromptTemplate'),
+                retriever=dict(
+                    type='opencompass.openicl.icl_retriever.ZeroRetriever')),
+            name='samsum',
+            path='opencompass/Longbench',
+            reader_cfg=dict(
+                input_columns=[
+                    'context',
+                    'input',
+                ],
+                output_column='answers',
+                test_range='[100:125]',
+                test_split='test',
+                train_split='test'),
+            type='opencompass.datasets.LongBenchsamsumDataset'),
+    ],
+]
+models = [
+    dict(
+        abbr='delta_net',
+        batch_size=128,
+        max_seq_len=2048,
+        model_kwargs=dict(
+            device_map='auto',
+            torch_dtype='torch.bfloat16',
+            trust_remote_code=True),
+        path='/mnt/jfzn/msj/delta_net-1.3B-100B',
+        run_cfg=dict(num_gpus=1),
+        tokenizer_kwargs=dict(padding_side='left', truncation_side='left'),
+        tokenizer_path='/mnt/jfzn/msj/delta_net-1.3B-100B',
+        type='opencompass.models.HuggingFaceBaseModel'),
+]
+work_dir = 'outputs/default/20251127_221150'