opencompass / .github /scripts /eval_regression_chat_longtext_fullbench.py
msj19's picture
Add files using upload-large-folder tool
9b40ad5 verified
from mmengine.config import read_base
from opencompass.models import TurboMindModelwithChatTemplate
with read_base():
from opencompass.configs.datasets.babilong.babilong_256k_gen import \
babiLong_256k_datasets # noqa: F401, E501
from opencompass.configs.datasets.longbench.longbench import \
longbench_datasets # noqa: F401, E501
from opencompass.configs.datasets.needlebench.needlebench_128k.needlebench_128k import \
needlebench_datasets as needlebench_128k_datasets # noqa: F401, E501
from opencompass.configs.datasets.ruler.ruler_128k_gen import \
ruler_datasets as ruler_128k_datasets # noqa: F401, E501
from opencompass.configs.models.hf_internlm.lmdeploy_internlm2_5_7b_chat_1m import \
models as lmdeploy_internlm2_5_7b_chat_1m_model # noqa: F401, E501
# Summary Groups
from opencompass.configs.summarizers.groups.babilong import \
babilong_summary_groups # noqa: F401, E501
from opencompass.configs.summarizers.groups.longbench import \
longbench_summary_groups # noqa: F401, E501
from opencompass.configs.summarizers.groups.ruler import \
ruler_summary_groups # noqa: F401, E501
from opencompass.configs.summarizers.needlebench import \
needlebench_128k_summarizer # noqa: F401, E501
from ...rjob import eval, infer # noqa: F401, E501
models = [
dict(
type=TurboMindModelwithChatTemplate,
abbr='qwen-3-8b-fullbench',
path='Qwen/Qwen3-8B',
engine_config=dict(hf_override=dict(
rope_scaling=dict(rope_type='yarn',
factor=4.0,
original_max_position_embeddings=32768)),
session_len=264192,
max_batch_size=1),
gen_config=dict(do_sample=True, max_new_tokens=2048),
max_seq_len=264192,
max_out_len=2048,
batch_size=1,
run_cfg=dict(num_gpus=1),
)
]
datasets = [
v[0] for k, v in locals().items()
if k.endswith('_datasets') and isinstance(v, list) and len(v) > 0
]
for d in datasets:
d['reader_cfg']['test_range'] = '[0:16]'