Student0809's picture
Add files using upload-large-folder tool
cb2428f verified
# Copyright (c) Alibaba, Inc. and its affiliates.
import os
from contextlib import nullcontext
from typing import List, Union
from evalscope.constants import EvalBackend, EvalType
from evalscope.run import TaskConfig, run_task
from evalscope.summarizer import Summarizer
from swift.utils import append_to_jsonl, get_logger
from .. import MediaResource
from ..argument import EvalArguments
from ..base import SwiftPipeline
from ..infer import run_deploy
logger = get_logger()
class SwiftEval(SwiftPipeline):
args_class = EvalArguments
args: args_class
def run(self):
args = self.args
eval_report = {}
deploy_context = nullcontext() if args.eval_url else run_deploy(args, return_url=True)
with deploy_context as base_url:
base_url = args.eval_url or base_url
url = f"{base_url.rstrip('/')}/chat/completions"
task_cfg = self.get_task_cfg(args.eval_dataset, args.eval_backend, url)
result = self.get_task_result(task_cfg)
eval_report[args.eval_backend] = result
eval_report.update({
'time': args.time,
'model': args.model,
'adapters': args.adapters,
'result_path': args.result_path,
'eval_output_dir': args.eval_output_dir,
'eval_limit': args.eval_limit
})
if args.result_jsonl:
append_to_jsonl(args.result_jsonl, eval_report)
logger.info(f'The eval result have been saved to result_jsonl: `{args.result_jsonl}`.')
return eval_report
def get_task_result(self, task_cfg: TaskConfig):
run_task(task_cfg=task_cfg)
reports = Summarizer.get_report_from_cfg(task_cfg=task_cfg)
result = {}
if task_cfg.eval_backend == EvalBackend.OPEN_COMPASS:
for report in reports:
if report[self.args.model_suffix] != '-':
result[report['dataset']] = {report['metric']: report[self.args.model_suffix]}
elif task_cfg.eval_backend == EvalBackend.VLM_EVAL_KIT:
for report in reports:
splited_key = next(iter(report)).rsplit('_', 2)
if len(splited_key) == 3:
_, dataset, metric = splited_key
else:
dataset, metric = '-', '-'
result[dataset] = {metric: list(report.values())[0]}
else:
result = reports
return result
def get_task_cfg(self, dataset: List[str], eval_backend: str, url: str):
assert eval_backend in {EvalBackend.NATIVE, EvalBackend.OPEN_COMPASS, EvalBackend.VLM_EVAL_KIT}
if eval_backend == EvalBackend.OPEN_COMPASS:
if self.args.local_dataset:
if os.path.exists('data'):
if not os.path.exists(os.path.join('data', 'CMB')):
raise RuntimeError('Opencompass need a `data` folder in your work dir('
'which will be created automatically by swift eval), '
'but a local path named `data` already exists, '
'please consider moving the dir to another location.')
else:
local_dir = MediaResource.download(
'https://modelscope.cn/datasets/'
'opencompass/OpenCompassDataComplete/'
'resolve/master/OpenCompassData-complete-20240207.zip', 'OpenCompassData')
os.symlink(os.path.join(local_dir, 'data'), 'data')
task_cfg = self.get_opencompass_task_cfg(dataset, url)
elif eval_backend == EvalBackend.VLM_EVAL_KIT:
task_cfg = self.get_vlmeval_task_cfg(dataset, url)
else:
task_cfg = self.get_native_task_cfg(dataset, url)
return task_cfg
def get_native_task_cfg(self, dataset: List[str], url: str):
args = self.args
work_dir = os.path.join(args.eval_output_dir, 'native')
return TaskConfig(
model=args.model_suffix,
eval_type=EvalType.SERVICE,
api_url=url,
api_key=args.api_key or 'EMPTY',
datasets=dataset,
work_dir=work_dir,
limit=args.eval_limit,
eval_batch_size=args.eval_num_proc,
dataset_args=args.dataset_args,
generation_config=args.eval_generation_config,
**args.extra_eval_args)
def get_opencompass_task_cfg(self, dataset: List[str], url: str):
args = self.args
work_dir = os.path.join(args.eval_output_dir, 'opencompass')
return TaskConfig(
eval_backend=EvalBackend.OPEN_COMPASS,
eval_config={
'datasets':
dataset,
'batch_size':
args.eval_num_proc,
'work_dir':
work_dir,
'models': [{
'path': args.model_suffix,
'openai_api_base': url,
'key': args.api_key or 'EMPTY',
'is_chat': args.use_chat_template
}],
'limit':
args.eval_limit
},
work_dir=work_dir)
def get_vlmeval_task_cfg(self, dataset: List[str], url: str):
args = self.args
work_dir = os.path.join(args.eval_output_dir, 'vlmeval')
return TaskConfig(
eval_backend=EvalBackend.VLM_EVAL_KIT,
eval_config={
'data':
dataset,
'model': [{
'type': args.model_suffix,
'name': 'CustomAPIModel',
'api_base': url,
'key': args.api_key or 'EMPTY',
**args.eval_generation_config
}],
'nproc':
args.eval_num_proc,
'limit':
args.eval_limit
},
work_dir=work_dir)
def eval_main(args: Union[List[str], EvalArguments, None] = None):
return SwiftEval(args).main()