|
|
|
|
|
import os |
|
|
from contextlib import nullcontext |
|
|
from typing import List, Union |
|
|
|
|
|
from evalscope.constants import EvalBackend, EvalType |
|
|
from evalscope.run import TaskConfig, run_task |
|
|
from evalscope.summarizer import Summarizer |
|
|
|
|
|
from swift.utils import append_to_jsonl, get_logger |
|
|
from .. import MediaResource |
|
|
from ..argument import EvalArguments |
|
|
from ..base import SwiftPipeline |
|
|
from ..infer import run_deploy |
|
|
|
|
|
logger = get_logger() |
|
|
|
|
|
|
|
|
class SwiftEval(SwiftPipeline): |
|
|
args_class = EvalArguments |
|
|
args: args_class |
|
|
|
|
|
def run(self): |
|
|
args = self.args |
|
|
eval_report = {} |
|
|
deploy_context = nullcontext() if args.eval_url else run_deploy(args, return_url=True) |
|
|
with deploy_context as base_url: |
|
|
base_url = args.eval_url or base_url |
|
|
url = f"{base_url.rstrip('/')}/chat/completions" |
|
|
|
|
|
task_cfg = self.get_task_cfg(args.eval_dataset, args.eval_backend, url) |
|
|
result = self.get_task_result(task_cfg) |
|
|
eval_report[args.eval_backend] = result |
|
|
|
|
|
eval_report.update({ |
|
|
'time': args.time, |
|
|
'model': args.model, |
|
|
'adapters': args.adapters, |
|
|
'result_path': args.result_path, |
|
|
'eval_output_dir': args.eval_output_dir, |
|
|
'eval_limit': args.eval_limit |
|
|
}) |
|
|
|
|
|
if args.result_jsonl: |
|
|
append_to_jsonl(args.result_jsonl, eval_report) |
|
|
logger.info(f'The eval result have been saved to result_jsonl: `{args.result_jsonl}`.') |
|
|
return eval_report |
|
|
|
|
|
def get_task_result(self, task_cfg: TaskConfig): |
|
|
run_task(task_cfg=task_cfg) |
|
|
reports = Summarizer.get_report_from_cfg(task_cfg=task_cfg) |
|
|
result = {} |
|
|
if task_cfg.eval_backend == EvalBackend.OPEN_COMPASS: |
|
|
for report in reports: |
|
|
if report[self.args.model_suffix] != '-': |
|
|
result[report['dataset']] = {report['metric']: report[self.args.model_suffix]} |
|
|
elif task_cfg.eval_backend == EvalBackend.VLM_EVAL_KIT: |
|
|
for report in reports: |
|
|
splited_key = next(iter(report)).rsplit('_', 2) |
|
|
if len(splited_key) == 3: |
|
|
_, dataset, metric = splited_key |
|
|
else: |
|
|
dataset, metric = '-', '-' |
|
|
result[dataset] = {metric: list(report.values())[0]} |
|
|
else: |
|
|
result = reports |
|
|
return result |
|
|
|
|
|
def get_task_cfg(self, dataset: List[str], eval_backend: str, url: str): |
|
|
assert eval_backend in {EvalBackend.NATIVE, EvalBackend.OPEN_COMPASS, EvalBackend.VLM_EVAL_KIT} |
|
|
if eval_backend == EvalBackend.OPEN_COMPASS: |
|
|
if self.args.local_dataset: |
|
|
if os.path.exists('data'): |
|
|
if not os.path.exists(os.path.join('data', 'CMB')): |
|
|
raise RuntimeError('Opencompass need a `data` folder in your work dir(' |
|
|
'which will be created automatically by swift eval), ' |
|
|
'but a local path named `data` already exists, ' |
|
|
'please consider moving the dir to another location.') |
|
|
else: |
|
|
local_dir = MediaResource.download( |
|
|
'https://modelscope.cn/datasets/' |
|
|
'opencompass/OpenCompassDataComplete/' |
|
|
'resolve/master/OpenCompassData-complete-20240207.zip', 'OpenCompassData') |
|
|
os.symlink(os.path.join(local_dir, 'data'), 'data') |
|
|
|
|
|
task_cfg = self.get_opencompass_task_cfg(dataset, url) |
|
|
elif eval_backend == EvalBackend.VLM_EVAL_KIT: |
|
|
task_cfg = self.get_vlmeval_task_cfg(dataset, url) |
|
|
else: |
|
|
task_cfg = self.get_native_task_cfg(dataset, url) |
|
|
return task_cfg |
|
|
|
|
|
def get_native_task_cfg(self, dataset: List[str], url: str): |
|
|
args = self.args |
|
|
work_dir = os.path.join(args.eval_output_dir, 'native') |
|
|
return TaskConfig( |
|
|
model=args.model_suffix, |
|
|
eval_type=EvalType.SERVICE, |
|
|
api_url=url, |
|
|
api_key=args.api_key or 'EMPTY', |
|
|
datasets=dataset, |
|
|
work_dir=work_dir, |
|
|
limit=args.eval_limit, |
|
|
eval_batch_size=args.eval_num_proc, |
|
|
dataset_args=args.dataset_args, |
|
|
generation_config=args.eval_generation_config, |
|
|
**args.extra_eval_args) |
|
|
|
|
|
def get_opencompass_task_cfg(self, dataset: List[str], url: str): |
|
|
args = self.args |
|
|
work_dir = os.path.join(args.eval_output_dir, 'opencompass') |
|
|
return TaskConfig( |
|
|
eval_backend=EvalBackend.OPEN_COMPASS, |
|
|
eval_config={ |
|
|
'datasets': |
|
|
dataset, |
|
|
'batch_size': |
|
|
args.eval_num_proc, |
|
|
'work_dir': |
|
|
work_dir, |
|
|
'models': [{ |
|
|
'path': args.model_suffix, |
|
|
'openai_api_base': url, |
|
|
'key': args.api_key or 'EMPTY', |
|
|
'is_chat': args.use_chat_template |
|
|
}], |
|
|
'limit': |
|
|
args.eval_limit |
|
|
}, |
|
|
work_dir=work_dir) |
|
|
|
|
|
def get_vlmeval_task_cfg(self, dataset: List[str], url: str): |
|
|
args = self.args |
|
|
work_dir = os.path.join(args.eval_output_dir, 'vlmeval') |
|
|
return TaskConfig( |
|
|
eval_backend=EvalBackend.VLM_EVAL_KIT, |
|
|
eval_config={ |
|
|
'data': |
|
|
dataset, |
|
|
'model': [{ |
|
|
'type': args.model_suffix, |
|
|
'name': 'CustomAPIModel', |
|
|
'api_base': url, |
|
|
'key': args.api_key or 'EMPTY', |
|
|
**args.eval_generation_config |
|
|
}], |
|
|
'nproc': |
|
|
args.eval_num_proc, |
|
|
'limit': |
|
|
args.eval_limit |
|
|
}, |
|
|
work_dir=work_dir) |
|
|
|
|
|
|
|
|
def eval_main(args: Union[List[str], EvalArguments, None] = None): |
|
|
return SwiftEval(args).main() |
|
|
|