File size: 6,154 Bytes
cb2428f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
# Copyright (c) Alibaba, Inc. and its affiliates.
import os
from contextlib import nullcontext
from typing import List, Union
from evalscope.constants import EvalBackend, EvalType
from evalscope.run import TaskConfig, run_task
from evalscope.summarizer import Summarizer
from swift.utils import append_to_jsonl, get_logger
from .. import MediaResource
from ..argument import EvalArguments
from ..base import SwiftPipeline
from ..infer import run_deploy
logger = get_logger()
class SwiftEval(SwiftPipeline):
args_class = EvalArguments
args: args_class
def run(self):
args = self.args
eval_report = {}
deploy_context = nullcontext() if args.eval_url else run_deploy(args, return_url=True)
with deploy_context as base_url:
base_url = args.eval_url or base_url
url = f"{base_url.rstrip('/')}/chat/completions"
task_cfg = self.get_task_cfg(args.eval_dataset, args.eval_backend, url)
result = self.get_task_result(task_cfg)
eval_report[args.eval_backend] = result
eval_report.update({
'time': args.time,
'model': args.model,
'adapters': args.adapters,
'result_path': args.result_path,
'eval_output_dir': args.eval_output_dir,
'eval_limit': args.eval_limit
})
if args.result_jsonl:
append_to_jsonl(args.result_jsonl, eval_report)
logger.info(f'The eval result have been saved to result_jsonl: `{args.result_jsonl}`.')
return eval_report
def get_task_result(self, task_cfg: TaskConfig):
run_task(task_cfg=task_cfg)
reports = Summarizer.get_report_from_cfg(task_cfg=task_cfg)
result = {}
if task_cfg.eval_backend == EvalBackend.OPEN_COMPASS:
for report in reports:
if report[self.args.model_suffix] != '-':
result[report['dataset']] = {report['metric']: report[self.args.model_suffix]}
elif task_cfg.eval_backend == EvalBackend.VLM_EVAL_KIT:
for report in reports:
splited_key = next(iter(report)).rsplit('_', 2)
if len(splited_key) == 3:
_, dataset, metric = splited_key
else:
dataset, metric = '-', '-'
result[dataset] = {metric: list(report.values())[0]}
else:
result = reports
return result
def get_task_cfg(self, dataset: List[str], eval_backend: str, url: str):
assert eval_backend in {EvalBackend.NATIVE, EvalBackend.OPEN_COMPASS, EvalBackend.VLM_EVAL_KIT}
if eval_backend == EvalBackend.OPEN_COMPASS:
if self.args.local_dataset:
if os.path.exists('data'):
if not os.path.exists(os.path.join('data', 'CMB')):
raise RuntimeError('Opencompass need a `data` folder in your work dir('
'which will be created automatically by swift eval), '
'but a local path named `data` already exists, '
'please consider moving the dir to another location.')
else:
local_dir = MediaResource.download(
'https://modelscope.cn/datasets/'
'opencompass/OpenCompassDataComplete/'
'resolve/master/OpenCompassData-complete-20240207.zip', 'OpenCompassData')
os.symlink(os.path.join(local_dir, 'data'), 'data')
task_cfg = self.get_opencompass_task_cfg(dataset, url)
elif eval_backend == EvalBackend.VLM_EVAL_KIT:
task_cfg = self.get_vlmeval_task_cfg(dataset, url)
else:
task_cfg = self.get_native_task_cfg(dataset, url)
return task_cfg
def get_native_task_cfg(self, dataset: List[str], url: str):
args = self.args
work_dir = os.path.join(args.eval_output_dir, 'native')
return TaskConfig(
model=args.model_suffix,
eval_type=EvalType.SERVICE,
api_url=url,
api_key=args.api_key or 'EMPTY',
datasets=dataset,
work_dir=work_dir,
limit=args.eval_limit,
eval_batch_size=args.eval_num_proc,
dataset_args=args.dataset_args,
generation_config=args.eval_generation_config,
**args.extra_eval_args)
def get_opencompass_task_cfg(self, dataset: List[str], url: str):
args = self.args
work_dir = os.path.join(args.eval_output_dir, 'opencompass')
return TaskConfig(
eval_backend=EvalBackend.OPEN_COMPASS,
eval_config={
'datasets':
dataset,
'batch_size':
args.eval_num_proc,
'work_dir':
work_dir,
'models': [{
'path': args.model_suffix,
'openai_api_base': url,
'key': args.api_key or 'EMPTY',
'is_chat': args.use_chat_template
}],
'limit':
args.eval_limit
},
work_dir=work_dir)
def get_vlmeval_task_cfg(self, dataset: List[str], url: str):
args = self.args
work_dir = os.path.join(args.eval_output_dir, 'vlmeval')
return TaskConfig(
eval_backend=EvalBackend.VLM_EVAL_KIT,
eval_config={
'data':
dataset,
'model': [{
'type': args.model_suffix,
'name': 'CustomAPIModel',
'api_base': url,
'key': args.api_key or 'EMPTY',
**args.eval_generation_config
}],
'nproc':
args.eval_num_proc,
'limit':
args.eval_limit
},
work_dir=work_dir)
def eval_main(args: Union[List[str], EvalArguments, None] = None):
return SwiftEval(args).main()
|