File size: 1,849 Bytes
7feac49
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
import os

os.environ['CUDA_VISIBLE_DEVICES'] = '0'

infer_backend = 'vllm'


def test_eval_native():
    from swift.llm import EvalArguments, eval_main
    eval_main(
        EvalArguments(
            model='Qwen/Qwen2.5-0.5B-Instruct',
            eval_dataset='arc',
            infer_backend=infer_backend,
            eval_backend='Native',
            eval_limit=10,
            eval_generation_config={
                'max_new_tokens': 128,
                'temperature': 0.1
            },
            extra_eval_args={
                'stream': True,
                'ignore_errors': True
            },
        ))


def test_eval_llm():
    from swift.llm import EvalArguments, eval_main
    eval_main(
        EvalArguments(
            model='Qwen/Qwen2-7B-Instruct',
            eval_dataset='arc_c',
            infer_backend=infer_backend,
            eval_backend='OpenCompass',
            eval_limit=10))


def test_eval_mllm():
    from swift.llm import EvalArguments, eval_main
    eval_main(
        EvalArguments(
            model='Qwen/Qwen2.5-VL-3B-Instruct',
            eval_dataset=['realWorldQA'],
            infer_backend='pt',
            eval_backend='VLMEvalKit',
            eval_limit=10,
            eval_generation_config={
                'max_new_tokens': 128,
                'temperature': 0.1
            }))


def test_eval_url():
    from swift.llm import EvalArguments, eval_main, DeployArguments, run_deploy
    deploy_args = DeployArguments(model='Qwen/Qwen2-VL-7B-Instruct', infer_backend=infer_backend, verbose=False)

    with run_deploy(deploy_args, return_url=True) as url:
        eval_main(EvalArguments(model='Qwen2-VL-7B-Instruct', eval_url=url, eval_dataset=['arc_c']))


if __name__ == '__main__':
    # test_eval_llm()
    test_eval_mllm()
    # test_eval_url()
    # test_eval_native()