import os import torch os.environ['CUDA_VISIBLE_DEVICES'] = '0,1,2,3' os.environ['SWIFT_DEBUG'] = '1' def _infer_model(pt_engine, system=None, messages=None, videos=None, max_tokens=128): seed_everything(42) request_config = RequestConfig(max_tokens=max_tokens, temperature=0) if messages is None: messages = [] if not messages: if system is not None: messages += [{'role': 'system', 'content': system}] messages += [{'role': 'user', 'content': '你好'}] resp = pt_engine.infer([{'messages': messages}], request_config=request_config) response = resp[0].choices[0].message.content messages += [{'role': 'assistant', 'content': response}, {'role': 'user', 'content': '