import os os.environ['CUDA_VISIBLE_DEVICES'] = '0,1,2,3' def _infer_model(pt_engine, system=None, messages=None, audios=None): seed_everything(42) request_config = RequestConfig(max_tokens=128, temperature=0) if messages is None: messages = [] if system is not None: messages += [{'role': 'system', 'content': system}] messages += [{'role': 'user', 'content': '你好'}] resp = pt_engine.infer([{'messages': messages}], request_config=request_config) response = resp[0].choices[0].message.content messages += [{'role': 'assistant', 'content': response}] messages += [{'role': 'user', 'content': '