| from swift.infer_engine import InferClient, InferRequest, RequestConfig
|
|
|
|
|
| def infer_multilora(engine: InferClient, infer_request: InferRequest):
|
|
|
| models = engine.models
|
| print(f'models: {models}')
|
| request_config = RequestConfig(max_tokens=512, temperature=0)
|
|
|
|
|
| resp_list = engine.infer([infer_request], request_config, model=models[1])
|
| response = resp_list[0].choices[0].message.content
|
| print(f'lora1-response: {response}')
|
|
|
| resp_list = engine.infer([infer_request], request_config, model=models[0])
|
| response = resp_list[0].choices[0].message.content
|
| print(f'response: {response}')
|
|
|
| resp_list = engine.infer([infer_request], request_config, model=models[2])
|
| response = resp_list[0].choices[0].message.content
|
| print(f'lora2-response: {response}')
|
|
|
|
|
| if __name__ == '__main__':
|
| engine = InferClient(host='127.0.0.1', port=8000)
|
| infer_request = InferRequest(messages=[{'role': 'user', 'content': 'who are you?'}])
|
| infer_multilora(engine, infer_request)
|
|
|