yuccaaa commited on
Commit
57d2ab6
·
verified ·
1 Parent(s): d0167cc

Upload ms-swift/examples/deploy/lora/client.py with huggingface_hub

Browse files
ms-swift/examples/deploy/lora/client.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from swift.llm import InferClient, InferRequest, RequestConfig
2
+
3
+
4
+ def infer_multilora(engine: InferClient, infer_request: InferRequest):
5
+ # Dynamic LoRA
6
+ models = engine.models
7
+ print(f'models: {models}')
8
+ request_config = RequestConfig(max_tokens=512, temperature=0)
9
+
10
+ # use lora1
11
+ resp_list = engine.infer([infer_request], request_config, model=models[1])
12
+ response = resp_list[0].choices[0].message.content
13
+ print(f'lora1-response: {response}')
14
+ # origin model
15
+ resp_list = engine.infer([infer_request], request_config, model=models[0])
16
+ response = resp_list[0].choices[0].message.content
17
+ print(f'response: {response}')
18
+ # use lora2
19
+ resp_list = engine.infer([infer_request], request_config, model=models[2])
20
+ response = resp_list[0].choices[0].message.content
21
+ print(f'lora2-response: {response}')
22
+
23
+
24
+ if __name__ == '__main__':
25
+ engine = InferClient(host='127.0.0.1', port=8000)
26
+ infer_request = InferRequest(messages=[{'role': 'user', 'content': 'who are you?'}])
27
+ infer_multilora(engine, infer_request)