yuccaaa commited on
Commit
ff5a13c
·
verified ·
1 Parent(s): a2912b1

Upload ms-swift/examples/deploy/client/llm/chat/openai_client.py with huggingface_hub

Browse files
ms-swift/examples/deploy/client/llm/chat/openai_client.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+ import os
3
+
4
+ from openai import OpenAI
5
+
6
+ os.environ['CUDA_VISIBLE_DEVICES'] = '0'
7
+
8
+
9
+ def infer(client, model: str, messages):
10
+ resp = client.chat.completions.create(model=model, messages=messages, max_tokens=512, temperature=0)
11
+ query = messages[0]['content']
12
+ response = resp.choices[0].message.content
13
+ print(f'query: {query}')
14
+ print(f'response: {response}')
15
+ return response
16
+
17
+
18
+ # streaming
19
+ def infer_stream(client, model: str, messages):
20
+ gen = client.chat.completions.create(model=model, messages=messages, stream=True, temperature=0)
21
+ print(f'messages: {messages}\nresponse: ', end='')
22
+ for chunk in gen:
23
+ print(chunk.choices[0].delta.content, end='', flush=True)
24
+ print()
25
+
26
+
27
+ def run_client(host: str = '127.0.0.1', port: int = 8000):
28
+ client = OpenAI(
29
+ api_key='EMPTY',
30
+ base_url=f'http://{host}:{port}/v1',
31
+ )
32
+ model = client.models.list().data[0].id
33
+ print(f'model: {model}')
34
+
35
+ query = 'Where is the capital of Zhejiang?'
36
+ messages = [{'role': 'user', 'content': query}]
37
+ response = infer(client, model, messages)
38
+ messages.append({'role': 'assistant', 'content': response})
39
+ messages.append({'role': 'user', 'content': 'What delicious food is there?'})
40
+ infer_stream(client, model, messages)
41
+
42
+
43
+ if __name__ == '__main__':
44
+ from swift.llm import run_deploy, DeployArguments
45
+ with run_deploy(DeployArguments(model='Qwen/Qwen2.5-1.5B-Instruct', verbose=False, log_interval=-1)) as port:
46
+ run_client(port=port)