Upload ms-swift/examples/deploy/client/mllm/openai_client.py with huggingface_hub

Browse files

Files changed (1) hide show

ms-swift/examples/deploy/client/mllm/openai_client.py +97 -0

ms-swift/examples/deploy/client/mllm/openai_client.py ADDED Viewed

	@@ -0,0 +1,97 @@

+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+from typing import Literal
+from openai import OpenAI
+os.environ['CUDA_VISIBLE_DEVICES'] = '0'
+def infer(client, model: str, messages):
+    resp = client.chat.completions.create(model=model, messages=messages, max_tokens=512, temperature=0)
+    query = messages[0]['content']
+    response = resp.choices[0].message.content
+    print(f'query: {query}')
+    print(f'response: {response}')
+    return response
+# streaming
+def infer_stream(client, model: str, messages):
+    gen = client.chat.completions.create(model=model, messages=messages, stream=True, temperature=0)
+    print(f'messages: {messages}\nresponse: ', end='')
+    for chunk in gen:
+        print(chunk.choices[0].delta.content, end='', flush=True)
+    print()
+def get_message(mm_type: Literal['text', 'image', 'video', 'audio']):
+    if mm_type == 'text':
+        message = {'role': 'user', 'content': 'who are you?'}
+    elif mm_type == 'image':
+        message = {
+            'role':
+            'user',
+            'content': [{
+                'type': 'image',
+                'image': 'http://modelscope-open.oss-cn-hangzhou.aliyuncs.com/images/animal.png'
+            }, {
+                'type': 'text',
+                'text': 'How many sheep are there in the picture?'
+            }]
+        }
+    elif mm_type == 'video':
+        # # use base64
+        # import base64
+        # with open('baby.mp4', 'rb') as f:
+        #     vid_base64 = base64.b64encode(f.read()).decode('utf-8')
+        # video = f'data:video/mp4;base64,{vid_base64}'
+        # use url
+        video = 'https://modelscope-open.oss-cn-hangzhou.aliyuncs.com/images/baby.mp4'
+        message = {
+            'role': 'user',
+            'content': [{
+                'type': 'video',
+                'video': video
+            }, {
+                'type': 'text',
+                'text': 'Describe this video.'
+            }]
+        }
+    elif mm_type == 'audio':
+        message = {
+            'role':
+            'user',
+            'content': [{
+                'type': 'audio',
+                'audio': 'http://modelscope-open.oss-cn-hangzhou.aliyuncs.com/images/weather.wav'
+            }, {
+                'type': 'text',
+                'text': 'What does this audio say?'
+            }]
+        }
+    return message
+def run_client(host: str = '127.0.0.1', port: int = 8000):
+    client = OpenAI(
+        api_key='EMPTY',
+        base_url=f'http://{host}:{port}/v1',
+    )
+    model = client.models.list().data[0].id
+    print(f'model: {model}')
+    query = 'who are you?'
+    messages = [{'role': 'user', 'content': query}]
+    response = infer(client, model, messages)
+    messages.append({'role': 'assistant', 'content': response})
+    messages.append(get_message(mm_type='video'))
+    infer_stream(client, model, messages)
+if __name__ == '__main__':
+    from swift.llm import run_deploy, DeployArguments
+    with run_deploy(DeployArguments(model='Qwen/Qwen2.5-VL-3B-Instruct', verbose=False, log_interval=-1)) as port:
+        run_client(port=port)