File size: 3,078 Bytes
cb2428f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 |
# Copyright (c) Alibaba, Inc. and its affiliates.
import os
from typing import Literal
from openai import OpenAI
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
def infer(client, model: str, messages):
resp = client.chat.completions.create(model=model, messages=messages, max_tokens=512, temperature=0)
query = messages[0]['content']
response = resp.choices[0].message.content
print(f'query: {query}')
print(f'response: {response}')
return response
# streaming
def infer_stream(client, model: str, messages):
gen = client.chat.completions.create(model=model, messages=messages, stream=True, temperature=0)
print(f'messages: {messages}\nresponse: ', end='')
for chunk in gen:
print(chunk.choices[0].delta.content, end='', flush=True)
print()
def get_message(mm_type: Literal['text', 'image', 'video', 'audio']):
if mm_type == 'text':
message = {'role': 'user', 'content': 'who are you?'}
elif mm_type == 'image':
message = {
'role':
'user',
'content': [{
'type': 'image',
'image': 'http://modelscope-open.oss-cn-hangzhou.aliyuncs.com/images/animal.png'
}, {
'type': 'text',
'text': 'How many sheep are there in the picture?'
}]
}
elif mm_type == 'video':
# # use base64
# import base64
# with open('baby.mp4', 'rb') as f:
# vid_base64 = base64.b64encode(f.read()).decode('utf-8')
# video = f'data:video/mp4;base64,{vid_base64}'
# use url
video = 'https://modelscope-open.oss-cn-hangzhou.aliyuncs.com/images/baby.mp4'
message = {
'role': 'user',
'content': [{
'type': 'video',
'video': video
}, {
'type': 'text',
'text': 'Describe this video.'
}]
}
elif mm_type == 'audio':
message = {
'role':
'user',
'content': [{
'type': 'audio',
'audio': 'http://modelscope-open.oss-cn-hangzhou.aliyuncs.com/images/weather.wav'
}, {
'type': 'text',
'text': 'What does this audio say?'
}]
}
return message
def run_client(host: str = '127.0.0.1', port: int = 8000):
client = OpenAI(
api_key='EMPTY',
base_url=f'http://{host}:{port}/v1',
)
model = client.models.list().data[0].id
print(f'model: {model}')
query = 'who are you?'
messages = [{'role': 'user', 'content': query}]
response = infer(client, model, messages)
messages.append({'role': 'assistant', 'content': response})
messages.append(get_message(mm_type='video'))
infer_stream(client, model, messages)
if __name__ == '__main__':
from swift.llm import run_deploy, DeployArguments
with run_deploy(DeployArguments(model='Qwen/Qwen2.5-VL-3B-Instruct', verbose=False, log_interval=-1)) as port:
run_client(port=port)
|