|
|
|
|
|
import os |
|
|
from typing import Literal |
|
|
|
|
|
from openai import OpenAI |
|
|
|
|
|
os.environ['CUDA_VISIBLE_DEVICES'] = '0' |
|
|
|
|
|
|
|
|
def infer(client, model: str, messages): |
|
|
resp = client.chat.completions.create(model=model, messages=messages, max_tokens=512, temperature=0) |
|
|
query = messages[0]['content'] |
|
|
response = resp.choices[0].message.content |
|
|
print(f'query: {query}') |
|
|
print(f'response: {response}') |
|
|
return response |
|
|
|
|
|
|
|
|
|
|
|
def infer_stream(client, model: str, messages): |
|
|
gen = client.chat.completions.create(model=model, messages=messages, stream=True, temperature=0) |
|
|
print(f'messages: {messages}\nresponse: ', end='') |
|
|
for chunk in gen: |
|
|
print(chunk.choices[0].delta.content, end='', flush=True) |
|
|
print() |
|
|
|
|
|
|
|
|
def get_message(mm_type: Literal['text', 'image', 'video', 'audio']): |
|
|
if mm_type == 'text': |
|
|
message = {'role': 'user', 'content': 'who are you?'} |
|
|
elif mm_type == 'image': |
|
|
message = { |
|
|
'role': |
|
|
'user', |
|
|
'content': [{ |
|
|
'type': 'image', |
|
|
'image': 'http://modelscope-open.oss-cn-hangzhou.aliyuncs.com/images/animal.png' |
|
|
}, { |
|
|
'type': 'text', |
|
|
'text': 'How many sheep are there in the picture?' |
|
|
}] |
|
|
} |
|
|
|
|
|
elif mm_type == 'video': |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
video = 'https://modelscope-open.oss-cn-hangzhou.aliyuncs.com/images/baby.mp4' |
|
|
message = { |
|
|
'role': 'user', |
|
|
'content': [{ |
|
|
'type': 'video', |
|
|
'video': video |
|
|
}, { |
|
|
'type': 'text', |
|
|
'text': 'Describe this video.' |
|
|
}] |
|
|
} |
|
|
elif mm_type == 'audio': |
|
|
message = { |
|
|
'role': |
|
|
'user', |
|
|
'content': [{ |
|
|
'type': 'audio', |
|
|
'audio': 'http://modelscope-open.oss-cn-hangzhou.aliyuncs.com/images/weather.wav' |
|
|
}, { |
|
|
'type': 'text', |
|
|
'text': 'What does this audio say?' |
|
|
}] |
|
|
} |
|
|
return message |
|
|
|
|
|
|
|
|
def run_client(host: str = '127.0.0.1', port: int = 8000): |
|
|
client = OpenAI( |
|
|
api_key='EMPTY', |
|
|
base_url=f'http://{host}:{port}/v1', |
|
|
) |
|
|
model = client.models.list().data[0].id |
|
|
print(f'model: {model}') |
|
|
|
|
|
query = 'who are you?' |
|
|
messages = [{'role': 'user', 'content': query}] |
|
|
response = infer(client, model, messages) |
|
|
messages.append({'role': 'assistant', 'content': response}) |
|
|
messages.append(get_message(mm_type='video')) |
|
|
infer_stream(client, model, messages) |
|
|
|
|
|
|
|
|
if __name__ == '__main__': |
|
|
from swift.llm import run_deploy, DeployArguments |
|
|
with run_deploy(DeployArguments(model='Qwen/Qwen2.5-VL-3B-Instruct', verbose=False, log_interval=-1)) as port: |
|
|
run_client(port=port) |
|
|
|