interactSpeech / examples /deploy /client /mllm /openai_client.py

Add files using upload-large-folder tool

cb2428f verified 5 months ago

3.08 kB

	# Copyright (c) Alibaba, Inc. and its affiliates.
	import os
	from typing import Literal

	from openai import OpenAI

	os.environ['CUDA_VISIBLE_DEVICES'] = '0'


	def infer(client, model: str, messages):
	resp = client.chat.completions.create(model=model, messages=messages, max_tokens=512, temperature=0)
	query = messages[0]['content']
	response = resp.choices[0].message.content
	print(f'query: {query}')
	print(f'response: {response}')
	return response


	# streaming
	def infer_stream(client, model: str, messages):
	gen = client.chat.completions.create(model=model, messages=messages, stream=True, temperature=0)
	print(f'messages: {messages}\nresponse: ', end='')
	for chunk in gen:
	print(chunk.choices[0].delta.content, end='', flush=True)
	print()


	def get_message(mm_type: Literal['text', 'image', 'video', 'audio']):
	if mm_type == 'text':
	message = {'role': 'user', 'content': 'who are you?'}
	elif mm_type == 'image':
	message = {
	'role':
	'user',
	'content': [{
	'type': 'image',
	'image': 'http://modelscope-open.oss-cn-hangzhou.aliyuncs.com/images/animal.png'
	}, {
	'type': 'text',
	'text': 'How many sheep are there in the picture?'
	}]
	}

	elif mm_type == 'video':
	# # use base64
	# import base64
	# with open('baby.mp4', 'rb') as f:
	# vid_base64 = base64.b64encode(f.read()).decode('utf-8')
	# video = f'data:video/mp4;base64,{vid_base64}'

	# use url
	video = 'https://modelscope-open.oss-cn-hangzhou.aliyuncs.com/images/baby.mp4'
	message = {
	'role': 'user',
	'content': [{
	'type': 'video',
	'video': video
	}, {
	'type': 'text',
	'text': 'Describe this video.'
	}]
	}
	elif mm_type == 'audio':
	message = {
	'role':
	'user',
	'content': [{
	'type': 'audio',
	'audio': 'http://modelscope-open.oss-cn-hangzhou.aliyuncs.com/images/weather.wav'
	}, {
	'type': 'text',
	'text': 'What does this audio say?'
	}]
	}
	return message


	def run_client(host: str = '127.0.0.1', port: int = 8000):
	client = OpenAI(
	api_key='EMPTY',
	base_url=f'http://{host}:{port}/v1',
	)
	model = client.models.list().data[0].id
	print(f'model: {model}')

	query = 'who are you?'
	messages = [{'role': 'user', 'content': query}]
	response = infer(client, model, messages)
	messages.append({'role': 'assistant', 'content': response})
	messages.append(get_message(mm_type='video'))
	infer_stream(client, model, messages)


	if __name__ == '__main__':
	from swift.llm import run_deploy, DeployArguments
	with run_deploy(DeployArguments(model='Qwen/Qwen2.5-VL-3B-Instruct', verbose=False, log_interval=-1)) as port:
	run_client(port=port)