BechusRantus
/

injected_thinking

Model card Files Files and versions

injected_thinking / third_party /ms-swift /examples /deploy /embedding /client.py

BechusRantus's picture

Upload folder using huggingface_hub

7134ce7 verified about 2 months ago

history blame contribute delete

1.75 kB

	# Copyright (c) ModelScope Contributors. All rights reserved.
	import os

	from openai import OpenAI

	os.environ['CUDA_VISIBLE_DEVICES'] = '0'


	def infer(client, model: str, messages):
	# You can also use client.embeddings.create
	# But this interface does not support multi-modal medias
	resp = client.chat.completions.create(model=model, messages=messages)
	emb = resp.data[0]['embedding']
	shape = len(emb)
	sample = str(emb)
	if len(emb) > 6:
	sample = str(emb[:3])[:-1] + ', ..., ' + str(emb[-3:])[1:]
	print(f'messages: {messages}')
	print(f'Embedding(shape: [1, {shape}]): {sample}')
	return emb


	def run_client(host: str = '127.0.0.1', port: int = 8000):
	client = OpenAI(
	api_key='EMPTY',
	base_url=f'http://{host}:{port}/v1',
	)
	model = client.models.list().data[0].id
	print(f'model: {model}')

	messages = [{
	'role':
	'user',
	'content': [
	# {
	# 'type': 'image',
	# 'image': 'http://modelscope-open.oss-cn-hangzhou.aliyuncs.com/images/animal.png'
	# },
	{
	'type': 'text',
	'text': 'What is the capital of China?'
	},
	]
	}]
	infer(client, model, messages)


	if __name__ == '__main__':
	from swift import run_deploy, DeployArguments
	with run_deploy(
	DeployArguments(
	model='Qwen/Qwen3-Embedding-0.6B', # GME/GTE models or your checkpoints are also supported
	task_type='embedding',
	infer_backend='vllm',
	verbose=False,
	log_interval=-1)) as port:
	run_client(port=port)