# Copyright (c) ModelScope Contributors. All rights reserved.
import os

from openai import OpenAI

os.environ['CUDA_VISIBLE_DEVICES'] = '0'


def infer(client, model: str, messages):
    # You can also use client.embeddings.create
    # But this interface does not support multi-modal medias
    resp = client.chat.completions.create(model=model, messages=messages)
    emb = resp.data[0]['embedding']
    shape = len(emb)
    sample = str(emb)
    if len(emb) > 6:
        sample = str(emb[:3])[:-1] + ', ..., ' + str(emb[-3:])[1:]
    print(f'messages: {messages}')
    print(f'Embedding(shape: [1, {shape}]): {sample}')
    return emb


def run_client(host: str = '127.0.0.1', port: int = 8000):
    client = OpenAI(
        api_key='EMPTY',
        base_url=f'http://{host}:{port}/v1',
    )
    model = client.models.list().data[0].id
    print(f'model: {model}')

    messages = [{
        'role':
        'user',
        'content': [
            # {
            #   'type': 'image',
            #   'image': 'http://modelscope-open.oss-cn-hangzhou.aliyuncs.com/images/animal.png'
            # },
            {
                'type': 'text',
                'text': 'What is the capital of China?'
            },
        ]
    }]
    infer(client, model, messages)


if __name__ == '__main__':
    from swift import run_deploy, DeployArguments
    with run_deploy(
            DeployArguments(
                model='Qwen/Qwen3-Embedding-0.6B',  # GME/GTE models or your checkpoints are also supported
                task_type='embedding',
                infer_backend='vllm',
                verbose=False,
                log_interval=-1)) as port:
        run_client(port=port)