File size: 2,169 Bytes
9cf08e9 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 | import os
from openai import OpenAI
from modelscope.hub.file_download import model_file_download
from openai._types import NOT_GIVEN, NotGiven
from openai.types.chat import ChatCompletionMessageParam
from openai.types.create_embedding_response import CreateEmbeddingResponse
from typing import Literal
class Qwen3VLEmbedderC:
def __init__(self, model_name: str = None, base_url: str = None, api_key: str = None):
self.model_name = model_name or os.getenv("EMBEDDING_MODEL_NAME", "Qwen3-4B-Instruct-2507")
self.base_url = base_url or os.getenv("EMBEDDING_API_BASE_URL", "http://localhost:8000/v1")
self.api_key = api_key or os.getenv("EMBEDDING_API_KEY", "EMPTY")
self.client = OpenAI(base_url=self.base_url, api_key=self.api_key)
def create_chat_embeddings(
self,
messages: list[ChatCompletionMessageParam],
model: str,
encoding_format: Literal["base64", "float"] | NotGiven = NOT_GIVEN,
continue_final_message: bool = False,
add_special_tokens: bool = False,
) -> CreateEmbeddingResponse:
"""
Convenience function for accessing vLLM's Chat Embeddings API,
which is an extension of OpenAI's existing Embeddings API.
"""
return self.client.post(
"/embeddings",
cast_to=CreateEmbeddingResponse,
body={
"messages": messages,
"model": model,
"encoding_format": encoding_format,
"continue_final_message": continue_final_message,
"add_special_tokens": add_special_tokens,
},
)
def embedding_gen(self, message_input):
"""
Convenience function for accessing vLLM's Text Embeddings API,
which is an extension of OpenAI's existing Embeddings API.
"""
response = self.create_chat_embeddings(
messages = message_input,
model=self.model_name,
encoding_format="float",
continue_final_message=False,
add_special_tokens=True,
)
return response.data[0].embedding
|