import os from openai import OpenAI from modelscope.hub.file_download import model_file_download from openai._types import NOT_GIVEN, NotGiven from openai.types.chat import ChatCompletionMessageParam from openai.types.create_embedding_response import CreateEmbeddingResponse from typing import Literal class Qwen3VLEmbedderC: def __init__(self, model_name: str = None, base_url: str = None, api_key: str = None): self.model_name = model_name or os.getenv("EMBEDDING_MODEL_NAME", "Qwen3-4B-Instruct-2507") self.base_url = base_url or os.getenv("EMBEDDING_API_BASE_URL", "http://localhost:8000/v1") self.api_key = api_key or os.getenv("EMBEDDING_API_KEY", "EMPTY") self.client = OpenAI(base_url=self.base_url, api_key=self.api_key) def create_chat_embeddings( self, messages: list[ChatCompletionMessageParam], model: str, encoding_format: Literal["base64", "float"] | NotGiven = NOT_GIVEN, continue_final_message: bool = False, add_special_tokens: bool = False, ) -> CreateEmbeddingResponse: """ Convenience function for accessing vLLM's Chat Embeddings API, which is an extension of OpenAI's existing Embeddings API. """ return self.client.post( "/embeddings", cast_to=CreateEmbeddingResponse, body={ "messages": messages, "model": model, "encoding_format": encoding_format, "continue_final_message": continue_final_message, "add_special_tokens": add_special_tokens, }, ) def embedding_gen(self, message_input): """ Convenience function for accessing vLLM's Text Embeddings API, which is an extension of OpenAI's existing Embeddings API. """ response = self.create_chat_embeddings( messages = message_input, model=self.model_name, encoding_format="float", continue_final_message=False, add_special_tokens=True, ) return response.data[0].embedding