Spaces:
Runtime error
Runtime error
| from loguru import logger | |
| from openai import AsyncOpenAI | |
| from pydantic import ConfigDict | |
| from typing import Any, Sequence, Self | |
| from ctp_slack_bot.core import ApplicationComponentBase, Settings | |
| class EmbeddingsModelService(ApplicationComponentBase): | |
| """ | |
| Service for embeddings model operations. | |
| """ | |
| model_config = ConfigDict(arbitrary_types_allowed=True, frozen=True) | |
| settings: Settings | |
| open_ai_client: AsyncOpenAI | |
| async def get_embeddings(self: Self, texts: Sequence[str]) -> Sequence[Sequence[float]]: | |
| """Get embeddings for a collection of texts using OpenAI’s API. | |
| Args: | |
| texts (Collection[str]): Collection of text chunks to embed | |
| Returns: | |
| NDArray: Array of embeddings with shape (n_texts, vector_dimension) | |
| Raises: | |
| ValueError: If the embedding dimensions don't match expected size | |
| """ | |
| logger.debug("Creating embeddings for {} text string(s)…", len(texts)) | |
| response = await self.open_ai_client.embeddings.create( | |
| model=self.settings.embedding_model, | |
| input=texts, | |
| encoding_format="float" # Ensure we get raw float values. | |
| ) | |
| embeddings = tuple(tuple(data.embedding) for data in response.data) | |
| match embeddings: | |
| case (first, _) if len(first) != self.settings.vector_dimension: | |
| logger.error("Embedding dimension mismatch and/or misconfiguration: expected configured dimension {}, but got {}.", self.settings.vector_dimension, len(first)) | |
| raise ValueError() # TODO: raise a more specific type. | |
| return embeddings | |
| def name(self: Self) -> str: | |
| return "embeddings_model_service" | |