Spaces:
Runtime error
Runtime error
| import logging | |
| from typing import List, Optional | |
| #from langchain_openai import OpenAIEmbeddings | |
| from langchain_core.documents import Document | |
| from langchain_huggingface import HuggingFaceEmbeddings | |
| logger = logging.getLogger(__name__) | |
| class EmbeddingManager: | |
| def __init__(self, model_name: str = "sentence-transformers/all-MiniLM-L6-v2"): # text-embedding-3-small | |
| self.model_name = model_name | |
| self.embeddings = None | |
| self._initialize_embeddings() | |
| def _initialize_embeddings(self): | |
| try: | |
| logger.info(f"Initializing embedding model: {self.model_name}") | |
| self.embeddings = HuggingFaceEmbeddings(model=self.model_name, model_kwargs={'device': 'cpu'}) | |
| logger.info("Embedding model initialized successfully") | |
| except Exception as e: | |
| logger.error(f"Error initializing embedding model: {e}") | |
| raise e | |
| def get_embeddings(self) -> HuggingFaceEmbeddings: | |
| if self.embeddings is None: | |
| self._initialize_embeddings() | |
| return self.embeddings | |
| def generate_embeddings(self, texts: List[str]) -> List[List[float]]: | |
| try: | |
| logger.info(f"Generating embeddings for {len(texts)} text(s)") | |
| embeddings = self.embeddings.embed_documents(texts) | |
| logger.info(f"Successfully generated {len(embeddings)} embeddings") | |
| return embeddings | |
| except Exception as e: | |
| logger.error(f"Error generating embeddings: {e}") | |
| raise e | |
| def generate_single_embedding(self, text: str) -> List[float]: | |
| try: | |
| embedding = self.embeddings.embed_query(text) | |
| return embedding | |
| except Exception as e: | |
| logger.error(f"Error generating single embedding: {e}") | |
| raise e | |
| def get_embedding_dimension(self) -> int: | |
| try: | |
| test_embedding = self.generate_single_embedding("test") | |
| return len(test_embedding) | |
| except Exception as e: | |
| logger.error(f"Error getting embedding dimension: {e}") | |
| raise e | |
| def get_model_info(self) -> dict: | |
| return { | |
| 'model_name': self.model_name, | |
| 'dimension': self.get_embedding_dimension(), | |
| 'is_initialized': self.embeddings is not None | |
| } | |