Spaces:
Sleeping
Sleeping
| """Embedding generation service using sentence-transformers""" | |
| from sentence_transformers import SentenceTransformer | |
| from typing import List | |
| import numpy as np | |
| from app.config import settings | |
| from app.utils.logger import setup_logger | |
| logger = setup_logger(__name__) | |
| class EmbeddingService: | |
| """Generate embeddings for text using sentence-transformers""" | |
| def __init__(self): | |
| """Initialize the embedding model""" | |
| logger.info(f"Loading embedding model: {settings.EMBEDDING_MODEL}") | |
| self.model = SentenceTransformer(settings.EMBEDDING_MODEL) | |
| self.dimension = self.model.get_sentence_embedding_dimension() | |
| logger.info(f"Embedding dimension: {self.dimension}") | |
| def embed_text(self, text: str) -> List[float]: | |
| """Generate embedding for a single text""" | |
| embedding = self.model.encode(text, convert_to_numpy=True, normalize_embeddings=True) | |
| #logger.debug(f"Generated embedding for text: {embedding}") | |
| return embedding.tolist() | |
| def embed_batch(self, texts: List[str], batch_size: int = 32) -> List[List[float]]: | |
| """Generate embeddings for a batch of texts""" | |
| logger.info(f"Embedding {len(texts)} texts...") | |
| embeddings = self.model.encode( | |
| texts, | |
| batch_size=batch_size, | |
| show_progress_bar=True, | |
| convert_to_numpy=True, | |
| normalize_embeddings=True | |
| ) | |
| return embeddings.tolist() | |
| def get_dimension(self) -> int: | |
| """Return embedding dimension""" | |
| logger.debug(f"Embedding dimension requested: {self.dimension}") | |
| return self.dimension | |
| # Global instance | |
| embedding_service = EmbeddingService() |