Baktabek's picture
Upload folder using huggingface_hub
409c17a verified
"""
Infrastructure - Sentence Transformers Embedding Service
"""
from typing import List
from sentence_transformers import SentenceTransformer
from app.domain.interfaces import IEmbedder
class SentenceTransformerEmbedder(IEmbedder):
"""Sentence Transformers embedding implementation"""
def __init__(self, model_name: str = "all-MiniLM-L6-v2"):
self.model_name = model_name
self.model = SentenceTransformer(model_name)
self.dimension = self.model.get_sentence_embedding_dimension()
async def embed_text(self, text: str) -> List[float]:
"""Generate embedding for single text"""
embedding = self.model.encode(text, convert_to_numpy=True)
return embedding.tolist()
async def embed_texts(self, texts: List[str]) -> List[List[float]]:
"""Generate embeddings for multiple texts"""
embeddings = self.model.encode(texts, convert_to_numpy=True, show_progress_bar=False)
return embeddings.tolist()
def get_dimension(self) -> int:
"""Get embedding dimension"""
return self.dimension