| """Shared embedding service - 384-dim vectors for RAG (ingestion + retrieval).""" | |
| import os | |
| from sentence_transformers import SentenceTransformer | |
| # all-MiniLM-L6-v2 (default) or BAAI/bge-small-en-v1.5 for better quality (both 384 dims) | |
| _MODEL_NAME = os.getenv("EMBEDDING_MODEL", "sentence-transformers/all-MiniLM-L6-v2") | |
| _model = None | |
| # BGE models: add prefix only to queries, not to documents | |
| _BGE_QUERY_PREFIX = "Represent this sentence for searching relevant passages: " | |
| def _get_model() -> SentenceTransformer: | |
| """Lazy-load the embedding model.""" | |
| global _model | |
| if _model is None: | |
| _model = SentenceTransformer(_MODEL_NAME) | |
| return _model | |
| def _is_bge_model() -> bool: | |
| return "bge" in _MODEL_NAME.lower() | |
| def encode(texts: list[str], task: str = "search_document") -> list[list[float]]: | |
| """ | |
| Embed texts. Returns list of 384-dim vectors. | |
| Args: | |
| texts: List of strings to embed. | |
| task: "search_query" for queries, "search_document" for documents. BGE uses prefixes. | |
| """ | |
| if not texts: | |
| return [] | |
| model = _get_model() | |
| if _is_bge_model() and task == "search_query": | |
| texts = [_BGE_QUERY_PREFIX + t for t in texts] | |
| embeddings = model.encode(texts, show_progress_bar=False) | |
| return [e.tolist() for e in embeddings] | |