NotebookLM-Clone / backend /embedding_service.py
rahulrb99
Add citations, retrieval improvements, chat isolation
a524c65
"""Shared embedding service - 384-dim vectors for RAG (ingestion + retrieval)."""
import os
from sentence_transformers import SentenceTransformer
# all-MiniLM-L6-v2 (default) or BAAI/bge-small-en-v1.5 for better quality (both 384 dims)
_MODEL_NAME = os.getenv("EMBEDDING_MODEL", "sentence-transformers/all-MiniLM-L6-v2")
_model = None
# BGE models: add prefix only to queries, not to documents
_BGE_QUERY_PREFIX = "Represent this sentence for searching relevant passages: "
def _get_model() -> SentenceTransformer:
"""Lazy-load the embedding model."""
global _model
if _model is None:
_model = SentenceTransformer(_MODEL_NAME)
return _model
def _is_bge_model() -> bool:
return "bge" in _MODEL_NAME.lower()
def encode(texts: list[str], task: str = "search_document") -> list[list[float]]:
"""
Embed texts. Returns list of 384-dim vectors.
Args:
texts: List of strings to embed.
task: "search_query" for queries, "search_document" for documents. BGE uses prefixes.
"""
if not texts:
return []
model = _get_model()
if _is_bge_model() and task == "search_query":
texts = [_BGE_QUERY_PREFIX + t for t in texts]
embeddings = model.encode(texts, show_progress_bar=False)
return [e.tolist() for e in embeddings]