File size: 1,009 Bytes
b2fe8d1 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 | """Embedding engine — wraps SentenceTransformer for local inference."""
import numpy as np
from sentence_transformers import SentenceTransformer
from config import EMBEDDING_MODEL, MAX_TEXT_LENGTH
_model = None
def get_model():
global _model
if _model is None:
_model = SentenceTransformer(EMBEDDING_MODEL)
return _model
def embed_texts(texts: list, batch_size: int = 32) -> np.ndarray:
"""Encode a list of texts into embeddings."""
model = get_model()
truncated = [" ".join(t.split()[:MAX_TEXT_LENGTH]) for t in texts]
return model.encode(
truncated, batch_size=batch_size,
show_progress_bar=len(texts) > 50,
convert_to_numpy=True
)
def embed_query(query: str) -> np.ndarray:
"""Encode a single query string."""
model = get_model()
truncated = " ".join(query.split()[:MAX_TEXT_LENGTH])
vec = model.encode([truncated], convert_to_numpy=True).astype("float32")
import faiss
faiss.normalize_L2(vec)
return vec
|