parthib07's picture
Upload 52 files
61411b5 verified
raw
history blame contribute delete
890 Bytes
from __future__ import annotations
import logging
from functools import lru_cache
from typing import List
from sentence_transformers import SentenceTransformer
logger = logging.getLogger(__name__)
@lru_cache(maxsize=1)
def get_embedding_model(model_name: str = "sentence-transformers/all-MiniLM-L6-v2") -> SentenceTransformer:
"""
Return a cached SentenceTransformers model instance.
Note: loading the model can be slow; caching keeps Streamlit responsive.
"""
logger.info("Loading embedding model: %s", model_name)
return SentenceTransformer(model_name)
def embed_texts(texts: List[str], model_name: str = "sentence-transformers/all-MiniLM-L6-v2") -> List[List[float]]:
model = get_embedding_model(model_name=model_name)
vectors = model.encode(texts, normalize_embeddings=True)
return [v.tolist() for v in vectors]