"""Embedding models using local HuggingFace Vietnamese models.""" import torch from langchain_core.embeddings import Embeddings from langchain_huggingface import HuggingFaceEmbeddings from src.config import settings from src.utils.logging import log_pipeline _embeddings: Embeddings | None = None def get_device() -> str: """Detect optimal device.""" # Force CPU to avoid silent crashes with CUDA/MPS # Uncomment below if you have working GPU setup # if torch.cuda.is_available(): # return "cuda" # if torch.backends.mps.is_available(): # return "mps" return "cpu" def get_embeddings() -> Embeddings: """Get or create embeddings model singleton (local HuggingFace).""" global _embeddings if _embeddings is not None: return _embeddings device = get_device() _embeddings = HuggingFaceEmbeddings( model_name=settings.embedding_model, model_kwargs={"device": device}, encode_kwargs={"normalize_embeddings": True}, ) log_pipeline(f"[Embedding] Loaded: {settings.embedding_model} on {device}") return _embeddings