""" embeddings.py ------------- Creates text embeddings using a Sentence Transformer model. What are embeddings? An embedding turns a piece of text into a list of numbers (a vector). Similar texts produce similar vectors, so we can find relevant chunks by comparing vectors — this is the core idea behind RAG retrieval. Default model : all-MiniLM-L6-v2 - Small, fast, and good for general-purpose semantic search. - Produces 384-dimensional vectors. - Runs entirely on your CPU — no GPU needed. To swap to a different model later, just change DEFAULT_MODEL_NAME. """ from langchain_huggingface import HuggingFaceEmbeddings # ── configuration ───────────────────────────────────────────────────────────── # Change this string to switch embedding models, e.g.: # "all-mpnet-base-v2" – higher quality, slightly slower # "paraphrase-MiniLM-L3-v2"– even smaller and faster DEFAULT_MODEL_NAME = "all-MiniLM-L6-v2" # ── factory function ────────────────────────────────────────────────────────── def get_embedding_model(model_name: str = DEFAULT_MODEL_NAME) -> HuggingFaceEmbeddings: """ Load and return a Sentence Transformer embedding model. The model is downloaded once from HuggingFace and cached locally (usually in ~/.cache/huggingface/). Subsequent calls are instant. Parameters ---------- model_name : str The HuggingFace model ID to use for embeddings. Returns ------- HuggingFaceEmbeddings A LangChain-compatible embedding object ready to use with FAISS. """ print(f" Loading embedding model: '{model_name}'...") embeddings = HuggingFaceEmbeddings( model_name=model_name, model_kwargs={"device": "cpu"}, # use "cuda" if you have a GPU encode_kwargs={"normalize_embeddings": True}, ) print(" OK: Embedding model ready.") return embeddings