"""
embeddings.py
-------------
Creates text embeddings using a Sentence Transformer model.

What are embeddings?
  An embedding turns a piece of text into a list of numbers (a vector).
  Similar texts produce similar vectors, so we can find relevant chunks
  by comparing vectors — this is the core idea behind RAG retrieval.

Default model : all-MiniLM-L6-v2
  - Small, fast, and good for general-purpose semantic search.
  - Produces 384-dimensional vectors.
  - Runs entirely on your CPU — no GPU needed.

To swap to a different model later, just change DEFAULT_MODEL_NAME.
"""

from langchain_huggingface import HuggingFaceEmbeddings

# ── configuration ─────────────────────────────────────────────────────────────

# Change this string to switch embedding models, e.g.:
#   "all-mpnet-base-v2"      – higher quality, slightly slower
#   "paraphrase-MiniLM-L3-v2"– even smaller and faster
DEFAULT_MODEL_NAME = "all-MiniLM-L6-v2"


# ── factory function ──────────────────────────────────────────────────────────

def get_embedding_model(model_name: str = DEFAULT_MODEL_NAME) -> HuggingFaceEmbeddings:
    """
    Load and return a Sentence Transformer embedding model.

    The model is downloaded once from HuggingFace and cached locally
    (usually in ~/.cache/huggingface/). Subsequent calls are instant.

    Parameters
    ----------
    model_name : str
        The HuggingFace model ID to use for embeddings.

    Returns
    -------
    HuggingFaceEmbeddings
        A LangChain-compatible embedding object ready to use with FAISS.
    """
    print(f"  Loading embedding model: '{model_name}'...")
    embeddings = HuggingFaceEmbeddings(
        model_name=model_name,
        model_kwargs={"device": "cpu"},   # use "cuda" if you have a GPU
        encode_kwargs={"normalize_embeddings": True},
    )
    print("  OK: Embedding model ready.")
    return embeddings