""" Embedder. Same role as the StudyMate version. Returns a NumPy array (not a list) so the FAISS VectorStore can read `embeddings.shape[1]`. Embeddings are L2-normalized so inner-product search (IndexFlatIP) == cosine similarity. Default model is the small general-purpose MiniLM (fast, already cached from StudyMate). For a code project, a code-aware model retrieves better -- swap in "jinaai/jina-embeddings-v2-base-code" (pass trust_remote_code=True) once the pipeline works end-to-end. """ import numpy as np from sentence_transformers import SentenceTransformer class Embedder: def __init__(self, model_name="sentence-transformers/all-MiniLM-L6-v2", **kwargs): self.model = SentenceTransformer(model_name, **kwargs) def create_embeddings(self, texts): embeddings = self.model.encode(list(texts), normalize_embeddings=True) return np.array(embeddings)