# query_vector_store.py import json import numpy as np from sentence_transformers import SentenceTransformer import faiss from pathlib import Path DATA_DIR = Path("data") EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2" TOP_K = 5 def load_index(): index = faiss.read_index(str(DATA_DIR / "vector_store.index")) return index def load_metadata(): with open(DATA_DIR / "metadata.json", "r", encoding="utf-8") as f: return json.load(f) def embed_query(model, query): emb = model.encode([query], convert_to_numpy=True) # normalize for cosine with IndexFlatIP faiss.normalize_L2(emb) return emb def search(query, top_k=TOP_K): model = SentenceTransformer(EMBED_MODEL) index = load_index() metadata = load_metadata() q_emb = embed_query(model, query) D, I = index.search(q_emb.astype('float32'), top_k) # D: similarities, I: indices results = [] for score, idx in zip(D[0], I[0]): meta = metadata[idx] results.append({"score": float(score), "doc": meta}) return results if __name__ == "__main__": q = input("Enter your question/query: ").strip() res = search(q, top_k=5) for i, r in enumerate(res, 1): print(f"\n=== Result {i} (score={r['score']:.4f}) ===") print("Source:", r["doc"]["source_file"], "page:", r["doc"]["page"]) print("Preview:", r["doc"]["text"][:800])