smartedu / query_vector_store.py
Bishal Sharma
Upload 5 files
48e85cb verified
# query_vector_store.py
import json
import numpy as np
from sentence_transformers import SentenceTransformer
import faiss
from pathlib import Path
DATA_DIR = Path("data")
EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
TOP_K = 5
def load_index():
index = faiss.read_index(str(DATA_DIR / "vector_store.index"))
return index
def load_metadata():
with open(DATA_DIR / "metadata.json", "r", encoding="utf-8") as f:
return json.load(f)
def embed_query(model, query):
emb = model.encode([query], convert_to_numpy=True)
# normalize for cosine with IndexFlatIP
faiss.normalize_L2(emb)
return emb
def search(query, top_k=TOP_K):
model = SentenceTransformer(EMBED_MODEL)
index = load_index()
metadata = load_metadata()
q_emb = embed_query(model, query)
D, I = index.search(q_emb.astype('float32'), top_k) # D: similarities, I: indices
results = []
for score, idx in zip(D[0], I[0]):
meta = metadata[idx]
results.append({"score": float(score), "doc": meta})
return results
if __name__ == "__main__":
q = input("Enter your question/query: ").strip()
res = search(q, top_k=5)
for i, r in enumerate(res, 1):
print(f"\n=== Result {i} (score={r['score']:.4f}) ===")
print("Source:", r["doc"]["source_file"], "page:", r["doc"]["page"])
print("Preview:", r["doc"]["text"][:800])