File size: 1,443 Bytes
48e85cb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
# query_vector_store.py
import json
import numpy as np
from sentence_transformers import SentenceTransformer
import faiss
from pathlib import Path

DATA_DIR = Path("data")
EMBED_MODEL = "sentence-transformers/all-MiniLM-L6-v2"
TOP_K = 5

def load_index():
    index = faiss.read_index(str(DATA_DIR / "vector_store.index"))
    return index

def load_metadata():
    with open(DATA_DIR / "metadata.json", "r", encoding="utf-8") as f:
        return json.load(f)

def embed_query(model, query):
    emb = model.encode([query], convert_to_numpy=True)
    # normalize for cosine with IndexFlatIP
    faiss.normalize_L2(emb)
    return emb

def search(query, top_k=TOP_K):
    model = SentenceTransformer(EMBED_MODEL)
    index = load_index()
    metadata = load_metadata()

    q_emb = embed_query(model, query)
    D, I = index.search(q_emb.astype('float32'), top_k)  # D: similarities, I: indices

    results = []
    for score, idx in zip(D[0], I[0]):
        meta = metadata[idx]
        results.append({"score": float(score), "doc": meta})
    return results

if __name__ == "__main__":
    q = input("Enter your question/query: ").strip()
    res = search(q, top_k=5)
    for i, r in enumerate(res, 1):
        print(f"\n=== Result {i} (score={r['score']:.4f}) ===")
        print("Source:", r["doc"]["source_file"], "page:", r["doc"]["page"])
        print("Preview:", r["doc"]["text"][:800])