"""Inspect and query the FAISS index and metadata created by `index_faiss.py`. Usage examples (PowerShell): # List first 10 ids C:/PF/Projects/Rs_mini_project/myenv/Scripts/python.exe backend/query_faiss_fixed.py --list 10 # Show vector info for id 5 C:/PF/Projects/Rs_mini_project/myenv/Scripts/python.exe backend/query_faiss_fixed.py --show-id 5 # Query by text (nearest neighbors) C:/PF/Projects/Rs_mini_project/myenv/Scripts/python.exe backend/query_faiss_fixed.py --query-text "python data science" --k 5 # Query by existing id (find neighbors of vector with id 0) C:/PF/Projects/Rs_mini_project/myenv/Scripts/python.exe backend/query_faiss_fixed.py --query-id 0 --k 5 The script expects the FAISS index at `backend/faiss_index.faiss` and metadata at `backend/faiss_metadata.json` by default. It uses the same embedding model (`all-MiniLM-L6-v2`) to encode text queries. """ from __future__ import annotations import argparse import json from pathlib import Path from typing import Optional import numpy as np from sentence_transformers import SentenceTransformer import faiss DEFAULT_INDEX = Path('faiss_index.faiss') DEFAULT_META = Path('faiss_metadata.json') MODEL_NAME = 'sentence-transformers/all-MiniLM-L6-v2' def load_index_and_meta(index_path: Path = DEFAULT_INDEX, meta_path: Path = DEFAULT_META): if not index_path.exists(): raise FileNotFoundError(f'FAISS index not found: {index_path}') if not meta_path.exists(): raise FileNotFoundError(f'Metadata file not found: {meta_path}') index = faiss.read_index(str(index_path)) with open(meta_path, 'r', encoding='utf-8') as f: metadata = json.load(f) return index, metadata def list_ids(index, metadata, n: int = 20): # Try to read all ids from metadata (keys are strings) ids = sorted(int(k) for k in metadata.keys()) for i in ids[:n]: print('id={} -> Title: {}'.format(i, metadata.get(str(i), ''))) def show_vector(index, idx: int, top_dims: int = 8): # FAISS doesn't provide direct access to stored vectors for all index types. # For IndexIDMap over an IndexFlat, we can reconstruct by searching for exact id. # We'll try to use index.reconstruct if available. try: vec = index.reconstruct(int(idx)) vec = np.array(vec).astype('float32') norm = np.linalg.norm(vec) print('id={} vector norm={:.4f} first {} dims: {}'.format(idx, norm, top_dims, vec[:top_dims].tolist())) except Exception as e: print('Could not reconstruct vector for id {}: {}'.format(idx, e)) def query_by_text(index, metadata, query: str, k: int = 5, model_name: str = MODEL_NAME): model = SentenceTransformer(model_name, device='cpu') q_emb = model.encode([query], convert_to_numpy=True).astype('float32') faiss.normalize_L2(q_emb) D, I = index.search(q_emb, k) D = D[0] I = I[0] print('Query: "{}" -> top {} results:'.format(query, k)) for score, idx in zip(D, I): if idx == -1: continue title = metadata.get(str(int(idx)), '') print(' id={} score={:.4f} title={}'.format(int(idx), float(score), title)) def query_by_id(index, metadata, qid: int, k: int = 5): # Try to reconstruct vector for qid then search try: vec = index.reconstruct(int(qid)).astype('float32') except Exception as e: print('Could not reconstruct vector for id {}: {}'.format(qid, e)) return vec = np.expand_dims(vec, axis=0) faiss.normalize_L2(vec) D, I = index.search(vec, k) D = D[0] I = I[0] print('Neighbors for id={} ->'.format(qid)) for score, idx in zip(D, I): if idx == -1: continue title = metadata.get(str(int(idx)), '') print(' id={} score={:.4f} title={}'.format(int(idx), float(score), title)) def _parse_args(argv=None): p = argparse.ArgumentParser(description='Inspect/query FAISS index') p.add_argument('--index', '-x', default=str(DEFAULT_INDEX)) p.add_argument('--meta', '-m', default=str(DEFAULT_META)) p.add_argument('--list', '-l', nargs='?', const=20, type=int, help='List first N ids (default 20)') p.add_argument('--show-id', type=int, help='Show vector info for given id') p.add_argument('--query-text', type=str, help='Query by text') p.add_argument('--query-id', type=int, help='Query by existing id (find neighbors)') p.add_argument('--k', type=int, default=5, help='Number of neighbors to return') return p.parse_args(argv) def main(argv=None): args = _parse_args(argv) index, metadata = load_index_and_meta(Path(args.index), Path(args.meta)) if args.list is not None: list_ids(index, metadata, args.list) return if args.show_id is not None: show_vector(index, args.show_id) return if args.query_text: query_by_text(index, metadata, args.query_text, k=args.k) return if args.query_id is not None: query_by_id(index, metadata, args.query_id, k=args.k) return print('No action specified. Use --help for usage examples.') if __name__ == '__main__': main()