import sys import os import time # Add backend to path sys.path.append(os.path.join(os.getcwd(), 'backend')) from services.vector_store import VectorStore def debug_legacy_data(): print("🚀 Starting Debug of Legacy Data...") store = VectorStore() # 1. Add a document WITHOUT session_id (simulating legacy data) # We need to bypass the add_document_chunks method or modify it temporarily, # or just use the collection directly. print("\n1️⃣ Adding legacy document (no session_id)...") legacy_text = "This is a legacy document about Ancient Rome." legacy_meta = {"source": "history.pdf"} # No session_id # Manually add to collection embedding = store.embedding_model.encode([f"passage: {legacy_text}"], normalize_embeddings=True) store.collection.add( embeddings=embedding.tolist(), metadatas=[legacy_meta], documents=[legacy_text], ids=["legacy_doc_1"] ) # 2. Query WITH a session_id session_id = "current_user_session" print(f"\n2️⃣ Querying with session_id='{session_id}'...") results = store.find_similar_chunks("Rome", session_id=session_id, n_results=5) print(f" Results: {results['documents']}") if results['documents'] and results['documents'][0]: print(" ❌ FAIL: Legacy document was returned despite filter!") else: print(" ✅ PASS: Legacy document was NOT returned.") # Clean up store.collection.delete(ids=["legacy_doc_1"]) if __name__ == "__main__": debug_legacy_data()