Spaces:
Running
Running
File size: 1,586 Bytes
21f8709 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 | import sys
import os
import time
# Add backend to path
sys.path.append(os.path.join(os.getcwd(), 'backend'))
from services.vector_store import VectorStore
def debug_legacy_data():
print("🚀 Starting Debug of Legacy Data...")
store = VectorStore()
# 1. Add a document WITHOUT session_id (simulating legacy data)
# We need to bypass the add_document_chunks method or modify it temporarily,
# or just use the collection directly.
print("\n1️⃣ Adding legacy document (no session_id)...")
legacy_text = "This is a legacy document about Ancient Rome."
legacy_meta = {"source": "history.pdf"} # No session_id
# Manually add to collection
embedding = store.embedding_model.encode([f"passage: {legacy_text}"], normalize_embeddings=True)
store.collection.add(
embeddings=embedding.tolist(),
metadatas=[legacy_meta],
documents=[legacy_text],
ids=["legacy_doc_1"]
)
# 2. Query WITH a session_id
session_id = "current_user_session"
print(f"\n2️⃣ Querying with session_id='{session_id}'...")
results = store.find_similar_chunks("Rome", session_id=session_id, n_results=5)
print(f" Results: {results['documents']}")
if results['documents'] and results['documents'][0]:
print(" ❌ FAIL: Legacy document was returned despite filter!")
else:
print(" ✅ PASS: Legacy document was NOT returned.")
# Clean up
store.collection.delete(ids=["legacy_doc_1"])
if __name__ == "__main__":
debug_legacy_data()
|