Assistant-Web-Educatif / debug_legacy_data.py
hamba-ho's picture
Mise à jour vector_store, verify_isolation et ajout debug_legacy_data
21f8709
import sys
import os
import time
# Add backend to path
sys.path.append(os.path.join(os.getcwd(), 'backend'))
from services.vector_store import VectorStore
def debug_legacy_data():
print("🚀 Starting Debug of Legacy Data...")
store = VectorStore()
# 1. Add a document WITHOUT session_id (simulating legacy data)
# We need to bypass the add_document_chunks method or modify it temporarily,
# or just use the collection directly.
print("\n1️⃣ Adding legacy document (no session_id)...")
legacy_text = "This is a legacy document about Ancient Rome."
legacy_meta = {"source": "history.pdf"} # No session_id
# Manually add to collection
embedding = store.embedding_model.encode([f"passage: {legacy_text}"], normalize_embeddings=True)
store.collection.add(
embeddings=embedding.tolist(),
metadatas=[legacy_meta],
documents=[legacy_text],
ids=["legacy_doc_1"]
)
# 2. Query WITH a session_id
session_id = "current_user_session"
print(f"\n2️⃣ Querying with session_id='{session_id}'...")
results = store.find_similar_chunks("Rome", session_id=session_id, n_results=5)
print(f" Results: {results['documents']}")
if results['documents'] and results['documents'][0]:
print(" ❌ FAIL: Legacy document was returned despite filter!")
else:
print(" ✅ PASS: Legacy document was NOT returned.")
# Clean up
store.collection.delete(ids=["legacy_doc_1"])
if __name__ == "__main__":
debug_legacy_data()