Spaces:

hamba-ho
/

Assistant-Web-Educatif

Running

Assistant-Web-Educatif / debug_legacy_data.py

Mise à jour vector_store, verify_isolation et ajout debug_legacy_data

21f8709 5 months ago

1.59 kB

	import sys
	import os
	import time

	# Add backend to path
	sys.path.append(os.path.join(os.getcwd(), 'backend'))

	from services.vector_store import VectorStore

	def debug_legacy_data():
	print("🚀 Starting Debug of Legacy Data...")

	store = VectorStore()

	# 1. Add a document WITHOUT session_id (simulating legacy data)
	# We need to bypass the add_document_chunks method or modify it temporarily,
	# or just use the collection directly.

	print("\n1️⃣ Adding legacy document (no session_id)...")
	legacy_text = "This is a legacy document about Ancient Rome."
	legacy_meta = {"source": "history.pdf"} # No session_id

	# Manually add to collection
	embedding = store.embedding_model.encode([f"passage: {legacy_text}"], normalize_embeddings=True)

	store.collection.add(
	embeddings=embedding.tolist(),
	metadatas=[legacy_meta],
	documents=[legacy_text],
	ids=["legacy_doc_1"]
	)

	# 2. Query WITH a session_id
	session_id = "current_user_session"
	print(f"\n2️⃣ Querying with session_id='{session_id}'...")

	results = store.find_similar_chunks("Rome", session_id=session_id, n_results=5)

	print(f" Results: {results['documents']}")

	if results['documents'] and results['documents'][0]:
	print(" ❌ FAIL: Legacy document was returned despite filter!")
	else:
	print(" ✅ PASS: Legacy document was NOT returned.")

	# Clean up
	store.collection.delete(ids=["legacy_doc_1"])

	if __name__ == "__main__":
	debug_legacy_data()