Spaces:

hamba-ho
/

Assistant-Web-Educatif

Running

File size: 1,586 Bytes

21f8709

import sys
import os
import time

# Add backend to path
sys.path.append(os.path.join(os.getcwd(), 'backend'))

from services.vector_store import VectorStore

def debug_legacy_data():
    print("🚀 Starting Debug of Legacy Data...")
    
    store = VectorStore()
    
    # 1. Add a document WITHOUT session_id (simulating legacy data)
    # We need to bypass the add_document_chunks method or modify it temporarily, 
    # or just use the collection directly.
    
    print("\n1️⃣ Adding legacy document (no session_id)...")
    legacy_text = "This is a legacy document about Ancient Rome."
    legacy_meta = {"source": "history.pdf"} # No session_id
    
    # Manually add to collection
    embedding = store.embedding_model.encode([f"passage: {legacy_text}"], normalize_embeddings=True)
    
    store.collection.add(
        embeddings=embedding.tolist(),
        metadatas=[legacy_meta],
        documents=[legacy_text],
        ids=["legacy_doc_1"]
    )
    
    # 2. Query WITH a session_id
    session_id = "current_user_session"
    print(f"\n2️⃣ Querying with session_id='{session_id}'...")
    
    results = store.find_similar_chunks("Rome", session_id=session_id, n_results=5)
    
    print(f"   Results: {results['documents']}")
    
    if results['documents'] and results['documents'][0]:
        print("   ❌ FAIL: Legacy document was returned despite filter!")
    else:
        print("   ✅ PASS: Legacy document was NOT returned.")

    # Clean up
    store.collection.delete(ids=["legacy_doc_1"])

if __name__ == "__main__":
    debug_legacy_data()