File size: 3,417 Bytes
9a9f1fb
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
import os
import sys
import time

# Fix path to allow importing from src
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

from src.vector_db import UnifiedQdrant
from src.router import LearnedRouter
from src.comparison import ComparisonEngine
from config import COLLECTION_NAME, NUM_CLUSTERS, FRESHNESS_SHARD_ID, MRL_DIMS

# Force Cloud Config for Diagnostic
os.environ["QDRANT_URL"] = "https://justmotes-xvector-db-node.hf.space"
os.environ["QDRANT_API_KEY"] = "xvector_secret_pass_123"

def run_diagnostic():
    print(">>> Starting Diagnostic Check...")
    
    # 1. Check Qdrant Connection
    print("\n1. Checking Qdrant Connection...")
    try:
        db = UnifiedQdrant(
            collection_name=COLLECTION_NAME,
            vector_size=384,
            num_clusters=NUM_CLUSTERS,
            freshness_shard_id=FRESHNESS_SHARD_ID
        )
        print("   - Initializing UnifiedQdrant...")
        db.initialize()
        
        # Check specific collection
        if db.client.collection_exists(COLLECTION_NAME):
            info = db.client.get_collection(COLLECTION_NAME)
            print(f"✅ Collection '{COLLECTION_NAME}' exists.")
            print(f"   - Status: {info.status}")
            print(f"   - Points: {info.points_count}")
            if info.points_count == 0:
                print("⚠️ WARNING: Collection is empty! Ingestion might have failed.")
        else:
            print(f"❌ Collection '{COLLECTION_NAME}' does NOT exist.")
            return
            
    except Exception as e:
        print(f"❌ Qdrant Connection Failed: {e}")
        return

    # 2. Check Router
    print("\n2. Checking Router Model...")
    router_path = "models/router_v1.pkl"
    if os.path.exists(router_path):
        try:
            router = LearnedRouter.load(router_path)
            print(f"✅ Router loaded from {router_path}")
            print(f"   - Clusters: {router.n_clusters}")
        except Exception as e:
            print(f"❌ Failed to load router: {e}")
            return
    else:
        print(f"❌ Router file not found at {router_path}")
        return

    # 3. Test Search Logic
    print("\n3. Testing Search Logic...")
    try:
        engine = ComparisonEngine(db, router, embedding_model_name="minilm")
        query = "What is the capital of France?"
        
        print(f"   - Query: '{query}'")
        
        # Direct Search
        print("   - Running Direct Search...")
        res_direct = engine.direct_search(query)
        print(f"     -> Found {len(res_direct['results'])} results. Latency: {res_direct['latency_ms']:.2f}ms")
        
        # xVector Search
        print("   - Running xVector Search...")
        res_xvector = engine.xvector_search(query)
        print(f"     -> Found {len(res_xvector['results'])} results. Latency: {res_xvector['latency_ms']:.2f}ms")
        print(f"     -> Mode: {res_xvector['mode']}")
        print(f"     -> Target Cluster: {res_xvector.get('target_cluster')}")
        
        if len(res_direct['results']) > 0:
            print("✅ Search Logic Verified.")
        else:
            print("⚠️ Search returned 0 results. Data might be missing or embeddings mismatched.")

    except Exception as e:
        print(f"❌ Search Logic Failed: {e}")
        import traceback
        traceback.print_exc()

if __name__ == "__main__":
    run_diagnostic()