dashVectorSpace / scripts /diagnostic.py
justmotes's picture
Deploy 9-Row Benchmark (via API)
9a9f1fb verified
raw
history blame
3.42 kB
import os
import sys
import time
# Fix path to allow importing from src
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from src.vector_db import UnifiedQdrant
from src.router import LearnedRouter
from src.comparison import ComparisonEngine
from config import COLLECTION_NAME, NUM_CLUSTERS, FRESHNESS_SHARD_ID, MRL_DIMS
# Force Cloud Config for Diagnostic
os.environ["QDRANT_URL"] = "https://justmotes-xvector-db-node.hf.space"
os.environ["QDRANT_API_KEY"] = "xvector_secret_pass_123"
def run_diagnostic():
print(">>> Starting Diagnostic Check...")
# 1. Check Qdrant Connection
print("\n1. Checking Qdrant Connection...")
try:
db = UnifiedQdrant(
collection_name=COLLECTION_NAME,
vector_size=384,
num_clusters=NUM_CLUSTERS,
freshness_shard_id=FRESHNESS_SHARD_ID
)
print(" - Initializing UnifiedQdrant...")
db.initialize()
# Check specific collection
if db.client.collection_exists(COLLECTION_NAME):
info = db.client.get_collection(COLLECTION_NAME)
print(f"✅ Collection '{COLLECTION_NAME}' exists.")
print(f" - Status: {info.status}")
print(f" - Points: {info.points_count}")
if info.points_count == 0:
print("⚠️ WARNING: Collection is empty! Ingestion might have failed.")
else:
print(f"❌ Collection '{COLLECTION_NAME}' does NOT exist.")
return
except Exception as e:
print(f"❌ Qdrant Connection Failed: {e}")
return
# 2. Check Router
print("\n2. Checking Router Model...")
router_path = "models/router_v1.pkl"
if os.path.exists(router_path):
try:
router = LearnedRouter.load(router_path)
print(f"✅ Router loaded from {router_path}")
print(f" - Clusters: {router.n_clusters}")
except Exception as e:
print(f"❌ Failed to load router: {e}")
return
else:
print(f"❌ Router file not found at {router_path}")
return
# 3. Test Search Logic
print("\n3. Testing Search Logic...")
try:
engine = ComparisonEngine(db, router, embedding_model_name="minilm")
query = "What is the capital of France?"
print(f" - Query: '{query}'")
# Direct Search
print(" - Running Direct Search...")
res_direct = engine.direct_search(query)
print(f" -> Found {len(res_direct['results'])} results. Latency: {res_direct['latency_ms']:.2f}ms")
# xVector Search
print(" - Running xVector Search...")
res_xvector = engine.xvector_search(query)
print(f" -> Found {len(res_xvector['results'])} results. Latency: {res_xvector['latency_ms']:.2f}ms")
print(f" -> Mode: {res_xvector['mode']}")
print(f" -> Target Cluster: {res_xvector.get('target_cluster')}")
if len(res_direct['results']) > 0:
print("✅ Search Logic Verified.")
else:
print("⚠️ Search returned 0 results. Data might be missing or embeddings mismatched.")
except Exception as e:
print(f"❌ Search Logic Failed: {e}")
import traceback
traceback.print_exc()
if __name__ == "__main__":
run_diagnostic()