import gradio as gr import pandas as pd import os import time from src.vector_db import UnifiedQdrant from src.router import LearnedRouter from src.comparison import ComparisonEngine from config import COLLECTION_NAME, NUM_CLUSTERS, FRESHNESS_SHARD_ID, MRL_DIMS # --- Initialization --- print("Initializing dashVectorspace App...") # 1. Initialize DB db = UnifiedQdrant( collection_name=COLLECTION_NAME, vector_size=384, num_clusters=NUM_CLUSTERS, freshness_shard_id=FRESHNESS_SHARD_ID ) db.initialize() # 2. Initialize Router ROUTER_PATH = "models/router_v1.pkl" if os.path.exists(ROUTER_PATH): router = LearnedRouter.load(ROUTER_PATH) else: print("WARNING: Router model not found. Creating a DUMMY router for demo UI.") router = LearnedRouter(model_type="lightgbm", n_clusters=NUM_CLUSTERS, mrl_dims=MRL_DIMS) router.predict = lambda x: (0, 0.99) # 3. Initialize Engine engine = ComparisonEngine(db, router, embedding_model_name="minilm") # --- UI Logic --- def run_comparison(query): if not query: return None, None, "Please enter a query." # Run Searches res_direct = engine.direct_search(query) res_xvector = engine.xvector_search(query) # --- 1. Benchmarking Table Data --- # Sketch Cols: Embedding Model | Router | dash Vector (Time, Shards) | Qdrant Search (Time, Shards) # We will format this as a Pandas DataFrame for the gr.Dataframe component df = pd.DataFrame({ "Embedding Model": ["MiniLM-L6-v2"], "Router": ["LightGBM"], "dashVector (Optimized)": [f"{res_xvector['latency_ms']:.1f} ms | {res_xvector['shards_searched']} Shards"], "Qdrant (Baseline)": [f"{res_direct['latency_ms']:.1f} ms | {res_direct['shards_searched']} Shards"], "Savings": [f"{(1 - res_xvector['shards_searched']/res_direct['shards_searched'])*100:.1f}%"] }) # --- 2. Search Results (Top 3) --- # Just showing top result text to prove it works, as per sketch focus on table def format_top_result(res_dict): if not res_dict["results"]: return "No results found." top_res = res_dict["results"][0] payload = top_res.payload text = payload.get("text", "No text") if payload else "No text" return f"Top Result: {text[:150]}..." results_preview = f"""
dashVector: {format_top_result(res_xvector)}
Qdrant: {format_top_result(res_direct)}
""" return df, results_preview # --- Custom CSS for Single Screen Layout --- custom_css = """ body { background-color: #0b0f19; color: #e0e0e0; overflow: hidden; } .gradio-container { max-width: 1200px !important; margin: 0 auto; height: 100vh; display: flex; flex-direction: column; justify-content: center; } h1 { font-size: 2.5em; margin-bottom: 0.2em; text-align: center; background: -webkit-linear-gradient(45deg, #667eea, #764ba2); -webkit-background-clip: text; -webkit-text-fill-color: transparent; } .input-box textarea { background: #1a1f2e !important; border: 1px solid #333 !important; font-size: 1.2em; } .dataset-box { border: 1px solid #444; padding: 10px 20px; border-radius: 8px; text-align: center; font-weight: bold; background: #1a1f2e; display: inline-block; } .scope-box { margin-top: 20px; padding: 15px; border-left: 4px solid #667eea; background: rgba(102, 126, 234, 0.1); } .table-wrap { margin-top: 20px; } .footer-row { margin-top: 40px !important; align-items: center !important; } footer { display: none !important; } """ # --- Gradio Layout --- with gr.Blocks(title="dashVectorspace", theme=gr.themes.Soft(primary_hue="indigo", secondary_hue="slate"), css=custom_css) as demo: # Title gr.Markdown("# 🚀 dashVectorspace") # Search Section (Centered) with gr.Row(elem_id="search-row"): with gr.Column(scale=4): query_input = gr.Textbox( placeholder="Enter your search query here...", show_label=False, elem_classes="input-box", lines=1 ) with gr.Column(scale=1): submit_btn = gr.Button("Search", variant="primary", size="lg") # Benchmarking Table gr.Markdown("### ⚡ Benchmarking Results (Live)") results_table = gr.Dataframe( headers=["Embedding Model", "Router", "dashVector (Optimized)", "Qdrant (Baseline)", "Savings"], datatype=["str", "str", "str", "str", "str"], interactive=False, elem_classes="table-wrap" ) # Result Preview (Hidden initially, shown after search) results_html = gr.HTML() # Footer Section: Dataset & Scope # Footer Section: Dataset & Scope with gr.Row(elem_classes="footer-row"): with gr.Column(scale=1): gr.HTML("""
Dataset: MS MARCO
""") with gr.Column(scale=2): gr.HTML("""
Project Scope:
""") # Event Listeners submit_btn.click( run_comparison, inputs=[query_input], outputs=[results_table, results_html] ) query_input.submit( run_comparison, inputs=[query_input], outputs=[results_table, results_html] ) if __name__ == "__main__": demo.launch()