#!/usr/bin/env python3 """ HF_Space_hipVS/app.py ===================== ROCKIT Vision Intelligence — Hugging Face Space GPU-accelerated multimodal search engine. - Embedding: Qwen3-VL-Embedding (GPU) / CLIP (CPU) - Search: CAGRA (hipVS) -> PyTorch -> NumPy - UI: Premium Gradio Demo (Gradio >= 5.7) """ import logging import sys import os from pathlib import Path import gradio as gr sys.path.insert(0, str(Path(__file__).parent)) logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(name)s] %(message)s") logger = logging.getLogger("rockit-vision") from config import ( USE_GPU, EMBED_MODEL, EMBED_DIM, LLM_MODEL, LLM_FALLBACK, FRAME_EVERY_SEC, HF_TOKEN, HF_DATASET_REPO, AUTO_SEED, DEFAULT_PROJECT, DATA_DIR ) from vector_store import get_store, list_projects from ingest import ( ingest_images, ingest_videos, ingest_single_image, ingest_single_video, HAS_FFMPEG, ) from search import search_images, search_videos import seed_data # ── Helpers ─────────────────────────────────────────────────────────────────── def get_system_info(project: str = DEFAULT_PROJECT) -> str: img_store = get_store(project, "image_index") vid_store = get_store(project, "video_index") return "\n".join([ f"### Project Context: `{project}`\n", "| Hardware & Models | Status |", "|:---|:---|", f"| **GPU Acceleration** | {'🚀 Enabled' if USE_GPU else '🐢 Disabled (CPU)'} |", f"| **Search Backend** | {img_store.mode} |", f"| **Vision Model** | `{EMBED_MODEL.split('/')[-1]}` ({EMBED_DIM}d) |", f"| **Reasoning LLM** | `{LLM_MODEL.split('/')[-1]}` |", f"| **Media Engine** | {'ffmpeg detected' if HAS_FFMPEG else 'ffmpeg MISSING'} |", "\n| Index Stats | Count | Location |", "|:---|:---|:---|", f"| Images | {img_store.count} | {('VRAM (Hot)' if img_store.in_vram else 'NVMe (Cold)')} |", f"| Video Frames | {vid_store.count} | {('VRAM (Hot)' if vid_store.in_vram else 'NVMe (Cold)')} |", ]) def get_projects_list() -> list[str]: projects = list_projects() if DEFAULT_PROJECT not in projects: projects.insert(0, DEFAULT_PROJECT) return projects # ── Callbacks ───────────────────────────────────────────────────────────────── def handle_image_upload(files, project, progress=gr.Progress()): """Embed and index uploaded images one by one.""" if not files: return "No files uploaded.", get_system_info(project) results = [] for i, f in enumerate(files): progress((i + 1) / len(files), desc=f"Embedding {Path(f).name}...") ok, msg = ingest_single_image(f, project=project) results.append(msg) return "\n".join(results), get_system_info(project) def handle_video_upload(files, project, progress=gr.Progress()): """Extract frames and index uploaded videos.""" if not files: return "No files uploaded.", get_system_info(project) results = [] for f in files: count, msg = ingest_single_video(f, project=project, progress_callback=progress) results.append(msg) return "\n".join(results), get_system_info(project) def handle_batch_ingest(project, progress=gr.Progress()): """Re-index all images and videos from the project's data folder.""" img_count, img_log = ingest_images(project=project, progress_callback=progress) vid_count, vid_log = ingest_videos(project=project, progress_callback=progress) log = ( f"=== Batch Ingest Results ===\n\n" f"Successfully indexed {img_count} images and {vid_count} video frames " f"into project '{project}'." ) return log, get_system_info(project) def handle_seed(project, progress=gr.Progress()): """Download and seed demo data for the selected project.""" count, log = seed_data.run(project=project, progress_callback=progress) return log, get_system_info(project) def handle_clear(project): """Purge all vector indexes for the selected project.""" get_store(project, "image_index").clear() get_store(project, "video_index").clear() return f"All indexes cleared for project '{project}'.", get_system_info(project) def handle_search(query, mode, top_k, project): """Run semantic search and return AI summary + gallery items.""" if not query.strip(): return "Please enter a search query.", [], "" if mode == "Image Search": result = search_images(query, project=project, top_k=int(top_k)) summary = result["llm_summary"] gallery_items = [] for r in result["results"]: path = r.get("file_path", "") name = r.get("file_name", "Unknown") score = r.get("score", 0) if path and os.path.exists(path): gallery_items.append((path, f"{name} (Score: {score:.3f})")) return summary, gallery_items, result["store_info"] else: # Video Intelligence result = search_videos(query, project=project, top_k=int(top_k)) summary = result["llm_summary"] gallery_items = [] for m in result["matches"]: path = m.get("representative_frame", "") name = m.get("video_name", "Unknown") time_range = f"{m['start']} - {m['end']}" score = m.get("score", 0) if path and os.path.exists(path): gallery_items.append((path, f"{name} @ {time_range} (Score: {score:.3f})")) return summary, gallery_items, result["store_info"] def handle_create_project(name): """Create a new named project workspace.""" if not name or not name.strip(): return "Enter a project name.", gr.skip() name = name.strip().lower().replace(" ", "-") from config import get_project_dir get_project_dir(name) return f"Project '{name}' created.", gr.Dropdown(choices=get_projects_list(), value=name) def refresh_projects(): """Return updated dropdown choices.""" return gr.Dropdown(choices=get_projects_list()) # ── CSS ─────────────────────────────────────────────────────────────────────── CSS = """ @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700;800&display=swap'); body { font-family: 'Inter', sans-serif !important; } .gradio-container { max-width: 1300px !important; margin: 0 auto !important; background-color: #050505 !important; } .main-header { text-align: center; background: linear-gradient(135deg, #0f0f1b 0%, #1a1a2e 100%); padding: 3rem 2rem; border-radius: 24px; margin-bottom: 2rem; border: 1px solid rgba(255,255,255,0.05); box-shadow: 0 10px 30px rgba(0,0,0,0.5); display: flex; flex-direction: column; align-items: center; } .logo-container img { max-width: 120px; margin-bottom: 1.5rem; filter: drop-shadow(0 0 15px rgba(233, 69, 96, 0.4)); } .main-header h1 { background: linear-gradient(90deg, #e94560, #a033ff, #4cc9f0); -webkit-background-clip: text; -webkit-text-fill-color: transparent; font-size: 3.2rem !important; font-weight: 800 !important; margin: 0; letter-spacing: -1px; } .main-header p.subtitle { color: #94a3b8; font-size: 1.1rem; margin-top: 0.5rem; } .card { background: #11111b !important; border: 1px solid rgba(255,255,255,0.08) !important; border-radius: 16px !important; padding: 1rem !important; } #search-btn { background: linear-gradient(135deg, #e94560 0%, #533483 100%) !important; border: none !important; font-weight: 700 !important; color: white !important; transition: all 0.3s ease; } #search-btn:hover { transform: translateY(-2px); box-shadow: 0 5px 15px rgba(233, 69, 96, 0.4); } .stat-box { background: rgba(255,255,255,0.03); border-radius: 12px; padding: 1rem; border: 1px solid rgba(255,255,255,0.05); } .gallery-container { background: #0a0a0f !important; border-radius: 12px !important; } footer { display: none !important; } """ # ── Build UI ────────────────────────────────────────────────────────────────── def build_ui(): logo_path = "assests/rockit_logo.png" arch_path = "assests/Architecture.svg" flow_path = "assests/data_flow.svg" gpu_path = "assests/gpu_compute_tiers.svg" with gr.Blocks( title="ROCKIT Vision Intelligence", # FIX: gr.themes.Default() was renamed; use gr.themes.Base() or a # named preset. Soft() ships with Gradio 4 and takes the same hue # kwargs. theme=gr.themes.Soft( primary_hue="rose", secondary_hue="indigo", neutral_hue="slate", ), css=CSS, ) as app: # ── Header ──────────────────────────────────────────────────────────── with gr.Column(elem_classes="main-header"): if os.path.exists(logo_path): gr.Image( logo_path, show_label=False, container=False, width=100, elem_classes="logo-container", ) gr.HTML("

ROCKIT Vision Intelligence

") gr.Markdown( "GPU-Accelerated Multimodal Search Platform", elem_classes="subtitle", ) # ── Main layout ─────────────────────────────────────────────────────── with gr.Row(): # Left sidebar with gr.Column(scale=3): with gr.Group(elem_classes="card"): gr.Markdown("### 🗂️ Project Selection") with gr.Row(): project_select = gr.Dropdown( choices=get_projects_list(), value=DEFAULT_PROJECT, label="Active Workspace", scale=4, interactive=True, ) # FIX: outputs must be a list, not a bare component refresh_btn = gr.Button("🔄", scale=1) with gr.Accordion("Create New Project", open=False): new_project_name = gr.Textbox( label="Project ID", placeholder="e.g. security-cam", ) create_btn = gr.Button("Initialize Project", variant="secondary") create_status = gr.Markdown() # FIX: gr.Group does not accept visible= in Gradio 4 — removed with gr.Group(elem_classes="card"): gr.Markdown("### ⚙️ System Status") system_info = gr.Markdown(value=get_system_info()) # Right content area with gr.Column(scale=7): with gr.Tabs(): # ── Tab 1: Search ───────────────────────────────────────── with gr.Tab("🔍 Search"): with gr.Group(elem_classes="card"): with gr.Row(): with gr.Column(scale=4): query_input = gr.Textbox( label="Natural Language Query", placeholder=( 'Try "a cat sitting on a laptop" ' 'or "someone running in a park"' ), lines=2, ) with gr.Column(scale=1): search_mode = gr.Radio( ["Image Search", "Video Intelligence"], value="Image Search", label="Search Mode", ) top_k = gr.Slider( 1, 50, value=12, step=1, label="Results Count", ) search_btn = gr.Button( "Execute Semantic Search", variant="primary", elem_id="search-btn", size="lg", ) gr.Markdown("### 🤖 AI Interpretation") search_summary = gr.Markdown( "*Results will appear here...*", elem_classes="card", ) gr.Markdown("### 🖼️ Visual Matches") result_gallery = gr.Gallery( label="Retrieved Media", # FIX: columns / rows must be plain int, not list columns=4, rows=2, object_fit="contain", height="auto", elem_classes="gallery-container", ) with gr.Accordion("Technical Details", open=False): store_info = gr.Textbox( label="Vector Store Engine", interactive=False, ) # ── Tab 2: Ingest Media ─────────────────────────────────── with gr.Tab("📤 Ingest Media"): with gr.Row(): with gr.Column(): with gr.Group(elem_classes="card"): gr.Markdown("#### 🖼️ Image Ingestion") img_upload = gr.File( label="Select Images", file_types=["image"], file_count="multiple", ) img_btn = gr.Button("Embed & Index Images") img_log = gr.Textbox( label="Status", lines=4, interactive=False, ) with gr.Column(): with gr.Group(elem_classes="card"): gr.Markdown("#### 🎥 Video Intelligence") vid_upload = gr.File( label="Select Videos", file_types=["video"], file_count="multiple", ) vid_btn = gr.Button("Extract & Index Frames") vid_log = gr.Textbox( label="Status", lines=4, interactive=False, ) with gr.Group(elem_classes="card"): gr.Markdown("#### ⚡ Batch Operations") with gr.Row(): seed_btn = gr.Button("Seed Demo Data", variant="secondary") batch_btn = gr.Button("Re-index Folder", variant="secondary") clear_btn = gr.Button("Purge All Indexes", variant="stop") action_log = gr.Markdown() # ── Tab 3: How It Works ─────────────────────────────────── with gr.Tab("🧠 How It Works"): gr.Markdown(""" ### Direct Multimodal Embedding ROCKIT doesn't use captioning models. It uses **Vision-Language Models (VLM)** to encode visual features directly into the same vector space as text. This preserves subtle details that text captions often lose. """) with gr.Row(): with gr.Column(): gr.Markdown("#### 1. System Architecture") if os.path.exists(arch_path): gr.Image(arch_path, show_label=False) with gr.Column(): gr.Markdown("#### 2. Query Flow") if os.path.exists(flow_path): gr.Image(flow_path, show_label=False) gr.Markdown("---") with gr.Row(): with gr.Column(): gr.Markdown("#### 3. GPU Acceleration Tiers") if os.path.exists(gpu_path): gr.Image(gpu_path, show_label=False) with gr.Column(): gr.Markdown(""" #### Hot/Cold Memory Management To support dozens of projects on a single GPU, ROCKIT implements an **NVMe-to-VRAM Async Swap**. - **Cold Store (NVMe):** Indexes are serialized as `.cagra` files. - **Hot Cache (VRAM):** Active projects are copied into VRAM using pinned-memory DMA. - **LRU Eviction:** Least recently used indexes are purged from VRAM to make room for new ones. """) # ── Event Bindings ──────────────────────────────────────────────────── # Sidebar controls project_select.change( fn=get_system_info, inputs=[project_select], outputs=[system_info], ) refresh_btn.click( fn=refresh_projects, inputs=[], outputs=[project_select], ) create_btn.click( fn=handle_create_project, inputs=[new_project_name], outputs=[create_status, project_select], ) # Search _search_inputs = [query_input, search_mode, top_k, project_select] _search_outputs = [search_summary, result_gallery, store_info] search_btn.click( fn=handle_search, inputs=_search_inputs, outputs=_search_outputs, ) query_input.submit( fn=handle_search, inputs=_search_inputs, outputs=_search_outputs, ) # Ingest img_btn.click( fn=handle_image_upload, inputs=[img_upload, project_select], outputs=[img_log, system_info], ) vid_btn.click( fn=handle_video_upload, inputs=[vid_upload, project_select], outputs=[vid_log, system_info], ) # Batch operations seed_btn.click( fn=handle_seed, inputs=[project_select], outputs=[action_log, system_info], ) batch_btn.click( fn=handle_batch_ingest, inputs=[project_select], outputs=[action_log, system_info], ) clear_btn.click( fn=handle_clear, inputs=[project_select], outputs=[action_log, system_info], ) return app # ── Entry point ─────────────────────────────────────────────────────────────── if __name__ == "__main__": if seed_data.is_needed(): logger.info("Auto-seeding default project from HF Dataset...") try: seed_data.run() except Exception as e: logger.error(f"Auto-seeding failed: {e}") app = build_ui() app.launch(server_name="0.0.0.0", server_port=7860, share=False)