| |
| """ |
| HF_Space_hipVS/app.py |
| ===================== |
| ROCKIT Vision Intelligence β Hugging Face Space |
| |
| GPU-accelerated multimodal search engine. |
| - Embedding: Qwen3-VL-Embedding (GPU) / CLIP (CPU) |
| - Search: CAGRA (hipVS) -> PyTorch -> NumPy |
| - UI: Premium Gradio Demo (Gradio >= 5.7) |
| """ |
|
|
| import logging |
| import sys |
| import os |
| from pathlib import Path |
| import gradio as gr |
|
|
| sys.path.insert(0, str(Path(__file__).parent)) |
|
|
| logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(name)s] %(message)s") |
| logger = logging.getLogger("rockit-vision") |
|
|
| from config import ( |
| USE_GPU, EMBED_MODEL, EMBED_DIM, LLM_MODEL, LLM_FALLBACK, |
| FRAME_EVERY_SEC, HF_TOKEN, HF_DATASET_REPO, AUTO_SEED, |
| DEFAULT_PROJECT, DATA_DIR |
| ) |
| from vector_store import get_store, list_projects |
| from ingest import ( |
| ingest_images, ingest_videos, |
| ingest_single_image, ingest_single_video, |
| HAS_FFMPEG, |
| ) |
| from search import search_images, search_videos |
| import seed_data |
|
|
| |
|
|
| def get_system_info(project: str = DEFAULT_PROJECT) -> str: |
| img_store = get_store(project, "image_index") |
| vid_store = get_store(project, "video_index") |
| return "\n".join([ |
| f"### Project Context: `{project}`\n", |
| "| Hardware & Models | Status |", |
| "|:---|:---|", |
| f"| **GPU Acceleration** | {'π Enabled' if USE_GPU else 'π’ Disabled (CPU)'} |", |
| f"| **Search Backend** | {img_store.mode} |", |
| f"| **Vision Model** | `{EMBED_MODEL.split('/')[-1]}` ({EMBED_DIM}d) |", |
| f"| **Reasoning LLM** | `{LLM_MODEL.split('/')[-1]}` |", |
| f"| **Media Engine** | {'ffmpeg detected' if HAS_FFMPEG else 'ffmpeg MISSING'} |", |
| "\n| Index Stats | Count | Location |", |
| "|:---|:---|:---|", |
| f"| Images | {img_store.count} | {('VRAM (Hot)' if img_store.in_vram else 'NVMe (Cold)')} |", |
| f"| Video Frames | {vid_store.count} | {('VRAM (Hot)' if vid_store.in_vram else 'NVMe (Cold)')} |", |
| ]) |
|
|
|
|
| def get_projects_list() -> list[str]: |
| projects = list_projects() |
| if DEFAULT_PROJECT not in projects: |
| projects.insert(0, DEFAULT_PROJECT) |
| return projects |
|
|
| |
|
|
| def handle_image_upload(files, project, progress=gr.Progress()): |
| """Embed and index uploaded images one by one.""" |
| if not files: |
| return "No files uploaded.", get_system_info(project) |
| results = [] |
| for i, f in enumerate(files): |
| progress((i + 1) / len(files), desc=f"Embedding {Path(f).name}...") |
| ok, msg = ingest_single_image(f, project=project) |
| results.append(msg) |
| return "\n".join(results), get_system_info(project) |
|
|
|
|
| def handle_video_upload(files, project, progress=gr.Progress()): |
| """Extract frames and index uploaded videos.""" |
| if not files: |
| return "No files uploaded.", get_system_info(project) |
| results = [] |
| for f in files: |
| count, msg = ingest_single_video(f, project=project, progress_callback=progress) |
| results.append(msg) |
| return "\n".join(results), get_system_info(project) |
|
|
|
|
| def handle_batch_ingest(project, progress=gr.Progress()): |
| """Re-index all images and videos from the project's data folder.""" |
| img_count, img_log = ingest_images(project=project, progress_callback=progress) |
| vid_count, vid_log = ingest_videos(project=project, progress_callback=progress) |
| log = ( |
| f"=== Batch Ingest Results ===\n\n" |
| f"Successfully indexed {img_count} images and {vid_count} video frames " |
| f"into project '{project}'." |
| ) |
| return log, get_system_info(project) |
|
|
|
|
| def handle_seed(project, progress=gr.Progress()): |
| """Download and seed demo data for the selected project.""" |
| count, log = seed_data.run(project=project, progress_callback=progress) |
| return log, get_system_info(project) |
|
|
|
|
| def handle_clear(project): |
| """Purge all vector indexes for the selected project.""" |
| get_store(project, "image_index").clear() |
| get_store(project, "video_index").clear() |
| return f"All indexes cleared for project '{project}'.", get_system_info(project) |
|
|
|
|
| def handle_search(query, mode, top_k, project): |
| """Run semantic search and return AI summary + gallery items.""" |
| if not query.strip(): |
| return "Please enter a search query.", [], "" |
|
|
| if mode == "Image Search": |
| result = search_images(query, project=project, top_k=int(top_k)) |
| summary = result["llm_summary"] |
| gallery_items = [] |
| for r in result["results"]: |
| path = r.get("file_path", "") |
| name = r.get("file_name", "Unknown") |
| score = r.get("score", 0) |
| if path and os.path.exists(path): |
| gallery_items.append((path, f"{name} (Score: {score:.3f})")) |
| return summary, gallery_items, result["store_info"] |
|
|
| else: |
| result = search_videos(query, project=project, top_k=int(top_k)) |
| summary = result["llm_summary"] |
| gallery_items = [] |
| for m in result["matches"]: |
| path = m.get("representative_frame", "") |
| name = m.get("video_name", "Unknown") |
| time_range = f"{m['start']} - {m['end']}" |
| score = m.get("score", 0) |
| if path and os.path.exists(path): |
| gallery_items.append((path, f"{name} @ {time_range} (Score: {score:.3f})")) |
| return summary, gallery_items, result["store_info"] |
|
|
|
|
| def handle_create_project(name): |
| """Create a new named project workspace.""" |
| if not name or not name.strip(): |
| return "Enter a project name.", gr.skip() |
| name = name.strip().lower().replace(" ", "-") |
| from config import get_project_dir |
| get_project_dir(name) |
| return f"Project '{name}' created.", gr.Dropdown(choices=get_projects_list(), value=name) |
|
|
|
|
| def refresh_projects(): |
| """Return updated dropdown choices.""" |
| return gr.Dropdown(choices=get_projects_list()) |
|
|
| |
|
|
| CSS = """ |
| @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700;800&display=swap'); |
| |
| body { font-family: 'Inter', sans-serif !important; } |
| |
| .gradio-container { |
| max-width: 1300px !important; |
| margin: 0 auto !important; |
| background-color: #050505 !important; |
| } |
| |
| .main-header { |
| text-align: center; |
| background: linear-gradient(135deg, #0f0f1b 0%, #1a1a2e 100%); |
| padding: 3rem 2rem; |
| border-radius: 24px; |
| margin-bottom: 2rem; |
| border: 1px solid rgba(255,255,255,0.05); |
| box-shadow: 0 10px 30px rgba(0,0,0,0.5); |
| display: flex; |
| flex-direction: column; |
| align-items: center; |
| } |
| |
| .logo-container img { |
| max-width: 120px; |
| margin-bottom: 1.5rem; |
| filter: drop-shadow(0 0 15px rgba(233, 69, 96, 0.4)); |
| } |
| |
| .main-header h1 { |
| background: linear-gradient(90deg, #e94560, #a033ff, #4cc9f0); |
| -webkit-background-clip: text; |
| -webkit-text-fill-color: transparent; |
| font-size: 3.2rem !important; |
| font-weight: 800 !important; |
| margin: 0; |
| letter-spacing: -1px; |
| } |
| |
| .main-header p.subtitle { |
| color: #94a3b8; |
| font-size: 1.1rem; |
| margin-top: 0.5rem; |
| } |
| |
| .card { |
| background: #11111b !important; |
| border: 1px solid rgba(255,255,255,0.08) !important; |
| border-radius: 16px !important; |
| padding: 1rem !important; |
| } |
| |
| #search-btn { |
| background: linear-gradient(135deg, #e94560 0%, #533483 100%) !important; |
| border: none !important; |
| font-weight: 700 !important; |
| color: white !important; |
| transition: all 0.3s ease; |
| } |
| |
| #search-btn:hover { |
| transform: translateY(-2px); |
| box-shadow: 0 5px 15px rgba(233, 69, 96, 0.4); |
| } |
| |
| .stat-box { |
| background: rgba(255,255,255,0.03); |
| border-radius: 12px; |
| padding: 1rem; |
| border: 1px solid rgba(255,255,255,0.05); |
| } |
| |
| .gallery-container { |
| background: #0a0a0f !important; |
| border-radius: 12px !important; |
| } |
| |
| footer { display: none !important; } |
| """ |
|
|
| |
|
|
| def build_ui(): |
| logo_path = "assests/rockit_logo.png" |
| arch_path = "assests/Architecture.svg" |
| flow_path = "assests/data_flow.svg" |
| gpu_path = "assests/gpu_compute_tiers.svg" |
|
|
| with gr.Blocks( |
| title="ROCKIT Vision Intelligence", |
| |
| |
| |
| theme=gr.themes.Soft( |
| primary_hue="rose", |
| secondary_hue="indigo", |
| neutral_hue="slate", |
| ), |
| css=CSS, |
| ) as app: |
|
|
| |
| with gr.Column(elem_classes="main-header"): |
| if os.path.exists(logo_path): |
| gr.Image( |
| logo_path, |
| show_label=False, |
| container=False, |
| width=100, |
| elem_classes="logo-container", |
| ) |
| gr.HTML("<h1>ROCKIT Vision Intelligence</h1>") |
| gr.Markdown( |
| "GPU-Accelerated Multimodal Search Platform", |
| elem_classes="subtitle", |
| ) |
|
|
| |
| with gr.Row(): |
|
|
| |
| with gr.Column(scale=3): |
| with gr.Group(elem_classes="card"): |
| gr.Markdown("### ποΈ Project Selection") |
| with gr.Row(): |
| project_select = gr.Dropdown( |
| choices=get_projects_list(), |
| value=DEFAULT_PROJECT, |
| label="Active Workspace", |
| scale=4, |
| interactive=True, |
| ) |
| |
| refresh_btn = gr.Button("π", scale=1) |
|
|
| with gr.Accordion("Create New Project", open=False): |
| new_project_name = gr.Textbox( |
| label="Project ID", |
| placeholder="e.g. security-cam", |
| ) |
| create_btn = gr.Button("Initialize Project", variant="secondary") |
| create_status = gr.Markdown() |
|
|
| |
| with gr.Group(elem_classes="card"): |
| gr.Markdown("### βοΈ System Status") |
| system_info = gr.Markdown(value=get_system_info()) |
|
|
| |
| with gr.Column(scale=7): |
| with gr.Tabs(): |
|
|
| |
| with gr.Tab("π Search"): |
| with gr.Group(elem_classes="card"): |
| with gr.Row(): |
| with gr.Column(scale=4): |
| query_input = gr.Textbox( |
| label="Natural Language Query", |
| placeholder=( |
| 'Try "a cat sitting on a laptop" ' |
| 'or "someone running in a park"' |
| ), |
| lines=2, |
| ) |
| with gr.Column(scale=1): |
| search_mode = gr.Radio( |
| ["Image Search", "Video Intelligence"], |
| value="Image Search", |
| label="Search Mode", |
| ) |
| top_k = gr.Slider( |
| 1, 50, value=12, step=1, |
| label="Results Count", |
| ) |
|
|
| search_btn = gr.Button( |
| "Execute Semantic Search", |
| variant="primary", |
| elem_id="search-btn", |
| size="lg", |
| ) |
|
|
| gr.Markdown("### π€ AI Interpretation") |
| search_summary = gr.Markdown( |
| "*Results will appear here...*", |
| elem_classes="card", |
| ) |
|
|
| gr.Markdown("### πΌοΈ Visual Matches") |
| result_gallery = gr.Gallery( |
| label="Retrieved Media", |
| |
| columns=4, |
| rows=2, |
| object_fit="contain", |
| height="auto", |
| elem_classes="gallery-container", |
| ) |
|
|
| with gr.Accordion("Technical Details", open=False): |
| store_info = gr.Textbox( |
| label="Vector Store Engine", |
| interactive=False, |
| ) |
|
|
| |
| with gr.Tab("π€ Ingest Media"): |
| with gr.Row(): |
| with gr.Column(): |
| with gr.Group(elem_classes="card"): |
| gr.Markdown("#### πΌοΈ Image Ingestion") |
| img_upload = gr.File( |
| label="Select Images", |
| file_types=["image"], |
| file_count="multiple", |
| ) |
| img_btn = gr.Button("Embed & Index Images") |
| img_log = gr.Textbox( |
| label="Status", |
| lines=4, |
| interactive=False, |
| ) |
|
|
| with gr.Column(): |
| with gr.Group(elem_classes="card"): |
| gr.Markdown("#### π₯ Video Intelligence") |
| vid_upload = gr.File( |
| label="Select Videos", |
| file_types=["video"], |
| file_count="multiple", |
| ) |
| vid_btn = gr.Button("Extract & Index Frames") |
| vid_log = gr.Textbox( |
| label="Status", |
| lines=4, |
| interactive=False, |
| ) |
|
|
| with gr.Group(elem_classes="card"): |
| gr.Markdown("#### β‘ Batch Operations") |
| with gr.Row(): |
| seed_btn = gr.Button("Seed Demo Data", variant="secondary") |
| batch_btn = gr.Button("Re-index Folder", variant="secondary") |
| clear_btn = gr.Button("Purge All Indexes", variant="stop") |
| action_log = gr.Markdown() |
|
|
| |
| with gr.Tab("π§ How It Works"): |
| gr.Markdown(""" |
| ### Direct Multimodal Embedding |
| ROCKIT doesn't use captioning models. It uses **Vision-Language Models (VLM)** to encode |
| visual features directly into the same vector space as text. This preserves subtle details |
| that text captions often lose. |
| """) |
| with gr.Row(): |
| with gr.Column(): |
| gr.Markdown("#### 1. System Architecture") |
| if os.path.exists(arch_path): |
| gr.Image(arch_path, show_label=False) |
| with gr.Column(): |
| gr.Markdown("#### 2. Query Flow") |
| if os.path.exists(flow_path): |
| gr.Image(flow_path, show_label=False) |
|
|
| gr.Markdown("---") |
|
|
| with gr.Row(): |
| with gr.Column(): |
| gr.Markdown("#### 3. GPU Acceleration Tiers") |
| if os.path.exists(gpu_path): |
| gr.Image(gpu_path, show_label=False) |
| with gr.Column(): |
| gr.Markdown(""" |
| #### Hot/Cold Memory Management |
| To support dozens of projects on a single GPU, ROCKIT implements an **NVMe-to-VRAM Async Swap**. |
| |
| - **Cold Store (NVMe):** Indexes are serialized as `.cagra` files. |
| - **Hot Cache (VRAM):** Active projects are copied into VRAM using pinned-memory DMA. |
| - **LRU Eviction:** Least recently used indexes are purged from VRAM to make room for new ones. |
| """) |
|
|
| |
|
|
| |
| project_select.change( |
| fn=get_system_info, |
| inputs=[project_select], |
| outputs=[system_info], |
| ) |
| refresh_btn.click( |
| fn=refresh_projects, |
| inputs=[], |
| outputs=[project_select], |
| ) |
| create_btn.click( |
| fn=handle_create_project, |
| inputs=[new_project_name], |
| outputs=[create_status, project_select], |
| ) |
|
|
| |
| _search_inputs = [query_input, search_mode, top_k, project_select] |
| _search_outputs = [search_summary, result_gallery, store_info] |
|
|
| search_btn.click( |
| fn=handle_search, |
| inputs=_search_inputs, |
| outputs=_search_outputs, |
| ) |
| query_input.submit( |
| fn=handle_search, |
| inputs=_search_inputs, |
| outputs=_search_outputs, |
| ) |
|
|
| |
| img_btn.click( |
| fn=handle_image_upload, |
| inputs=[img_upload, project_select], |
| outputs=[img_log, system_info], |
| ) |
| vid_btn.click( |
| fn=handle_video_upload, |
| inputs=[vid_upload, project_select], |
| outputs=[vid_log, system_info], |
| ) |
|
|
| |
| seed_btn.click( |
| fn=handle_seed, |
| inputs=[project_select], |
| outputs=[action_log, system_info], |
| ) |
| batch_btn.click( |
| fn=handle_batch_ingest, |
| inputs=[project_select], |
| outputs=[action_log, system_info], |
| ) |
| clear_btn.click( |
| fn=handle_clear, |
| inputs=[project_select], |
| outputs=[action_log, system_info], |
| ) |
|
|
| return app |
|
|
| |
|
|
| if __name__ == "__main__": |
| if seed_data.is_needed(): |
| logger.info("Auto-seeding default project from HF Dataset...") |
| try: |
| seed_data.run() |
| except Exception as e: |
| logger.error(f"Auto-seeding failed: {e}") |
|
|
| app = build_ui() |
| app.launch(server_name="0.0.0.0", server_port=7860, share=False) |