Billavenu's picture
Update app.py
d0f7240 verified
#!/usr/bin/env python3
"""
HF_Space_hipVS/app.py
=====================
ROCKIT Vision Intelligence β€” Hugging Face Space
GPU-accelerated multimodal search engine.
- Embedding: Qwen3-VL-Embedding (GPU) / CLIP (CPU)
- Search: CAGRA (hipVS) -> PyTorch -> NumPy
- UI: Premium Gradio Demo (Gradio >= 5.7)
"""
import logging
import sys
import os
from pathlib import Path
import gradio as gr
sys.path.insert(0, str(Path(__file__).parent))
logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(name)s] %(message)s")
logger = logging.getLogger("rockit-vision")
from config import (
USE_GPU, EMBED_MODEL, EMBED_DIM, LLM_MODEL, LLM_FALLBACK,
FRAME_EVERY_SEC, HF_TOKEN, HF_DATASET_REPO, AUTO_SEED,
DEFAULT_PROJECT, DATA_DIR
)
from vector_store import get_store, list_projects
from ingest import (
ingest_images, ingest_videos,
ingest_single_image, ingest_single_video,
HAS_FFMPEG,
)
from search import search_images, search_videos
import seed_data
# ── Helpers ───────────────────────────────────────────────────────────────────
def get_system_info(project: str = DEFAULT_PROJECT) -> str:
img_store = get_store(project, "image_index")
vid_store = get_store(project, "video_index")
return "\n".join([
f"### Project Context: `{project}`\n",
"| Hardware & Models | Status |",
"|:---|:---|",
f"| **GPU Acceleration** | {'πŸš€ Enabled' if USE_GPU else '🐒 Disabled (CPU)'} |",
f"| **Search Backend** | {img_store.mode} |",
f"| **Vision Model** | `{EMBED_MODEL.split('/')[-1]}` ({EMBED_DIM}d) |",
f"| **Reasoning LLM** | `{LLM_MODEL.split('/')[-1]}` |",
f"| **Media Engine** | {'ffmpeg detected' if HAS_FFMPEG else 'ffmpeg MISSING'} |",
"\n| Index Stats | Count | Location |",
"|:---|:---|:---|",
f"| Images | {img_store.count} | {('VRAM (Hot)' if img_store.in_vram else 'NVMe (Cold)')} |",
f"| Video Frames | {vid_store.count} | {('VRAM (Hot)' if vid_store.in_vram else 'NVMe (Cold)')} |",
])
def get_projects_list() -> list[str]:
projects = list_projects()
if DEFAULT_PROJECT not in projects:
projects.insert(0, DEFAULT_PROJECT)
return projects
# ── Callbacks ─────────────────────────────────────────────────────────────────
def handle_image_upload(files, project, progress=gr.Progress()):
"""Embed and index uploaded images one by one."""
if not files:
return "No files uploaded.", get_system_info(project)
results = []
for i, f in enumerate(files):
progress((i + 1) / len(files), desc=f"Embedding {Path(f).name}...")
ok, msg = ingest_single_image(f, project=project)
results.append(msg)
return "\n".join(results), get_system_info(project)
def handle_video_upload(files, project, progress=gr.Progress()):
"""Extract frames and index uploaded videos."""
if not files:
return "No files uploaded.", get_system_info(project)
results = []
for f in files:
count, msg = ingest_single_video(f, project=project, progress_callback=progress)
results.append(msg)
return "\n".join(results), get_system_info(project)
def handle_batch_ingest(project, progress=gr.Progress()):
"""Re-index all images and videos from the project's data folder."""
img_count, img_log = ingest_images(project=project, progress_callback=progress)
vid_count, vid_log = ingest_videos(project=project, progress_callback=progress)
log = (
f"=== Batch Ingest Results ===\n\n"
f"Successfully indexed {img_count} images and {vid_count} video frames "
f"into project '{project}'."
)
return log, get_system_info(project)
def handle_seed(project, progress=gr.Progress()):
"""Download and seed demo data for the selected project."""
count, log = seed_data.run(project=project, progress_callback=progress)
return log, get_system_info(project)
def handle_clear(project):
"""Purge all vector indexes for the selected project."""
get_store(project, "image_index").clear()
get_store(project, "video_index").clear()
return f"All indexes cleared for project '{project}'.", get_system_info(project)
def handle_search(query, mode, top_k, project):
"""Run semantic search and return AI summary + gallery items."""
if not query.strip():
return "Please enter a search query.", [], ""
if mode == "Image Search":
result = search_images(query, project=project, top_k=int(top_k))
summary = result["llm_summary"]
gallery_items = []
for r in result["results"]:
path = r.get("file_path", "")
name = r.get("file_name", "Unknown")
score = r.get("score", 0)
if path and os.path.exists(path):
gallery_items.append((path, f"{name} (Score: {score:.3f})"))
return summary, gallery_items, result["store_info"]
else: # Video Intelligence
result = search_videos(query, project=project, top_k=int(top_k))
summary = result["llm_summary"]
gallery_items = []
for m in result["matches"]:
path = m.get("representative_frame", "")
name = m.get("video_name", "Unknown")
time_range = f"{m['start']} - {m['end']}"
score = m.get("score", 0)
if path and os.path.exists(path):
gallery_items.append((path, f"{name} @ {time_range} (Score: {score:.3f})"))
return summary, gallery_items, result["store_info"]
def handle_create_project(name):
"""Create a new named project workspace."""
if not name or not name.strip():
return "Enter a project name.", gr.skip()
name = name.strip().lower().replace(" ", "-")
from config import get_project_dir
get_project_dir(name)
return f"Project '{name}' created.", gr.Dropdown(choices=get_projects_list(), value=name)
def refresh_projects():
"""Return updated dropdown choices."""
return gr.Dropdown(choices=get_projects_list())
# ── CSS ───────────────────────────────────────────────────────────────────────
CSS = """
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700;800&display=swap');
body { font-family: 'Inter', sans-serif !important; }
.gradio-container {
max-width: 1300px !important;
margin: 0 auto !important;
background-color: #050505 !important;
}
.main-header {
text-align: center;
background: linear-gradient(135deg, #0f0f1b 0%, #1a1a2e 100%);
padding: 3rem 2rem;
border-radius: 24px;
margin-bottom: 2rem;
border: 1px solid rgba(255,255,255,0.05);
box-shadow: 0 10px 30px rgba(0,0,0,0.5);
display: flex;
flex-direction: column;
align-items: center;
}
.logo-container img {
max-width: 120px;
margin-bottom: 1.5rem;
filter: drop-shadow(0 0 15px rgba(233, 69, 96, 0.4));
}
.main-header h1 {
background: linear-gradient(90deg, #e94560, #a033ff, #4cc9f0);
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
font-size: 3.2rem !important;
font-weight: 800 !important;
margin: 0;
letter-spacing: -1px;
}
.main-header p.subtitle {
color: #94a3b8;
font-size: 1.1rem;
margin-top: 0.5rem;
}
.card {
background: #11111b !important;
border: 1px solid rgba(255,255,255,0.08) !important;
border-radius: 16px !important;
padding: 1rem !important;
}
#search-btn {
background: linear-gradient(135deg, #e94560 0%, #533483 100%) !important;
border: none !important;
font-weight: 700 !important;
color: white !important;
transition: all 0.3s ease;
}
#search-btn:hover {
transform: translateY(-2px);
box-shadow: 0 5px 15px rgba(233, 69, 96, 0.4);
}
.stat-box {
background: rgba(255,255,255,0.03);
border-radius: 12px;
padding: 1rem;
border: 1px solid rgba(255,255,255,0.05);
}
.gallery-container {
background: #0a0a0f !important;
border-radius: 12px !important;
}
footer { display: none !important; }
"""
# ── Build UI ──────────────────────────────────────────────────────────────────
def build_ui():
logo_path = "assests/rockit_logo.png"
arch_path = "assests/Architecture.svg"
flow_path = "assests/data_flow.svg"
gpu_path = "assests/gpu_compute_tiers.svg"
with gr.Blocks(
title="ROCKIT Vision Intelligence",
# FIX: gr.themes.Default() was renamed; use gr.themes.Base() or a
# named preset. Soft() ships with Gradio 4 and takes the same hue
# kwargs.
theme=gr.themes.Soft(
primary_hue="rose",
secondary_hue="indigo",
neutral_hue="slate",
),
css=CSS,
) as app:
# ── Header ────────────────────────────────────────────────────────────
with gr.Column(elem_classes="main-header"):
if os.path.exists(logo_path):
gr.Image(
logo_path,
show_label=False,
container=False,
width=100,
elem_classes="logo-container",
)
gr.HTML("<h1>ROCKIT Vision Intelligence</h1>")
gr.Markdown(
"GPU-Accelerated Multimodal Search Platform",
elem_classes="subtitle",
)
# ── Main layout ───────────────────────────────────────────────────────
with gr.Row():
# Left sidebar
with gr.Column(scale=3):
with gr.Group(elem_classes="card"):
gr.Markdown("### πŸ—‚οΈ Project Selection")
with gr.Row():
project_select = gr.Dropdown(
choices=get_projects_list(),
value=DEFAULT_PROJECT,
label="Active Workspace",
scale=4,
interactive=True,
)
# FIX: outputs must be a list, not a bare component
refresh_btn = gr.Button("πŸ”„", scale=1)
with gr.Accordion("Create New Project", open=False):
new_project_name = gr.Textbox(
label="Project ID",
placeholder="e.g. security-cam",
)
create_btn = gr.Button("Initialize Project", variant="secondary")
create_status = gr.Markdown()
# FIX: gr.Group does not accept visible= in Gradio 4 β€” removed
with gr.Group(elem_classes="card"):
gr.Markdown("### βš™οΈ System Status")
system_info = gr.Markdown(value=get_system_info())
# Right content area
with gr.Column(scale=7):
with gr.Tabs():
# ── Tab 1: Search ─────────────────────────────────────────
with gr.Tab("πŸ” Search"):
with gr.Group(elem_classes="card"):
with gr.Row():
with gr.Column(scale=4):
query_input = gr.Textbox(
label="Natural Language Query",
placeholder=(
'Try "a cat sitting on a laptop" '
'or "someone running in a park"'
),
lines=2,
)
with gr.Column(scale=1):
search_mode = gr.Radio(
["Image Search", "Video Intelligence"],
value="Image Search",
label="Search Mode",
)
top_k = gr.Slider(
1, 50, value=12, step=1,
label="Results Count",
)
search_btn = gr.Button(
"Execute Semantic Search",
variant="primary",
elem_id="search-btn",
size="lg",
)
gr.Markdown("### πŸ€– AI Interpretation")
search_summary = gr.Markdown(
"*Results will appear here...*",
elem_classes="card",
)
gr.Markdown("### πŸ–ΌοΈ Visual Matches")
result_gallery = gr.Gallery(
label="Retrieved Media",
# FIX: columns / rows must be plain int, not list
columns=4,
rows=2,
object_fit="contain",
height="auto",
elem_classes="gallery-container",
)
with gr.Accordion("Technical Details", open=False):
store_info = gr.Textbox(
label="Vector Store Engine",
interactive=False,
)
# ── Tab 2: Ingest Media ───────────────────────────────────
with gr.Tab("πŸ“€ Ingest Media"):
with gr.Row():
with gr.Column():
with gr.Group(elem_classes="card"):
gr.Markdown("#### πŸ–ΌοΈ Image Ingestion")
img_upload = gr.File(
label="Select Images",
file_types=["image"],
file_count="multiple",
)
img_btn = gr.Button("Embed & Index Images")
img_log = gr.Textbox(
label="Status",
lines=4,
interactive=False,
)
with gr.Column():
with gr.Group(elem_classes="card"):
gr.Markdown("#### πŸŽ₯ Video Intelligence")
vid_upload = gr.File(
label="Select Videos",
file_types=["video"],
file_count="multiple",
)
vid_btn = gr.Button("Extract & Index Frames")
vid_log = gr.Textbox(
label="Status",
lines=4,
interactive=False,
)
with gr.Group(elem_classes="card"):
gr.Markdown("#### ⚑ Batch Operations")
with gr.Row():
seed_btn = gr.Button("Seed Demo Data", variant="secondary")
batch_btn = gr.Button("Re-index Folder", variant="secondary")
clear_btn = gr.Button("Purge All Indexes", variant="stop")
action_log = gr.Markdown()
# ── Tab 3: How It Works ───────────────────────────────────
with gr.Tab("🧠 How It Works"):
gr.Markdown("""
### Direct Multimodal Embedding
ROCKIT doesn't use captioning models. It uses **Vision-Language Models (VLM)** to encode
visual features directly into the same vector space as text. This preserves subtle details
that text captions often lose.
""")
with gr.Row():
with gr.Column():
gr.Markdown("#### 1. System Architecture")
if os.path.exists(arch_path):
gr.Image(arch_path, show_label=False)
with gr.Column():
gr.Markdown("#### 2. Query Flow")
if os.path.exists(flow_path):
gr.Image(flow_path, show_label=False)
gr.Markdown("---")
with gr.Row():
with gr.Column():
gr.Markdown("#### 3. GPU Acceleration Tiers")
if os.path.exists(gpu_path):
gr.Image(gpu_path, show_label=False)
with gr.Column():
gr.Markdown("""
#### Hot/Cold Memory Management
To support dozens of projects on a single GPU, ROCKIT implements an **NVMe-to-VRAM Async Swap**.
- **Cold Store (NVMe):** Indexes are serialized as `.cagra` files.
- **Hot Cache (VRAM):** Active projects are copied into VRAM using pinned-memory DMA.
- **LRU Eviction:** Least recently used indexes are purged from VRAM to make room for new ones.
""")
# ── Event Bindings ────────────────────────────────────────────────────
# Sidebar controls
project_select.change(
fn=get_system_info,
inputs=[project_select],
outputs=[system_info],
)
refresh_btn.click(
fn=refresh_projects,
inputs=[],
outputs=[project_select],
)
create_btn.click(
fn=handle_create_project,
inputs=[new_project_name],
outputs=[create_status, project_select],
)
# Search
_search_inputs = [query_input, search_mode, top_k, project_select]
_search_outputs = [search_summary, result_gallery, store_info]
search_btn.click(
fn=handle_search,
inputs=_search_inputs,
outputs=_search_outputs,
)
query_input.submit(
fn=handle_search,
inputs=_search_inputs,
outputs=_search_outputs,
)
# Ingest
img_btn.click(
fn=handle_image_upload,
inputs=[img_upload, project_select],
outputs=[img_log, system_info],
)
vid_btn.click(
fn=handle_video_upload,
inputs=[vid_upload, project_select],
outputs=[vid_log, system_info],
)
# Batch operations
seed_btn.click(
fn=handle_seed,
inputs=[project_select],
outputs=[action_log, system_info],
)
batch_btn.click(
fn=handle_batch_ingest,
inputs=[project_select],
outputs=[action_log, system_info],
)
clear_btn.click(
fn=handle_clear,
inputs=[project_select],
outputs=[action_log, system_info],
)
return app
# ── Entry point ───────────────────────────────────────────────────────────────
if __name__ == "__main__":
if seed_data.is_needed():
logger.info("Auto-seeding default project from HF Dataset...")
try:
seed_data.run()
except Exception as e:
logger.error(f"Auto-seeding failed: {e}")
app = build_ui()
app.launch(server_name="0.0.0.0", server_port=7860, share=False)