File size: 21,180 Bytes
fb12ddc
 
 
 
 
 
 
 
 
d0f7240
fb12ddc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83dad6b
fb12ddc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83dad6b
fb12ddc
 
 
 
 
 
83dad6b
fb12ddc
 
83dad6b
fb12ddc
 
 
 
 
 
 
 
 
83dad6b
fb12ddc
83dad6b
fb12ddc
 
 
 
 
 
 
 
83dad6b
fb12ddc
83dad6b
fb12ddc
 
 
 
83dad6b
 
fb12ddc
 
 
83dad6b
fb12ddc
83dad6b
fb12ddc
 
 
83dad6b
fb12ddc
83dad6b
fb12ddc
 
 
 
83dad6b
fb12ddc
83dad6b
fb12ddc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83dad6b
fb12ddc
 
 
 
 
 
 
 
 
 
 
 
83dad6b
fb12ddc
83dad6b
fb12ddc
d0f7240
fb12ddc
 
 
d0f7240
fb12ddc
83dad6b
 
 
d0f7240
83dad6b
 
fb12ddc
 
 
 
 
 
83dad6b
 
 
fb12ddc
 
 
 
 
 
83dad6b
 
fb12ddc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83dad6b
fb12ddc
83dad6b
fb12ddc
 
 
 
 
83dad6b
 
fb12ddc
 
 
 
 
 
 
 
 
 
 
83dad6b
 
 
fb12ddc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83dad6b
fb12ddc
 
 
 
 
83dad6b
fb12ddc
 
 
83dad6b
 
 
 
 
 
fb12ddc
 
 
 
 
83dad6b
 
fb12ddc
83dad6b
 
 
 
 
 
 
fb12ddc
83dad6b
 
 
 
fb12ddc
83dad6b
fb12ddc
83dad6b
 
fb12ddc
 
 
 
 
 
 
 
 
 
 
83dad6b
fb12ddc
83dad6b
fb12ddc
83dad6b
 
 
 
fb12ddc
 
 
83dad6b
 
fb12ddc
 
 
83dad6b
fb12ddc
 
83dad6b
 
fb12ddc
 
 
 
 
 
83dad6b
 
 
 
fb12ddc
 
 
83dad6b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fb12ddc
83dad6b
 
 
 
 
fb12ddc
 
83dad6b
 
 
 
 
fb12ddc
83dad6b
fb12ddc
83dad6b
fb12ddc
83dad6b
 
 
 
fb12ddc
83dad6b
fb12ddc
 
 
 
 
83dad6b
 
1b0db98
83dad6b
 
fb12ddc
83dad6b
 
 
 
 
 
fb12ddc
 
 
83dad6b
 
1b0db98
83dad6b
 
fb12ddc
83dad6b
 
 
 
 
 
fb12ddc
 
 
83dad6b
 
 
fb12ddc
 
83dad6b
fb12ddc
 
83dad6b
 
 
 
 
fb12ddc
 
 
 
 
 
 
 
 
83dad6b
fb12ddc
83dad6b
fb12ddc
 
 
 
 
 
 
83dad6b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fb12ddc
 
 
 
 
 
83dad6b
 
 
 
fb12ddc
83dad6b
 
 
fb12ddc
 
83dad6b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fb12ddc
 
83dad6b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
fb12ddc
 
 
83dad6b
 
fb12ddc
 
 
 
 
 
 
 
 
83dad6b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
#!/usr/bin/env python3
"""
HF_Space_hipVS/app.py
=====================
ROCKIT Vision Intelligence β€” Hugging Face Space

GPU-accelerated multimodal search engine.
  - Embedding: Qwen3-VL-Embedding (GPU) / CLIP (CPU)
  - Search:    CAGRA (hipVS) -> PyTorch -> NumPy
  - UI:        Premium Gradio Demo (Gradio >= 5.7)
"""

import logging
import sys
import os
from pathlib import Path
import gradio as gr

sys.path.insert(0, str(Path(__file__).parent))

logging.basicConfig(level=logging.INFO, format="%(asctime)s [%(name)s] %(message)s")
logger = logging.getLogger("rockit-vision")

from config import (
    USE_GPU, EMBED_MODEL, EMBED_DIM, LLM_MODEL, LLM_FALLBACK,
    FRAME_EVERY_SEC, HF_TOKEN, HF_DATASET_REPO, AUTO_SEED,
    DEFAULT_PROJECT, DATA_DIR
)
from vector_store import get_store, list_projects
from ingest import (
    ingest_images, ingest_videos,
    ingest_single_image, ingest_single_video,
    HAS_FFMPEG,
)
from search import search_images, search_videos
import seed_data

# ── Helpers ───────────────────────────────────────────────────────────────────

def get_system_info(project: str = DEFAULT_PROJECT) -> str:
    img_store = get_store(project, "image_index")
    vid_store = get_store(project, "video_index")
    return "\n".join([
        f"### Project Context: `{project}`\n",
        "| Hardware & Models | Status |",
        "|:---|:---|",
        f"| **GPU Acceleration** | {'πŸš€ Enabled' if USE_GPU else '🐒 Disabled (CPU)'} |",
        f"| **Search Backend** | {img_store.mode} |",
        f"| **Vision Model** | `{EMBED_MODEL.split('/')[-1]}` ({EMBED_DIM}d) |",
        f"| **Reasoning LLM** | `{LLM_MODEL.split('/')[-1]}` |",
        f"| **Media Engine** | {'ffmpeg detected' if HAS_FFMPEG else 'ffmpeg MISSING'} |",
        "\n| Index Stats | Count | Location |",
        "|:---|:---|:---|",
        f"| Images | {img_store.count} | {('VRAM (Hot)' if img_store.in_vram else 'NVMe (Cold)')} |",
        f"| Video Frames | {vid_store.count} | {('VRAM (Hot)' if vid_store.in_vram else 'NVMe (Cold)')} |",
    ])


def get_projects_list() -> list[str]:
    projects = list_projects()
    if DEFAULT_PROJECT not in projects:
        projects.insert(0, DEFAULT_PROJECT)
    return projects

# ── Callbacks ─────────────────────────────────────────────────────────────────

def handle_image_upload(files, project, progress=gr.Progress()):
    """Embed and index uploaded images one by one."""
    if not files:
        return "No files uploaded.", get_system_info(project)
    results = []
    for i, f in enumerate(files):
        progress((i + 1) / len(files), desc=f"Embedding {Path(f).name}...")
        ok, msg = ingest_single_image(f, project=project)
        results.append(msg)
    return "\n".join(results), get_system_info(project)


def handle_video_upload(files, project, progress=gr.Progress()):
    """Extract frames and index uploaded videos."""
    if not files:
        return "No files uploaded.", get_system_info(project)
    results = []
    for f in files:
        count, msg = ingest_single_video(f, project=project, progress_callback=progress)
        results.append(msg)
    return "\n".join(results), get_system_info(project)


def handle_batch_ingest(project, progress=gr.Progress()):
    """Re-index all images and videos from the project's data folder."""
    img_count, img_log = ingest_images(project=project, progress_callback=progress)
    vid_count, vid_log = ingest_videos(project=project, progress_callback=progress)
    log = (
        f"=== Batch Ingest Results ===\n\n"
        f"Successfully indexed {img_count} images and {vid_count} video frames "
        f"into project '{project}'."
    )
    return log, get_system_info(project)


def handle_seed(project, progress=gr.Progress()):
    """Download and seed demo data for the selected project."""
    count, log = seed_data.run(project=project, progress_callback=progress)
    return log, get_system_info(project)


def handle_clear(project):
    """Purge all vector indexes for the selected project."""
    get_store(project, "image_index").clear()
    get_store(project, "video_index").clear()
    return f"All indexes cleared for project '{project}'.", get_system_info(project)


def handle_search(query, mode, top_k, project):
    """Run semantic search and return AI summary + gallery items."""
    if not query.strip():
        return "Please enter a search query.", [], ""

    if mode == "Image Search":
        result = search_images(query, project=project, top_k=int(top_k))
        summary = result["llm_summary"]
        gallery_items = []
        for r in result["results"]:
            path = r.get("file_path", "")
            name = r.get("file_name", "Unknown")
            score = r.get("score", 0)
            if path and os.path.exists(path):
                gallery_items.append((path, f"{name} (Score: {score:.3f})"))
        return summary, gallery_items, result["store_info"]

    else:  # Video Intelligence
        result = search_videos(query, project=project, top_k=int(top_k))
        summary = result["llm_summary"]
        gallery_items = []
        for m in result["matches"]:
            path = m.get("representative_frame", "")
            name = m.get("video_name", "Unknown")
            time_range = f"{m['start']} - {m['end']}"
            score = m.get("score", 0)
            if path and os.path.exists(path):
                gallery_items.append((path, f"{name} @ {time_range} (Score: {score:.3f})"))
        return summary, gallery_items, result["store_info"]


def handle_create_project(name):
    """Create a new named project workspace."""
    if not name or not name.strip():
        return "Enter a project name.", gr.skip()
    name = name.strip().lower().replace(" ", "-")
    from config import get_project_dir
    get_project_dir(name)
    return f"Project '{name}' created.", gr.Dropdown(choices=get_projects_list(), value=name)


def refresh_projects():
    """Return updated dropdown choices."""
    return gr.Dropdown(choices=get_projects_list())

# ── CSS ───────────────────────────────────────────────────────────────────────

CSS = """
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700;800&display=swap');

body { font-family: 'Inter', sans-serif !important; }

.gradio-container {
    max-width: 1300px !important;
    margin: 0 auto !important;
    background-color: #050505 !important;
}

.main-header {
    text-align: center;
    background: linear-gradient(135deg, #0f0f1b 0%, #1a1a2e 100%);
    padding: 3rem 2rem;
    border-radius: 24px;
    margin-bottom: 2rem;
    border: 1px solid rgba(255,255,255,0.05);
    box-shadow: 0 10px 30px rgba(0,0,0,0.5);
    display: flex;
    flex-direction: column;
    align-items: center;
}

.logo-container img {
    max-width: 120px;
    margin-bottom: 1.5rem;
    filter: drop-shadow(0 0 15px rgba(233, 69, 96, 0.4));
}

.main-header h1 {
    background: linear-gradient(90deg, #e94560, #a033ff, #4cc9f0);
    -webkit-background-clip: text;
    -webkit-text-fill-color: transparent;
    font-size: 3.2rem !important;
    font-weight: 800 !important;
    margin: 0;
    letter-spacing: -1px;
}

.main-header p.subtitle {
    color: #94a3b8;
    font-size: 1.1rem;
    margin-top: 0.5rem;
}

.card {
    background: #11111b !important;
    border: 1px solid rgba(255,255,255,0.08) !important;
    border-radius: 16px !important;
    padding: 1rem !important;
}

#search-btn {
    background: linear-gradient(135deg, #e94560 0%, #533483 100%) !important;
    border: none !important;
    font-weight: 700 !important;
    color: white !important;
    transition: all 0.3s ease;
}

#search-btn:hover {
    transform: translateY(-2px);
    box-shadow: 0 5px 15px rgba(233, 69, 96, 0.4);
}

.stat-box {
    background: rgba(255,255,255,0.03);
    border-radius: 12px;
    padding: 1rem;
    border: 1px solid rgba(255,255,255,0.05);
}

.gallery-container {
    background: #0a0a0f !important;
    border-radius: 12px !important;
}

footer { display: none !important; }
"""

# ── Build UI ──────────────────────────────────────────────────────────────────

def build_ui():
    logo_path = "assests/rockit_logo.png"
    arch_path = "assests/Architecture.svg"
    flow_path = "assests/data_flow.svg"
    gpu_path  = "assests/gpu_compute_tiers.svg"

    with gr.Blocks(
        title="ROCKIT Vision Intelligence",
        # FIX: gr.themes.Default() was renamed; use gr.themes.Base() or a
        # named preset.  Soft() ships with Gradio 4 and takes the same hue
        # kwargs.
        theme=gr.themes.Soft(
            primary_hue="rose",
            secondary_hue="indigo",
            neutral_hue="slate",
        ),
        css=CSS,
    ) as app:

        # ── Header ────────────────────────────────────────────────────────────
        with gr.Column(elem_classes="main-header"):
            if os.path.exists(logo_path):
                gr.Image(
                    logo_path,
                    show_label=False,
                    container=False,
                    width=100,
                    elem_classes="logo-container",
                )
            gr.HTML("<h1>ROCKIT Vision Intelligence</h1>")
            gr.Markdown(
                "GPU-Accelerated Multimodal Search Platform",
                elem_classes="subtitle",
            )

        # ── Main layout ───────────────────────────────────────────────────────
        with gr.Row():

            # Left sidebar
            with gr.Column(scale=3):
                with gr.Group(elem_classes="card"):
                    gr.Markdown("### πŸ—‚οΈ Project Selection")
                    with gr.Row():
                        project_select = gr.Dropdown(
                            choices=get_projects_list(),
                            value=DEFAULT_PROJECT,
                            label="Active Workspace",
                            scale=4,
                            interactive=True,
                        )
                        # FIX: outputs must be a list, not a bare component
                        refresh_btn = gr.Button("πŸ”„", scale=1)

                    with gr.Accordion("Create New Project", open=False):
                        new_project_name = gr.Textbox(
                            label="Project ID",
                            placeholder="e.g. security-cam",
                        )
                        create_btn = gr.Button("Initialize Project", variant="secondary")
                        create_status = gr.Markdown()

                # FIX: gr.Group does not accept visible= in Gradio 4 β€” removed
                with gr.Group(elem_classes="card"):
                    gr.Markdown("### βš™οΈ System Status")
                    system_info = gr.Markdown(value=get_system_info())

            # Right content area
            with gr.Column(scale=7):
                with gr.Tabs():

                    # ── Tab 1: Search ─────────────────────────────────────────
                    with gr.Tab("πŸ” Search"):
                        with gr.Group(elem_classes="card"):
                            with gr.Row():
                                with gr.Column(scale=4):
                                    query_input = gr.Textbox(
                                        label="Natural Language Query",
                                        placeholder=(
                                            'Try "a cat sitting on a laptop" '
                                            'or "someone running in a park"'
                                        ),
                                        lines=2,
                                    )
                                with gr.Column(scale=1):
                                    search_mode = gr.Radio(
                                        ["Image Search", "Video Intelligence"],
                                        value="Image Search",
                                        label="Search Mode",
                                    )
                                    top_k = gr.Slider(
                                        1, 50, value=12, step=1,
                                        label="Results Count",
                                    )

                            search_btn = gr.Button(
                                "Execute Semantic Search",
                                variant="primary",
                                elem_id="search-btn",
                                size="lg",
                            )

                        gr.Markdown("### πŸ€– AI Interpretation")
                        search_summary = gr.Markdown(
                            "*Results will appear here...*",
                            elem_classes="card",
                        )

                        gr.Markdown("### πŸ–ΌοΈ Visual Matches")
                        result_gallery = gr.Gallery(
                            label="Retrieved Media",
                            # FIX: columns / rows must be plain int, not list
                            columns=4,
                            rows=2,
                            object_fit="contain",
                            height="auto",
                            elem_classes="gallery-container",
                        )

                        with gr.Accordion("Technical Details", open=False):
                            store_info = gr.Textbox(
                                label="Vector Store Engine",
                                interactive=False,
                            )

                    # ── Tab 2: Ingest Media ───────────────────────────────────
                    with gr.Tab("πŸ“€ Ingest Media"):
                        with gr.Row():
                            with gr.Column():
                                with gr.Group(elem_classes="card"):
                                    gr.Markdown("#### πŸ–ΌοΈ Image Ingestion")
                                    img_upload = gr.File(
                                        label="Select Images",
                                        file_types=["image"],
                                        file_count="multiple",
                                    )
                                    img_btn = gr.Button("Embed & Index Images")
                                    img_log = gr.Textbox(
                                        label="Status",
                                        lines=4,
                                        interactive=False,
                                    )

                            with gr.Column():
                                with gr.Group(elem_classes="card"):
                                    gr.Markdown("#### πŸŽ₯ Video Intelligence")
                                    vid_upload = gr.File(
                                        label="Select Videos",
                                        file_types=["video"],
                                        file_count="multiple",
                                    )
                                    vid_btn = gr.Button("Extract & Index Frames")
                                    vid_log = gr.Textbox(
                                        label="Status",
                                        lines=4,
                                        interactive=False,
                                    )

                        with gr.Group(elem_classes="card"):
                            gr.Markdown("#### ⚑ Batch Operations")
                            with gr.Row():
                                seed_btn  = gr.Button("Seed Demo Data",     variant="secondary")
                                batch_btn = gr.Button("Re-index Folder",    variant="secondary")
                                clear_btn = gr.Button("Purge All Indexes",  variant="stop")
                            action_log = gr.Markdown()

                    # ── Tab 3: How It Works ───────────────────────────────────
                    with gr.Tab("🧠 How It Works"):
                        gr.Markdown("""
### Direct Multimodal Embedding
ROCKIT doesn't use captioning models. It uses **Vision-Language Models (VLM)** to encode
visual features directly into the same vector space as text. This preserves subtle details
that text captions often lose.
""")
                        with gr.Row():
                            with gr.Column():
                                gr.Markdown("#### 1. System Architecture")
                                if os.path.exists(arch_path):
                                    gr.Image(arch_path, show_label=False)
                            with gr.Column():
                                gr.Markdown("#### 2. Query Flow")
                                if os.path.exists(flow_path):
                                    gr.Image(flow_path, show_label=False)

                        gr.Markdown("---")

                        with gr.Row():
                            with gr.Column():
                                gr.Markdown("#### 3. GPU Acceleration Tiers")
                                if os.path.exists(gpu_path):
                                    gr.Image(gpu_path, show_label=False)
                            with gr.Column():
                                gr.Markdown("""
#### Hot/Cold Memory Management
To support dozens of projects on a single GPU, ROCKIT implements an **NVMe-to-VRAM Async Swap**.

- **Cold Store (NVMe):** Indexes are serialized as `.cagra` files.
- **Hot Cache (VRAM):** Active projects are copied into VRAM using pinned-memory DMA.
- **LRU Eviction:** Least recently used indexes are purged from VRAM to make room for new ones.
""")

        # ── Event Bindings ────────────────────────────────────────────────────

        # Sidebar controls
        project_select.change(
            fn=get_system_info,
            inputs=[project_select],
            outputs=[system_info],
        )
        refresh_btn.click(
            fn=refresh_projects,
            inputs=[],
            outputs=[project_select],
        )
        create_btn.click(
            fn=handle_create_project,
            inputs=[new_project_name],
            outputs=[create_status, project_select],
        )

        # Search
        _search_inputs  = [query_input, search_mode, top_k, project_select]
        _search_outputs = [search_summary, result_gallery, store_info]

        search_btn.click(
            fn=handle_search,
            inputs=_search_inputs,
            outputs=_search_outputs,
        )
        query_input.submit(
            fn=handle_search,
            inputs=_search_inputs,
            outputs=_search_outputs,
        )

        # Ingest
        img_btn.click(
            fn=handle_image_upload,
            inputs=[img_upload, project_select],
            outputs=[img_log, system_info],
        )
        vid_btn.click(
            fn=handle_video_upload,
            inputs=[vid_upload, project_select],
            outputs=[vid_log, system_info],
        )

        # Batch operations
        seed_btn.click(
            fn=handle_seed,
            inputs=[project_select],
            outputs=[action_log, system_info],
        )
        batch_btn.click(
            fn=handle_batch_ingest,
            inputs=[project_select],
            outputs=[action_log, system_info],
        )
        clear_btn.click(
            fn=handle_clear,
            inputs=[project_select],
            outputs=[action_log, system_info],
        )

    return app

# ── Entry point ───────────────────────────────────────────────────────────────

if __name__ == "__main__":
    if seed_data.is_needed():
        logger.info("Auto-seeding default project from HF Dataset...")
        try:
            seed_data.run()
        except Exception as e:
            logger.error(f"Auto-seeding failed: {e}")

    app = build_ui()
    app.launch(server_name="0.0.0.0", server_port=7860, share=False)