import spaces # MUST be the first import for ZeroGPU import os from pathlib import Path import gradio as gr from PIL import Image from core.apps import APP_REGISTRY, _error_html, _error_updates from core.model import vision_infer from core.parse import parse_json # ── Image helpers ───────────────────────────────────────────────────────────── def _pil(item) -> "Image.Image | None": if item is None: return None if isinstance(item, Image.Image): return item.convert("RGB") if isinstance(item, str) and Path(item).exists(): return Image.open(item).convert("RGB") if isinstance(item, (list, tuple)) and len(item) >= 1: return _pil(item[0]) if hasattr(item, "__array__"): import numpy as np arr = item if isinstance(item, np.ndarray) else item.__array__() return Image.fromarray(arr).convert("RGB") return None def _collect_images(raw) -> list: if raw is None: return [] if not isinstance(raw, (list, tuple)): img = _pil(raw) return [img] if img else [] imgs = [_pil(item) for item in raw] return [i for i in imgs if i is not None] # ── Status constants ────────────────────────────────────────────────────────── _STATUS_WARM = "⏳ Warming up the model — first run takes a few seconds…" _STATUS_CLEAR = "" def _shimmer_html() -> str: return ( '
' '
' '
' '
' '
' '

✦ Analyzing…

' '
' ) # ── Per-spec inference handler factory ──────────────────────────────────────── def _make_run(spec): def _run(image_input, *extra_inputs): n_out = len(spec.output_components) yield [gr.update(value=_STATUS_WARM)] + [gr.update(value=_shimmer_html())] + [gr.update()] * (n_out - 1) images = _collect_images(image_input) if not images: yield [gr.update(value=_STATUS_CLEAR)] + _error_updates( n_out, "Please upload at least one image." ) return instruction = spec.instruction_fn(*extra_inputs) try: raw = vision_infer( images=images, instruction=instruction, json_mode=(spec.output_mode == "json"), max_tokens=spec.max_tokens, do_sample=spec.do_sample, temperature=spec.temperature, ) except Exception as exc: yield [gr.update(value=_STATUS_CLEAR)] + _error_updates( n_out, f"Inference error: {exc}" ) return data = parse_json(raw) if spec.output_mode == "json" else raw updates = spec.render_fn(data) yield [gr.update(value=_STATUS_CLEAR)] + updates return _run # ── Output component builder ────────────────────────────────────────────────── def _build_output_components(component_types: list[str]) -> list: comps = [] for kind in component_types: if kind == "html": comps.append( gr.HTML( value=( '
' 'Upload an image and click Analyze ✦' '
' ) ) ) elif kind == "markdown": comps.append(gr.Markdown("")) else: comps.append(gr.HTML("")) return comps # ── Tab builder ─────────────────────────────────────────────────────────────── def _build_tab(spec): gr.HTML( f'
' ) status_md = gr.Markdown(_STATUS_CLEAR, visible=True) with gr.Row(equal_height=False): with gr.Column(scale=1): image_comp = gr.Image( label="Upload image", type="pil", sources=["upload", "webcam", "clipboard"], height=300, ) extra_comps = [] if spec.input_spec.text_label: extra_comps.append(gr.Textbox( label=spec.input_spec.text_label, placeholder=spec.input_spec.text_placeholder, lines=2, )) if spec.input_spec.dropdown_choices: extra_comps.append(gr.Dropdown( choices=spec.input_spec.dropdown_choices, value=spec.input_spec.dropdown_default, label=spec.input_spec.dropdown_label, )) submit = gr.Button("Analyze ✦", variant="primary", elem_classes=["submit-btn"]) with gr.Column(scale=1): output_comps = _build_output_components(spec.output_components) all_inputs = [image_comp] + extra_comps all_outputs = [status_md] + output_comps submit.click(fn=_make_run(spec), inputs=all_inputs, outputs=all_outputs) placeholder = ( '
' 'Click Analyze ✦ to process
' ) def _on_image_change(*_): return [gr.update(value=_STATUS_CLEAR)] + [ gr.update(value=placeholder) for _ in spec.output_components ] image_comp.change(fn=_on_image_change, inputs=[image_comp], outputs=all_outputs) _app_root = Path(__file__).parent valid_examples = [] for row in spec.examples: if not row: continue abs_img = _app_root / str(row[0]) if abs_img.exists(): valid_examples.append([str(abs_img)] + list(row[1:])) if valid_examples: gr.Markdown("**✦ Try an example** — click an image below") _gal = gr.Gallery( value=[r[0] for r in valid_examples], columns=len(valid_examples), height=140, allow_preview=False, show_label=False, object_fit="cover", elem_classes=["example-gallery"], ) _n = len(all_inputs) def _pick(evt: gr.SelectData, _ex=valid_examples, _n=_n): row = list(_ex[evt.index]) while len(row) < _n: row.append("") updates = [gr.update(value=v) for v in row[:_n]] return updates[0] if _n == 1 else updates _gal.select(fn=_pick, outputs=all_inputs) # ── Main demo ───────────────────────────────────────────────────────────────── css = """ footer { display: none !important; } /* ── Submit button ── */ .submit-btn { font-size: 17px !important; padding: 14px 0 !important; margin-top: 6px !important; background: linear-gradient(135deg, #1565c0 0%, #283593 100%) !important; color: white !important; border: none !important; border-radius: 10px !important; transition: opacity 0.15s !important; } .submit-btn:hover { opacity: 0.88 !important; } /* ── Tab bar ── */ .tab-nav button { font-size: 14px !important; font-weight: 600 !important; padding: 10px 16px !important; } /* ── Result reveal animation ── */ @keyframes fadeSlideUp { from { opacity: 0; transform: translateY(10px); } to { opacity: 1; transform: translateY(0); } } .result-reveal { animation: fadeSlideUp 0.35s ease; } /* ── Shimmer loading skeleton ── */ @keyframes shimmer { 0% { background-position: -600px 0; } 100% { background-position: 600px 0; } } .shimmer-line { height: 14px; border-radius: 4px; background: linear-gradient(90deg, #ececec 25%, #d8d8d8 50%, #ececec 75%); background-size: 1200px 100%; animation: shimmer 1.4s infinite; margin-bottom: 10px; } """ with gr.Blocks(title="Vision Base", css=css, theme=gr.themes.Soft()) as demo: gr.HTML( '
' '
' '👁️' '
' '

' 'Vision Base

' '

' 'Four practical vision tools — scan labels, raid your fridge,
' 'decode error codes, reveal object mysteries.

' '
' '' '⚡ 1.3B params · Tiny Titan' '
' '
' '🔍 Allergen Lens' '🍽️ Fridge Dinner' '🔮 Object Oracle' '🛠️ What\'s That Error?' 'MiniCPM-V 4.6 · ZeroGPU' '
' '
' ) with gr.Tabs(): for spec in APP_REGISTRY.values(): with gr.Tab(spec.title): gr.Markdown(f"*{spec.tagline}*") _build_tab(spec) gr.HTML( '
' 'Powered by MiniCPM-V 4.6 (1.3B) · ZeroGPU · ' 'OpenBMB' '
' ) if __name__ == "__main__": demo.launch(show_error=True)