import spaces # MUST be the first import for ZeroGPU
import os
from pathlib import Path
import gradio as gr
from PIL import Image
from core.apps import APP_REGISTRY, _error_html, _error_updates
from core.model import vision_infer
from core.parse import parse_json
# ── Image helpers ─────────────────────────────────────────────────────────────
def _pil(item) -> "Image.Image | None":
if item is None:
return None
if isinstance(item, Image.Image):
return item.convert("RGB")
if isinstance(item, str) and Path(item).exists():
return Image.open(item).convert("RGB")
if isinstance(item, (list, tuple)) and len(item) >= 1:
return _pil(item[0])
if hasattr(item, "__array__"):
import numpy as np
arr = item if isinstance(item, np.ndarray) else item.__array__()
return Image.fromarray(arr).convert("RGB")
return None
def _collect_images(raw) -> list:
if raw is None:
return []
if not isinstance(raw, (list, tuple)):
img = _pil(raw)
return [img] if img else []
imgs = [_pil(item) for item in raw]
return [i for i in imgs if i is not None]
# ── Status constants ──────────────────────────────────────────────────────────
_STATUS_WARM = "⏳ Warming up the model — first run takes a few seconds…"
_STATUS_CLEAR = ""
def _shimmer_html() -> str:
return (
'
'
'
'
'
'
'
'
'
'
'
✦ Analyzing…
'
'
'
)
# ── Per-spec inference handler factory ────────────────────────────────────────
def _make_run(spec):
def _run(image_input, *extra_inputs):
n_out = len(spec.output_components)
yield [gr.update(value=_STATUS_WARM)] + [gr.update(value=_shimmer_html())] + [gr.update()] * (n_out - 1)
images = _collect_images(image_input)
if not images:
yield [gr.update(value=_STATUS_CLEAR)] + _error_updates(
n_out, "Please upload at least one image."
)
return
instruction = spec.instruction_fn(*extra_inputs)
try:
raw = vision_infer(
images=images,
instruction=instruction,
json_mode=(spec.output_mode == "json"),
max_tokens=spec.max_tokens,
do_sample=spec.do_sample,
temperature=spec.temperature,
)
except Exception as exc:
yield [gr.update(value=_STATUS_CLEAR)] + _error_updates(
n_out, f"Inference error: {exc}"
)
return
data = parse_json(raw) if spec.output_mode == "json" else raw
updates = spec.render_fn(data)
yield [gr.update(value=_STATUS_CLEAR)] + updates
return _run
# ── Output component builder ──────────────────────────────────────────────────
def _build_output_components(component_types: list[str]) -> list:
comps = []
for kind in component_types:
if kind == "html":
comps.append(
gr.HTML(
value=(
''
'Upload an image and click Analyze ✦'
'
'
)
)
)
elif kind == "markdown":
comps.append(gr.Markdown(""))
else:
comps.append(gr.HTML(""))
return comps
# ── Tab builder ───────────────────────────────────────────────────────────────
def _build_tab(spec):
gr.HTML(
f''
)
status_md = gr.Markdown(_STATUS_CLEAR, visible=True)
with gr.Row(equal_height=False):
with gr.Column(scale=1):
image_comp = gr.Image(
label="Upload image",
type="pil",
sources=["upload", "webcam", "clipboard"],
height=300,
)
extra_comps = []
if spec.input_spec.text_label:
extra_comps.append(gr.Textbox(
label=spec.input_spec.text_label,
placeholder=spec.input_spec.text_placeholder,
lines=2,
))
if spec.input_spec.dropdown_choices:
extra_comps.append(gr.Dropdown(
choices=spec.input_spec.dropdown_choices,
value=spec.input_spec.dropdown_default,
label=spec.input_spec.dropdown_label,
))
submit = gr.Button("Analyze ✦", variant="primary", elem_classes=["submit-btn"])
with gr.Column(scale=1):
output_comps = _build_output_components(spec.output_components)
all_inputs = [image_comp] + extra_comps
all_outputs = [status_md] + output_comps
submit.click(fn=_make_run(spec), inputs=all_inputs, outputs=all_outputs)
placeholder = (
''
'Click Analyze ✦ to process
'
)
def _on_image_change(*_):
return [gr.update(value=_STATUS_CLEAR)] + [
gr.update(value=placeholder) for _ in spec.output_components
]
image_comp.change(fn=_on_image_change, inputs=[image_comp], outputs=all_outputs)
_app_root = Path(__file__).parent
valid_examples = []
for row in spec.examples:
if not row:
continue
abs_img = _app_root / str(row[0])
if abs_img.exists():
valid_examples.append([str(abs_img)] + list(row[1:]))
if valid_examples:
gr.Markdown("**✦ Try an example** — click an image below")
_gal = gr.Gallery(
value=[r[0] for r in valid_examples],
columns=len(valid_examples),
height=140,
allow_preview=False,
show_label=False,
object_fit="cover",
elem_classes=["example-gallery"],
)
_n = len(all_inputs)
def _pick(evt: gr.SelectData, _ex=valid_examples, _n=_n):
row = list(_ex[evt.index])
while len(row) < _n:
row.append("")
updates = [gr.update(value=v) for v in row[:_n]]
return updates[0] if _n == 1 else updates
_gal.select(fn=_pick, outputs=all_inputs)
# ── Main demo ─────────────────────────────────────────────────────────────────
css = """
footer { display: none !important; }
/* ── Submit button ── */
.submit-btn {
font-size: 17px !important;
padding: 14px 0 !important;
margin-top: 6px !important;
background: linear-gradient(135deg, #1565c0 0%, #283593 100%) !important;
color: white !important;
border: none !important;
border-radius: 10px !important;
transition: opacity 0.15s !important;
}
.submit-btn:hover { opacity: 0.88 !important; }
/* ── Tab bar ── */
.tab-nav button {
font-size: 14px !important;
font-weight: 600 !important;
padding: 10px 16px !important;
}
/* ── Result reveal animation ── */
@keyframes fadeSlideUp {
from { opacity: 0; transform: translateY(10px); }
to { opacity: 1; transform: translateY(0); }
}
.result-reveal { animation: fadeSlideUp 0.35s ease; }
/* ── Shimmer loading skeleton ── */
@keyframes shimmer {
0% { background-position: -600px 0; }
100% { background-position: 600px 0; }
}
.shimmer-line {
height: 14px;
border-radius: 4px;
background: linear-gradient(90deg, #ececec 25%, #d8d8d8 50%, #ececec 75%);
background-size: 1200px 100%;
animation: shimmer 1.4s infinite;
margin-bottom: 10px;
}
"""
with gr.Blocks(title="Vision Base", css=css, theme=gr.themes.Soft()) as demo:
gr.HTML(
''
'
'
'
👁️'
'
'
'
'
'Vision Base
'
'
'
'Four practical vision tools — scan labels, raid your fridge,
'
'decode error codes, reveal object mysteries.
'
'
'
'
'
'⚡ 1.3B params · Tiny Titan'
'
'
'
'
'🔍 Allergen Lens'
'🍽️ Fridge Dinner'
'🔮 Object Oracle'
'🛠️ What\'s That Error?'
'MiniCPM-V 4.6 · ZeroGPU'
'
'
'
'
)
with gr.Tabs():
for spec in APP_REGISTRY.values():
with gr.Tab(spec.title):
gr.Markdown(f"*{spec.tagline}*")
_build_tab(spec)
gr.HTML(
''
'Powered by
MiniCPM-V 4.6 (1.3B) · ZeroGPU · '
'
OpenBMB'
'
'
)
if __name__ == "__main__":
demo.launch(show_error=True)