File size: 11,289 Bytes
98ceb88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
099ea5d
98ceb88
 
3e70669
 
 
 
 
 
 
 
 
 
 
 
 
099ea5d
98ceb88
099ea5d
 
 
3e70669
98ceb88
099ea5d
 
 
 
 
 
98ceb88
099ea5d
98ceb88
099ea5d
 
 
 
 
 
3e70669
 
099ea5d
 
 
 
 
 
98ceb88
099ea5d
 
 
98ceb88
099ea5d
98ceb88
 
099ea5d
98ceb88
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
099ea5d
98ceb88
099ea5d
3e70669
 
 
 
099ea5d
98ceb88
099ea5d
 
 
 
 
 
 
 
98ceb88
099ea5d
98ceb88
099ea5d
 
 
 
 
 
98ceb88
099ea5d
 
 
 
 
 
98ceb88
099ea5d
98ceb88
099ea5d
 
98ceb88
099ea5d
 
98ceb88
099ea5d
98ceb88
099ea5d
 
 
 
 
98ceb88
099ea5d
 
 
98ceb88
 
099ea5d
 
38d0e2a
8c415e4
 
 
 
 
 
 
 
099ea5d
1552f10
 
 
 
 
 
 
 
 
98ceb88
1552f10
 
 
 
 
 
 
 
98ceb88
 
099ea5d
 
 
 
3e70669
38d0e2a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3e70669
 
 
 
 
 
38d0e2a
3e70669
 
 
 
 
 
 
 
 
 
 
 
099ea5d
 
 
 
38d0e2a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
099ea5d
 
 
 
 
 
 
 
 
38d0e2a
 
 
 
 
099ea5d
 
98ceb88
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
import spaces  # MUST be the first import for ZeroGPU

import os
from pathlib import Path

import gradio as gr
from PIL import Image

from core.apps import APP_REGISTRY, _error_html, _error_updates
from core.model import vision_infer
from core.parse import parse_json

# โ”€โ”€ Image helpers โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€

def _pil(item) -> "Image.Image | None":
    if item is None:
        return None
    if isinstance(item, Image.Image):
        return item.convert("RGB")
    if isinstance(item, str) and Path(item).exists():
        return Image.open(item).convert("RGB")
    if isinstance(item, (list, tuple)) and len(item) >= 1:
        return _pil(item[0])
    if hasattr(item, "__array__"):
        import numpy as np
        arr = item if isinstance(item, np.ndarray) else item.__array__()
        return Image.fromarray(arr).convert("RGB")
    return None


def _collect_images(raw) -> list:
    if raw is None:
        return []
    if not isinstance(raw, (list, tuple)):
        img = _pil(raw)
        return [img] if img else []
    imgs = [_pil(item) for item in raw]
    return [i for i in imgs if i is not None]


# โ”€โ”€ Status constants โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€

_STATUS_WARM  = "โณ Warming up the model โ€” first run takes a few secondsโ€ฆ"
_STATUS_CLEAR = ""


def _shimmer_html() -> str:
    return (
        '<div style="padding:24px 20px;border-radius:8px;background:#fafafa;border:1px solid #eee">'
        '<div class="shimmer-line" style="width:55%"></div>'
        '<div class="shimmer-line" style="width:88%"></div>'
        '<div class="shimmer-line" style="width:72%"></div>'
        '<div class="shimmer-line" style="width:80%"></div>'
        '<p style="text-align:center;color:#bbb;font-size:13px;margin-top:16px;'
        'font-family:system-ui">โœฆ Analyzingโ€ฆ</p>'
        '</div>'
    )

# โ”€โ”€ Per-spec inference handler factory โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€

def _make_run(spec):
    def _run(image_input, *extra_inputs):
        n_out = len(spec.output_components)
        yield [gr.update(value=_STATUS_WARM)] + [gr.update(value=_shimmer_html())] + [gr.update()] * (n_out - 1)

        images = _collect_images(image_input)
        if not images:
            yield [gr.update(value=_STATUS_CLEAR)] + _error_updates(
                n_out, "Please upload at least one image."
            )
            return

        instruction = spec.instruction_fn(*extra_inputs)

        try:
            raw = vision_infer(
                images=images,
                instruction=instruction,
                json_mode=(spec.output_mode == "json"),
                max_tokens=spec.max_tokens,
                do_sample=spec.do_sample,
                temperature=spec.temperature,
            )
        except Exception as exc:
            yield [gr.update(value=_STATUS_CLEAR)] + _error_updates(
                n_out, f"Inference error: {exc}"
            )
            return

        data = parse_json(raw) if spec.output_mode == "json" else raw
        updates = spec.render_fn(data)
        yield [gr.update(value=_STATUS_CLEAR)] + updates

    return _run


# โ”€โ”€ Output component builder โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€

def _build_output_components(component_types: list[str]) -> list:
    comps = []
    for kind in component_types:
        if kind == "html":
            comps.append(
                gr.HTML(
                    value=(
                        '<div style="min-height:180px;display:flex;align-items:center;'
                        'justify-content:center;color:#aaa;font-family:system-ui;font-size:15px">'
                        'Upload an image and click Analyze โœฆ'
                        '</div>'
                    )
                )
            )
        elif kind == "markdown":
            comps.append(gr.Markdown(""))
        else:
            comps.append(gr.HTML(""))
    return comps


# โ”€โ”€ Tab builder โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€

def _build_tab(spec):
    gr.HTML(
        f'<div style="height:4px;background:{spec.theme_color};'
        f'border-radius:2px;margin:0 0 6px"></div>'
    )
    status_md = gr.Markdown(_STATUS_CLEAR, visible=True)

    with gr.Row(equal_height=False):
        with gr.Column(scale=1):
            image_comp = gr.Image(
                label="Upload image",
                type="pil",
                sources=["upload", "webcam", "clipboard"],
                height=300,
            )

            extra_comps = []

            if spec.input_spec.text_label:
                extra_comps.append(gr.Textbox(
                    label=spec.input_spec.text_label,
                    placeholder=spec.input_spec.text_placeholder,
                    lines=2,
                ))

            if spec.input_spec.dropdown_choices:
                extra_comps.append(gr.Dropdown(
                    choices=spec.input_spec.dropdown_choices,
                    value=spec.input_spec.dropdown_default,
                    label=spec.input_spec.dropdown_label,
                ))

            submit = gr.Button("Analyze โœฆ", variant="primary", elem_classes=["submit-btn"])

        with gr.Column(scale=1):
            output_comps = _build_output_components(spec.output_components)

    all_inputs  = [image_comp] + extra_comps
    all_outputs = [status_md]  + output_comps

    submit.click(fn=_make_run(spec), inputs=all_inputs, outputs=all_outputs)

    placeholder = (
        '<div style="min-height:180px;display:flex;align-items:center;'
        'justify-content:center;color:#aaa;font-family:system-ui">'
        'Click Analyze โœฆ to process</div>'
    )

    def _on_image_change(*_):
        return [gr.update(value=_STATUS_CLEAR)] + [
            gr.update(value=placeholder) for _ in spec.output_components
        ]

    image_comp.change(fn=_on_image_change, inputs=[image_comp], outputs=all_outputs)

    _app_root = Path(__file__).parent
    valid_examples = []
    for row in spec.examples:
        if not row:
            continue
        abs_img = _app_root / str(row[0])
        if abs_img.exists():
            valid_examples.append([str(abs_img)] + list(row[1:]))

    if valid_examples:
        gr.Markdown("**โœฆ Try an example** โ€” click an image below")
        _gal = gr.Gallery(
            value=[r[0] for r in valid_examples],
            columns=len(valid_examples),
            height=140,
            allow_preview=False,
            show_label=False,
            object_fit="cover",
            elem_classes=["example-gallery"],
        )
        _n = len(all_inputs)
        def _pick(evt: gr.SelectData, _ex=valid_examples, _n=_n):
            row = list(_ex[evt.index])
            while len(row) < _n:
                row.append("")
            updates = [gr.update(value=v) for v in row[:_n]]
            return updates[0] if _n == 1 else updates
        _gal.select(fn=_pick, outputs=all_inputs)


# โ”€โ”€ Main demo โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€

css = """
footer { display: none !important; }

/* โ”€โ”€ Submit button โ”€โ”€ */
.submit-btn {
    font-size: 17px !important;
    padding: 14px 0 !important;
    margin-top: 6px !important;
    background: linear-gradient(135deg, #1565c0 0%, #283593 100%) !important;
    color: white !important;
    border: none !important;
    border-radius: 10px !important;
    transition: opacity 0.15s !important;
}
.submit-btn:hover { opacity: 0.88 !important; }

/* โ”€โ”€ Tab bar โ”€โ”€ */
.tab-nav button {
    font-size: 14px !important;
    font-weight: 600 !important;
    padding: 10px 16px !important;
}

/* โ”€โ”€ Result reveal animation โ”€โ”€ */
@keyframes fadeSlideUp {
    from { opacity: 0; transform: translateY(10px); }
    to   { opacity: 1; transform: translateY(0); }
}
.result-reveal { animation: fadeSlideUp 0.35s ease; }

/* โ”€โ”€ Shimmer loading skeleton โ”€โ”€ */
@keyframes shimmer {
    0%   { background-position: -600px 0; }
    100% { background-position:  600px 0; }
}
.shimmer-line {
    height: 14px;
    border-radius: 4px;
    background: linear-gradient(90deg, #ececec 25%, #d8d8d8 50%, #ececec 75%);
    background-size: 1200px 100%;
    animation: shimmer 1.4s infinite;
    margin-bottom: 10px;
}
"""

with gr.Blocks(title="Vision Base", css=css, theme=gr.themes.Soft()) as demo:
    gr.HTML(
        '<div style="background:linear-gradient(135deg,#1a1a2e 0%,#16213e 55%,#0f3460 100%);'
        'border-radius:14px;padding:28px 32px;margin-bottom:8px;color:white">'
        '<div style="display:flex;align-items:center;gap:18px;flex-wrap:wrap">'
        '<span style="font-size:52px;line-height:1">๐Ÿ‘๏ธ</span>'
        '<div style="flex:1;min-width:200px">'
        '<h1 style="margin:0;font-size:28px;font-weight:800;color:white;letter-spacing:-.3px">'
        'Vision Base</h1>'
        '<p style="margin:5px 0 0;color:#a0b8d8;font-size:14px;line-height:1.5">'
        'Four practical vision tools โ€” scan labels, raid your fridge,<br>'
        'decode error codes, reveal object mysteries.</p>'
        '</div>'
        '<span style="background:#6a1b9a;color:white;padding:5px 14px;border-radius:20px;'
        'font-size:12px;font-weight:700;letter-spacing:.4px;white-space:nowrap">'
        'โšก 1.3B params ยท Tiny Titan</span>'
        '</div>'
        '<div style="margin-top:16px;padding-top:14px;border-top:1px solid rgba(255,255,255,.1);'
        'display:flex;gap:24px;flex-wrap:wrap">'
        '<span style="color:#7090b8;font-size:12px">๐Ÿ” Allergen Lens</span>'
        '<span style="color:#7090b8;font-size:12px">๐Ÿฝ๏ธ Fridge Dinner</span>'
        '<span style="color:#9070b8;font-size:12px">๐Ÿ”ฎ Object Oracle</span>'
        '<span style="color:#b07070;font-size:12px">๐Ÿ› ๏ธ What\'s That Error?</span>'
        '<span style="margin-left:auto;color:#506080;font-size:11px">MiniCPM-V 4.6 ยท ZeroGPU</span>'
        '</div>'
        '</div>'
    )

    with gr.Tabs():
        for spec in APP_REGISTRY.values():
            with gr.Tab(spec.title):
                gr.Markdown(f"*{spec.tagline}*")
                _build_tab(spec)

    gr.HTML(
        '<div style="text-align:center;color:#bbb;font-size:11px;margin-top:12px;'
        'padding-top:8px;border-top:1px solid #eee">'
        'Powered by <b>MiniCPM-V 4.6 (1.3B)</b> ยท ZeroGPU ยท '
        '<a href="https://huggingface.co/openbmb/MiniCPM-V-4.6" '
        'style="color:#9090cc;text-decoration:none" target="_blank">OpenBMB</a>'
        '</div>'
    )

if __name__ == "__main__":
    demo.launch(show_error=True)