File size: 3,939 Bytes
7f9dfed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
from __future__ import annotations

import gradio as gr

from core.app_state import APP_STATE, emit_inference_response
from core.deployment import (
    DeploymentPolicy,
    current_policy,
    default_backend_for_policy,
    filter_backends_for_policy,
)
from core.events import Event, EventType
from core.spaces_runtime import spaces
from core.tab_feedback import emit_tab_error, status_ok
from models.model_catalog import ModelInfo, model_choices, model_summary
from models.service_factory import BACKENDS, create_vision_service
from ui.progress import CLICK_PROGRESS


def build_vision_tab(
    catalog: dict[str, ModelInfo],
    policy: DeploymentPolicy | None = None,
) -> None:
    active_policy = policy or current_policy()
    vision_models = model_choices(catalog, "vision")
    if not vision_models:
        vision_models = [mid for mid, model in catalog.items() if model.type == "omnimodal"]
    default_model = vision_models[0]
    backend_choices = filter_backends_for_policy(BACKENDS, active_policy)
    default_backend = default_backend_for_policy(
        BACKENDS,
        "transformers" if active_policy.is_space else "placeholder",
        active_policy,
    )

    with gr.Row():
        model_id = gr.Dropdown(vision_models, value=default_model, label="Vision model")
        backend = gr.Dropdown(backend_choices, value=default_backend, label="Backend")
        thinking = gr.Checkbox(label="Thinking mode", value=False)

    image = gr.Image(type="pil", label="Image")
    prompt = gr.Textbox(label="Prompt", lines=4, placeholder="Describe or ask about the image...")
    model_meta = gr.JSON(model_summary(catalog[default_model]), label="Model card")
    run = gr.Button("Run vision", variant="primary")
    output = gr.Textbox(label="Response", lines=10)
    status = gr.Markdown(status_ok("Ready."))

    def select_model(selected: str) -> dict:
        return model_summary(catalog[selected])

    @spaces.GPU(duration=180)
    def respond(
        selected: str,
        selected_backend: str,
        _thinking: bool,
        img,
        text: str,
    ) -> tuple[str, str]:
        if img is None and not text.strip():
            return (
                "",
                emit_tab_error(
                    "Vision",
                    "Add an image or prompt before running vision.",
                    {"model_id": selected, "backend": selected_backend},
                ),
            )
        APP_STATE.emit(
            Event(
                EventType.INFERENCE_REQUEST,
                {
                    "mode": "vision",
                    "model_id": selected,
                    "backend": selected_backend,
                    "has_image": img is not None,
                    "thinking": _thinking,
                    "prompt_chars": len(text),
                },
            )
        )
        try:
            response = create_vision_service(
                catalog[selected],
                selected_backend,
                active_policy,
            ).vision_chat(
                img is not None,
                text,
                img,
            )
        except (RuntimeError, ValueError, OSError) as exc:
            return (
                "",
                emit_tab_error(
                    "Vision",
                    str(exc),
                    {"model_id": selected, "backend": selected_backend},
                ),
            )
        if _thinking:
            response += (
                "\n\nThinking mode requested. Real backend will map this to the model template."
            )
        emit_inference_response("vision", selected, selected_backend, response)
        return response, status_ok("Vision response generated.")

    model_id.change(select_model, model_id, model_meta)
    run.click(
        respond,
        [model_id, backend, thinking, image, prompt],
        [output, status],
        show_progress=CLICK_PROGRESS,
    )