workbench / ui /vision_tab.py
GitHub Actions
Initial ZeroGPU deployment with spaces shim
7f9dfed
Raw
History Blame Contribute Delete
3.94 kB
from __future__ import annotations
import gradio as gr
from core.app_state import APP_STATE, emit_inference_response
from core.deployment import (
DeploymentPolicy,
current_policy,
default_backend_for_policy,
filter_backends_for_policy,
)
from core.events import Event, EventType
from core.spaces_runtime import spaces
from core.tab_feedback import emit_tab_error, status_ok
from models.model_catalog import ModelInfo, model_choices, model_summary
from models.service_factory import BACKENDS, create_vision_service
from ui.progress import CLICK_PROGRESS
def build_vision_tab(
catalog: dict[str, ModelInfo],
policy: DeploymentPolicy | None = None,
) -> None:
active_policy = policy or current_policy()
vision_models = model_choices(catalog, "vision")
if not vision_models:
vision_models = [mid for mid, model in catalog.items() if model.type == "omnimodal"]
default_model = vision_models[0]
backend_choices = filter_backends_for_policy(BACKENDS, active_policy)
default_backend = default_backend_for_policy(
BACKENDS,
"transformers" if active_policy.is_space else "placeholder",
active_policy,
)
with gr.Row():
model_id = gr.Dropdown(vision_models, value=default_model, label="Vision model")
backend = gr.Dropdown(backend_choices, value=default_backend, label="Backend")
thinking = gr.Checkbox(label="Thinking mode", value=False)
image = gr.Image(type="pil", label="Image")
prompt = gr.Textbox(label="Prompt", lines=4, placeholder="Describe or ask about the image...")
model_meta = gr.JSON(model_summary(catalog[default_model]), label="Model card")
run = gr.Button("Run vision", variant="primary")
output = gr.Textbox(label="Response", lines=10)
status = gr.Markdown(status_ok("Ready."))
def select_model(selected: str) -> dict:
return model_summary(catalog[selected])
@spaces.GPU(duration=180)
def respond(
selected: str,
selected_backend: str,
_thinking: bool,
img,
text: str,
) -> tuple[str, str]:
if img is None and not text.strip():
return (
"",
emit_tab_error(
"Vision",
"Add an image or prompt before running vision.",
{"model_id": selected, "backend": selected_backend},
),
)
APP_STATE.emit(
Event(
EventType.INFERENCE_REQUEST,
{
"mode": "vision",
"model_id": selected,
"backend": selected_backend,
"has_image": img is not None,
"thinking": _thinking,
"prompt_chars": len(text),
},
)
)
try:
response = create_vision_service(
catalog[selected],
selected_backend,
active_policy,
).vision_chat(
img is not None,
text,
img,
)
except (RuntimeError, ValueError, OSError) as exc:
return (
"",
emit_tab_error(
"Vision",
str(exc),
{"model_id": selected, "backend": selected_backend},
),
)
if _thinking:
response += (
"\n\nThinking mode requested. Real backend will map this to the model template."
)
emit_inference_response("vision", selected, selected_backend, response)
return response, status_ok("Vision response generated.")
model_id.change(select_model, model_id, model_meta)
run.click(
respond,
[model_id, backend, thinking, image, prompt],
[output, status],
show_progress=CLICK_PROGRESS,
)