"""Gradio layout for the mock Objectverse archive UI.""" from __future__ import annotations from html import escape from pathlib import Path from typing import Any import gradio as gr from src.config import APP_TITLE, DEFAULT_MODE, PERSONALITY_MODES from src.example_cache import load_sample_generation from src.examples import EXAMPLE_OBJECTS, example_button_label from src.models.llama_cpp_runner import reply_as_object from src.models.schema import GenerationResult from src.models.vision_runner import probe_vision_runtime from src.pipeline import format_diary_markdown, generate_object_diary from src.renderer.share_card import render_share_card from src.ui import copy from src.utils.zero_gpu import zero_gpu CHAT_EMPTY_MESSAGE = "Wake an object first." ARCHIVE_STATUS_EMPTY = """
Archive Status 档案状态 Object asleep

Drop in a photo or use a sample object to open a case file.

上传照片或使用示例物品来开启档案。

Case pending Tiny models ready
""" OBJECT_FILE_EMPTY = """
Object File 物品档案

No case opened yet.

The shelf is quiet. Wake an everyday object to see its file.

档案架仍然安静。唤醒一个日常物品后查看档案。

""" DIARY_EMPTY = """ ### Secret Diary Wake an object to open its private field notes.
唤醒物品后阅读它的秘密观察记录。
""" SHARE_CARD_EMPTY = """
Share Card 分享卡片 Evidence slot empty.

A screenshot-friendly archive card appears after the object wakes.

物品醒来后,这里会出现可截图分享的档案卡片。

""" TRACE_EMPTY = """
Trace 模型轨迹

No trace saved yet.

尚未保存 trace。

""" UI_CONTROL_SCRIPT = r""" (() => { const root = document.documentElement; const INTERNAL_TEXT_REPLACEMENTS = new Map([ ["将图像文件拖放到此处以上传", "Drop image file here to upload"], ["将图像拖放到此处", "Drop image here"], ["- 或 -", "- or -"], ["点击上传", "Click to upload"], ["清空对话", "Clear chat"], ["通过 API 使用", "Use via API"], ["使用 Gradio 构建", "Built with Gradio"], ["设置", "Settings"], ["标志", "icon"], ]); const CJK_RE = /[\u3400-\u9fff]/; const CJK_WRAP_RE = /[\u3400-\u9fff,。!?、;::“”‘’()《》【】]+/g; const SKIP_TEXT_SELECTOR = "script, style, textarea, input, select, option, svg, .lang-zh, .auto-zh"; function readStoredTheme() { try { return localStorage.getItem("objectverse-theme") === "light" ? "light" : "dark"; } catch { return "dark"; } } root.dataset.ovTheme = readStoredTheme(); function syncLanguageButtons(value) { document.querySelectorAll("[data-lang-toggle]").forEach((button) => { const active = button.dataset.langToggle === value; button.classList.toggle("active", active); button.setAttribute("aria-pressed", String(active)); }); } function syncThemeButtons(value) { const isLight = value === "light"; document.querySelectorAll("[data-theme-toggle]").forEach((button) => { button.dataset.themeToggle = value; button.classList.toggle("active", isLight); button.setAttribute("aria-pressed", String(isLight)); button.setAttribute("aria-label", isLight ? "Switch to dark theme" : "Switch to light theme"); button.setAttribute("title", isLight ? "Switch to dark theme" : "Switch to light theme"); button.querySelectorAll("[data-theme-icon]").forEach((icon) => { const hidden = icon.dataset.themeIcon !== value; icon.hidden = hidden; icon.toggleAttribute("hidden", hidden); }); }); } function syncControls() { syncLanguageButtons(root.dataset.ovLang === "zh" ? "zh" : "en"); syncThemeButtons(root.dataset.ovTheme === "light" ? "light" : "dark"); } function scheduleControlSync() { syncControls(); window.setTimeout(syncControls, 50); window.setTimeout(syncControls, 250); window.setTimeout(syncControls, 1000); } function applyLanguage(value) { const language = value === "zh" ? "zh" : "en"; root.dataset.ovLang = language; if (document.body) { document.body.dataset.ovLang = language; } syncLanguageButtons(language); } function applyTheme(value) { const theme = value === "light" ? "light" : "dark"; root.dataset.ovTheme = theme; if (document.body) { document.body.dataset.ovTheme = theme; } try { localStorage.setItem("objectverse-theme", theme); } catch { // Local storage can be unavailable in embedded previews. } syncThemeButtons(theme); } function initControls() { root.lang = "en"; applyLanguage("en"); applyTheme(readStoredTheme()); normalizeGradioChrome(document.body); wrapChineseText(document.body); scheduleControlSync(); } function normalizeString(value) { let nextValue = value; INTERNAL_TEXT_REPLACEMENTS.forEach((replacement, source) => { nextValue = nextValue.split(source).join(replacement); }); return nextValue; } function normalizeGradioChrome(rootNode) { if (!rootNode) return; rootNode.querySelectorAll("[aria-label], [title], [alt]").forEach((element) => { ["aria-label", "title", "alt"].forEach((attribute) => { const value = element.getAttribute(attribute); if (value && CJK_RE.test(value)) { const normalizedValue = normalizeString(value); if (normalizedValue !== value) { element.setAttribute(attribute, normalizedValue); } } }); }); const walker = document.createTreeWalker(rootNode, NodeFilter.SHOW_TEXT); const nodes = []; let node = walker.nextNode(); while (node) { const parent = node.parentElement; const text = node.nodeValue || ""; if (parent && !parent.closest(SKIP_TEXT_SELECTOR) && CJK_RE.test(text)) { nodes.push(node); } node = walker.nextNode(); } nodes.forEach((textNode) => { const text = textNode.nodeValue || ""; const normalizedText = normalizeString(text); if (normalizedText !== text) { textNode.nodeValue = normalizedText; } }); } function wrapChineseText(rootNode) { if (!rootNode) return; const walker = document.createTreeWalker(rootNode, NodeFilter.SHOW_TEXT); const nodes = []; let node = walker.nextNode(); while (node) { const parent = node.parentElement; const text = node.nodeValue || ""; if (parent && !parent.closest(SKIP_TEXT_SELECTOR) && CJK_RE.test(text)) { nodes.push(node); } node = walker.nextNode(); } nodes.forEach((textNode) => { const text = textNode.nodeValue || ""; const fragment = document.createDocumentFragment(); let lastIndex = 0; text.replace(CJK_WRAP_RE, (match, index) => { if (index > lastIndex) { fragment.append(document.createTextNode(text.slice(lastIndex, index))); } const span = document.createElement("span"); span.className = "auto-zh"; span.textContent = match; fragment.append(span); lastIndex = index + match.length; return match; }); if (lastIndex < text.length) { fragment.append(document.createTextNode(text.slice(lastIndex))); } textNode.replaceWith(fragment); }); } document.addEventListener("click", (event) => { const langButton = event.target.closest("[data-lang-toggle]"); if (langButton) { applyLanguage(langButton.dataset.langToggle); return; } const themeButton = event.target.closest("[data-theme-toggle]"); if (themeButton) { const currentTheme = root.dataset.ovTheme === "light" ? "light" : "dark"; applyTheme(currentTheme === "light" ? "dark" : "light"); } }); if (document.readyState === "loading") { document.addEventListener("DOMContentLoaded", initControls); } else { initControls(); } const observer = new MutationObserver(() => { normalizeGradioChrome(document.body); wrapChineseText(document.body); syncControls(); }); if (document.body) { observer.observe(document.body, { childList: true, subtree: true }); } else { document.addEventListener("DOMContentLoaded", () => { observer.observe(document.body, { childList: true, subtree: true }); }); } })(); """ GenerationUiResult = tuple[ str, dict[str, Any], dict[str, Any], str, str, str, dict[str, Any], str, dict[str, Any] | None, list[dict[str, str]], str, ] def build_app() -> gr.Blocks: css = Path("src/ui/styles.css").read_text(encoding="utf-8") custom_theme = gr.themes.Monochrome( primary_hue="amber", secondary_hue="yellow", neutral_hue="stone", ).set( body_background_fill="#161513", body_background_fill_dark="#161513", background_fill_primary="#161513", background_fill_primary_dark="#161513", background_fill_secondary="rgba(30, 28, 25, 0.6)", background_fill_secondary_dark="rgba(30, 28, 25, 0.6)", border_color_primary="rgba(212, 175, 55, 0.15)", border_color_primary_dark="rgba(212, 175, 55, 0.15)", body_text_color="#E6E1D3", body_text_color_dark="#E6E1D3", body_text_color_subdued="#A89B84", body_text_color_subdued_dark="#A89B84", link_text_color="#D4AF37", link_text_color_dark="#D4AF37", link_text_color_hover="#F5D061", link_text_color_hover_dark="#F5D061", link_text_color_active="#F5D061", link_text_color_active_dark="#F5D061", link_text_color_visited="#D4AF37", link_text_color_visited_dark="#D4AF37", block_background_fill="rgba(30, 28, 25, 0.72)", block_background_fill_dark="rgba(30, 28, 25, 0.72)", block_border_width="1px", block_info_text_color="#A89B84", block_info_text_color_dark="#A89B84", block_label_text_color="#A89B84", block_label_text_color_dark="#A89B84", block_title_text_color="#E6E1D3", block_title_text_color_dark="#E6E1D3", panel_background_fill="rgba(30, 28, 25, 0.88)", panel_background_fill_dark="rgba(30, 28, 25, 0.88)", accordion_text_color="#E6E1D3", accordion_text_color_dark="#E6E1D3", table_text_color="#E6E1D3", table_text_color_dark="#E6E1D3", input_background_fill="#1b1a18", input_background_fill_dark="#1b1a18", input_background_fill_focus="#1b1a18", input_background_fill_focus_dark="#1b1a18", input_background_fill_hover="#1b1a18", input_background_fill_hover_dark="#1b1a18", input_border_color="rgba(212, 175, 55, 0.3)", input_border_color_dark="rgba(212, 175, 55, 0.3)", input_border_color_focus="#D4AF37", input_border_color_focus_dark="#D4AF37", input_placeholder_color="#8B8678", input_placeholder_color_dark="#8B8678", checkbox_label_text_color="#E6E1D3", checkbox_label_text_color_dark="#E6E1D3", checkbox_label_text_color_selected="#F5D061", checkbox_label_text_color_selected_dark="#F5D061", checkbox_label_background_fill="transparent", checkbox_label_background_fill_dark="transparent", checkbox_label_background_fill_selected="rgba(212, 175, 55, 0.05)", checkbox_label_background_fill_selected_dark="rgba(212, 175, 55, 0.05)", checkbox_label_border_color="rgba(212, 175, 55, 0.3)", checkbox_label_border_color_dark="rgba(212, 175, 55, 0.3)", checkbox_label_border_color_selected="#D4AF37", checkbox_label_border_color_selected_dark="#D4AF37", button_secondary_background_fill="rgba(22, 21, 19, 0.8)", button_secondary_background_fill_dark="rgba(22, 21, 19, 0.8)", button_secondary_background_fill_hover="rgba(38, 35, 29, 0.9)", button_secondary_background_fill_hover_dark="rgba(38, 35, 29, 0.9)", button_secondary_border_color="rgba(212, 175, 55, 0.15)", button_secondary_border_color_dark="rgba(212, 175, 55, 0.15)", button_secondary_border_color_hover="#D4AF37", button_secondary_border_color_hover_dark="#D4AF37", button_secondary_text_color="#E6E1D3", button_secondary_text_color_dark="#E6E1D3", button_secondary_text_color_hover="#F5D061", button_secondary_text_color_hover_dark="#F5D061", button_primary_text_color="#2a261f", button_primary_text_color_dark="#2a261f", ) with gr.Blocks(theme=custom_theme, head=f"", title=APP_TITLE, fill_width=True, elem_id="objectverse-app") as demo: with gr.Column(elem_id="app-container"): gr.HTML( f"""
Gradio Small Model Lab

{APP_TITLE}

Every object has a secret life.

Upload a photo, wake a tiny object persona, read its diary, chat, and export the evidence.

万物日记:每个物品都有秘密人生。

上传照片,唤醒小模型生成的物品人格,阅读日记、对话并保存证据。

Gradio Blocks Mock-safe MVP < 32B params
Language
""", padding=False, ) result_state = gr.State() zero_gpu_probe_button = gr.Button(visible=False) zero_gpu_probe_output = gr.JSON(visible=False) vision_runtime_probe_button = gr.Button(visible=False) vision_runtime_probe_output = gr.JSON(visible=False) archive_status = gr.HTML(value=ARCHIVE_STATUS_EMPTY, elem_id="archive-status", padding=False) with gr.Row(elem_id="intake", elem_classes=["content-section", "top-grid"]): with gr.Column(scale=7, elem_classes=["archive-panel", "intake-panel"]): gr.HTML(_panel_header("01", "Wake an Object", "Upload a photo or describe an everyday object.", "唤醒物品"), padding=False) image_input = gr.Image( label=copy.UPLOAD_LABEL, show_label=False, type="filepath", sources=["upload"], placeholder="Drop an object photo here or click to upload.", elem_id="object-upload", ) description_input = gr.Textbox( label=copy.DESCRIPTION_LABEL, placeholder=copy.DESCRIPTION_PLACEHOLDER, lines=2, max_lines=5, elem_id="object-description", ) gr.HTML("""
Personality mode 人格模式
""", padding=False) mode_input = gr.Radio( label=copy.MODE_LABEL, show_label=False, choices=PERSONALITY_MODES, value=DEFAULT_MODE, elem_id="personality-mode", elem_classes=["mode-switch"], ) generate_button = gr.Button(copy.GENERATE_LABEL, variant="primary", elem_id="wake-button") with gr.Column(scale=4, elem_classes=["archive-panel", "examples-panel"]): gr.HTML( """
Example Objects 示例物品
6 filed samples
""", padding=False, ) example_buttons: list[gr.Button] = [] for index in range(len(EXAMPLE_OBJECTS)): example_buttons.append( gr.Button( example_button_label(index), elem_classes=["example-card"], variant="secondary", ) ) with gr.Row(elem_id="results", elem_classes=["content-section", "results-grid"]): with gr.Column(scale=5, elem_classes=["archive-panel", "file-panel"]): gr.HTML(_panel_header("02", "Object File", "Structured understanding, persona, and evidence tags.", "物品档案"), padding=False) object_file_summary = gr.HTML(value=OBJECT_FILE_EMPTY, elem_id="object-file-summary", padding=False) with gr.Column(scale=6, elem_classes=["archive-panel", "diary-panel"]): gr.HTML(_panel_header("03", "Secret Diary", "A private note written by the object.", "秘密日记"), padding=False) diary_output = gr.Markdown( value=DIARY_EMPTY, label=copy.DIARY_LABEL, elem_id="diary-output", ) with gr.Row(elem_id="share-chat", elem_classes=["content-section", "split-section"]): with gr.Column(scale=5, elem_classes=["archive-panel", "share-panel"], elem_id="share-panel"): gr.HTML(_panel_header("04", "Share Card", "Screenshot-friendly field evidence.", "分享卡片"), padding=False) share_card = gr.HTML(value=SHARE_CARD_EMPTY, label=copy.SHARE_CARD_LABEL, padding=False) with gr.Column(scale=4, elem_classes=["archive-panel", "chat-panel"], elem_id="chat-panel"): gr.HTML(_panel_header("05", "Object Chat", "Ask after the object wakes up.", "物品对话"), padding=False) chatbot = gr.Chatbot( value=_empty_chat_history(), label=copy.CHAT_LABEL, type="messages", height=300, allow_tags=False, ) chat_input = gr.Textbox(placeholder=copy.CHAT_INPUT_PLACEHOLDER, show_label=False) chat_button = gr.Button(copy.CHAT_BUTTON_LABEL, elem_classes=["quiet-button"]) with gr.Accordion("Developer details", open=False, elem_classes=["developer-details"]): trace_summary = gr.HTML(value=TRACE_EMPTY, elem_id="trace-summary", padding=False) with gr.Row(elem_classes=["developer-json-grid"]): object_json = gr.JSON(value={}, label=copy.OBJECT_JSON_LABEL) persona_json = gr.JSON(value={}, label=copy.PERSONA_JSON_LABEL) trace_json = gr.JSON(value={}, label=copy.TRACE_JSON_LABEL) trace_path = gr.Textbox(label=copy.TRACE_PATH_LABEL, interactive=False) manual_outputs = [ object_file_summary, object_json, persona_json, diary_output, share_card, trace_summary, trace_json, trace_path, result_state, chatbot, archive_status, ] generate_button.click( fn=generate_object_file, inputs=[image_input, description_input, mode_input], outputs=manual_outputs, ) for index, button in enumerate(example_buttons): button.click( fn=_example_handler(index), inputs=[], outputs=[description_input, mode_input, *manual_outputs], ) chat_button.click( fn=chat_with_object, inputs=[chat_input, chatbot, result_state], outputs=[chatbot, chat_input], ) chat_input.submit( fn=chat_with_object, inputs=[chat_input, chatbot, result_state], outputs=[chatbot, chat_input], ) zero_gpu_probe_button.click( fn=zero_gpu_probe, inputs=[], outputs=[zero_gpu_probe_output], api_name="zero_gpu_probe", ) vision_runtime_probe_button.click( fn=vision_runtime_probe, inputs=[], outputs=[vision_runtime_probe_output], api_name="vision_runtime_probe", ) return demo def _panel_header(index: str, title: str, note: str, chinese: str = "") -> str: chinese_label = f' {escape(chinese)}' if chinese else "" return f"""
{escape(index)}

{escape(title)}{chinese_label}

{escape(note)}

""" def _example_handler(index: int): def load_example() -> tuple[Any, ...]: item = EXAMPLE_OBJECTS[index] cached_result = load_sample_generation(index) if cached_result is not None: return item["description"], item["mode"], *_format_generation_result(cached_result) result = generate_object_file(None, item["description"], item["mode"]) return item["description"], item["mode"], *result return load_example @zero_gpu(duration=180) def generate_object_file( image_path: str | None, description: str, mode: str, ) -> GenerationUiResult: try: result = generate_object_diary(image_path, description, mode) except Exception as exc: # pragma: no cover - exercised manually by UI failure paths. return _generation_error(exc, description, mode) return _format_generation_result(result) def _format_generation_result(result: GenerationResult) -> GenerationUiResult: object_payload = result.object_understanding.model_dump(mode="json") persona_payload = result.persona.model_dump(mode="json") return ( _render_object_file(result), object_payload, persona_payload, format_diary_markdown(result.diary.title, result.diary.english, result.diary.chinese), render_share_card(result.persona, result.diary), _render_trace_summary(result), result.trace.model_dump(mode="json"), result.trace_path, result.model_dump(mode="json"), _awake_chat_history(result), _render_archive_status(result), ) def _render_object_file(result: GenerationResult) -> str: obj = result.object_understanding.object persona = result.persona.persona features = "".join(f"
  • {escape(feature)}
  • " for feature in obj.visible_features) tags = "".join(f"{escape(tag)}" for tag in persona.tags) confidence = f"{obj.confidence:.0%}" case_id = _case_id(result) runtime_badge = "mock-safe" if "mock" in result.trace.model_runtime["text"] else "llama.cpp" evidence_tag = f"{result.trace.mode.lower()} witness" return f"""
    Case ID {escape(case_id)} Awake {escape(runtime_badge)}
    Confidence {escape(confidence)} {escape(result.trace.mode)} Evidence: {escape(evidence_tag)}

    {escape(persona.character_name)}

    {escape(obj.name)} / {escape(persona.object_name)}

    Mood
    {escape(persona.mood)}
    Secret fear
    {escape(persona.secret_fear)}
    Core memory
    {escape(persona.core_memory)}
    Visible features 可见特征

    {escape(persona.complaint)}

    {tags}
    """ def _render_archive_status(result: GenerationResult) -> str: obj = result.object_understanding.object persona = result.persona.persona case_id = _case_id(result) return f"""
    Archive Status 档案状态 {escape(persona.character_name)} is awake

    Case {escape(case_id)} opened for {escape(obj.name)} with {obj.confidence:.0%} object confidence.

    档案 {escape(case_id)} 已开启,物品识别置信度 {obj.confidence:.0%}。

    Object awake Diary unlocked {escape(result.trace.mode)} mode
    """ def _render_trace_summary(result: GenerationResult) -> str: return f"""
    Trace saved Trace 已保存 {escape(result.trace.trace_id)}

    {escape(result.trace.model_runtime["vision"])} · {escape(result.trace.model_runtime["text"])}

    """ def _generation_error(exc: Exception, description: str, mode: str) -> GenerationUiResult: error_type = type(exc).__name__ error_message = str(exc) or "Unknown generation error" error_payload = { "error": error_type, "message": error_message, "input": {"description": description, "mode": mode}, } error_html = f"""
    Generation failed 生成失败 {escape(error_type)}

    {escape(error_message)}

    """ error_markdown = ( "### Generation failed\n\n" f"{error_type}: {error_message}\n\n" "Please try another description or sample object.\n\n" '
    请尝试其他描述或示例物品。
    ' ) return ( error_html, error_payload, error_payload, error_markdown, error_html, error_html, error_payload, "", None, [{"role": "assistant", "content": f"Generation failed: {error_type}"}], f"""
    Archive Status 档案状态 Case jammed

    {escape(error_type)}: {escape(error_message)}

    生成失败,请换一个描述或示例物品再试。

    Needs retry {escape(mode)}
    """, ) def _case_id(result: GenerationResult) -> str: return result.trace.trace_id.replace("_", "-").upper() def _empty_chat_history() -> list[dict[str, str]]: return [{"role": "assistant", "content": CHAT_EMPTY_MESSAGE}] def _awake_chat_history(result: GenerationResult) -> list[dict[str, str]]: name = result.persona.persona.character_name return [ { "role": "assistant", "content": f"{name} is awake. Ask what it remembers.", } ] def chat_with_object( message: str, history: list[dict[str, str]] | None, result_state: dict[str, Any] | None, ) -> tuple[list[dict[str, str]], str]: history = history or _empty_chat_history() clean_message = message.strip() if not clean_message: return history, "" if not result_state: reply = CHAT_EMPTY_MESSAGE else: reply = reply_as_object(result_state["persona"], clean_message) history.append({"role": "user", "content": clean_message}) history.append({"role": "assistant", "content": reply}) return history, "" @zero_gpu(duration=30) def zero_gpu_probe() -> dict[str, Any]: try: import torch except Exception as exc: return {"torch_import": False, "error": f"{type(exc).__name__}: {exc}"} cuda_available = torch.cuda.is_available() return { "torch_import": True, "cuda_available": cuda_available, "device_count": torch.cuda.device_count(), "device_name": torch.cuda.get_device_name(0) if cuda_available else "", } @zero_gpu(duration=180) def vision_runtime_probe() -> dict[str, Any]: return probe_vision_runtime(load_model=True)