Spaces:

fastino
/

GLiGuard

Running

App Files Files Community

urchade commited on 24 days ago

Commit

abf90c4

verified ·

1 Parent(s): 86f265c

Update app.py

Browse files

Files changed (1) hide show

app.py +282 -74

app.py CHANGED Viewed

@@ -1,7 +1,8 @@
-"""GLiGuard prompt moderation demo built with Gradio."""
 import html
 import os
 import gradio as gr
 from gliner2 import GLiNER2
@@ -10,6 +11,7 @@ from huggingface_hub import login
 MODEL_ID = "fastino/gliguard-LLMGuardrails-300M"
 MODEL_NAME = "GLiGuard LLM Guardrails 300M"
 DEFAULT_THRESHOLD = 0.5
 SAFETY_LABELS = ["safe", "unsafe"]
 REFUSAL_LABELS = ["refusal", "compliance"]
@@ -50,18 +52,18 @@ TASKS = {
     "prompt_toxicity": {
         "labels": TOXICITY_LABELS,
         "multi_label": True,
-        "cls_threshold": 0.4,
     },
     "jailbreak_detection": {
         "labels": JAILBREAK_LABELS,
         "multi_label": True,
-        "cls_threshold": 0.4,
     },
     "response_safety": SAFETY_LABELS,
     "response_toxicity": {
         "labels": TOXICITY_LABELS,
         "multi_label": True,
-        "cls_threshold": 0.4,
     },
     "response_refusal": REFUSAL_LABELS,
 }
@@ -75,6 +77,9 @@ TASK_OPTIONS = [
     ("Response Refusal", "response_refusal"),
 ]
 DISPLAY_NAMES = {
     "safe": "Safe",
     "unsafe": "Unsafe",
@@ -124,19 +129,57 @@ EXAMPLES = [
 ]
 HF_TOKEN = os.environ.get("HF_TOKEN")
-if HF_TOKEN:
-    login(token=HF_TOKEN)
-    print("Logged in to Hugging Face Hub")
-print(f"Loading model: {MODEL_ID}")
-model = GLiNER2.from_pretrained(MODEL_ID)
-print("Model loaded")
 def _format_label(label: str) -> str:
     return DISPLAY_NAMES.get(label, label.replace("_", " ").title())
 def _extract_single_label(value):
     if isinstance(value, dict):
         return value.get("label", "unknown"), float(value.get("confidence", 0.0))
@@ -183,18 +226,36 @@ def _render_group(title: str, subtitle: str, items: list[tuple[str, float]], acc
     )
 def _empty_state_html() -> str:
     return """
     <div class="empty-state">
         <div class="empty-icon">🛡️</div>
-        <div class="empty-title">Run GLiGuard on prompts or responses</div>
         <div class="empty-copy">
-            Select prompt-side and/or response-side tasks, then analyze the text with the GLiGuard checkpoint.
         </div>
     </div>
     """
 def _build_overview_card(title: str, value: str, subtitle: str) -> str:
     return (
         "<div class='stat-card'>"
@@ -205,7 +266,22 @@ def _build_overview_card(title: str, value: str, subtitle: str) -> str:
     )
-def _build_result_html(result: dict, selected_tasks: list[str]) -> str:
     selected_task_set = set(selected_tasks)
     has_safety = "prompt_safety" in selected_task_set and "prompt_safety" in result
     has_toxicity = "prompt_toxicity" in selected_task_set and "prompt_toxicity" in result
@@ -244,26 +320,42 @@ def _build_result_html(result: dict, selected_tasks: list[str]) -> str:
     if has_response_refusal:
         response_refusal_label, response_refusal_conf = _extract_single_label(result.get("response_refusal"))
-    is_unsafe = (
-        (has_safety and safety_label == "unsafe")
-        or bool(toxicity_hits)
-        or bool(jailbreak_hits)
-        or (has_response_safety and response_safety_label == "unsafe" and response_refusal_label != "refusal")
-        or bool(response_toxicity_hits)
-    )
     status_key = "unsafe" if is_unsafe else "safe"
     status = STATUS_STYLES[status_key]
-    summary = "GLiGuard found one or more harmful signals in the selected prompt or response tasks."
-    if status_key == "safe":
-        summary = "No selected task produced a harmful signal above the chosen threshold."
     prompt_task_count = sum([has_safety, has_toxicity, has_jailbreak])
     response_task_count = sum([has_response_safety, has_response_toxicity, has_response_refusal])
     if prompt_task_count and not response_task_count:
-        summary = "This run evaluates prompt-side safety only."
     elif response_task_count and not prompt_task_count:
-        summary = "This run evaluates response-side safety only."
     top_risk = "None"
     if toxicity_hits:
@@ -277,11 +369,15 @@ def _build_result_html(result: dict, selected_tasks: list[str]) -> str:
     stats_cards = [
         _build_overview_card("Tasks Run", str(len(selected_tasks)), "Selected at inference time"),
-        _build_overview_card("Prompt Tasks", str(prompt_task_count), "Prompt-side analyses executed"),
-        _build_overview_card("Response Tasks", str(response_task_count), "Response-side analyses executed"),
     ]
     prompt_cards = []
     if has_safety:
         prompt_cards.append(
             _build_overview_card("Prompt Safety", _format_label(safety_label), f"Confidence {safety_confidence:.1%}")
@@ -296,6 +392,13 @@ def _build_result_html(result: dict, selected_tasks: list[str]) -> str:
         )
     response_cards = []
     if has_response_safety:
         response_cards.append(
             _build_overview_card("Response Safety", _format_label(response_safety_label), f"Confidence {response_safety_conf:.1%}")
@@ -309,6 +412,27 @@ def _build_result_html(result: dict, selected_tasks: list[str]) -> str:
             _build_overview_card("Response Refusal", _format_label(response_refusal_label), f"Confidence {response_refusal_conf:.1%}")
         )
     result_sections = []
     if prompt_cards:
         result_sections.append(
@@ -329,6 +453,9 @@ def _build_result_html(result: dict, selected_tasks: list[str]) -> str:
         result_sections.append(
             _render_group("Response Toxicity", "Multi-label response harm classification", response_toxicity_hits, "#2563eb")
         )
     return f"""
     <div class="results-shell">
@@ -349,69 +476,144 @@ def _build_result_html(result: dict, selected_tasks: list[str]) -> str:
     """
-def classify_prompt(prompt_text: str, response_text: str, threshold: float, selected_tasks: list[str]) -> str:
     prompt_text = (prompt_text or "").strip()
     response_text = (response_text or "").strip()
     if not prompt_text and not response_text:
-        return _empty_state_html()
     if not selected_tasks:
-        return """
-        <div class="empty-state">
-            <div class="empty-icon">🧭</div>
-            <div class="empty-title">Select at least one task</div>
-            <div class="empty-copy">
-                Choose one or more GLiGuard tasks before running inference.
             </div>
-        </div>
-        """
     tasks = {task_name: TASKS[task_name] for task_name in selected_tasks if task_name in TASKS}
     has_prompt_task = any(task.startswith("prompt_") or task == "jailbreak_detection" for task in selected_tasks)
     has_response_task = any(task.startswith("response_") for task in selected_tasks)
     if has_prompt_task and not prompt_text:
-        return """
-        <div class="empty-state">
-            <div class="empty-icon">✍️</div>
-            <div class="empty-title">Add a prompt</div>
-            <div class="empty-copy">
-                Prompt-side tasks require a prompt in the first text box.
             </div>
-        </div>
-        """
     if has_response_task and not response_text:
-        return """
-        <div class="empty-state">
-            <div class="empty-icon">💬</div>
-            <div class="empty-title">Add a response</div>
-            <div class="empty-copy">
-                Response-side tasks require a model response in the response text box.
             </div>
-        </div>
-        """
-    inference_parts = []
-    if has_prompt_task and prompt_text:
-        inference_parts.append(f"Prompt: {prompt_text}")
-    if has_response_task and response_text:
-        if prompt_text:
-            inference_parts.append(f"Response: {response_text}")
-        else:
-            inference_parts.append(f"Response: {response_text}")
-    inference_text = "\n".join(inference_parts)
-    if has_prompt_task and not has_response_task:
-        inference_text = prompt_text
-    result = model.classify_text(
-        text=inference_text,
-        tasks=tasks,
-        threshold=threshold,
-        include_confidence=True,
     )
-    return _build_result_html(result, selected_tasks)
 DESCRIPTION = f"""
@@ -610,9 +812,9 @@ with gr.Blocks(title="GLiGuard Demo") as demo:
                 )
                 task_selector = gr.CheckboxGroup(
                     choices=TASK_OPTIONS,
-                    value=[task_value for _, task_value in TASK_OPTIONS],
                     label="Tasks to run",
-                    info="Select any mix of prompt-side and response-side GLiGuard tasks.",
                 )
                 with gr.Row():
                     classify_btn = gr.Button("Analyze Content", variant="primary", size="lg")
@@ -628,6 +830,12 @@ with gr.Blocks(title="GLiGuard Demo") as demo:
             examples_per_page=8,
         )
     classify_btn.click(
         fn=classify_prompt,
         inputs=[prompt_input, response_input, threshold_slider, task_selector],
@@ -644,9 +852,9 @@ with gr.Blocks(title="GLiGuard Demo") as demo:
         outputs=[result_html],
     )
     clear_btn.click(
-        fn=lambda: ("", "", [task_value for _, task_value in TASK_OPTIONS], _empty_state_html()),
         outputs=[prompt_input, response_input, task_selector, result_html],
     )
 if __name__ == "__main__":
-    demo.launch(theme=THEME, css=CUSTOM_CSS)

+"""GLiGuard demo built with Gradio."""
 import html
 import os
+from functools import lru_cache
 import gradio as gr
 from gliner2 import GLiNER2
 MODEL_ID = "fastino/gliguard-LLMGuardrails-300M"
 MODEL_NAME = "GLiGuard LLM Guardrails 300M"
 DEFAULT_THRESHOLD = 0.5
+MULTI_LABEL_THRESHOLD = 0.4
 SAFETY_LABELS = ["safe", "unsafe"]
 REFUSAL_LABELS = ["refusal", "compliance"]
     "prompt_toxicity": {
         "labels": TOXICITY_LABELS,
         "multi_label": True,
+        "cls_threshold": MULTI_LABEL_THRESHOLD,
     },
     "jailbreak_detection": {
         "labels": JAILBREAK_LABELS,
         "multi_label": True,
+        "cls_threshold": MULTI_LABEL_THRESHOLD,
     },
     "response_safety": SAFETY_LABELS,
     "response_toxicity": {
         "labels": TOXICITY_LABELS,
         "multi_label": True,
+        "cls_threshold": MULTI_LABEL_THRESHOLD,
     },
     "response_refusal": REFUSAL_LABELS,
 }
     ("Response Refusal", "response_refusal"),
 ]
+PROMPT_TASK_VALUES = ["prompt_safety", "prompt_toxicity", "jailbreak_detection"]
+ALL_TASK_VALUES = [task_value for _, task_value in TASK_OPTIONS]
 DISPLAY_NAMES = {
     "safe": "Safe",
     "unsafe": "Unsafe",
 ]
 HF_TOKEN = os.environ.get("HF_TOKEN")
+@lru_cache(maxsize=1)
+def _load_model() -> GLiNER2:
+    if HF_TOKEN:
+        login(token=HF_TOKEN)
+    return GLiNER2.from_pretrained(MODEL_ID)
 def _format_label(label: str) -> str:
     return DISPLAY_NAMES.get(label, label.replace("_", " ").title())
+def _runtime_status_html(title: str, copy: str, tone: str = "info", details: str | None = None) -> str:
+    tones = {
+        "info": {"accent": "#2563eb", "bg": "#eff6ff", "badge": "Info"},
+        "ready": {"accent": "#16a34a", "bg": "#f0fdf4", "badge": "Ready"},
+        "warning": {"accent": "#d97706", "bg": "#fffbeb", "badge": "Check"},
+        "error": {"accent": "#dc2626", "bg": "#fef2f2", "badge": "Error"},
+    }
+    style = tones.get(tone, tones["info"])
+    detail_html = ""
+    if details:
+        detail_html = f"<div class='runtime-detail'>{html.escape(details)}</div>"
+    return (
+        "<div class='runtime-status' "
+        f"style='border-color:{style['accent']}33;background:{style['bg']};'>"
+        f"<div class='runtime-badge' style='background:{style['accent']};'>{style['badge']}</div>"
+        "<div class='runtime-copy'>"
+        f"<div class='runtime-title' style='color:{style['accent']};'>{html.escape(title)}</div>"
+        f"<div class='runtime-subtitle'>{html.escape(copy)}</div>"
+        f"{detail_html}"
+        "</div>"
+        "</div>"
+    )
+def _idle_status_html() -> str:
+    return _runtime_status_html(
+        "Model loads on first analysis",
+        "The first run may take longer while the GLiGuard checkpoint is initialized through the GLiNER2 interface.",
+        tone="info",
+    )
+def _format_exception(exc: Exception) -> str:
+    detail = str(exc).strip() or exc.__class__.__name__
+    return detail.splitlines()[0][:280]
 def _extract_single_label(value):
     if isinstance(value, dict):
         return value.get("label", "unknown"), float(value.get("confidence", 0.0))
     )
+def _render_notes(title: str, subtitle: str, items: list[str]) -> str:
+    body = "".join(f"<li>{html.escape(item)}</li>" for item in items)
+    return (
+        "<div class='result-card'>"
+        f"<div class='eyebrow'>{html.escape(title)}</div>"
+        f"<div class='subtle'>{html.escape(subtitle)}</div>"
+        f"<ul class='note-list'>{body}</ul>"
+        "</div>"
+    )
 def _empty_state_html() -> str:
     return """
     <div class="empty-state">
         <div class="empty-icon">🛡️</div>
+        <div class="empty-title">Run schema-driven GLiGuard moderation</div>
         <div class="empty-copy">
+            Choose any mix of prompt-side and response-side tasks, then run the GLiGuard checkpoint in one composed moderation pass.
         </div>
     </div>
     """
+def _auto_select_tasks(response_text: str):
+    response_text = (response_text or "").strip()
+    if response_text:
+        return gr.update(value=ALL_TASK_VALUES)
+    return gr.update(value=PROMPT_TASK_VALUES)
 def _build_overview_card(title: str, value: str, subtitle: str) -> str:
     return (
         "<div class='stat-card'>"
     )
+def _build_inference_text(
+    prompt_text: str,
+    response_text: str,
+    has_prompt_task: bool,
+    has_response_task: bool,
+) -> tuple[str, str]:
+    if has_prompt_task and not has_response_task:
+        return prompt_text, "Raw prompt"
+    if has_response_task and not has_prompt_task:
+        if prompt_text:
+            return f"Prompt: {prompt_text}\nResponse: {response_text}", "Prompt + Response pair"
+        return f"Response: {response_text}", "Response only"
+    return f"Prompt: {prompt_text}\nResponse: {response_text}", "Prompt + Response pair"
+def _build_result_html(result: dict, selected_tasks: list[str], threshold: float, input_format: str) -> str:
     selected_task_set = set(selected_tasks)
     has_safety = "prompt_safety" in selected_task_set and "prompt_safety" in result
     has_toxicity = "prompt_toxicity" in selected_task_set and "prompt_toxicity" in result
     if has_response_refusal:
         response_refusal_label, response_refusal_conf = _extract_single_label(result.get("response_refusal"))
+    prompt_flagged = (has_safety and safety_label == "unsafe") or bool(toxicity_hits) or bool(jailbreak_hits)
+    response_unsafe_signal = has_response_safety and response_safety_label == "unsafe"
+    refusal_override = response_unsafe_signal and response_refusal_label == "refusal"
+    response_flagged = (response_unsafe_signal and not refusal_override) or bool(response_toxicity_hits)
+    is_unsafe = prompt_flagged or response_flagged
     status_key = "unsafe" if is_unsafe else "safe"
     status = STATUS_STYLES[status_key]
+    signal_phrases = []
+    if has_safety and safety_label == "unsafe":
+        signal_phrases.append("prompt safety predicted unsafe")
+    if toxicity_hits:
+        signal_phrases.append(f"{len(toxicity_hits)} prompt toxicity signal(s)")
+    if jailbreak_hits:
+        signal_phrases.append(f"{len(jailbreak_hits)} jailbreak signal(s)")
+    if response_unsafe_signal and not refusal_override:
+        signal_phrases.append("response safety predicted unsafe without a refusal")
+    if response_toxicity_hits:
+        signal_phrases.append(f"{len(response_toxicity_hits)} response toxicity signal(s)")
+    if signal_phrases:
+        summary = "GLiGuard flagged this run because " + ", ".join(signal_phrases) + "."
+    elif refusal_override:
+        summary = (
+            "Response safety predicted unsafe, but response refusal predicted a refusal, "
+            "which overrides unsafe in the benchmark-style response verdict."
+        )
+    else:
+        summary = "No selected task produced a harmful signal above the configured cutoffs."
     prompt_task_count = sum([has_safety, has_toxicity, has_jailbreak])
     response_task_count = sum([has_response_safety, has_response_toxicity, has_response_refusal])
     if prompt_task_count and not response_task_count:
+        summary = summary + " This run only used prompt-side moderation tasks."
     elif response_task_count and not prompt_task_count:
+        summary = summary + " This run only used response-side moderation tasks."
     top_risk = "None"
     if toxicity_hits:
     stats_cards = [
         _build_overview_card("Tasks Run", str(len(selected_tasks)), "Selected at inference time"),
+        _build_overview_card("Input Format", input_format, "Formatting passed into GLiGuard"),
+        _build_overview_card("Global Threshold", f"{threshold:.2f}", "Forwarded to classify_text"),
     ]
     prompt_cards = []
+    if prompt_task_count:
+        prompt_cards.append(
+            _build_overview_card("Prompt Verdict", "Flagged" if prompt_flagged else "Clear", "Unsafe if any prompt-side harmful signal fires")
+        )
     if has_safety:
         prompt_cards.append(
             _build_overview_card("Prompt Safety", _format_label(safety_label), f"Confidence {safety_confidence:.1%}")
         )
     response_cards = []
+    if response_task_count:
+        verdict_subtitle = "Benchmark-style response verdict"
+        if refusal_override:
+            verdict_subtitle = "Refusal overrides the unsafe response-safety signal"
+        response_cards.append(
+            _build_overview_card("Response Verdict", "Flagged" if response_flagged else "Clear", verdict_subtitle)
+        )
     if has_response_safety:
         response_cards.append(
             _build_overview_card("Response Safety", _format_label(response_safety_label), f"Confidence {response_safety_conf:.1%}")
             _build_overview_card("Response Refusal", _format_label(response_refusal_label), f"Confidence {response_refusal_conf:.1%}")
         )
+    decision_notes = [
+        f"Prompt-only runs use the raw prompt, while response-side runs use {input_format.lower()} formatting.",
+        (
+            f"Multi-label tasks keep the README default cls_threshold={MULTI_LABEL_THRESHOLD:.1f}, "
+            f"and the global threshold for this run was {threshold:.2f}."
+        ),
+    ]
+    if prompt_task_count:
+        decision_notes.append(
+            "Prompt verdict becomes unsafe when prompt safety predicts unsafe or any non-benign prompt toxicity or jailbreak label appears."
+        )
+    if response_task_count:
+        if refusal_override:
+            decision_notes.append(
+                "Response safety fired, but refusal overrode that signal, so the response verdict stayed clear unless response toxicity also fired."
+            )
+        else:
+            decision_notes.append(
+                "Response verdict becomes unsafe when response safety predicts unsafe without a refusal, or when response toxicity returns non-benign labels."
+            )
     result_sections = []
     if prompt_cards:
         result_sections.append(
         result_sections.append(
             _render_group("Response Toxicity", "Multi-label response harm classification", response_toxicity_hits, "#2563eb")
         )
+    result_sections.append(
+        _render_notes("Decision Logic", "How the demo aggregated the selected GLiGuard tasks", decision_notes)
+    )
     return f"""
     <div class="results-shell">
     """
+def classify_prompt(
+    prompt_text: str,
+    response_text: str,
+    threshold: float,
+    selected_tasks: list[str],
+    progress=gr.Progress(track_tqdm=False),
+) -> tuple[str, str]:
     prompt_text = (prompt_text or "").strip()
     response_text = (response_text or "").strip()
     if not prompt_text and not response_text:
+        return _empty_state_html(), _idle_status_html()
     if not selected_tasks:
+        return (
+            """
+            <div class="empty-state">
+                <div class="empty-icon">🧭</div>
+                <div class="empty-title">Select at least one task</div>
+                <div class="empty-copy">
+                    Choose one or more GLiGuard tasks before running inference.
+                </div>
             </div>
+            """,
+            _runtime_status_html(
+                "Task selection needed",
+                "Pick at least one prompt-side or response-side GLiGuard task before analyzing text.",
+                tone="warning",
+            ),
+        )
     tasks = {task_name: TASKS[task_name] for task_name in selected_tasks if task_name in TASKS}
     has_prompt_task = any(task.startswith("prompt_") or task == "jailbreak_detection" for task in selected_tasks)
     has_response_task = any(task.startswith("response_") for task in selected_tasks)
     if has_prompt_task and not prompt_text:
+        return (
+            """
+            <div class="empty-state">
+                <div class="empty-icon">✍️</div>
+                <div class="empty-title">Add a prompt</div>
+                <div class="empty-copy">
+                    Prompt-side tasks require a prompt in the first text box.
+                </div>
             </div>
+            """,
+            _runtime_status_html(
+                "Prompt required",
+                "Prompt safety, prompt toxicity, and jailbreak detection all require prompt text.",
+                tone="warning",
+            ),
+        )
     if has_response_task and not response_text:
+        return (
+            """
+            <div class="empty-state">
+                <div class="empty-icon">💬</div>
+                <div class="empty-title">Add a response</div>
+                <div class="empty-copy">
+                    Response-side tasks require a model response in the response text box.
+                </div>
             </div>
+            """,
+            _runtime_status_html(
+                "Response required",
+                "Response safety, response toxicity, and response refusal need assistant output in the response box.",
+                tone="warning",
+            ),
+        )
+    inference_text, input_format = _build_inference_text(
+        prompt_text=prompt_text,
+        response_text=response_text,
+        has_prompt_task=has_prompt_task,
+        has_response_task=has_response_task,
+    )
+    progress(0.15, desc="Preparing GLiGuard schema")
+    try:
+        progress(0.4, desc="Loading GLiGuard model")
+        model = _load_model()
+    except Exception as exc:
+        error_detail = _format_exception(exc)
+        return (
+            """
+            <div class="empty-state">
+                <div class="empty-icon">⚠️</div>
+                <div class="empty-title">GLiGuard could not load</div>
+                <div class="empty-copy">
+                    The demo could not initialize the checkpoint. Check your Hugging Face access and local model setup, then try again.
+                </div>
+            </div>
+            """,
+            _runtime_status_html(
+                "Model load failed",
+                "The checkpoint did not initialize successfully.",
+                tone="error",
+                details=error_detail,
+            ),
+        )
+    try:
+        progress(0.8, desc="Running moderation")
+        result = model.classify_text(
+            text=inference_text,
+            tasks=tasks,
+            threshold=threshold,
+            include_confidence=True,
+        )
+    except Exception as exc:
+        error_detail = _format_exception(exc)
+        return (
+            """
+            <div class="empty-state">
+                <div class="empty-icon">⚠️</div>
+                <div class="empty-title">Inference did not complete</div>
+                <div class="empty-copy">
+                    GLiGuard loaded, but this moderation request failed before results could be rendered.
+                </div>
+            </div>
+            """,
+            _runtime_status_html(
+                "Inference failed",
+                "The model was available, but this specific request raised an error.",
+                tone="error",
+                details=error_detail,
+            ),
+        )
+    progress(1.0, desc="Rendering results")
+    return (
+        _build_result_html(result, selected_tasks, threshold, input_format),
+        _runtime_status_html(
+            "Model ready",
+            f"Ran {len(selected_tasks)} task(s) using {input_format.lower()} formatting.",
+            tone="ready",
+        ),
     )
 DESCRIPTION = f"""
                 )
                 task_selector = gr.CheckboxGroup(
                     choices=TASK_OPTIONS,
+                    value=PROMPT_TASK_VALUES,
                     label="Tasks to run",
+                    info="Tasks auto-switch based on whether a response is present. You can still adjust them manually.",
                 )
                 with gr.Row():
                     classify_btn = gr.Button("Analyze Content", variant="primary", size="lg")
             examples_per_page=8,
         )
+    response_input.change(
+        fn=_auto_select_tasks,
+        inputs=[response_input],
+        outputs=[task_selector],
+    )
     classify_btn.click(
         fn=classify_prompt,
         inputs=[prompt_input, response_input, threshold_slider, task_selector],
         outputs=[result_html],
     )
     clear_btn.click(
+        fn=lambda: ("", "", PROMPT_TASK_VALUES, _empty_state_html()),
         outputs=[prompt_input, response_input, task_selector, result_html],
     )
 if __name__ == "__main__":
+    demo.launch(theme=THEME, css=CUSTOM_CSS)