Spaces:

AlephBeth-AI
/

GuardLLM

Sleeping

G. Claude Opus 4.7 commited on 18 days ago

Commit

dea9e25

1 Parent(s): 1569836

Apply Aleph Beth design system to GuardLLM UI

- Parchment surface + ink typography, Instrument Serif display, Geist body/mono.
- Restrained 13-category palette drawn from brand families (safe, threat, gilt, signal, ink) — no neon.
- Replace emoji (verdict, header) with geometric primitives and editorial labels.
- Plotly chart re-skinned: parchment paper, ink axes, soft grid, branded hover labels.
- Bilingual mark (א-ב · أب) in header and footer.
- Cards, buttons, inputs, filters all themed via gr.themes.Base override + custom CSS.
- Pass HF_TOKEN to from_pretrained so the gated model loads when the Space secret is set.

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>

Files changed (1) hide show

app.py +761 -222

app.py CHANGED Viewed

@@ -1,19 +1,18 @@
 """
-GuardLLM - Interactive Prompt Security Visualizer
-Combines t-SNE embedding visualization with real-time prompt risk analysis.
 Powered by Llama Prompt Guard 2 (86M) and neuralchemy/Prompt-injection-dataset.
 """
 import logging
 import sys
 import json
-import traceback
 import gradio as gr
 import torch
 import numpy as np
 import plotly.graph_objects as go
-import plotly.io as pio
 from pathlib import Path
 # ---------------------------------------------------------------------------
@@ -27,22 +26,61 @@ logging.basicConfig(
 logger = logging.getLogger("GuardLLM")
 # ---------------------------------------------------------------------------
-# Color palette for categories
 # ---------------------------------------------------------------------------
 CATEGORY_COLORS = {
-    "benign": "#22c55e",
-    "direct_injection": "#ef4444",
-    "jailbreak": "#f97316",
-    "system_extraction": "#a855f7",
-    "encoding_obfuscation": "#ec4899",
-    "persona_replacement": "#f59e0b",
-    "indirect_injection": "#e11d48",
-    "token_smuggling": "#7c3aed",
-    "many_shot": "#06b6d4",
-    "crescendo": "#14b8a6",
-    "context_overflow": "#8b5cf6",
-    "prompt_leaking": "#d946ef",
-    "unknown": "#64748b",
 }
 CATEGORY_LABELS = {
@@ -66,6 +104,7 @@ CATEGORY_LABELS = {
 # ---------------------------------------------------------------------------
 MODEL_ID = "meta-llama/Llama-Prompt-Guard-2-86M"
 LABELS = ["Benign", "Malicious"]
 _classifier = {"tokenizer": None, "model": None, "device": None}
@@ -73,8 +112,9 @@ def get_classifier():
     if _classifier["model"] is None:
         logger.info("Lazy-loading Llama Prompt Guard 2...")
         from transformers import AutoTokenizer, AutoModelForSequenceClassification
-        tok = AutoTokenizer.from_pretrained(MODEL_ID)
-        mdl = AutoModelForSequenceClassification.from_pretrained(MODEL_ID)
         mdl.eval()
         dev = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         mdl.to(dev)
@@ -131,14 +171,13 @@ def analyze_prompt(text):
     with torch.no_grad():
         outputs = model(**inputs)
         probs = torch.softmax(outputs.logits, dim=-1)[0].cpu().numpy()
-    pred_idx = int(np.argmax(probs))
     prob_dict = {LABELS[i]: float(probs[i]) for i in range(len(LABELS))}
     safety = float(probs[0])
     return prob_dict, safety
 # ---------------------------------------------------------------------------
-# Build the t-SNE Plotly figure
 # ---------------------------------------------------------------------------
 def build_tsne_figure(selected_categories=None):
     fig = go.Figure()
@@ -159,8 +198,8 @@ def build_tsne_figure(selected_categories=None):
         severities = [ALL_SEVERITIES[i] or "benign" for i in indices]
         hover_texts = [
             f"<b>{CATEGORY_LABELS.get(cat, cat)}</b><br>"
-            f"Severity: {sev}<br>"
-            f"Index: {idx}<br>"
             f"<i>{txt}</i>"
             for idx, txt, sev in zip(indices, texts_preview, severities)
         ]
@@ -173,41 +212,55 @@ def build_tsne_figure(selected_categories=None):
             marker=dict(
                 size=5 if len(indices) > 500 else 7,
                 color=color,
-                opacity=0.7,
-                line=dict(width=0.5, color="rgba(255,255,255,0.2)"),
             ),
             text=hover_texts,
             hoverinfo="text",
             customdata=[str(i) for i in indices],
         ))
     fig.update_layout(
-        template="plotly_dark",
-        paper_bgcolor="#0f172a",
-        plot_bgcolor="#1e293b",
         title=dict(
-            text="t-SNE Embedding Space - Prompt Security Landscape",
-            font=dict(size=16, color="#e2e8f0"),
             x=0.5,
         ),
         legend=dict(
-            title=dict(text="Category", font=dict(color="#94a3b8")),
-            bgcolor="rgba(15,23,42,0.9)",
-            bordercolor="#334155",
             borderwidth=1,
-            font=dict(color="#cbd5e1", size=10),
             itemsizing="constant",
         ),
         xaxis=dict(
-            title="t-SNE 1", showgrid=True, gridcolor="#334155",
-            zeroline=False, color="#94a3b8",
         ),
         yaxis=dict(
-            title="t-SNE 2", showgrid=True, gridcolor="#334155",
-            zeroline=False, color="#94a3b8",
         ),
-        margin=dict(l=40, r=40, t=50, b=40),
-        height=600,
         dragmode="pan",
     )
     return fig
@@ -228,30 +281,34 @@ def deselect_all_categories():
     return gr.update(value=[]), build_tsne_figure([])
 def on_dropdown_select(choice):
     if not choice:
-        return empty_analysis_html(), "*Select a prompt.*", ""
     try:
         idx = int(choice.split(" | ")[0])
         text = ALL_TEXTS[idx]
         category = ALL_CATEGORIES[idx]
         severity = ALL_SEVERITIES[idx] or "N/A"
         ground_truth = "Malicious" if ALL_LABELS_DS[idx] == 1 else "Benign"
-        prob_dict, safety = analyze_prompt(text)
         pred_label = max(prob_dict, key=prob_dict.get)
         confidence = prob_dict[pred_label]
         result_html = build_result_html(pred_label, confidence, prob_dict, text)
         risk_text = build_risk_assessment(pred_label, confidence, prob_dict)
-        risk_text += (
-            f"\n\n---\n**Dataset metadata:**\n"
-            f"- Category: **{CATEGORY_LABELS.get(category, category)}**\n"
-            f"- Severity: **{severity}**\n"
-            f"- Ground truth: **{ground_truth}**\n"
-        )
         return result_html, risk_text, text
     except Exception as e:
         logger.error("Error: %s", e)
-        return empty_analysis_html(), f"Error: {e}", ""
 def on_index_input(idx_str):
@@ -260,32 +317,27 @@ def on_index_input(idx_str):
     try:
         idx = int(idx_str.strip())
         if idx < 0 or idx >= len(ALL_TEXTS):
-            return empty_analysis_html(), f"Invalid index: {idx}", ""
         text = ALL_TEXTS[idx]
         category = ALL_CATEGORIES[idx]
         severity = ALL_SEVERITIES[idx] or "N/A"
         ground_truth = "Malicious" if ALL_LABELS_DS[idx] == 1 else "Benign"
-        prob_dict, safety = analyze_prompt(text)
         pred_label = max(prob_dict, key=prob_dict.get)
         confidence = prob_dict[pred_label]
         result_html = build_result_html(pred_label, confidence, prob_dict, text)
         risk_text = build_risk_assessment(pred_label, confidence, prob_dict)
-        risk_text += (
-            f"\n\n---\n**Dataset metadata:**\n"
-            f"- Category: **{CATEGORY_LABELS.get(category, category)}**\n"
-            f"- Severity: **{severity}**\n"
-            f"- Ground truth: **{ground_truth}**\n"
-        )
         return result_html, risk_text, text
     except Exception as e:
         logger.error("Error: %s", e)
-        return empty_analysis_html(), f"Error: {e}", ""
 def on_manual_analyze(text):
     if not text or not text.strip():
         return empty_analysis_html(), ""
-    prob_dict, safety = analyze_prompt(text)
     pred_label = max(prob_dict, key=prob_dict.get)
     confidence = prob_dict[pred_label]
     result_html = build_result_html(pred_label, confidence, prob_dict, text)
@@ -294,69 +346,79 @@ def on_manual_analyze(text):
 # ---------------------------------------------------------------------------
-# UI builders
 # ---------------------------------------------------------------------------
 def empty_analysis_html():
-    return """
-    <div style="text-align:center; padding:30px; color:#94a3b8;">
-        <p style="font-size:1em;">Click a point on the chart,<br>
-        select a prompt from the list,<br>
-        or enter a custom prompt below.</p>
     </div>
     """
 def build_result_html(label, confidence, probs, text):
-    color = "#22c55e" if label == "Benign" else "#ef4444"
-    emoji = "\u2705" if label == "Benign" else "\u26a0\ufe0f"
     pct = confidence * 100
     safety_score = probs["Benign"] * 100
     safety_color = (
-        "#22c55e" if safety_score >= 70
-        else "#f59e0b" if safety_score >= 40
-        else "#ef4444"
     )
     bars_html = ""
     for lbl in LABELS:
         p = probs[lbl] * 100
-        c = "#22c55e" if lbl == "Benign" else "#ef4444"
         bars_html += f"""
-        <div style="margin-bottom:8px;">
-            <div style="display:flex; justify-content:space-between; margin-bottom:2px;">
-                <span style="font-weight:600; color:#e2e8f0;">{lbl}</span>
-                <span style="color:#cbd5e1; font-weight:600;">{p:.1f}%</span>
             </div>
-            <div style="background:#1e293b; border-radius:8px; height:18px; overflow:hidden;">
-                <div style="background:{c}; height:100%; width:{p}%; border-radius:8px;"></div>
             </div>
         </div>
         """
-    preview = text[:150].replace("<", "&lt;").replace(">", "&gt;")
-    if len(text) > 150:
-        preview += "..."
     return f"""
-    <div style="background:#0f172a; border-radius:12px; padding:18px; font-family:system-ui,sans-serif;">
-        <div style="text-align:center; margin-bottom:14px;">
-            <div style="font-size:2em;">{emoji}</div>
-            <div style="font-size:1.2em; font-weight:700; color:{color};">{label}</div>
-            <div style="color:#94a3b8; font-size:0.85em;">Confidence: {pct:.1f}%</div>
-        </div>
-        <div style="background:#1e293b; border-radius:10px; padding:12px; margin-bottom:10px;">
-            <div style="display:flex; justify-content:space-between; margin-bottom:4px;">
-                <span style="color:#e2e8f0; font-weight:600;">Safety Score</span>
-                <span style="color:{safety_color}; font-weight:700; font-size:1.1em;">{safety_score:.0f}/100</span>
-            </div>
-            <div style="background:#334155; border-radius:8px; height:12px; overflow:hidden;">
-                <div style="background:linear-gradient(90deg, #ef4444, #f59e0b, #22c55e);
-                            height:100%; width:{safety_score}%; border-radius:8px;"></div>
             </div>
         </div>
-        <div style="background:#1e293b; border-radius:10px; padding:12px; margin-bottom:10px;">
-            {bars_html}
         </div>
-        <div style="background:#1e293b; border-radius:10px; padding:12px;">
-            <div style="color:#94a3b8; font-size:0.8em; margin-bottom:3px;">Analyzed prompt:</div>
-            <div style="color:#cbd5e1; font-style:italic; word-break:break-word; font-size:0.85em;">"{preview}"</div>
         </div>
     </div>
     """
@@ -366,19 +428,22 @@ def build_risk_assessment(label, confidence, probs):
     safety_score = probs["Benign"] * 100
     malicious_score = probs["Malicious"] * 100
     if label == "Benign" and confidence > 0.85:
-        level, desc = "Low", "This prompt appears **safe**. No injection or jailbreak patterns detected."
     elif label == "Benign":
-        level, desc = "Moderate", "Likely benign, but moderate confidence. Potentially ambiguous wording."
     elif confidence > 0.85:
-        level, desc = "Critical", "**Malicious prompt detected** with high confidence. Likely injection or jailbreak attempt."
     else:
-        level, desc = "High", "**Malicious prompt detected.** Possible injection or jailbreak. Review recommended."
     return (
-        f"### Risk Level: {level}\n\n{desc}\n\n"
-        f"**Details:**\n"
-        f"- Safety score: **{safety_score:.0f}/100**\n"
-        f"- Predicted class: **{label}** ({confidence*100:.1f}%)\n"
-        f"- P(Benign) = {probs['Benign']*100:.1f}% | P(Malicious) = {malicious_score:.1f}%\n"
     )
@@ -396,37 +461,37 @@ def build_stats_html():
         pct = count / total * 100
         label = CATEGORY_LABELS.get(cat, cat)
         cats_html += (
-            f'<div style="display:flex; justify-content:space-between; padding:2px 0;">'
-            f'<span style="color:{color}; font-weight:500; font-size:0.85em;">{label}</span>'
-            f'<span style="color:#94a3b8; font-size:0.85em;">{count} ({pct:.1f}%)</span>'
             f'</div>'
         )
     return f"""
-    <div style="background:#0f172a; border-radius:12px; padding:14px; font-family:system-ui,sans-serif;">
-        <div style="color:#e2e8f0; font-weight:700; margin-bottom:8px;">Dataset Statistics</div>
-        <div style="display:flex; gap:10px; margin-bottom:10px;">
-            <div style="flex:1; background:#1e293b; border-radius:8px; padding:8px; text-align:center;">
-                <div style="color:#94a3b8; font-size:0.75em;">Total</div>
-                <div style="color:#e2e8f0; font-weight:700; font-size:1.2em;">{total:,}</div>
             </div>
-            <div style="flex:1; background:#1e293b; border-radius:8px; padding:8px; text-align:center;">
-                <div style="color:#22c55e; font-size:0.75em;">Benign</div>
-                <div style="color:#22c55e; font-weight:700; font-size:1.2em;">{n_benign:,}</div>
             </div>
-            <div style="flex:1; background:#1e293b; border-radius:8px; padding:8px; text-align:center;">
-                <div style="color:#ef4444; font-size:0.75em;">Malicious</div>
-                <div style="color:#ef4444; font-weight:700; font-size:1.2em;">{n_malicious:,}</div>
             </div>
         </div>
-        <div style="background:#1e293b; border-radius:8px; padding:8px;">
-            {cats_html}
-        </div>
     </div>
     """
 # ---------------------------------------------------------------------------
-# JavaScript to bridge Plotly clicks -> Gradio
 # ---------------------------------------------------------------------------
 PLOTLY_CLICK_JS = """
 () => {
@@ -440,7 +505,8 @@ PLOTLY_CLICK_JS = """
             if (data && data.points && data.points.length > 0) {
                 const idx = data.points[0].customdata;
                 if (idx !== undefined && idx !== null) {
-                    const inputEl = document.querySelector('#click-index-input textarea') || document.querySelector('#click-index-input input');
                     if (inputEl) {
                         const proto = inputEl.tagName === 'TEXTAREA'
                             ? window.HTMLTextAreaElement.prototype
@@ -473,60 +539,560 @@ PLOTLY_CLICK_JS = """
 # ---------------------------------------------------------------------------
-# Gradio Interface
 # ---------------------------------------------------------------------------
-TITLE_HTML = """
-<div style="text-align:center; padding:10px 0 4px 0;">
-    <h1 style="font-size:1.8em; margin:0;">GuardLLM - Prompt Security Visualizer</h1>
-    <p style="color:#94a3b8; font-size:0.95em; margin-top:4px;">
-        Interactive t-SNE embedding space &bull;
-        <a href="https://huggingface.co/meta-llama/Llama-Prompt-Guard-2-86M" target="_blank" style="color:#60a5fa;">
-        Llama Prompt Guard 2</a> &bull;
-        <a href="https://huggingface.co/datasets/neuralchemy/Prompt-injection-dataset" target="_blank" style="color:#60a5fa;">
-        neuralchemy dataset</a>
     </p>
-</div>
 """
 HOW_TO_HTML = """
-<div style="background:linear-gradient(135deg, #0f172a 0%, #1e293b 100%); border:1px solid #334155; border-radius:12px; padding:16px 20px; margin:0 0 8px 0; font-family:system-ui,sans-serif;">
-    <div style="color:#e2e8f0; font-weight:700; font-size:1em; margin-bottom:8px;">How to use this tool</div>
-    <div style="display:flex; flex-wrap:wrap; gap:12px;">
-        <div style="flex:1; min-width:180px; background:#1e293b; border-radius:8px; padding:10px 12px;">
-            <div style="color:#60a5fa; font-weight:600; font-size:0.85em; margin-bottom:4px;">1. Explore the map</div>
-            <div style="color:#94a3b8; font-size:0.8em; line-height:1.4;">Each dot represents a prompt from the dataset, positioned by semantic similarity. Colors indicate attack categories. Hover to preview, scroll to zoom, drag to pan.</div>
         </div>
-        <div style="flex:1; min-width:180px; background:#1e293b; border-radius:8px; padding:10px 12px;">
-            <div style="color:#f59e0b; font-weight:600; font-size:0.85em; margin-bottom:4px;">2. Click to analyze</div>
-            <div style="color:#94a3b8; font-size:0.8em; line-height:1.4;">Click any point to run it through <strong style="color:#cbd5e1;">Llama Prompt Guard 2</strong>. The right panel will show the risk classification, safety score, and confidence breakdown.</div>
         </div>
-        <div style="flex:1; min-width:180px; background:#1e293b; border-radius:8px; padding:10px 12px;">
-            <div style="color:#22c55e; font-weight:600; font-size:0.85em; margin-bottom:4px;">3. Test your own prompts</div>
-            <div style="color:#94a3b8; font-size:0.8em; line-height:1.4;">Type or paste any prompt in the <strong style="color:#cbd5e1;">Custom prompt</strong> field and hit Analyze to check if it would be flagged as an injection attempt.</div>
         </div>
     </div>
 </div>
 """
 with gr.Blocks(
-    title="GuardLLM - Prompt Security Visualizer",
 ) as demo:
-    gr.HTML(TITLE_HTML)
     gr.HTML(HOW_TO_HTML)
-    click_index = gr.Textbox(
-        value="",
-        visible=True,
-        elem_id="click-index-input",
-    )
     with gr.Row():
-        # ---- Left: t-SNE chart + filters ----
         with gr.Column(scale=3):
             with gr.Row():
-                select_all_btn = gr.Button("Select All", size="sm", scale=1)
-                deselect_all_btn = gr.Button("Deselect All", size="sm", scale=1)
             category_filter = gr.CheckboxGroup(
                 choices=UNIQUE_CATEGORIES,
@@ -536,96 +1102,69 @@ with gr.Blocks(
             )
             tsne_plot = gr.Plot(
                 value=build_tsne_figure(),
-                label="t-SNE Space",
                 elem_id="tsne-chart",
             )
             gr.Markdown(
-                "*Click a point to analyze it. "
-                "Hover to preview text. Use scroll wheel to zoom.*"
             )
-        # ---- Right: Analysis first, then stats (swapped) ----
         with gr.Column(scale=2):
-            gr.Markdown("### Analysis Result")
             result_html = gr.HTML(value=empty_analysis_html())
             risk_md = gr.Markdown(value="")
-            full_prompt = gr.Textbox(label="Full prompt", lines=3, interactive=False, visible=True)
             gr.Markdown("---")
-            gr.Markdown("### Select a prompt")
             prompt_dropdown = gr.Dropdown(
                 choices=DROPDOWN_CHOICES,
-                label="Search dataset",
                 filterable=True,
                 interactive=True,
             )
-            gr.Markdown("### Or analyze a custom prompt")
             manual_input = gr.Textbox(
-                label="Custom prompt",
-                placeholder="Type or paste a prompt...",
                 lines=2,
             )
-            analyze_btn = gr.Button("Analyze", variant="primary")
             gr.Markdown("---")
             gr.HTML(build_stats_html())
     # ---- Events ----
-    category_filter.change(
-        fn=on_filter_change,
-        inputs=[category_filter],
-        outputs=[tsne_plot],
-    )
-    select_all_btn.click(
-        fn=select_all_categories,
-        inputs=[],
-        outputs=[category_filter, tsne_plot],
-    )
-    deselect_all_btn.click(
-        fn=deselect_all_categories,
-        inputs=[],
-        outputs=[category_filter, tsne_plot],
-    )
-    click_index.change(
-        fn=on_index_input,
-        inputs=[click_index],
-        outputs=[result_html, risk_md, full_prompt],
-    )
-    prompt_dropdown.change(
-        fn=on_dropdown_select,
-        inputs=[prompt_dropdown],
-        outputs=[result_html, risk_md, full_prompt],
-    )
-    analyze_btn.click(
-        fn=on_manual_analyze,
-        inputs=[manual_input],
-        outputs=[result_html, risk_md],
-    )
-    manual_input.submit(
-        fn=on_manual_analyze,
-        inputs=[manual_input],
-        outputs=[result_html, risk_md],
-    )
     demo.load(fn=None, inputs=None, outputs=None, js=PLOTLY_CLICK_JS)
-    gr.Markdown(
-        """
-        ---
-        <div style="text-align:center; color:#64748b; font-size:0.8em;">
-            <strong>GuardLLM</strong> - Prompt Security Visualizer<br>
-            Model: <a href="https://huggingface.co/meta-llama/Llama-Prompt-Guard-2-86M">
-            Llama Prompt Guard 2 (86M)</a> by Meta |
-            Dataset: <a href="https://huggingface.co/datasets/neuralchemy/Prompt-injection-dataset">
-            neuralchemy/Prompt-injection-dataset</a>
-        </div>
-        """
-    )
 logger.info("Gradio app built. Ready to launch.")
 if __name__ == "__main__":
-    demo.launch(css="#click-index-input { position:absolute !important; width:1px !important; height:1px !important; overflow:hidden !important; opacity:0 !important; pointer-events:none !important; }")

 """
+GuardLLM — Prompt Security Visualizer
+Aleph Beth design system applied. Editorial calm, bilingual FR/EN posture.
 Powered by Llama Prompt Guard 2 (86M) and neuralchemy/Prompt-injection-dataset.
 """
 import logging
+import os
 import sys
 import json
 import gradio as gr
 import torch
 import numpy as np
 import plotly.graph_objects as go
 from pathlib import Path
 # ---------------------------------------------------------------------------
 logger = logging.getLogger("GuardLLM")
 # ---------------------------------------------------------------------------
+# Aleph Beth — palette tokens (mirrored from colors_and_type.css)
 # ---------------------------------------------------------------------------
+AB = {
+    "ink_950": "#0B1626",
+    "ink_900": "#11203A",
+    "ink_800": "#1B2F4E",
+    "ink_700": "#2A4566",
+    "ink_600": "#44607F",
+    "ink_500": "#6B829D",
+    "ink_400": "#95A6BB",
+    "ink_300": "#BCC8D6",
+    "ink_200": "#DAE1EA",
+    "ink_100": "#ECF0F5",
+    "ink_50":  "#F6F8FB",
+    "parchment_50":  "#FCFAF2",
+    "parchment_100": "#F8F3E6",
+    "parchment_200": "#ECE5D2",
+    "parchment_300": "#DDD3B9",
+    "parchment_400": "#C2B695",
+    "gilt_50":  "#FCEEDA",
+    "gilt_100": "#F8D9A4",
+    "gilt_200": "#F2BD72",
+    "gilt_300": "#EAA046",
+    "gilt_400": "#DC8B2A",
+    "gilt_500": "#A66718",
+    "gilt_600": "#7A4912",
+    "signal_100": "#C9DDEB",
+    "signal_200": "#9BBFD9",
+    "signal_300": "#6FA0C2",
+    "signal_400": "#4A82AA",
+    "signal_500": "#36678C",
+    "signal_600": "#244D6B",
+    "threat_400": "#D44A3E",
+    "threat_300": "#E07065",
+    "threat_100": "#F8DAD5",
+    "safe_400":   "#3F8F6E",
+    "safe_300":   "#66AB8C",
+    "safe_100":   "#D4E8DD",
+}
+# Category colors stay within the brand families — no neon, no inventions.
 CATEGORY_COLORS = {
+    "benign":                AB["safe_400"],
+    "direct_injection":      AB["threat_400"],
+    "jailbreak":             AB["gilt_400"],
+    "system_extraction":     AB["gilt_600"],
+    "encoding_obfuscation":  AB["signal_500"],
+    "persona_replacement":   AB["gilt_300"],
+    "indirect_injection":    AB["threat_300"],
+    "token_smuggling":       AB["signal_600"],
+    "many_shot":             AB["signal_400"],
+    "crescendo":             AB["signal_200"],
+    "context_overflow":      AB["ink_600"],
+    "prompt_leaking":        AB["gilt_500"],
+    "unknown":               AB["ink_400"],
 }
 CATEGORY_LABELS = {
 # ---------------------------------------------------------------------------
 MODEL_ID = "meta-llama/Llama-Prompt-Guard-2-86M"
 LABELS = ["Benign", "Malicious"]
+HF_TOKEN = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN")
 _classifier = {"tokenizer": None, "model": None, "device": None}
     if _classifier["model"] is None:
         logger.info("Lazy-loading Llama Prompt Guard 2...")
         from transformers import AutoTokenizer, AutoModelForSequenceClassification
+        kwargs = {"token": HF_TOKEN} if HF_TOKEN else {}
+        tok = AutoTokenizer.from_pretrained(MODEL_ID, **kwargs)
+        mdl = AutoModelForSequenceClassification.from_pretrained(MODEL_ID, **kwargs)
         mdl.eval()
         dev = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         mdl.to(dev)
     with torch.no_grad():
         outputs = model(**inputs)
         probs = torch.softmax(outputs.logits, dim=-1)[0].cpu().numpy()
     prob_dict = {LABELS[i]: float(probs[i]) for i in range(len(LABELS))}
     safety = float(probs[0])
     return prob_dict, safety
 # ---------------------------------------------------------------------------
+# Plotly figure — parchment surface, ink axes, restrained palette
 # ---------------------------------------------------------------------------
 def build_tsne_figure(selected_categories=None):
     fig = go.Figure()
         severities = [ALL_SEVERITIES[i] or "benign" for i in indices]
         hover_texts = [
             f"<b>{CATEGORY_LABELS.get(cat, cat)}</b><br>"
+            f"Severity — {sev}<br>"
+            f"Index — {idx}<br>"
             f"<i>{txt}</i>"
             for idx, txt, sev in zip(indices, texts_preview, severities)
         ]
             marker=dict(
                 size=5 if len(indices) > 500 else 7,
                 color=color,
+                opacity=0.78,
+                line=dict(width=0.5, color="rgba(17,32,58,0.20)"),
             ),
             text=hover_texts,
             hoverinfo="text",
             customdata=[str(i) for i in indices],
         ))
     fig.update_layout(
+        template="plotly_white",
+        paper_bgcolor=AB["parchment_100"],
+        plot_bgcolor=AB["parchment_50"],
+        font=dict(family="Geist, Inter, system-ui, sans-serif", color=AB["ink_700"]),
         title=dict(
+            text="<span style='font-family: Instrument Serif, serif; font-size:18px;'>"
+                 "t-SNE — Prompt Security Landscape</span>",
+            font=dict(color=AB["ink_900"]),
             x=0.5,
+            xanchor="center",
         ),
         legend=dict(
+            title=dict(text="Category", font=dict(color=AB["ink_700"], size=11)),
+            bgcolor="rgba(252,250,242,0.88)",
+            bordercolor="rgba(17,32,58,0.12)",
             borderwidth=1,
+            font=dict(color=AB["ink_800"], size=10),
             itemsizing="constant",
         ),
         xaxis=dict(
+            title=dict(text="t-SNE 1", font=dict(color=AB["ink_500"], size=11)),
+            showgrid=True,
+            gridcolor="rgba(17,32,58,0.06)",
+            zeroline=False,
+            color=AB["ink_500"],
         ),
         yaxis=dict(
+            title=dict(text="t-SNE 2", font=dict(color=AB["ink_500"], size=11)),
+            showgrid=True,
+            gridcolor="rgba(17,32,58,0.06)",
+            zeroline=False,
+            color=AB["ink_500"],
         ),
+        margin=dict(l=44, r=44, t=56, b=44),
+        height=620,
         dragmode="pan",
+        hoverlabel=dict(
+            bgcolor=AB["parchment_50"],
+            bordercolor="rgba(17,32,58,0.12)",
+            font=dict(family="Geist, sans-serif", color=AB["ink_900"], size=12),
+        ),
     )
     return fig
     return gr.update(value=[]), build_tsne_figure([])
+def _dataset_meta_block(category, severity, ground_truth):
+    return (
+        f"\n\n<span class='ab-eyebrow'>Dataset metadata</span>\n"
+        f"- Category — **{CATEGORY_LABELS.get(category, category)}**\n"
+        f"- Severity — **{severity}**\n"
+        f"- Ground truth — **{ground_truth}**\n"
+    )
 def on_dropdown_select(choice):
     if not choice:
+        return empty_analysis_html(), "*Select a prompt to begin.*", ""
     try:
         idx = int(choice.split(" | ")[0])
         text = ALL_TEXTS[idx]
         category = ALL_CATEGORIES[idx]
         severity = ALL_SEVERITIES[idx] or "N/A"
         ground_truth = "Malicious" if ALL_LABELS_DS[idx] == 1 else "Benign"
+        prob_dict, _ = analyze_prompt(text)
         pred_label = max(prob_dict, key=prob_dict.get)
         confidence = prob_dict[pred_label]
         result_html = build_result_html(pred_label, confidence, prob_dict, text)
         risk_text = build_risk_assessment(pred_label, confidence, prob_dict)
+        risk_text += _dataset_meta_block(category, severity, ground_truth)
         return result_html, risk_text, text
     except Exception as e:
         logger.error("Error: %s", e)
+        return empty_analysis_html(), f"Error — {e}", ""
 def on_index_input(idx_str):
     try:
         idx = int(idx_str.strip())
         if idx < 0 or idx >= len(ALL_TEXTS):
+            return empty_analysis_html(), f"Invalid index — {idx}", ""
         text = ALL_TEXTS[idx]
         category = ALL_CATEGORIES[idx]
         severity = ALL_SEVERITIES[idx] or "N/A"
         ground_truth = "Malicious" if ALL_LABELS_DS[idx] == 1 else "Benign"
+        prob_dict, _ = analyze_prompt(text)
         pred_label = max(prob_dict, key=prob_dict.get)
         confidence = prob_dict[pred_label]
         result_html = build_result_html(pred_label, confidence, prob_dict, text)
         risk_text = build_risk_assessment(pred_label, confidence, prob_dict)
+        risk_text += _dataset_meta_block(category, severity, ground_truth)
         return result_html, risk_text, text
     except Exception as e:
         logger.error("Error: %s", e)
+        return empty_analysis_html(), f"Error — {e}", ""
 def on_manual_analyze(text):
     if not text or not text.strip():
         return empty_analysis_html(), ""
+    prob_dict, _ = analyze_prompt(text)
     pred_label = max(prob_dict, key=prob_dict.get)
     confidence = prob_dict[pred_label]
     result_html = build_result_html(pred_label, confidence, prob_dict, text)
 # ---------------------------------------------------------------------------
+# UI builders — editorial, parchment surface, ink type, no emoji
 # ---------------------------------------------------------------------------
 def empty_analysis_html():
+    return f"""
+    <div class="ab-card ab-card--quiet">
+        <div class="ab-eyebrow">Idle</div>
+        <p class="ab-prose">
+            Click a point on the chart, pick a prompt from the list,
+            or paste your own below. The classifier runs on demand.
+        </p>
     </div>
     """
 def build_result_html(label, confidence, probs, text):
+    is_safe = label == "Benign"
+    accent = AB["safe_400"] if is_safe else AB["threat_400"]
+    marker = "●"  # geometric primitive instead of emoji
     pct = confidence * 100
     safety_score = probs["Benign"] * 100
     safety_color = (
+        AB["safe_400"] if safety_score >= 70
+        else AB["gilt_400"] if safety_score >= 40
+        else AB["threat_400"]
     )
     bars_html = ""
     for lbl in LABELS:
         p = probs[lbl] * 100
+        c = AB["safe_400"] if lbl == "Benign" else AB["threat_400"]
         bars_html += f"""
+        <div class="ab-bar">
+            <div class="ab-bar__row">
+                <span class="ab-bar__label">{lbl}</span>
+                <span class="ab-bar__value">{p:.1f}%</span>
             </div>
+            <div class="ab-bar__track">
+                <div class="ab-bar__fill" style="width:{p}%; background:{c};"></div>
             </div>
         </div>
         """
+    preview = text[:180].replace("<", "&lt;").replace(">", "&gt;")
+    if len(text) > 180:
+        preview += "…"
     return f"""
+    <div class="ab-card">
+        <div class="ab-result__head">
+            <span class="ab-result__marker" style="color:{accent};">{marker}</span>
+            <div>
+                <div class="ab-eyebrow">Verdict</div>
+                <div class="ab-result__label" style="color:{accent};">{label}</div>
+                <div class="ab-caption">Confidence — {pct:.1f}%</div>
             </div>
         </div>
+        <div class="ab-divider"></div>
+        <div class="ab-eyebrow">Safety score</div>
+        <div class="ab-score">
+            <div class="ab-score__value" style="color:{safety_color};">{safety_score:.0f}<span>/100</span></div>
+            <div class="ab-score__track">
+                <div class="ab-score__fill" style="width:{safety_score}%;"></div>
+            </div>
         </div>
+        <div class="ab-eyebrow" style="margin-top:18px;">Class probabilities</div>
+        <div class="ab-bars">{bars_html}</div>
+        <div class="ab-quote">
+            <div class="ab-eyebrow">Analyzed prompt</div>
+            <blockquote>“{preview}”</blockquote>
         </div>
     </div>
     """
     safety_score = probs["Benign"] * 100
     malicious_score = probs["Malicious"] * 100
     if label == "Benign" and confidence > 0.85:
+        level = "Low"
+        desc = "The request appears **safe**. No injection or jailbreak patterns were detected."
     elif label == "Benign":
+        level = "Moderate"
+        desc = "Likely benign, with moderate confidence. The wording may be ambiguous."
     elif confidence > 0.85:
+        level = "Critical"
+        desc = "**Malicious request detected** with high confidence. Likely injection or jailbreak."
     else:
+        level = "High"
+        desc = "**Malicious request detected.** Possible injection or jailbreak — review recommended."
     return (
+        f"<span class='ab-eyebrow'>Risk level — {level}</span>\n\n{desc}\n\n"
+        f"- Safety score — **{safety_score:.0f}/100**\n"
+        f"- Predicted class — **{label}** ({confidence*100:.1f}%)\n"
+        f"- P(Benign) — {probs['Benign']*100:.1f}% &nbsp;·&nbsp; P(Malicious) — {malicious_score:.1f}%\n"
     )
         pct = count / total * 100
         label = CATEGORY_LABELS.get(cat, cat)
         cats_html += (
+            f'<div class="ab-stats__row">'
+            f'<span class="ab-stats__dot" style="background:{color};"></span>'
+            f'<span class="ab-stats__name">{label}</span>'
+            f'<span class="ab-stats__count">{count:,} <em>({pct:.1f}%)</em></span>'
             f'</div>'
         )
     return f"""
+    <div class="ab-card">
+        <div class="ab-eyebrow">Dataset</div>
+        <h3 class="ab-h3">Composition</h3>
+        <div class="ab-kpi-row">
+            <div class="ab-kpi">
+                <div class="ab-kpi__label">Total</div>
+                <div class="ab-kpi__value">{total:,}</div>
             </div>
+            <div class="ab-kpi">
+                <div class="ab-kpi__label" style="color:{AB['safe_400']};">Benign</div>
+                <div class="ab-kpi__value" style="color:{AB['safe_400']};">{n_benign:,}</div>
             </div>
+            <div class="ab-kpi">
+                <div class="ab-kpi__label" style="color:{AB['threat_400']};">Malicious</div>
+                <div class="ab-kpi__value" style="color:{AB['threat_400']};">{n_malicious:,}</div>
             </div>
         </div>
+        <div class="ab-stats">{cats_html}</div>
     </div>
     """
 # ---------------------------------------------------------------------------
+# JavaScript bridge: Plotly clicks → Gradio hidden input
 # ---------------------------------------------------------------------------
 PLOTLY_CLICK_JS = """
 () => {
             if (data && data.points && data.points.length > 0) {
                 const idx = data.points[0].customdata;
                 if (idx !== undefined && idx !== null) {
+                    const inputEl = document.querySelector('#click-index-input textarea')
+                        || document.querySelector('#click-index-input input');
                     if (inputEl) {
                         const proto = inputEl.tagName === 'TEXTAREA'
                             ? window.HTMLTextAreaElement.prototype
 # ---------------------------------------------------------------------------
+# Aleph Beth — global CSS
+# ---------------------------------------------------------------------------
+ALEPH_BETH_CSS = """
+@import url('https://fonts.googleapis.com/css2?family=Instrument+Serif:ital@0;1&family=Geist:wght@300;400;500;600;700&family=Geist+Mono:wght@400;500;600&family=Frank+Ruhl+Libre:wght@400;500&family=Amiri:wght@400;700&display=swap');
+:root, .gradio-container {
+    --ab-ink-950:#0B1626; --ab-ink-900:#11203A; --ab-ink-800:#1B2F4E;
+    --ab-ink-700:#2A4566; --ab-ink-600:#44607F; --ab-ink-500:#6B829D;
+    --ab-ink-400:#95A6BB; --ab-ink-300:#BCC8D6; --ab-ink-200:#DAE1EA;
+    --ab-ink-100:#ECF0F5; --ab-ink-50:#F6F8FB;
+    --ab-parchment-50:#FCFAF2; --ab-parchment-100:#F8F3E6;
+    --ab-parchment-200:#ECE5D2; --ab-parchment-300:#DDD3B9;
+    --ab-gilt-300:#EAA046; --ab-gilt-400:#DC8B2A; --ab-gilt-500:#A66718; --ab-gilt-600:#7A4912;
+    --ab-signal-300:#6FA0C2; --ab-signal-400:#4A82AA; --ab-signal-500:#36678C;
+    --ab-threat-400:#D44A3E; --ab-safe-400:#3F8F6E;
+    --ab-border: rgba(17,32,58,0.12);
+    --ab-border-subtle: rgba(17,32,58,0.06);
+    --ab-shadow-sm: 0 2px 6px rgba(17,32,58,0.07), 0 1px 2px rgba(17,32,58,0.04);
+    --ab-shadow-md: 0 8px 20px rgba(17,32,58,0.08), 0 2px 4px rgba(17,32,58,0.05);
+    --ab-ease: cubic-bezier(0.16, 1, 0.3, 1);
+    --font-display: 'Instrument Serif', 'Cormorant Garamond', serif;
+    --font-body: 'Geist', 'Inter', system-ui, sans-serif;
+    --font-mono: 'Geist Mono', 'JetBrains Mono', ui-monospace, monospace;
+}
+/* ---------- Base canvas ---------- */
+.gradio-container, body, html {
+    background: var(--ab-parchment-100) !important;
+    color: var(--ab-ink-900) !important;
+    font-family: var(--font-body) !important;
+    font-feature-settings: 'ss01', 'cv01';
+}
+.gradio-container { max-width: 1440px !important; margin: 0 auto !important; padding: 24px 32px !important; }
+/* Remove Gradio gradient backgrounds */
+.gradio-container *::before, .gradio-container *::after { background-image: none !important; }
+/* ---------- Header / brand ---------- */
+.ab-header {
+    padding: 18px 4px 22px;
+    border-bottom: 1px solid var(--ab-border);
+    margin-bottom: 24px;
+    display: flex; align-items: baseline; justify-content: space-between; gap: 24px;
+    flex-wrap: wrap;
+}
+.ab-header__brand {
+    display: flex; align-items: baseline; gap: 14px;
+}
+.ab-header__mark {
+    font-family: var(--font-display);
+    font-size: 32px; line-height: 1;
+    color: var(--ab-gilt-500);
+    letter-spacing: -0.01em;
+}
+.ab-header__mark .heb { font-family: 'Frank Ruhl Libre', serif; }
+.ab-header__mark .ar  { font-family: 'Amiri', serif; }
+.ab-header__title {
+    font-family: var(--font-display);
+    font-size: 38px; line-height: 1.05;
+    color: var(--ab-ink-900);
+    letter-spacing: -0.01em;
+    margin: 0;
+}
+.ab-header__title em { font-style: italic; color: var(--ab-gilt-600); }
+.ab-header__sub {
+    font-family: var(--font-body);
+    color: var(--ab-ink-700);
+    font-size: 14px; line-height: 1.5;
+    max-width: 460px;
+}
+.ab-header__sub a { color: var(--ab-signal-500); text-decoration: underline; text-underline-offset: 3px; }
+/* ---------- Eyebrow / labels / type ---------- */
+.ab-eyebrow {
+    display: inline-block;
+    font-family: var(--font-body);
+    font-size: 11px; font-weight: 500;
+    text-transform: uppercase;
+    letter-spacing: 0.16em;
+    color: var(--ab-gilt-600);
+    margin-bottom: 6px;
+}
+.ab-h3 {
+    font-family: var(--font-display);
+    font-size: 22px; line-height: 1.2;
+    color: var(--ab-ink-900);
+    margin: 0 0 12px 0;
+    letter-spacing: -0.005em;
+}
+.ab-prose {
+    font-family: var(--font-body);
+    font-size: 14px; line-height: 1.55;
+    color: var(--ab-ink-700);
+}
+.ab-caption {
+    font-family: var(--font-body);
+    font-size: 12px;
+    color: var(--ab-ink-500);
+    letter-spacing: 0.02em;
+}
+.ab-divider {
+    height: 1px; background: var(--ab-border);
+    margin: 16px 0;
+}
+/* ---------- Cards ---------- */
+.ab-card {
+    background: var(--ab-parchment-50);
+    border: 1px solid var(--ab-border);
+    border-radius: 12px;
+    padding: 20px 22px;
+    box-shadow: var(--ab-shadow-sm);
+    font-family: var(--font-body);
+}
+.ab-card--quiet {
+    background: transparent;
+    border-style: dashed;
+    box-shadow: none;
+}
+/* ---------- How-to (3-up) ---------- */
+.ab-howto {
+    display: grid;
+    grid-template-columns: repeat(3, 1fr);
+    gap: 12px;
+    margin: 8px 0 20px;
+}
+@media (max-width: 900px) { .ab-howto { grid-template-columns: 1fr; } }
+.ab-howto__step {
+    background: var(--ab-parchment-50);
+    border: 1px solid var(--ab-border);
+    border-radius: 12px;
+    padding: 16px 18px;
+    transition: transform var(--ab-ease) 220ms, box-shadow var(--ab-ease) 220ms;
+}
+.ab-howto__step:hover { transform: translateY(-1px); box-shadow: var(--ab-shadow-md); }
+.ab-howto__num {
+    font-family: var(--font-display);
+    font-size: 28px;
+    color: var(--ab-gilt-500);
+    line-height: 1;
+}
+.ab-howto__title {
+    font-family: var(--font-body);
+    font-size: 14px; font-weight: 600;
+    color: var(--ab-ink-900);
+    margin: 8px 0 6px;
+}
+.ab-howto__body {
+    font-family: var(--font-body);
+    font-size: 13px; line-height: 1.5;
+    color: var(--ab-ink-700);
+}
+/* ---------- Result card ---------- */
+.ab-result__head {
+    display: flex; align-items: center; gap: 14px;
+}
+.ab-result__marker {
+    font-size: 28px; line-height: 1;
+}
+.ab-result__label {
+    font-family: var(--font-display);
+    font-size: 28px;
+    line-height: 1.1;
+    letter-spacing: -0.01em;
+    margin-top: 2px;
+}
+.ab-score {
+    display: flex; align-items: center; gap: 14px;
+    margin: 6px 0 4px;
+}
+.ab-score__value {
+    font-family: var(--font-display);
+    font-size: 44px; line-height: 1;
+    letter-spacing: -0.02em;
+}
+.ab-score__value span { font-size: 16px; color: var(--ab-ink-500); margin-left: 2px; }
+.ab-score__track {
+    flex: 1; height: 8px;
+    background: var(--ab-parchment-200);
+    border-radius: 999px; overflow: hidden;
+}
+.ab-score__fill {
+    height: 100%;
+    background: linear-gradient(90deg, var(--ab-threat-400), var(--ab-gilt-400) 50%, var(--ab-safe-400));
+    border-radius: 999px;
+    transition: width 380ms var(--ab-ease);
+}
+.ab-bars { display: flex; flex-direction: column; gap: 10px; margin-top: 4px; }
+.ab-bar__row {
+    display: flex; justify-content: space-between;
+    font-size: 13px; margin-bottom: 4px;
+}
+.ab-bar__label { color: var(--ab-ink-800); font-weight: 500; }
+.ab-bar__value { color: var(--ab-ink-700); font-family: var(--font-mono); font-size: 12px; }
+.ab-bar__track {
+    height: 8px; background: var(--ab-parchment-200);
+    border-radius: 999px; overflow: hidden;
+}
+.ab-bar__fill { height: 100%; border-radius: 999px; transition: width 380ms var(--ab-ease); }
+.ab-quote {
+    margin-top: 18px;
+    padding: 14px 16px;
+    background: var(--ab-parchment-100);
+    border-left: 2px solid var(--ab-gilt-400);
+    border-radius: 4px;
+}
+.ab-quote blockquote {
+    font-family: var(--font-display);
+    font-style: italic;
+    font-size: 16px;
+    color: var(--ab-ink-800);
+    margin: 6px 0 0; padding: 0;
+    line-height: 1.45;
+}
+/* ---------- Stats ---------- */
+.ab-kpi-row {
+    display: grid; grid-template-columns: repeat(3, 1fr); gap: 10px;
+    margin: 4px 0 16px;
+}
+.ab-kpi {
+    background: var(--ab-parchment-100);
+    border: 1px solid var(--ab-border-subtle);
+    border-radius: 8px;
+    padding: 10px 12px;
+    text-align: center;
+}
+.ab-kpi__label {
+    font-family: var(--font-body);
+    font-size: 11px; text-transform: uppercase; letter-spacing: 0.12em;
+    color: var(--ab-ink-500);
+    margin-bottom: 4px;
+}
+.ab-kpi__value {
+    font-family: var(--font-display);
+    font-size: 26px; line-height: 1;
+    color: var(--ab-ink-900);
+    letter-spacing: -0.01em;
+}
+.ab-stats { display: flex; flex-direction: column; }
+.ab-stats__row {
+    display: flex; align-items: center; gap: 10px;
+    padding: 6px 0;
+    border-bottom: 1px solid var(--ab-border-subtle);
+    font-size: 13px;
+}
+.ab-stats__row:last-child { border-bottom: 0; }
+.ab-stats__dot { width: 8px; height: 8px; border-radius: 999px; flex-shrink: 0; }
+.ab-stats__name { color: var(--ab-ink-800); flex: 1; }
+.ab-stats__count { color: var(--ab-ink-600); font-family: var(--font-mono); font-size: 12px; }
+.ab-stats__count em { color: var(--ab-ink-500); font-style: normal; }
+/* ---------- Gradio component overrides ---------- */
+.gradio-container .block, .gradio-container .form, .gradio-container .panel {
+    background: transparent !important;
+    border: none !important;
+}
+.gradio-container .gr-box, .gradio-container .gr-panel,
+.gradio-container .gr-form, .gradio-container [data-testid="block"] {
+    background: transparent !important;
+    border: none !important;
+    box-shadow: none !important;
+}
+/* Plot wrapper — paper card */
+#tsne-chart {
+    background: var(--ab-parchment-50) !important;
+    border: 1px solid var(--ab-border) !important;
+    border-radius: 12px !important;
+    padding: 8px !important;
+    box-shadow: var(--ab-shadow-sm) !important;
+}
+/* Buttons */
+.gradio-container button {
+    font-family: var(--font-body) !important;
+    font-weight: 500 !important;
+    letter-spacing: 0 !important;
+    border-radius: 8px !important;
+    transition: transform 80ms var(--ab-ease), background-color 220ms var(--ab-ease) !important;
+}
+.gradio-container button:active { transform: scale(0.98) !important; }
+.gradio-container button.primary, .gradio-container button[variant="primary"] {
+    background: var(--ab-ink-900) !important;
+    color: var(--ab-parchment-50) !important;
+    border: 1px solid var(--ab-ink-900) !important;
+}
+.gradio-container button.primary:hover {
+    background: var(--ab-ink-800) !important;
+}
+.gradio-container button.secondary {
+    background: var(--ab-parchment-50) !important;
+    color: var(--ab-ink-900) !important;
+    border: 1px solid var(--ab-border) !important;
+}
+.gradio-container button.secondary:hover {
+    background: var(--ab-parchment-200) !important;
+}
+/* Text inputs / textareas */
+.gradio-container input[type="text"],
+.gradio-container textarea,
+.gradio-container .gr-input,
+.gradio-container .gr-textbox textarea {
+    background: var(--ab-parchment-50) !important;
+    color: var(--ab-ink-900) !important;
+    border: 1px solid var(--ab-border) !important;
+    border-radius: 8px !important;
+    font-family: var(--font-body) !important;
+    font-size: 14px !important;
+    box-shadow: inset 0 1px 2px rgba(17,32,58,0.04);
+}
+.gradio-container input[type="text"]:focus,
+.gradio-container textarea:focus,
+.gradio-container .gr-textbox textarea:focus {
+    outline: none !important;
+    border-color: var(--ab-gilt-400) !important;
+    box-shadow: 0 0 0 3px rgba(220,139,42,0.18) !important;
+}
+/* Labels */
+.gradio-container label, .gradio-container .label-wrap {
+    color: var(--ab-ink-700) !important;
+    font-family: var(--font-body) !important;
+    font-size: 13px !important;
+    font-weight: 500 !important;
+    letter-spacing: 0.01em !important;
+}
+/* Dropdowns */
+.gradio-container .gr-dropdown, .gradio-container [data-testid="dropdown"] select,
+.gradio-container .wrap.svelte-1cl284s {
+    background: var(--ab-parchment-50) !important;
+    border: 1px solid var(--ab-border) !important;
+    border-radius: 8px !important;
+    color: var(--ab-ink-900) !important;
+}
+/* Checkbox group filter */
+.gradio-container .gr-check-radio,
+.gradio-container fieldset[data-testid="checkbox-group"] {
+    background: var(--ab-parchment-50) !important;
+    border: 1px solid var(--ab-border) !important;
+    border-radius: 12px !important;
+    padding: 12px 14px !important;
+}
+.gradio-container fieldset[data-testid="checkbox-group"] label {
+    background: var(--ab-parchment-100) !important;
+    border: 1px solid var(--ab-border-subtle) !important;
+    border-radius: 999px !important;
+    padding: 4px 10px !important;
+    margin: 3px !important;
+    font-size: 12px !important;
+}
+.gradio-container fieldset[data-testid="checkbox-group"] label:hover {
+    background: var(--ab-parchment-200) !important;
+}
+.gradio-container input[type="checkbox"]:checked + * {
+    color: var(--ab-ink-900) !important;
+}
+.gradio-container input[type="checkbox"] {
+    accent-color: var(--ab-gilt-400) !important;
+}
+/* Markdown */
+.gradio-container .markdown, .gradio-container .prose {
+    color: var(--ab-ink-800) !important;
+    font-family: var(--font-body) !important;
+}
+.gradio-container .markdown h1, .gradio-container .markdown h2,
+.gradio-container .prose h1, .gradio-container .prose h2 {
+    font-family: var(--font-display) !important;
+    color: var(--ab-ink-900) !important;
+    font-weight: 400 !important;
+    letter-spacing: -0.01em !important;
+}
+.gradio-container .markdown h3, .gradio-container .prose h3 {
+    font-family: var(--font-body) !important;
+    font-weight: 600 !important;
+    color: var(--ab-ink-900) !important;
+    font-size: 16px !important;
+    margin-bottom: 8px !important;
+}
+.gradio-container .markdown strong { color: var(--ab-ink-900) !important; font-weight: 600 !important; }
+.gradio-container .markdown a { color: var(--ab-signal-500) !important; }
+.gradio-container .markdown hr {
+    border: none !important;
+    border-top: 1px solid var(--ab-border) !important;
+    margin: 18px 0 !important;
+}
+/* Hidden index input (kept invisible) */
+#click-index-input {
+    position: absolute !important;
+    width: 1px !important;
+    height: 1px !important;
+    overflow: hidden !important;
+    opacity: 0 !important;
+    pointer-events: none !important;
+}
+/* Footer */
+.ab-footer {
+    border-top: 1px solid var(--ab-border);
+    margin-top: 36px;
+    padding-top: 18px;
+    text-align: center;
+}
+.ab-footer__line {
+    font-family: var(--font-body);
+    color: var(--ab-ink-500);
+    font-size: 12px;
+    letter-spacing: 0.02em;
+}
+.ab-footer__line a { color: var(--ab-signal-500); }
+.ab-footer__mark {
+    font-family: var(--font-display);
+    color: var(--ab-gilt-500);
+    font-size: 14px;
+    letter-spacing: 0.04em;
+    margin-bottom: 6px;
+}
+.ab-footer__mark .heb { font-family: 'Frank Ruhl Libre', serif; }
+.ab-footer__mark .ar { font-family: 'Amiri', serif; }
+"""
+# ---------------------------------------------------------------------------
+# Header / How-to / Footer markup
 # ---------------------------------------------------------------------------
+HEADER_HTML = """
+<header class="ab-header">
+    <div class="ab-header__brand">
+        <div class="ab-header__mark">
+            <span class="heb">א-ב</span>&nbsp;·&nbsp;<span class="ar">أب</span>
+        </div>
+        <div>
+            <h1 class="ab-header__title">GuardLLM <em>—</em> Prompt Security Visualizer</h1>
+        </div>
+    </div>
+    <p class="ab-header__sub">
+        Editorial inspection of the prompt attack surface. Powered by
+        <a href="https://huggingface.co/meta-llama/Llama-Prompt-Guard-2-86M" target="_blank">Llama Prompt Guard 2 (86M)</a>
+        on the <a href="https://huggingface.co/datasets/neuralchemy/Prompt-injection-dataset" target="_blank">neuralchemy</a> corpus.
     </p>
+</header>
 """
 HOW_TO_HTML = """
+<div class="ab-howto">
+    <div class="ab-howto__step">
+        <div class="ab-howto__num">01</div>
+        <div class="ab-eyebrow">Map</div>
+        <div class="ab-howto__title">Explore the landscape</div>
+        <div class="ab-howto__body">
+            Each point is a prompt placed by semantic similarity. Color encodes the attack class.
+            Hover to preview, scroll to zoom, drag to pan.
         </div>
+    </div>
+    <div class="ab-howto__step">
+        <div class="ab-howto__num">02</div>
+        <div class="ab-eyebrow">Inspect</div>
+        <div class="ab-howto__title">Click to analyze</div>
+        <div class="ab-howto__body">
+            Selecting a point runs the classifier and returns a verdict, a safety score,
+            and the full class probability breakdown.
         </div>
+    </div>
+    <div class="ab-howto__step">
+        <div class="ab-howto__num">03</div>
+        <div class="ab-eyebrow">Probe</div>
+        <div class="ab-howto__title">Try your own prompt</div>
+        <div class="ab-howto__body">
+            Paste any text into the custom field below to see whether the model would flag
+            it as injection or jailbreak.
         </div>
     </div>
 </div>
 """
+FOOTER_HTML = """
+<footer class="ab-footer">
+    <div class="ab-footer__mark"><span class="heb">א-ב</span> · ALEPH BETH · <span class="ar">أب</span></div>
+    <div class="ab-footer__line">
+        GuardLLM — Prompt Security Visualizer.
+        Model: <a href="https://huggingface.co/meta-llama/Llama-Prompt-Guard-2-86M">Llama Prompt Guard 2 (86M)</a>.
+        Dataset: <a href="https://huggingface.co/datasets/neuralchemy/Prompt-injection-dataset">neuralchemy / Prompt-injection-dataset</a>.
+    </div>
+</footer>
+"""
+# ---------------------------------------------------------------------------
+# Gradio theme (parchment / ink)
+# ---------------------------------------------------------------------------
+ab_theme = gr.themes.Base(
+    primary_hue=gr.themes.Color(
+        c50=AB["parchment_50"], c100=AB["parchment_100"], c200=AB["parchment_200"],
+        c300=AB["parchment_300"], c400=AB["gilt_300"], c500=AB["gilt_400"],
+        c600=AB["gilt_500"], c700=AB["gilt_600"], c800=AB["ink_800"],
+        c900=AB["ink_900"], c950=AB["ink_950"],
+    ),
+    neutral_hue=gr.themes.Color(
+        c50=AB["parchment_50"], c100=AB["parchment_100"], c200=AB["parchment_200"],
+        c300=AB["ink_200"], c400=AB["ink_300"], c500=AB["ink_500"],
+        c600=AB["ink_600"], c700=AB["ink_700"], c800=AB["ink_800"],
+        c900=AB["ink_900"], c950=AB["ink_950"],
+    ),
+    font=[gr.themes.GoogleFont("Geist"), "Inter", "system-ui", "sans-serif"],
+    font_mono=[gr.themes.GoogleFont("Geist Mono"), "JetBrains Mono", "monospace"],
+).set(
+    body_background_fill=AB["parchment_100"],
+    body_text_color=AB["ink_900"],
+    background_fill_primary=AB["parchment_50"],
+    background_fill_secondary=AB["parchment_100"],
+    border_color_primary="rgba(17,32,58,0.12)",
+    block_background_fill=AB["parchment_50"],
+    block_border_color="rgba(17,32,58,0.12)",
+    block_label_text_color=AB["ink_700"],
+    block_title_text_color=AB["ink_900"],
+    input_background_fill=AB["parchment_50"],
+    input_border_color="rgba(17,32,58,0.12)",
+    input_border_color_focus=AB["gilt_400"],
+    button_primary_background_fill=AB["ink_900"],
+    button_primary_background_fill_hover=AB["ink_800"],
+    button_primary_text_color=AB["parchment_50"],
+    button_secondary_background_fill=AB["parchment_50"],
+    button_secondary_background_fill_hover=AB["parchment_200"],
+    button_secondary_text_color=AB["ink_900"],
+)
+# ---------------------------------------------------------------------------
+# Gradio Interface
+# ---------------------------------------------------------------------------
 with gr.Blocks(
+    title="GuardLLM — Prompt Security Visualizer",
+    theme=ab_theme,
+    css=ALEPH_BETH_CSS,
 ) as demo:
+    gr.HTML(HEADER_HTML)
     gr.HTML(HOW_TO_HTML)
+    click_index = gr.Textbox(value="", visible=True, elem_id="click-index-input")
     with gr.Row():
+        # ---- Left — t-SNE chart + filters ----
         with gr.Column(scale=3):
             with gr.Row():
+                select_all_btn = gr.Button("Select all", size="sm", scale=1)
+                deselect_all_btn = gr.Button("Deselect all", size="sm", scale=1)
             category_filter = gr.CheckboxGroup(
                 choices=UNIQUE_CATEGORIES,
             )
             tsne_plot = gr.Plot(
                 value=build_tsne_figure(),
+                label="t-SNE space",
                 elem_id="tsne-chart",
             )
             gr.Markdown(
+                "<span class='ab-caption'>Click a point to inspect it. "
+                "Hover to preview. Scroll to zoom, drag to pan.</span>"
             )
+        # ---- Right — Analysis + controls + stats ----
         with gr.Column(scale=2):
+            gr.HTML("<div class='ab-eyebrow'>Analysis</div>"
+                    "<h3 class='ab-h3'>Verdict & confidence</h3>")
             result_html = gr.HTML(value=empty_analysis_html())
             risk_md = gr.Markdown(value="")
+            full_prompt = gr.Textbox(
+                label="Full prompt",
+                lines=3,
+                interactive=False,
+                visible=True,
+            )
             gr.Markdown("---")
+            gr.HTML("<div class='ab-eyebrow'>Library</div>"
+                    "<h3 class='ab-h3'>Pick a prompt</h3>")
             prompt_dropdown = gr.Dropdown(
                 choices=DROPDOWN_CHOICES,
+                label="Search the dataset",
                 filterable=True,
                 interactive=True,
             )
+            gr.HTML("<div class='ab-eyebrow' style='margin-top:14px;'>Custom</div>"
+                    "<h3 class='ab-h3'>Analyze your own</h3>")
             manual_input = gr.Textbox(
+                label="Prompt",
+                placeholder="Type or paste a request to evaluate…",
                 lines=2,
             )
+            analyze_btn = gr.Button("Inspect", variant="primary")
             gr.Markdown("---")
             gr.HTML(build_stats_html())
     # ---- Events ----
+    category_filter.change(fn=on_filter_change, inputs=[category_filter], outputs=[tsne_plot])
+    select_all_btn.click(fn=select_all_categories, inputs=[], outputs=[category_filter, tsne_plot])
+    deselect_all_btn.click(fn=deselect_all_categories, inputs=[], outputs=[category_filter, tsne_plot])
+    click_index.change(fn=on_index_input, inputs=[click_index],
+                       outputs=[result_html, risk_md, full_prompt])
+    prompt_dropdown.change(fn=on_dropdown_select, inputs=[prompt_dropdown],
+                           outputs=[result_html, risk_md, full_prompt])
+    analyze_btn.click(fn=on_manual_analyze, inputs=[manual_input],
+                      outputs=[result_html, risk_md])
+    manual_input.submit(fn=on_manual_analyze, inputs=[manual_input],
+                        outputs=[result_html, risk_md])
     demo.load(fn=None, inputs=None, outputs=None, js=PLOTLY_CLICK_JS)
+    gr.HTML(FOOTER_HTML)
 logger.info("Gradio app built. Ready to launch.")
 if __name__ == "__main__":
+    demo.launch()