Spaces:

AlephBeth-AI
/

GuardLLM

Sleeping

App Files Files Community

AlephBeth-AI commited on Apr 13

Commit

b1516cb

verified ·

1 Parent(s): 2718a1f

Upload app.py with huggingface_hub

Browse files

Files changed (1) hide show

app.py +105 -92

app.py CHANGED Viewed

@@ -45,20 +45,20 @@ CATEGORY_COLORS = {
     "unknown": "#64748b",
 }
-CATEGORY_LABELS_FR = {
-    "benign": "Benin",
-    "direct_injection": "Injection directe",
     "jailbreak": "Jailbreak",
-    "system_extraction": "Extraction systeme",
-    "encoding_obfuscation": "Obfuscation/Encodage",
-    "persona_replacement": "Remplacement persona",
-    "indirect_injection": "Injection indirecte",
-    "token_smuggling": "Token smuggling",
-    "many_shot": "Many-shot",
     "crescendo": "Crescendo",
-    "context_overflow": "Overflow contexte",
-    "prompt_leaking": "Fuite de prompt",
-    "unknown": "Inconnu",
 }
 # ---------------------------------------------------------------------------
@@ -132,8 +132,6 @@ def analyze_prompt(text):
         outputs = model(**inputs)
         probs = torch.softmax(outputs.logits, dim=-1)[0].cpu().numpy()
     pred_idx = int(np.argmax(probs))
-    pred_label = LABELS[pred_idx]
-    confidence = float(probs[pred_idx])
     prob_dict = {LABELS[i]: float(probs[i]) for i in range(len(LABELS))}
     safety = float(probs[0])
     return prob_dict, safety
@@ -160,18 +158,18 @@ def build_tsne_figure(selected_categories=None):
         ]
         severities = [ALL_SEVERITIES[i] or "benign" for i in indices]
         hover_texts = [
-            f"<b>{CATEGORY_LABELS_FR.get(cat, cat)}</b><br>"
-            f"Severite: {sev}<br>"
             f"Index: {idx}<br>"
             f"<i>{txt}</i>"
             for idx, txt, sev in zip(indices, texts_preview, severities)
         ]
         color = CATEGORY_COLORS.get(cat, CATEGORY_COLORS["unknown"])
-        label_fr = CATEGORY_LABELS_FR.get(cat, cat)
         fig.add_trace(go.Scatter(
             x=x, y=y,
             mode="markers",
-            name=label_fr,
             marker=dict(
                 size=5 if len(indices) > 500 else 7,
                 color=color,
@@ -187,12 +185,12 @@ def build_tsne_figure(selected_categories=None):
         paper_bgcolor="#0f172a",
         plot_bgcolor="#1e293b",
         title=dict(
-            text="Espace d'Embedding t-SNE - Paysage de Securite des Prompts",
             font=dict(size=16, color="#e2e8f0"),
             x=0.5,
         ),
         legend=dict(
-            title=dict(text="Categorie", font=dict(color="#94a3b8")),
             bgcolor="rgba(15,23,42,0.9)",
             bordercolor="#334155",
             borderwidth=1,
@@ -222,9 +220,17 @@ def on_filter_change(categories):
     return build_tsne_figure(sel)
 def on_dropdown_select(choice):
     if not choice:
-        return empty_analysis_html(), "*Selectionnez un prompt.*", ""
     try:
         idx = int(choice.split(" | ")[0])
         text = ALL_TEXTS[idx]
@@ -237,24 +243,24 @@ def on_dropdown_select(choice):
         result_html = build_result_html(pred_label, confidence, prob_dict, text)
         risk_text = build_risk_assessment(pred_label, confidence, prob_dict)
         risk_text += (
-            f"\n\n---\n**Metadonnees du dataset :**\n"
-            f"- Categorie : **{CATEGORY_LABELS_FR.get(category, category)}**\n"
-            f"- Severite : **{severity}**\n"
-            f"- Verite terrain : **{ground_truth}**\n"
         )
         return result_html, risk_text, text
     except Exception as e:
         logger.error("Error: %s", e)
-        return empty_analysis_html(), f"Erreur : {e}", ""
 def on_index_input(idx_str):
     if not idx_str or not idx_str.strip():
-        return empty_analysis_html(), "*Cliquez sur un point du graphique.*", ""
     try:
         idx = int(idx_str.strip())
         if idx < 0 or idx >= len(ALL_TEXTS):
-            return empty_analysis_html(), f"Index invalide : {idx}", ""
         text = ALL_TEXTS[idx]
         category = ALL_CATEGORIES[idx]
         severity = ALL_SEVERITIES[idx] or "N/A"
@@ -265,15 +271,15 @@ def on_index_input(idx_str):
         result_html = build_result_html(pred_label, confidence, prob_dict, text)
         risk_text = build_risk_assessment(pred_label, confidence, prob_dict)
         risk_text += (
-            f"\n\n---\n**Metadonnees du dataset :**\n"
-            f"- Categorie : **{CATEGORY_LABELS_FR.get(category, category)}**\n"
-            f"- Severite : **{severity}**\n"
-            f"- Verite terrain : **{ground_truth}**\n"
         )
         return result_html, risk_text, text
     except Exception as e:
         logger.error("Error: %s", e)
-        return empty_analysis_html(), f"Erreur : {e}", ""
 def on_manual_analyze(text):
@@ -293,9 +299,9 @@ def on_manual_analyze(text):
 def empty_analysis_html():
     return """
     <div style="text-align:center; padding:30px; color:#94a3b8;">
-        <p style="font-size:1em;">Cliquez sur un point du graphique,<br>
-        selectionnez un prompt dans la liste,<br>
-        ou entrez un prompt manuellement.</p>
     </div>
     """
@@ -333,11 +339,11 @@ def build_result_html(label, confidence, probs, text):
         <div style="text-align:center; margin-bottom:14px;">
             <div style="font-size:2em;">{emoji}</div>
             <div style="font-size:1.2em; font-weight:700; color:{color};">{label}</div>
-            <div style="color:#94a3b8; font-size:0.85em;">Confiance : {pct:.1f}%</div>
         </div>
         <div style="background:#1e293b; border-radius:10px; padding:12px; margin-bottom:10px;">
             <div style="display:flex; justify-content:space-between; margin-bottom:4px;">
-                <span style="color:#e2e8f0; font-weight:600;">Score de securite</span>
                 <span style="color:{safety_color}; font-weight:700; font-size:1.1em;">{safety_score:.0f}/100</span>
             </div>
             <div style="background:#334155; border-radius:8px; height:12px; overflow:hidden;">
@@ -349,7 +355,7 @@ def build_result_html(label, confidence, probs, text):
             {bars_html}
         </div>
         <div style="background:#1e293b; border-radius:10px; padding:12px;">
-            <div style="color:#94a3b8; font-size:0.8em; margin-bottom:3px;">Prompt analyse :</div>
             <div style="color:#cbd5e1; font-style:italic; word-break:break-word; font-size:0.85em;">"{preview}"</div>
         </div>
     </div>
@@ -360,18 +366,18 @@ def build_risk_assessment(label, confidence, probs):
     safety_score = probs["Benign"] * 100
     malicious_score = probs["Malicious"] * 100
     if label == "Benign" and confidence > 0.85:
-        level, desc = "Faible", "Ce prompt semble **sur**. Aucun pattern d'injection ou de jailbreak detecte."
     elif label == "Benign":
-        level, desc = "Modere", "Probablement benin, mais confiance moderee. Formulation potentiellement ambigue."
     elif confidence > 0.85:
-        level, desc = "Critique", "**Prompt malveillant detecte** avec haute confiance. Probable tentative d'injection ou de jailbreak."
     else:
-        level, desc = "Eleve", "**Prompt malveillant detecte.** Possible injection ou jailbreak. Revue recommandee."
     return (
-        f"### Niveau de risque : {level}\n\n{desc}\n\n"
-        f"**Details :**\n"
-        f"- Score de securite : **{safety_score:.0f}/100**\n"
-        f"- Classe predite : **{label}** ({confidence*100:.1f}%)\n"
         f"- P(Benign) = {probs['Benign']*100:.1f}% | P(Malicious) = {malicious_score:.1f}%\n"
     )
@@ -388,27 +394,27 @@ def build_stats_html():
         count = cat_counts[cat]
         color = CATEGORY_COLORS.get(cat, CATEGORY_COLORS["unknown"])
         pct = count / total * 100
-        label_fr = CATEGORY_LABELS_FR.get(cat, cat)
         cats_html += (
             f'<div style="display:flex; justify-content:space-between; padding:2px 0;">'
-            f'<span style="color:{color}; font-weight:500; font-size:0.85em;">{label_fr}</span>'
             f'<span style="color:#94a3b8; font-size:0.85em;">{count} ({pct:.1f}%)</span>'
             f'</div>'
         )
     return f"""
     <div style="background:#0f172a; border-radius:12px; padding:14px; font-family:system-ui,sans-serif;">
-        <div style="color:#e2e8f0; font-weight:700; margin-bottom:8px;">Statistiques du dataset</div>
         <div style="display:flex; gap:10px; margin-bottom:10px;">
             <div style="flex:1; background:#1e293b; border-radius:8px; padding:8px; text-align:center;">
                 <div style="color:#94a3b8; font-size:0.75em;">Total</div>
                 <div style="color:#e2e8f0; font-weight:700; font-size:1.2em;">{total:,}</div>
             </div>
             <div style="flex:1; background:#1e293b; border-radius:8px; padding:8px; text-align:center;">
-                <div style="color:#22c55e; font-size:0.75em;">Benin</div>
                 <div style="color:#22c55e; font-weight:700; font-size:1.2em;">{n_benign:,}</div>
             </div>
             <div style="flex:1; background:#1e293b; border-radius:8px; padding:8px; text-align:center;">
-                <div style="color:#ef4444; font-size:0.75em;">Malveillant</div>
                 <div style="color:#ef4444; font-weight:700; font-size:1.2em;">{n_malicious:,}</div>
             </div>
         </div>
@@ -430,7 +436,7 @@ PLOTLY_CLICK_JS = """
             setTimeout(setupClickHandler, 500);
             return;
         }
-        plotEl.on('plotly_click', function(data) {
             if (data && data.points && data.points.length > 0) {
                 const idx = data.points[0].customdata;
                 if (idx !== undefined && idx !== null) {
@@ -447,29 +453,13 @@ PLOTLY_CLICK_JS = """
                     }
                 }
             }
-        });
         const observer = new MutationObserver(() => {
             const newPlot = document.querySelector('#tsne-chart .js-plotly-plot');
             if (newPlot && !newPlot._hasClickHandler) {
                 newPlot._hasClickHandler = true;
-                newPlot.on('plotly_click', function(data) {
-                    if (data && data.points && data.points.length > 0) {
-                        const idx = data.points[0].customdata;
-                        if (idx !== undefined && idx !== null) {
-                            const inputEl = document.querySelector('#click-index-input textarea');
-                            if (inputEl) {
-                                const nativeSetter = Object.getOwnPropertyDescriptor(
-                                    window.HTMLTextAreaElement.prototype, 'value'
-                                ).set;
-                                nativeSetter.call(inputEl, String(idx));
-                                inputEl.dispatchEvent(new Event('input', { bubbles: true }));
-                                setTimeout(() => {
-                                    inputEl.dispatchEvent(new Event('change', { bubbles: true }));
-                                }, 50);
-                            }
-                        }
-                    }
-                });
             }
         });
         observer.observe(document.querySelector('#tsne-chart') || document.body, {
@@ -488,7 +478,7 @@ TITLE_HTML = """
 <div style="text-align:center; padding:10px 0;">
     <h1 style="font-size:1.8em; margin:0;">GuardLLM - Prompt Security Visualizer</h1>
     <p style="color:#94a3b8; font-size:0.95em; margin-top:4px;">
-        Espace d'embedding t-SNE interactif &bull;
         <a href="https://huggingface.co/meta-llama/Llama-Prompt-Guard-2-86M" target="_blank" style="color:#60a5fa;">
         Llama Prompt Guard 2</a> &bull;
         <a href="https://huggingface.co/datasets/neuralchemy/Prompt-injection-dataset" target="_blank" style="color:#60a5fa;">
@@ -510,50 +500,73 @@ with gr.Blocks(
     )
     with gr.Row():
         with gr.Column(scale=3):
             category_filter = gr.CheckboxGroup(
                 choices=UNIQUE_CATEGORIES,
                 value=UNIQUE_CATEGORIES,
-                label="Filtrer par categorie",
                 interactive=True,
             )
             tsne_plot = gr.Plot(
                 value=build_tsne_figure(),
-                label="Espace t-SNE",
                 elem_id="tsne-chart",
             )
             gr.Markdown(
-                "*Cliquez sur un point pour l'analyser. "
-                "Survolez pour voir le texte. Utilisez la molette pour zoomer.*"
             )
         with gr.Column(scale=2):
-            gr.HTML(build_stats_html())
-            gr.Markdown("### Selectionner un prompt")
             prompt_dropdown = gr.Dropdown(
                 choices=DROPDOWN_CHOICES,
-                label="Rechercher dans le dataset",
                 filterable=True,
                 interactive=True,
             )
-            gr.Markdown("### Ou analyser un prompt libre")
             manual_input = gr.Textbox(
-                label="Prompt personnalise",
-                placeholder="Tapez ou collez un prompt...",
                 lines=2,
             )
-            analyze_btn = gr.Button("Analyser", variant="primary")
             gr.Markdown("---")
-            gr.Markdown("### Resultat de l'analyse")
-            result_html = gr.HTML(value=empty_analysis_html())
-            risk_md = gr.Markdown(value="")
-            full_prompt = gr.Textbox(label="Prompt complet", lines=3, interactive=False, visible=True)
     category_filter.change(
         fn=on_filter_change,
         inputs=[category_filter],
         outputs=[tsne_plot],
     )
     click_index.change(
         fn=on_index_input,
         inputs=[click_index],
@@ -580,10 +593,10 @@ with gr.Blocks(
         """
         ---
         <div style="text-align:center; color:#64748b; font-size:0.8em;">
-            <strong>GuardLLM</strong> - Visualiseur de securite des prompts<br>
-            Modele : <a href="https://huggingface.co/meta-llama/Llama-Prompt-Guard-2-86M">
-            Llama Prompt Guard 2 (86M)</a> par Meta |
-            Dataset : <a href="https://huggingface.co/datasets/neuralchemy/Prompt-injection-dataset">
             neuralchemy/Prompt-injection-dataset</a>
         </div>
         """

     "unknown": "#64748b",
 }
+CATEGORY_LABELS = {
+    "benign": "Benign",
+    "direct_injection": "Direct Injection",
     "jailbreak": "Jailbreak",
+    "system_extraction": "System Extraction",
+    "encoding_obfuscation": "Encoding / Obfuscation",
+    "persona_replacement": "Persona Replacement",
+    "indirect_injection": "Indirect Injection",
+    "token_smuggling": "Token Smuggling",
+    "many_shot": "Many-Shot",
     "crescendo": "Crescendo",
+    "context_overflow": "Context Overflow",
+    "prompt_leaking": "Prompt Leaking",
+    "unknown": "Unknown",
 }
 # ---------------------------------------------------------------------------
         outputs = model(**inputs)
         probs = torch.softmax(outputs.logits, dim=-1)[0].cpu().numpy()
     pred_idx = int(np.argmax(probs))
     prob_dict = {LABELS[i]: float(probs[i]) for i in range(len(LABELS))}
     safety = float(probs[0])
     return prob_dict, safety
         ]
         severities = [ALL_SEVERITIES[i] or "benign" for i in indices]
         hover_texts = [
+            f"<b>{CATEGORY_LABELS.get(cat, cat)}</b><br>"
+            f"Severity: {sev}<br>"
             f"Index: {idx}<br>"
             f"<i>{txt}</i>"
             for idx, txt, sev in zip(indices, texts_preview, severities)
         ]
         color = CATEGORY_COLORS.get(cat, CATEGORY_COLORS["unknown"])
+        label = CATEGORY_LABELS.get(cat, cat)
         fig.add_trace(go.Scatter(
             x=x, y=y,
             mode="markers",
+            name=label,
             marker=dict(
                 size=5 if len(indices) > 500 else 7,
                 color=color,
         paper_bgcolor="#0f172a",
         plot_bgcolor="#1e293b",
         title=dict(
+            text="t-SNE Embedding Space - Prompt Security Landscape",
             font=dict(size=16, color="#e2e8f0"),
             x=0.5,
         ),
         legend=dict(
+            title=dict(text="Category", font=dict(color="#94a3b8")),
             bgcolor="rgba(15,23,42,0.9)",
             bordercolor="#334155",
             borderwidth=1,
     return build_tsne_figure(sel)
+def select_all_categories():
+    return gr.update(value=UNIQUE_CATEGORIES), build_tsne_figure(UNIQUE_CATEGORIES)
+def deselect_all_categories():
+    return gr.update(value=[]), build_tsne_figure([])
 def on_dropdown_select(choice):
     if not choice:
+        return empty_analysis_html(), "*Select a prompt.*", ""
     try:
         idx = int(choice.split(" | ")[0])
         text = ALL_TEXTS[idx]
         result_html = build_result_html(pred_label, confidence, prob_dict, text)
         risk_text = build_risk_assessment(pred_label, confidence, prob_dict)
         risk_text += (
+            f"\n\n---\n**Dataset metadata:**\n"
+            f"- Category: **{CATEGORY_LABELS.get(category, category)}**\n"
+            f"- Severity: **{severity}**\n"
+            f"- Ground truth: **{ground_truth}**\n"
         )
         return result_html, risk_text, text
     except Exception as e:
         logger.error("Error: %s", e)
+        return empty_analysis_html(), f"Error: {e}", ""
 def on_index_input(idx_str):
     if not idx_str or not idx_str.strip():
+        return empty_analysis_html(), "*Click a point on the chart.*", ""
     try:
         idx = int(idx_str.strip())
         if idx < 0 or idx >= len(ALL_TEXTS):
+            return empty_analysis_html(), f"Invalid index: {idx}", ""
         text = ALL_TEXTS[idx]
         category = ALL_CATEGORIES[idx]
         severity = ALL_SEVERITIES[idx] or "N/A"
         result_html = build_result_html(pred_label, confidence, prob_dict, text)
         risk_text = build_risk_assessment(pred_label, confidence, prob_dict)
         risk_text += (
+            f"\n\n---\n**Dataset metadata:**\n"
+            f"- Category: **{CATEGORY_LABELS.get(category, category)}**\n"
+            f"- Severity: **{severity}**\n"
+            f"- Ground truth: **{ground_truth}**\n"
         )
         return result_html, risk_text, text
     except Exception as e:
         logger.error("Error: %s", e)
+        return empty_analysis_html(), f"Error: {e}", ""
 def on_manual_analyze(text):
 def empty_analysis_html():
     return """
     <div style="text-align:center; padding:30px; color:#94a3b8;">
+        <p style="font-size:1em;">Click a point on the chart,<br>
+        select a prompt from the list,<br>
+        or enter a custom prompt below.</p>
     </div>
     """
         <div style="text-align:center; margin-bottom:14px;">
             <div style="font-size:2em;">{emoji}</div>
             <div style="font-size:1.2em; font-weight:700; color:{color};">{label}</div>
+            <div style="color:#94a3b8; font-size:0.85em;">Confidence: {pct:.1f}%</div>
         </div>
         <div style="background:#1e293b; border-radius:10px; padding:12px; margin-bottom:10px;">
             <div style="display:flex; justify-content:space-between; margin-bottom:4px;">
+                <span style="color:#e2e8f0; font-weight:600;">Safety Score</span>
                 <span style="color:{safety_color}; font-weight:700; font-size:1.1em;">{safety_score:.0f}/100</span>
             </div>
             <div style="background:#334155; border-radius:8px; height:12px; overflow:hidden;">
             {bars_html}
         </div>
         <div style="background:#1e293b; border-radius:10px; padding:12px;">
+            <div style="color:#94a3b8; font-size:0.8em; margin-bottom:3px;">Analyzed prompt:</div>
             <div style="color:#cbd5e1; font-style:italic; word-break:break-word; font-size:0.85em;">"{preview}"</div>
         </div>
     </div>
     safety_score = probs["Benign"] * 100
     malicious_score = probs["Malicious"] * 100
     if label == "Benign" and confidence > 0.85:
+        level, desc = "Low", "This prompt appears **safe**. No injection or jailbreak patterns detected."
     elif label == "Benign":
+        level, desc = "Moderate", "Likely benign, but moderate confidence. Potentially ambiguous wording."
     elif confidence > 0.85:
+        level, desc = "Critical", "**Malicious prompt detected** with high confidence. Likely injection or jailbreak attempt."
     else:
+        level, desc = "High", "**Malicious prompt detected.** Possible injection or jailbreak. Review recommended."
     return (
+        f"### Risk Level: {level}\n\n{desc}\n\n"
+        f"**Details:**\n"
+        f"- Safety score: **{safety_score:.0f}/100**\n"
+        f"- Predicted class: **{label}** ({confidence*100:.1f}%)\n"
         f"- P(Benign) = {probs['Benign']*100:.1f}% | P(Malicious) = {malicious_score:.1f}%\n"
     )
         count = cat_counts[cat]
         color = CATEGORY_COLORS.get(cat, CATEGORY_COLORS["unknown"])
         pct = count / total * 100
+        label = CATEGORY_LABELS.get(cat, cat)
         cats_html += (
             f'<div style="display:flex; justify-content:space-between; padding:2px 0;">'
+            f'<span style="color:{color}; font-weight:500; font-size:0.85em;">{label}</span>'
             f'<span style="color:#94a3b8; font-size:0.85em;">{count} ({pct:.1f}%)</span>'
             f'</div>'
         )
     return f"""
     <div style="background:#0f172a; border-radius:12px; padding:14px; font-family:system-ui,sans-serif;">
+        <div style="color:#e2e8f0; font-weight:700; margin-bottom:8px;">Dataset Statistics</div>
         <div style="display:flex; gap:10px; margin-bottom:10px;">
             <div style="flex:1; background:#1e293b; border-radius:8px; padding:8px; text-align:center;">
                 <div style="color:#94a3b8; font-size:0.75em;">Total</div>
                 <div style="color:#e2e8f0; font-weight:700; font-size:1.2em;">{total:,}</div>
             </div>
             <div style="flex:1; background:#1e293b; border-radius:8px; padding:8px; text-align:center;">
+                <div style="color:#22c55e; font-size:0.75em;">Benign</div>
                 <div style="color:#22c55e; font-weight:700; font-size:1.2em;">{n_benign:,}</div>
             </div>
             <div style="flex:1; background:#1e293b; border-radius:8px; padding:8px; text-align:center;">
+                <div style="color:#ef4444; font-size:0.75em;">Malicious</div>
                 <div style="color:#ef4444; font-weight:700; font-size:1.2em;">{n_malicious:,}</div>
             </div>
         </div>
             setTimeout(setupClickHandler, 500);
             return;
         }
+        function handleClick(data) {
             if (data && data.points && data.points.length > 0) {
                 const idx = data.points[0].customdata;
                 if (idx !== undefined && idx !== null) {
                     }
                 }
             }
+        }
+        plotEl.on('plotly_click', handleClick);
         const observer = new MutationObserver(() => {
             const newPlot = document.querySelector('#tsne-chart .js-plotly-plot');
             if (newPlot && !newPlot._hasClickHandler) {
                 newPlot._hasClickHandler = true;
+                newPlot.on('plotly_click', handleClick);
             }
         });
         observer.observe(document.querySelector('#tsne-chart') || document.body, {
 <div style="text-align:center; padding:10px 0;">
     <h1 style="font-size:1.8em; margin:0;">GuardLLM - Prompt Security Visualizer</h1>
     <p style="color:#94a3b8; font-size:0.95em; margin-top:4px;">
+        Interactive t-SNE embedding space &bull;
         <a href="https://huggingface.co/meta-llama/Llama-Prompt-Guard-2-86M" target="_blank" style="color:#60a5fa;">
         Llama Prompt Guard 2</a> &bull;
         <a href="https://huggingface.co/datasets/neuralchemy/Prompt-injection-dataset" target="_blank" style="color:#60a5fa;">
     )
     with gr.Row():
+        # ---- Left: t-SNE chart + filters ----
         with gr.Column(scale=3):
+            with gr.Row():
+                select_all_btn = gr.Button("Select All", size="sm", scale=1)
+                deselect_all_btn = gr.Button("Deselect All", size="sm", scale=1)
             category_filter = gr.CheckboxGroup(
                 choices=UNIQUE_CATEGORIES,
                 value=UNIQUE_CATEGORIES,
+                label="Filter by category",
                 interactive=True,
             )
             tsne_plot = gr.Plot(
                 value=build_tsne_figure(),
+                label="t-SNE Space",
                 elem_id="tsne-chart",
             )
             gr.Markdown(
+                "*Click a point to analyze it. "
+                "Hover to preview text. Use scroll wheel to zoom.*"
             )
+        # ---- Right: Analysis first, then stats (swapped) ----
         with gr.Column(scale=2):
+            gr.Markdown("### Analysis Result")
+            result_html = gr.HTML(value=empty_analysis_html())
+            risk_md = gr.Markdown(value="")
+            full_prompt = gr.Textbox(label="Full prompt", lines=3, interactive=False, visible=True)
+            gr.Markdown("---")
+            gr.Markdown("### Select a prompt")
             prompt_dropdown = gr.Dropdown(
                 choices=DROPDOWN_CHOICES,
+                label="Search dataset",
                 filterable=True,
                 interactive=True,
             )
+            gr.Markdown("### Or analyze a custom prompt")
             manual_input = gr.Textbox(
+                label="Custom prompt",
+                placeholder="Type or paste a prompt...",
                 lines=2,
             )
+            analyze_btn = gr.Button("Analyze", variant="primary")
             gr.Markdown("---")
+            gr.HTML(build_stats_html())
+    # ---- Events ----
     category_filter.change(
         fn=on_filter_change,
         inputs=[category_filter],
         outputs=[tsne_plot],
     )
+    select_all_btn.click(
+        fn=select_all_categories,
+        inputs=[],
+        outputs=[category_filter, tsne_plot],
+    )
+    deselect_all_btn.click(
+        fn=deselect_all_categories,
+        inputs=[],
+        outputs=[category_filter, tsne_plot],
+    )
     click_index.change(
         fn=on_index_input,
         inputs=[click_index],
         """
         ---
         <div style="text-align:center; color:#64748b; font-size:0.8em;">
+            <strong>GuardLLM</strong> - Prompt Security Visualizer<br>
+            Model: <a href="https://huggingface.co/meta-llama/Llama-Prompt-Guard-2-86M">
+            Llama Prompt Guard 2 (86M)</a> by Meta |
+            Dataset: <a href="https://huggingface.co/datasets/neuralchemy/Prompt-injection-dataset">
             neuralchemy/Prompt-injection-dataset</a>
         </div>
         """