PhantomOps / app_hf.py
shanko585's picture
Upload 4 files
1a8bf2d verified
Raw
History Blame Contribute Delete
12.3 kB
# app_hf.py
# PhantomOps β€” HuggingFace Space Demo
# Displays real results from AMD MI300X run
import streamlit as st
import json
import os
st.set_page_config(
page_title="PhantomOps Platform",
layout="wide",
initial_sidebar_state="expanded"
)
st.markdown("""
<style>
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700;800&display=swap');
html, body, [class*="css"] { font-family: 'Inter', sans-serif; }
.stApp {
background-color: #0b0c10;
background-image:
radial-gradient(circle at 15% 50%, rgba(79,70,229,0.12), transparent 25%),
radial-gradient(circle at 85% 30%, rgba(16,185,129,0.12), transparent 25%);
color: #e2e8f0;
}
@keyframes textShine {
0% { background-position: 0% 50%; }
100% { background-position: 200% 50%; }
}
.phantom-title {
font-size: 2.5rem;
font-weight: 800;
letter-spacing: -0.025em;
background: linear-gradient(90deg, #38bdf8, #818cf8, #c084fc, #38bdf8);
background-size: 200% auto;
-webkit-background-clip: text;
-webkit-text-fill-color: transparent;
animation: textShine 4s linear infinite;
margin-bottom: 4px;
}
.phantom-tagline {
font-size: 0.95rem;
color: #94a3b8;
margin-top: 0;
margin-bottom: 16px;
}
.amd-banner {
background: linear-gradient(135deg, rgba(237,28,36,0.15), rgba(99,102,241,0.15));
border: 1px solid rgba(237,28,36,0.3);
border-radius: 10px;
padding: 12px 20px;
margin-bottom: 24px;
font-size: 0.85rem;
color: #94a3b8;
}
.amd-banner b { color: #f1f5f9; }
.agent-card {
background: rgba(30,33,43,0.65);
border: 1px solid rgba(255,255,255,0.05);
border-top: 2px solid #6366f1;
border-radius: 12px;
padding: 20px;
margin: 8px 0;
box-shadow: 0 10px 30px rgba(0,0,0,0.3);
transition: transform 0.3s, box-shadow 0.3s;
}
.agent-card:hover {
transform: translateY(-3px);
box-shadow: 0 15px 35px rgba(99,102,241,0.15);
}
.agent-card b { color:#f8fafc; font-size:1.05rem; margin-bottom:6px; display:block; }
.result-fail {
background: rgba(30,33,43,0.65);
border-left: 4px solid #f43f5e;
border-radius: 6px;
padding: 18px;
margin: 10px 0;
font-size: 0.9rem;
color: #cbd5e1;
}
.result-pass {
background: rgba(30,33,43,0.65);
border-left: 4px solid #10b981;
border-radius: 6px;
padding: 18px;
margin: 10px 0;
font-size: 0.9rem;
color: #cbd5e1;
}
.result-patch {
background: rgba(30,33,43,0.65);
border-left: 4px solid #38bdf8;
border-radius: 6px;
padding: 18px;
margin: 10px 0;
font-size: 0.9rem;
color: #cbd5e1;
}
.metric-card {
background: rgba(30,33,43,0.65);
border: 1px solid rgba(255,255,255,0.05);
border-top: 2px solid #c084fc;
border-radius: 12px;
padding: 24px;
text-align: left;
box-shadow: 0 10px 30px rgba(0,0,0,0.2);
}
.metric-number { font-size:2.8rem; font-weight:800; line-height:1.1; margin-bottom:8px; }
.metric-label { font-size:0.85rem; font-weight:600; color:#94a3b8; text-transform:uppercase; letter-spacing:0.05em; }
hr { border-color: #27272a; }
#MainMenu {visibility:hidden;} footer {visibility:hidden;} header {visibility:hidden;}
</style>
""", unsafe_allow_html=True)
# ── Header ─────────────────────────────────────────────────────
st.markdown('<div class="phantom-title">PhantomOps Platform</div>', unsafe_allow_html=True)
st.markdown('<div class="phantom-tagline">Automated Adversarial Testing & Hardening for LLM Agents</div>', unsafe_allow_html=True)
st.markdown('''<div class="amd-banner">
⚑ <b>Live Results from AMD Instinct MI300X</b> β€” This demo displays real output generated
on AMD Developer Cloud hardware using ROCm 7.2 and Qwen 2.5 from HuggingFace Hub.
The full pipeline runs locally on MI300X for data privacy and inference speed.
</div>''', unsafe_allow_html=True)
# ── Sidebar ─────────────────────────────────────────────────────
with st.sidebar:
st.markdown("### About PhantomOps")
st.markdown("""
<div style="color:#a1a1aa;font-size:0.85rem;line-height:1.6;">
PhantomOps is the crash test lab for AI agents.<br><br>
It finds failures before your users do β€” then fixes them automatically.<br><br>
<b style="color:#f1f5f9;">Three unique weapons:</b><br>
🎯 Personalized Chaos<br>
πŸ”¬ Reasoning Autopsy<br>
πŸ”§ Auto-Patching
</div>
""", unsafe_allow_html=True)
st.divider()
st.markdown("#### Infrastructure")
st.markdown("""
<div style="color:#a1a1aa;font-size:0.85rem;">
πŸ”΄ AMD Instinct MI300X<br>
⚑ ROCm 7.2<br>
πŸ€— Qwen 2.5 (HuggingFace Hub)<br>
🐍 Python + Transformers
</div>
""", unsafe_allow_html=True)
st.divider()
st.markdown("<div style='text-align:center;color:#52525b;font-size:0.75rem;'>PhantomOps Core v1.0.0<br>AMD Developer Hackathon 2026</div>", unsafe_allow_html=True)
# ── Load results ────────────────────────────────────────────────
RESULTS_FILE = "demo_results.json"
if not os.path.exists(RESULTS_FILE):
st.error("demo_results.json not found. Please upload your AMD results file.")
st.stop()
with open(RESULTS_FILE, 'r') as f:
output = json.load(f)
fingerprint = output.get('fingerprint', {})
autopsies = output.get('autopsies', [])
patches = output.get('patches', [])
drift = output.get('drift_report', {})
failures = [a for a in autopsies if a['autopsy'].get('did_fail', False)]
# ── Executive Summary ───────────────────────────────────────────
st.markdown("### Executive Summary")
m1, m2, m3, m4 = st.columns(4)
metrics = [
(len(autopsies), "Simulations Run", "#f4f4f5"),
(len(failures), "Failures Isolated", "#ef4444" if failures else "#22c55e"),
(len(patches), "Patches Synthesized", "#22c55e" if patches else "#f4f4f5"),
("Detected" if drift.get('drift_detected') else "None",
"Behavioral Drift",
"#ef4444" if drift.get('drift_detected') else "#22c55e"),
]
for col, (val, label, color) in zip([m1,m2,m3,m4], metrics):
with col:
st.markdown(f"""<div class="metric-card">
<div class="metric-number" style="color:{color};">{val}</div>
<div class="metric-label">● {label}</div>
</div>""", unsafe_allow_html=True)
# ── Agent fingerprint ───────────────────────────────────────────
st.markdown("---")
st.markdown("### Target Agent Profile")
col1, col2 = st.columns(2)
with col1:
st.markdown(f"""<div class="agent-card">
<b>Domain</b>
<span style="color:#a1a1aa;font-size:0.85rem;">{fingerprint.get('domain','N/A')}</span>
</div>""", unsafe_allow_html=True)
assumptions = fingerprint.get('assumptions', [])
if assumptions:
st.markdown(f"""<div class="agent-card">
<b>Assumptions Detected</b>
<span style="color:#a1a1aa;font-size:0.85rem;">{"<br>".join(f"β€’ {a}" for a in assumptions)}</span>
</div>""", unsafe_allow_html=True)
with col2:
weak_points = fingerprint.get('weak_points', [])
if weak_points:
st.markdown(f"""<div class="agent-card">
<b>Predicted Weak Points</b>
<span style="color:#f43f5e;font-size:0.85rem;">{"<br>".join(f"β€’ {w}" for w in weak_points)}</span>
</div>""", unsafe_allow_html=True)
# ── Vulnerability report ────────────────────────────────────────
st.markdown("---")
st.markdown("### Vulnerability Report & Remediation")
for i, item in enumerate(autopsies):
autopsy = item['autopsy']
failed = autopsy.get('did_fail', False)
stype = item['scenario']['scenario_type'].replace('_',' ').title()
severity = autopsy.get('severity','unknown').upper()
sev_color = {"CRITICAL":"#ef4444","HIGH":"#f97316","MEDIUM":"#eab308","LOW":"#22c55e"}.get(severity,"#a1a1aa")
with st.expander(
f"{'πŸ”΄ Issue Detected' if failed else '🟒 Secure'} β€” Vector {i+1}: {stype} | Severity: {severity}",
expanded=(i == 0 and failed)
):
left, right = st.columns(2)
with left:
st.markdown("#### Baseline Behavior")
st.markdown("**Adversarial Input:**")
st.code(item['scenario']['input'], language=None)
st.markdown("**Agent Output:**")
resp = item['scenario']['response']
st.markdown(f'<div class="result-fail">{resp[:500]}{"..." if len(resp)>500 else ""}</div>',
unsafe_allow_html=True)
if failed:
st.markdown("**Reasoning Autopsy:**")
st.markdown(f"""<div class="result-fail">
<b>Failure Class:</b> {autopsy.get('failure_type','N/A')}<br>
<b>Severity:</b> <span style="color:{sev_color}">{severity}</span><br><br>
<b>Logic Deterioration:</b><br>{autopsy.get('reasoning_breakdown','N/A')}<br><br>
<b>Root Cause:</b><br>{autopsy.get('root_cause','N/A')}
</div>""", unsafe_allow_html=True)
with right:
st.markdown("#### Remediated Behavior")
matching = next(
(p for p in patches
if p['original_failure']['scenario']['scenario_type'] == item['scenario']['scenario_type']),
None
)
if matching:
patch = matching['patch']
confidence = patch.get('confidence','unknown').upper()
conf_color = {"HIGH":"#22c55e","MEDIUM":"#eab308","LOW":"#ef4444"}.get(confidence,"#a1a1aa")
vr = matching['verified_response']
st.markdown("**Synthesized Directive:**")
st.markdown(f'<div class="result-patch">{patch.get("what_changed","N/A")}</div>',
unsafe_allow_html=True)
st.markdown(f"**Verification Confidence:** <span style='color:{conf_color};font-weight:600;'>{confidence}</span>",
unsafe_allow_html=True)
st.markdown("**Verified Agent Output:**")
st.markdown(f'<div class="result-pass">{vr[:500]}{"..." if len(vr)>500 else ""}</div>',
unsafe_allow_html=True)
else:
st.markdown('<div class="result-pass">Simulation passed baseline checks. No remediation required.</div>',
unsafe_allow_html=True)
# ── Drift ───────────────────────────────────────────────────────
st.markdown("---")
st.markdown("### Long-term Stability Analysis")
if drift.get('drift_detected'):
st.error(f"Drift Detected β€” Severity: {drift.get('drift_severity','unknown').upper()}", icon="🚨")
for change in drift.get('changed_behaviors', []):
st.write(f"β€” {change}")
st.warning(f"Recommendation: {drift.get('recommendation','N/A')}", icon="βš™οΈ")
else:
st.success("Stable Baseline β€” No behavioral degradation detected.", icon="βœ“")
st.info(drift.get('recommendation', 'Continuous monitoring active.'), icon="ℹ️")
# ── Raw JSON ────────────────────────────────────────────────────
st.markdown("---")
with st.expander("Raw Diagnostic Payload (JSON)"):
st.json(output)