| |
| |
| |
|
|
| import streamlit as st |
| import json |
| import os |
|
|
| st.set_page_config( |
| page_title="PhantomOps Platform", |
| layout="wide", |
| initial_sidebar_state="expanded" |
| ) |
|
|
| st.markdown(""" |
| <style> |
| @import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700;800&display=swap'); |
| html, body, [class*="css"] { font-family: 'Inter', sans-serif; } |
| |
| .stApp { |
| background-color: #0b0c10; |
| background-image: |
| radial-gradient(circle at 15% 50%, rgba(79,70,229,0.12), transparent 25%), |
| radial-gradient(circle at 85% 30%, rgba(16,185,129,0.12), transparent 25%); |
| color: #e2e8f0; |
| } |
| @keyframes textShine { |
| 0% { background-position: 0% 50%; } |
| 100% { background-position: 200% 50%; } |
| } |
| .phantom-title { |
| font-size: 2.5rem; |
| font-weight: 800; |
| letter-spacing: -0.025em; |
| background: linear-gradient(90deg, #38bdf8, #818cf8, #c084fc, #38bdf8); |
| background-size: 200% auto; |
| -webkit-background-clip: text; |
| -webkit-text-fill-color: transparent; |
| animation: textShine 4s linear infinite; |
| margin-bottom: 4px; |
| } |
| .phantom-tagline { |
| font-size: 0.95rem; |
| color: #94a3b8; |
| margin-top: 0; |
| margin-bottom: 16px; |
| } |
| .amd-banner { |
| background: linear-gradient(135deg, rgba(237,28,36,0.15), rgba(99,102,241,0.15)); |
| border: 1px solid rgba(237,28,36,0.3); |
| border-radius: 10px; |
| padding: 12px 20px; |
| margin-bottom: 24px; |
| font-size: 0.85rem; |
| color: #94a3b8; |
| } |
| .amd-banner b { color: #f1f5f9; } |
| .agent-card { |
| background: rgba(30,33,43,0.65); |
| border: 1px solid rgba(255,255,255,0.05); |
| border-top: 2px solid #6366f1; |
| border-radius: 12px; |
| padding: 20px; |
| margin: 8px 0; |
| box-shadow: 0 10px 30px rgba(0,0,0,0.3); |
| transition: transform 0.3s, box-shadow 0.3s; |
| } |
| .agent-card:hover { |
| transform: translateY(-3px); |
| box-shadow: 0 15px 35px rgba(99,102,241,0.15); |
| } |
| .agent-card b { color:#f8fafc; font-size:1.05rem; margin-bottom:6px; display:block; } |
| .result-fail { |
| background: rgba(30,33,43,0.65); |
| border-left: 4px solid #f43f5e; |
| border-radius: 6px; |
| padding: 18px; |
| margin: 10px 0; |
| font-size: 0.9rem; |
| color: #cbd5e1; |
| } |
| .result-pass { |
| background: rgba(30,33,43,0.65); |
| border-left: 4px solid #10b981; |
| border-radius: 6px; |
| padding: 18px; |
| margin: 10px 0; |
| font-size: 0.9rem; |
| color: #cbd5e1; |
| } |
| .result-patch { |
| background: rgba(30,33,43,0.65); |
| border-left: 4px solid #38bdf8; |
| border-radius: 6px; |
| padding: 18px; |
| margin: 10px 0; |
| font-size: 0.9rem; |
| color: #cbd5e1; |
| } |
| .metric-card { |
| background: rgba(30,33,43,0.65); |
| border: 1px solid rgba(255,255,255,0.05); |
| border-top: 2px solid #c084fc; |
| border-radius: 12px; |
| padding: 24px; |
| text-align: left; |
| box-shadow: 0 10px 30px rgba(0,0,0,0.2); |
| } |
| .metric-number { font-size:2.8rem; font-weight:800; line-height:1.1; margin-bottom:8px; } |
| .metric-label { font-size:0.85rem; font-weight:600; color:#94a3b8; text-transform:uppercase; letter-spacing:0.05em; } |
| hr { border-color: #27272a; } |
| #MainMenu {visibility:hidden;} footer {visibility:hidden;} header {visibility:hidden;} |
| </style> |
| """, unsafe_allow_html=True) |
|
|
| |
| st.markdown('<div class="phantom-title">PhantomOps Platform</div>', unsafe_allow_html=True) |
| st.markdown('<div class="phantom-tagline">Automated Adversarial Testing & Hardening for LLM Agents</div>', unsafe_allow_html=True) |
| st.markdown('''<div class="amd-banner"> |
| β‘ <b>Live Results from AMD Instinct MI300X</b> β This demo displays real output generated |
| on AMD Developer Cloud hardware using ROCm 7.2 and Qwen 2.5 from HuggingFace Hub. |
| The full pipeline runs locally on MI300X for data privacy and inference speed. |
| </div>''', unsafe_allow_html=True) |
|
|
| |
| with st.sidebar: |
| st.markdown("### About PhantomOps") |
| st.markdown(""" |
| <div style="color:#a1a1aa;font-size:0.85rem;line-height:1.6;"> |
| PhantomOps is the crash test lab for AI agents.<br><br> |
| It finds failures before your users do β then fixes them automatically.<br><br> |
| <b style="color:#f1f5f9;">Three unique weapons:</b><br> |
| π― Personalized Chaos<br> |
| π¬ Reasoning Autopsy<br> |
| π§ Auto-Patching |
| </div> |
| """, unsafe_allow_html=True) |
| st.divider() |
| st.markdown("#### Infrastructure") |
| st.markdown(""" |
| <div style="color:#a1a1aa;font-size:0.85rem;"> |
| π΄ AMD Instinct MI300X<br> |
| β‘ ROCm 7.2<br> |
| π€ Qwen 2.5 (HuggingFace Hub)<br> |
| π Python + Transformers |
| </div> |
| """, unsafe_allow_html=True) |
| st.divider() |
| st.markdown("<div style='text-align:center;color:#52525b;font-size:0.75rem;'>PhantomOps Core v1.0.0<br>AMD Developer Hackathon 2026</div>", unsafe_allow_html=True) |
|
|
| |
| RESULTS_FILE = "demo_results.json" |
|
|
| if not os.path.exists(RESULTS_FILE): |
| st.error("demo_results.json not found. Please upload your AMD results file.") |
| st.stop() |
|
|
| with open(RESULTS_FILE, 'r') as f: |
| output = json.load(f) |
|
|
| fingerprint = output.get('fingerprint', {}) |
| autopsies = output.get('autopsies', []) |
| patches = output.get('patches', []) |
| drift = output.get('drift_report', {}) |
| failures = [a for a in autopsies if a['autopsy'].get('did_fail', False)] |
|
|
| |
| st.markdown("### Executive Summary") |
|
|
| m1, m2, m3, m4 = st.columns(4) |
| metrics = [ |
| (len(autopsies), "Simulations Run", "#f4f4f5"), |
| (len(failures), "Failures Isolated", "#ef4444" if failures else "#22c55e"), |
| (len(patches), "Patches Synthesized", "#22c55e" if patches else "#f4f4f5"), |
| ("Detected" if drift.get('drift_detected') else "None", |
| "Behavioral Drift", |
| "#ef4444" if drift.get('drift_detected') else "#22c55e"), |
| ] |
| for col, (val, label, color) in zip([m1,m2,m3,m4], metrics): |
| with col: |
| st.markdown(f"""<div class="metric-card"> |
| <div class="metric-number" style="color:{color};">{val}</div> |
| <div class="metric-label">β {label}</div> |
| </div>""", unsafe_allow_html=True) |
|
|
| |
| st.markdown("---") |
| st.markdown("### Target Agent Profile") |
| col1, col2 = st.columns(2) |
| with col1: |
| st.markdown(f"""<div class="agent-card"> |
| <b>Domain</b> |
| <span style="color:#a1a1aa;font-size:0.85rem;">{fingerprint.get('domain','N/A')}</span> |
| </div>""", unsafe_allow_html=True) |
| assumptions = fingerprint.get('assumptions', []) |
| if assumptions: |
| st.markdown(f"""<div class="agent-card"> |
| <b>Assumptions Detected</b> |
| <span style="color:#a1a1aa;font-size:0.85rem;">{"<br>".join(f"β’ {a}" for a in assumptions)}</span> |
| </div>""", unsafe_allow_html=True) |
| with col2: |
| weak_points = fingerprint.get('weak_points', []) |
| if weak_points: |
| st.markdown(f"""<div class="agent-card"> |
| <b>Predicted Weak Points</b> |
| <span style="color:#f43f5e;font-size:0.85rem;">{"<br>".join(f"β’ {w}" for w in weak_points)}</span> |
| </div>""", unsafe_allow_html=True) |
|
|
| |
| st.markdown("---") |
| st.markdown("### Vulnerability Report & Remediation") |
|
|
| for i, item in enumerate(autopsies): |
| autopsy = item['autopsy'] |
| failed = autopsy.get('did_fail', False) |
| stype = item['scenario']['scenario_type'].replace('_',' ').title() |
| severity = autopsy.get('severity','unknown').upper() |
| sev_color = {"CRITICAL":"#ef4444","HIGH":"#f97316","MEDIUM":"#eab308","LOW":"#22c55e"}.get(severity,"#a1a1aa") |
|
|
| with st.expander( |
| f"{'π΄ Issue Detected' if failed else 'π’ Secure'} β Vector {i+1}: {stype} | Severity: {severity}", |
| expanded=(i == 0 and failed) |
| ): |
| left, right = st.columns(2) |
|
|
| with left: |
| st.markdown("#### Baseline Behavior") |
| st.markdown("**Adversarial Input:**") |
| st.code(item['scenario']['input'], language=None) |
| st.markdown("**Agent Output:**") |
| resp = item['scenario']['response'] |
| st.markdown(f'<div class="result-fail">{resp[:500]}{"..." if len(resp)>500 else ""}</div>', |
| unsafe_allow_html=True) |
| if failed: |
| st.markdown("**Reasoning Autopsy:**") |
| st.markdown(f"""<div class="result-fail"> |
| <b>Failure Class:</b> {autopsy.get('failure_type','N/A')}<br> |
| <b>Severity:</b> <span style="color:{sev_color}">{severity}</span><br><br> |
| <b>Logic Deterioration:</b><br>{autopsy.get('reasoning_breakdown','N/A')}<br><br> |
| <b>Root Cause:</b><br>{autopsy.get('root_cause','N/A')} |
| </div>""", unsafe_allow_html=True) |
|
|
| with right: |
| st.markdown("#### Remediated Behavior") |
| matching = next( |
| (p for p in patches |
| if p['original_failure']['scenario']['scenario_type'] == item['scenario']['scenario_type']), |
| None |
| ) |
| if matching: |
| patch = matching['patch'] |
| confidence = patch.get('confidence','unknown').upper() |
| conf_color = {"HIGH":"#22c55e","MEDIUM":"#eab308","LOW":"#ef4444"}.get(confidence,"#a1a1aa") |
| vr = matching['verified_response'] |
| st.markdown("**Synthesized Directive:**") |
| st.markdown(f'<div class="result-patch">{patch.get("what_changed","N/A")}</div>', |
| unsafe_allow_html=True) |
| st.markdown(f"**Verification Confidence:** <span style='color:{conf_color};font-weight:600;'>{confidence}</span>", |
| unsafe_allow_html=True) |
| st.markdown("**Verified Agent Output:**") |
| st.markdown(f'<div class="result-pass">{vr[:500]}{"..." if len(vr)>500 else ""}</div>', |
| unsafe_allow_html=True) |
| else: |
| st.markdown('<div class="result-pass">Simulation passed baseline checks. No remediation required.</div>', |
| unsafe_allow_html=True) |
|
|
| |
| st.markdown("---") |
| st.markdown("### Long-term Stability Analysis") |
| if drift.get('drift_detected'): |
| st.error(f"Drift Detected β Severity: {drift.get('drift_severity','unknown').upper()}", icon="π¨") |
| for change in drift.get('changed_behaviors', []): |
| st.write(f"β {change}") |
| st.warning(f"Recommendation: {drift.get('recommendation','N/A')}", icon="βοΈ") |
| else: |
| st.success("Stable Baseline β No behavioral degradation detected.", icon="β") |
| st.info(drift.get('recommendation', 'Continuous monitoring active.'), icon="βΉοΈ") |
|
|
| |
| st.markdown("---") |
| with st.expander("Raw Diagnostic Payload (JSON)"): |
| st.json(output) |
|
|