# app_hf.py # PhantomOps — HuggingFace Space Demo # Displays real results from AMD MI300X run import streamlit as st import json import os st.set_page_config( page_title="PhantomOps Platform", layout="wide", initial_sidebar_state="expanded" ) st.markdown(""" """, unsafe_allow_html=True) # ── Header ───────────────────────────────────────────────────── st.markdown('

PhantomOps Platform

', unsafe_allow_html=True) st.markdown('

Automated Adversarial Testing & Hardening for LLM Agents

', unsafe_allow_html=True) st.markdown('''''', unsafe_allow_html=True) # ── Sidebar ───────────────────────────────────────────────────── with st.sidebar: st.markdown("### About PhantomOps") st.markdown("""

PhantomOps is the crash test lab for AI agents.

It finds failures before your users do — then fixes them automatically.

Three unique weapons:
🎯 Personalized Chaos
🔬 Reasoning Autopsy
🔧 Auto-Patching

""", unsafe_allow_html=True) st.divider() st.markdown("#### Infrastructure") st.markdown("""

🔴 AMD Instinct MI300X
⚡ ROCm 7.2
🤗 Qwen 2.5 (HuggingFace Hub)
🐍 Python + Transformers

""", unsafe_allow_html=True) st.divider() st.markdown("

PhantomOps Core v1.0.0
AMD Developer Hackathon 2026

", unsafe_allow_html=True) # ── Load results ──────────────────────────────────────────────── RESULTS_FILE = "demo_results.json" if not os.path.exists(RESULTS_FILE): st.error("demo_results.json not found. Please upload your AMD results file.") st.stop() with open(RESULTS_FILE, 'r') as f: output = json.load(f) fingerprint = output.get('fingerprint', {}) autopsies = output.get('autopsies', []) patches = output.get('patches', []) drift = output.get('drift_report', {}) failures = [a for a in autopsies if a['autopsy'].get('did_fail', False)] # ── Executive Summary ─────────────────────────────────────────── st.markdown("### Executive Summary") m1, m2, m3, m4 = st.columns(4) metrics = [ (len(autopsies), "Simulations Run", "#f4f4f5"), (len(failures), "Failures Isolated", "#ef4444" if failures else "#22c55e"), (len(patches), "Patches Synthesized", "#22c55e" if patches else "#f4f4f5"), ("Detected" if drift.get('drift_detected') else "None", "Behavioral Drift", "#ef4444" if drift.get('drift_detected') else "#22c55e"), ] for col, (val, label, color) in zip([m1,m2,m3,m4], metrics): with col: st.markdown(f"""

{val}

● {label}

""", unsafe_allow_html=True) # ── Agent fingerprint ─────────────────────────────────────────── st.markdown("---") st.markdown("### Target Agent Profile") col1, col2 = st.columns(2) with col1: st.markdown(f"""

Domain {fingerprint.get('domain','N/A')}

""", unsafe_allow_html=True) assumptions = fingerprint.get('assumptions', []) if assumptions: st.markdown(f"""

Assumptions Detected {"
".join(f"• {a}" for a in assumptions)}

""", unsafe_allow_html=True) with col2: weak_points = fingerprint.get('weak_points', []) if weak_points: st.markdown(f"""

Predicted Weak Points {"
".join(f"• {w}" for w in weak_points)}

""", unsafe_allow_html=True) # ── Vulnerability report ──────────────────────────────────────── st.markdown("---") st.markdown("### Vulnerability Report & Remediation") for i, item in enumerate(autopsies): autopsy = item['autopsy'] failed = autopsy.get('did_fail', False) stype = item['scenario']['scenario_type'].replace('_',' ').title() severity = autopsy.get('severity','unknown').upper() sev_color = {"CRITICAL":"#ef4444","HIGH":"#f97316","MEDIUM":"#eab308","LOW":"#22c55e"}.get(severity,"#a1a1aa") with st.expander( f"{'🔴 Issue Detected' if failed else '🟢 Secure'} — Vector {i+1}: {stype} | Severity: {severity}", expanded=(i == 0 and failed) ): left, right = st.columns(2) with left: st.markdown("#### Baseline Behavior") st.markdown("**Adversarial Input:**") st.code(item['scenario']['input'], language=None) st.markdown("**Agent Output:**") resp = item['scenario']['response'] st.markdown(f'

{resp[:500]}{"..." if len(resp)>500 else ""}

', unsafe_allow_html=True) if failed: st.markdown("**Reasoning Autopsy:**") st.markdown(f"""

Failure Class: {autopsy.get('failure_type','N/A')}
Severity: {severity}

Logic Deterioration:
{autopsy.get('reasoning_breakdown','N/A')}

Root Cause:
{autopsy.get('root_cause','N/A')}

""", unsafe_allow_html=True) with right: st.markdown("#### Remediated Behavior") matching = next( (p for p in patches if p['original_failure']['scenario']['scenario_type'] == item['scenario']['scenario_type']), None ) if matching: patch = matching['patch'] confidence = patch.get('confidence','unknown').upper() conf_color = {"HIGH":"#22c55e","MEDIUM":"#eab308","LOW":"#ef4444"}.get(confidence,"#a1a1aa") vr = matching['verified_response'] st.markdown("**Synthesized Directive:**") st.markdown(f'

{patch.get("what_changed","N/A")}

', unsafe_allow_html=True) st.markdown(f"**Verification Confidence:** {confidence}", unsafe_allow_html=True) st.markdown("**Verified Agent Output:**") st.markdown(f'

{vr[:500]}{"..." if len(vr)>500 else ""}

', unsafe_allow_html=True) else: st.markdown('

Simulation passed baseline checks. No remediation required.

', unsafe_allow_html=True) # ── Drift ─────────────────────────────────────────────────────── st.markdown("---") st.markdown("### Long-term Stability Analysis") if drift.get('drift_detected'): st.error(f"Drift Detected — Severity: {drift.get('drift_severity','unknown').upper()}", icon="🚨") for change in drift.get('changed_behaviors', []): st.write(f"— {change}") st.warning(f"Recommendation: {drift.get('recommendation','N/A')}", icon="⚙️") else: st.success("Stable Baseline — No behavioral degradation detected.", icon="✓") st.info(drift.get('recommendation', 'Continuous monitoring active.'), icon="ℹ️") # ── Raw JSON ──────────────────────────────────────────────────── st.markdown("---") with st.expander("Raw Diagnostic Payload (JSON)"): st.json(output)