Spaces:

lablab-ai-amd-developer-hackathon
/

PhantomOps

Sleeping

App Files Files Community

PhantomOps / app_hf.py

shanko585

Upload 4 files

1a8bf2d verified about 2 months ago

Raw

History Blame Contribute Delete

12.3 kB

	# app_hf.py
	# PhantomOps — HuggingFace Space Demo
	# Displays real results from AMD MI300X run

	import streamlit as st
	import json
	import os

	st.set_page_config(
	page_title="PhantomOps Platform",
	layout="wide",
	initial_sidebar_state="expanded"
	)

	st.markdown("""
	<style>
	@import url('https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700;800&display=swap');
	html, body, [class*="css"] { font-family: 'Inter', sans-serif; }

	.stApp {
	background-color: #0b0c10;
	background-image:
	radial-gradient(circle at 15% 50%, rgba(79,70,229,0.12), transparent 25%),
	radial-gradient(circle at 85% 30%, rgba(16,185,129,0.12), transparent 25%);
	color: #e2e8f0;
	}
	@keyframes textShine {
	0% { background-position: 0% 50%; }
	100% { background-position: 200% 50%; }
	}
	.phantom-title {
	font-size: 2.5rem;
	font-weight: 800;
	letter-spacing: -0.025em;
	background: linear-gradient(90deg, #38bdf8, #818cf8, #c084fc, #38bdf8);
	background-size: 200% auto;
	-webkit-background-clip: text;
	-webkit-text-fill-color: transparent;
	animation: textShine 4s linear infinite;
	margin-bottom: 4px;
	}
	.phantom-tagline {
	font-size: 0.95rem;
	color: #94a3b8;
	margin-top: 0;
	margin-bottom: 16px;
	}
	.amd-banner {
	background: linear-gradient(135deg, rgba(237,28,36,0.15), rgba(99,102,241,0.15));
	border: 1px solid rgba(237,28,36,0.3);
	border-radius: 10px;
	padding: 12px 20px;
	margin-bottom: 24px;
	font-size: 0.85rem;
	color: #94a3b8;
	}
	.amd-banner b { color: #f1f5f9; }
	.agent-card {
	background: rgba(30,33,43,0.65);
	border: 1px solid rgba(255,255,255,0.05);
	border-top: 2px solid #6366f1;
	border-radius: 12px;
	padding: 20px;
	margin: 8px 0;
	box-shadow: 0 10px 30px rgba(0,0,0,0.3);
	transition: transform 0.3s, box-shadow 0.3s;
	}
	.agent-card:hover {
	transform: translateY(-3px);
	box-shadow: 0 15px 35px rgba(99,102,241,0.15);
	}
	.agent-card b { color:#f8fafc; font-size:1.05rem; margin-bottom:6px; display:block; }
	.result-fail {
	background: rgba(30,33,43,0.65);
	border-left: 4px solid #f43f5e;
	border-radius: 6px;
	padding: 18px;
	margin: 10px 0;
	font-size: 0.9rem;
	color: #cbd5e1;
	}
	.result-pass {
	background: rgba(30,33,43,0.65);
	border-left: 4px solid #10b981;
	border-radius: 6px;
	padding: 18px;
	margin: 10px 0;
	font-size: 0.9rem;
	color: #cbd5e1;
	}
	.result-patch {
	background: rgba(30,33,43,0.65);
	border-left: 4px solid #38bdf8;
	border-radius: 6px;
	padding: 18px;
	margin: 10px 0;
	font-size: 0.9rem;
	color: #cbd5e1;
	}
	.metric-card {
	background: rgba(30,33,43,0.65);
	border: 1px solid rgba(255,255,255,0.05);
	border-top: 2px solid #c084fc;
	border-radius: 12px;
	padding: 24px;
	text-align: left;
	box-shadow: 0 10px 30px rgba(0,0,0,0.2);
	}
	.metric-number { font-size:2.8rem; font-weight:800; line-height:1.1; margin-bottom:8px; }
	.metric-label { font-size:0.85rem; font-weight:600; color:#94a3b8; text-transform:uppercase; letter-spacing:0.05em; }
	hr { border-color: #27272a; }
	#MainMenu {visibility:hidden;} footer {visibility:hidden;} header {visibility:hidden;}
	</style>
	""", unsafe_allow_html=True)

	# ── Header ─────────────────────────────────────────────────────
	st.markdown('<div class="phantom-title">PhantomOps Platform</div>', unsafe_allow_html=True)
	st.markdown('<div class="phantom-tagline">Automated Adversarial Testing & Hardening for LLM Agents</div>', unsafe_allow_html=True)
	st.markdown('''<div class="amd-banner">
	⚡ <b>Live Results from AMD Instinct MI300X</b> — This demo displays real output generated
	on AMD Developer Cloud hardware using ROCm 7.2 and Qwen 2.5 from HuggingFace Hub.
	The full pipeline runs locally on MI300X for data privacy and inference speed.
	</div>''', unsafe_allow_html=True)

	# ── Sidebar ─────────────────────────────────────────────────────
	with st.sidebar:
	st.markdown("### About PhantomOps")
	st.markdown("""
	<div style="color:#a1a1aa;font-size:0.85rem;line-height:1.6;">
	PhantomOps is the crash test lab for AI agents.<br><br>
	It finds failures before your users do — then fixes them automatically.<br><br>
	<b style="color:#f1f5f9;">Three unique weapons:</b><br>
	🎯 Personalized Chaos<br>
	🔬 Reasoning Autopsy<br>
	🔧 Auto-Patching
	</div>
	""", unsafe_allow_html=True)
	st.divider()
	st.markdown("#### Infrastructure")
	st.markdown("""
	<div style="color:#a1a1aa;font-size:0.85rem;">
	🔴 AMD Instinct MI300X<br>
	⚡ ROCm 7.2<br>
	🤗 Qwen 2.5 (HuggingFace Hub)<br>
	🐍 Python + Transformers
	</div>
	""", unsafe_allow_html=True)
	st.divider()
	st.markdown("<div style='text-align:center;color:#52525b;font-size:0.75rem;'>PhantomOps Core v1.0.0<br>AMD Developer Hackathon 2026</div>", unsafe_allow_html=True)

	# ── Load results ────────────────────────────────────────────────
	RESULTS_FILE = "demo_results.json"

	if not os.path.exists(RESULTS_FILE):
	st.error("demo_results.json not found. Please upload your AMD results file.")
	st.stop()

	with open(RESULTS_FILE, 'r') as f:
	output = json.load(f)

	fingerprint = output.get('fingerprint', {})
	autopsies = output.get('autopsies', [])
	patches = output.get('patches', [])
	drift = output.get('drift_report', {})
	failures = [a for a in autopsies if a['autopsy'].get('did_fail', False)]

	# ── Executive Summary ───────────────────────────────────────────
	st.markdown("### Executive Summary")

	m1, m2, m3, m4 = st.columns(4)
	metrics = [
	(len(autopsies), "Simulations Run", "#f4f4f5"),
	(len(failures), "Failures Isolated", "#ef4444" if failures else "#22c55e"),
	(len(patches), "Patches Synthesized", "#22c55e" if patches else "#f4f4f5"),
	("Detected" if drift.get('drift_detected') else "None",
	"Behavioral Drift",
	"#ef4444" if drift.get('drift_detected') else "#22c55e"),
	]
	for col, (val, label, color) in zip([m1,m2,m3,m4], metrics):
	with col:
	st.markdown(f"""<div class="metric-card">
	<div class="metric-number" style="color:{color};">{val}</div>
	<div class="metric-label">● {label}</div>
	</div>""", unsafe_allow_html=True)

	# ── Agent fingerprint ───────────────────────────────────────────
	st.markdown("---")
	st.markdown("### Target Agent Profile")
	col1, col2 = st.columns(2)
	with col1:
	st.markdown(f"""<div class="agent-card">
	<b>Domain</b>
	<span style="color:#a1a1aa;font-size:0.85rem;">{fingerprint.get('domain','N/A')}</span>
	</div>""", unsafe_allow_html=True)
	assumptions = fingerprint.get('assumptions', [])
	if assumptions:
	st.markdown(f"""<div class="agent-card">
	<b>Assumptions Detected</b>
	<span style="color:#a1a1aa;font-size:0.85rem;">{"<br>".join(f"• {a}" for a in assumptions)}</span>
	</div>""", unsafe_allow_html=True)
	with col2:
	weak_points = fingerprint.get('weak_points', [])
	if weak_points:
	st.markdown(f"""<div class="agent-card">
	<b>Predicted Weak Points</b>
	<span style="color:#f43f5e;font-size:0.85rem;">{"<br>".join(f"• {w}" for w in weak_points)}</span>
	</div>""", unsafe_allow_html=True)

	# ── Vulnerability report ────────────────────────────────────────
	st.markdown("---")
	st.markdown("### Vulnerability Report & Remediation")

	for i, item in enumerate(autopsies):
	autopsy = item['autopsy']
	failed = autopsy.get('did_fail', False)
	stype = item['scenario']['scenario_type'].replace('_',' ').title()
	severity = autopsy.get('severity','unknown').upper()
	sev_color = {"CRITICAL":"#ef4444","HIGH":"#f97316","MEDIUM":"#eab308","LOW":"#22c55e"}.get(severity,"#a1a1aa")

	with st.expander(
	f"{'🔴 Issue Detected' if failed else '🟢 Secure'} — Vector {i+1}: {stype} \| Severity: {severity}",
	expanded=(i == 0 and failed)
	):
	left, right = st.columns(2)

	with left:
	st.markdown("#### Baseline Behavior")
	st.markdown("Adversarial Input:")
	st.code(item['scenario']['input'], language=None)
	st.markdown("Agent Output:")
	resp = item['scenario']['response']
	st.markdown(f'<div class="result-fail">{resp[:500]}{"..." if len(resp)>500 else ""}</div>',
	unsafe_allow_html=True)
	if failed:
	st.markdown("Reasoning Autopsy:")
	st.markdown(f"""<div class="result-fail">
	<b>Failure Class:</b> {autopsy.get('failure_type','N/A')}<br>
	<b>Severity:</b> <span style="color:{sev_color}">{severity}</span><br><br>
	<b>Logic Deterioration:</b><br>{autopsy.get('reasoning_breakdown','N/A')}<br><br>
	<b>Root Cause:</b><br>{autopsy.get('root_cause','N/A')}
	</div>""", unsafe_allow_html=True)

	with right:
	st.markdown("#### Remediated Behavior")
	matching = next(
	(p for p in patches
	if p['original_failure']['scenario']['scenario_type'] == item['scenario']['scenario_type']),
	None
	)
	if matching:
	patch = matching['patch']
	confidence = patch.get('confidence','unknown').upper()
	conf_color = {"HIGH":"#22c55e","MEDIUM":"#eab308","LOW":"#ef4444"}.get(confidence,"#a1a1aa")
	vr = matching['verified_response']
	st.markdown("Synthesized Directive:")
	st.markdown(f'<div class="result-patch">{patch.get("what_changed","N/A")}</div>',
	unsafe_allow_html=True)
	st.markdown(f"Verification Confidence: <span style='color:{conf_color};font-weight:600;'>{confidence}</span>",
	unsafe_allow_html=True)
	st.markdown("Verified Agent Output:")
	st.markdown(f'<div class="result-pass">{vr[:500]}{"..." if len(vr)>500 else ""}</div>',
	unsafe_allow_html=True)
	else:
	st.markdown('<div class="result-pass">Simulation passed baseline checks. No remediation required.</div>',
	unsafe_allow_html=True)

	# ── Drift ───────────────────────────────────────────────────────
	st.markdown("---")
	st.markdown("### Long-term Stability Analysis")
	if drift.get('drift_detected'):
	st.error(f"Drift Detected — Severity: {drift.get('drift_severity','unknown').upper()}", icon="🚨")
	for change in drift.get('changed_behaviors', []):
	st.write(f"— {change}")
	st.warning(f"Recommendation: {drift.get('recommendation','N/A')}", icon="⚙️")
	else:
	st.success("Stable Baseline — No behavioral degradation detected.", icon="✓")
	st.info(drift.get('recommendation', 'Continuous monitoring active.'), icon="ℹ️")

	# ── Raw JSON ────────────────────────────────────────────────────
	st.markdown("---")
	with st.expander("Raw Diagnostic Payload (JSON)"):
	st.json(output)