| from __future__ import annotations |
| from fastapi import FastAPI, HTTPException, Request, WebSocket, WebSocketDisconnect |
| from fastapi.responses import HTMLResponse |
| from fastapi.middleware.cors import CORSMiddleware |
| from pydantic import BaseModel |
| from typing import Optional |
| from models import Action, ActionType, Observation, StepResult, State |
| from server.devops_environment import DevOpsEnvironment |
| from collections import deque |
| from datetime import datetime |
| import uuid |
| import statistics |
| from generator.incident_factory import IncidentFactory |
| from curriculum import CurriculumEngine |
| from multi_agent import DualAgentSession |
|
|
| _factory = IncidentFactory() |
| curriculum_engine = CurriculumEngine() |
| multi_agent_sessions: dict = {} |
|
|
| episode_history = deque(maxlen=1000) |
|
|
| replay_store: dict = {} |
| replay_counter: int = 0 |
| current_episode_steps: list = [] |
|
|
| def track_episode(state_obj: State): |
| from graders.grader import grade_episode |
| score = grade_episode( |
| task_id=state_obj.task_id, |
| action_history=state_obj.action_history, |
| ground_truth_root_cause=state_obj.ground_truth_root_cause, |
| ground_truth_fix=state_obj.ground_truth_fix, |
| incident_resolved=state_obj.incident_resolved, |
| total_reward=state_obj.total_reward |
| ) |
| |
| info_actions = {"read_logs", "read_metrics", "read_runbook", "search_logs"} |
| info_count = 0 |
| diag_step = None |
| |
| for act in state_obj.action_history: |
| at = act["action"].get("action_type") |
| if at in info_actions: |
| info_count += 1 |
| if at == "diagnose" and diag_step is None: |
| diag_step = act["step"] |
| |
| info_ratio = info_count / len(state_obj.action_history) if state_obj.action_history else 0.0 |
| |
| |
| seed = state_obj.info.get("seed", 42) |
| |
| record = { |
| "episode_id": state_obj.episode_id or str(uuid.uuid4()), |
| "task_id": state_obj.task_id, |
| "seed": seed, |
| "steps_taken": state_obj.step, |
| "incident_resolved": state_obj.incident_resolved, |
| "final_score": float(score), |
| "steps_to_diagnosis": diag_step, |
| "info_gathering_ratio": float(info_ratio), |
| "timestamp": datetime.utcnow().isoformat() + "Z" |
| } |
| episode_history.append(record) |
|
|
|
|
| |
| try: |
| from openenv.core.env_server import create_web_interface_app |
| HAS_WEB_INTERFACE = True |
| except ImportError: |
| HAS_WEB_INTERFACE = False |
|
|
| VALID_TASKS = ("easy", "medium", "hard", "bonus", "security", "database", "failover") |
| _env = DevOpsEnvironment() |
| app = FastAPI( |
| title="DevOps Incident Response — OpenEnv", |
| description="An OpenEnv-compliant RL environment", |
| version="1.0.0", |
| ) |
|
|
| app.add_middleware( |
| CORSMiddleware, |
| allow_origins=["*"], |
| allow_methods=["*"], |
| allow_headers=["*"], |
| ) |
|
|
|
|
| class ResetRequest(BaseModel): |
| task_id: str = "easy" |
| seed: Optional[int] = None |
|
|
|
|
| class CurriculumRecordRequest(BaseModel): |
| task_id: str |
| score: float |
|
|
|
|
| class MultiAgentResetRequest(BaseModel): |
| task_id: str |
| seed: int = 42 |
|
|
|
|
| class AgentAStepRequest(BaseModel): |
| finding: str |
|
|
|
|
| @app.get("/about") |
| async def about(): |
| return { |
| "name": "ARIA — DevOps Incident Response", |
| "version": "2.0.0", |
| "description": ( |
| "OpenEnv-compliant RL environment for production incident " |
| "response. AI agents diagnose and remediate software incidents " |
| "across 7 task types using 14 actions with dense reward shaping." |
| ), |
| "tasks": 8, |
| "action_types": 14, |
| "themes": [ |
| "World Modeling: Professional Tasks", |
| "Self-Improvement: Curriculum Engine", |
| "Multi-Agent Interactions: Dual-Agent Mode" |
| ], |
| "features": { |
| "curriculum_engine": "Adaptive difficulty based on agent performance. Promotes when avg > 0.75, scaffolds when avg < 0.30.", |
| "incident_generator": "Procedural incidents from seeds 0-99999. 6 failure modes x 8 services x 3 severities.", |
| "dual_agent_mode": "Split observability — Observer sees logs/alerts, Responder sees metrics/deps.", |
| "reward_shaping": "Dense rewards with collateral damage penalties (-0.15), blind remediation penalties (-0.10), semantic diagnosis matching." |
| }, |
| "training": { |
| "model": "Llama-3.1-8B-Instruct", |
| "algorithm": "GRPO (Group Relative Policy Optimization)", |
| "framework": "HuggingFace TRL + Unsloth", |
| "lora_rank": 32, |
| "episodes": 160, |
| "adapter_3b": "https://huggingface.co/Arijit-07/aria-devops-llama3b", |
| "adapter_8b": "https://huggingface.co/Arijit-07/aria-devops-llama8b" |
| }, |
| "reward_design": { |
| "type": "dense", |
| "range": [0.001, 0.999], |
| "gates": { |
| "read_logs_correct": 0.15, |
| "read_metrics": 0.10, |
| "diagnose_full": 0.35, |
| "correct_fix": 0.45, |
| "alert_oncall": 0.15 |
| }, |
| "penalties": { |
| "collateral_damage": -0.15, |
| "blind_remediation": -0.10, |
| "wrong_failover": -0.25, |
| "excessive_noop": -0.04 |
| } |
| }, |
| "links": { |
| "space": "https://arijit-07-devops-incident-response.hf.space", |
| "docs": "https://arijit-07-devops-incident-response.hf.space/docs", |
| "validate": "https://arijit-07-devops-incident-response.hf.space/validate", |
| "github": "https://github.com/Twilight-13/devops-incident-response", |
| "model_3b": "https://huggingface.co/Arijit-07/aria-devops-llama3b", |
| "model_8b": "https://huggingface.co/Arijit-07/aria-devops-llama8b", |
| "blog": "https://huggingface.co/blog/Arijit-07/aria-devops-incident-response" |
| } |
| } |
|
|
|
|
|
|
| @app.get("/live", response_class=HTMLResponse) |
| async def live_dashboard(): |
| html = f"""<!DOCTYPE html> |
| <html lang="en"> |
| <head> |
| <meta charset="UTF-8"> |
| <title>ARIA NOC LIVE</title> |
| <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700&family=Share+Tech+Mono&display=swap" rel="stylesheet"> |
| <style> |
| :root {{ |
| --void: #000000; |
| --bg: #060914; |
| --surface: #0a0f1e; |
| --surface2: #0d1628; |
| --border: #1a2744; |
| --border-bright: #2a4080; |
| --blue: #4d9fff; |
| --blue-dim: #1a3a6e; |
| --cyan: #00d4ff; |
| --green: #00ff88; |
| --green-dim: #003a1e; |
| --yellow: #ffaa00; |
| --yellow-dim: #3a2800; |
| --red: #ff3355; |
| --red-dim: #3a0011; |
| --purple: #9d4edd; |
| --text: #c8d8f0; |
| --text-dim: #4a6080; |
| --text-mono: #8ab4d4; |
| }} |
| |
| * {{ box-sizing: border-box; margin: 0; padding: 0; }} |
| |
| body {{ |
| background-color: var(--bg); |
| color: var(--text); |
| font-family: 'Inter', sans-serif; |
| overflow: hidden; |
| height: 100vh; |
| display: grid; |
| grid-template-rows: 48px 1fr 56px; |
| grid-template-columns: 28% 44% 28%; |
| grid-template-areas: |
| "top top top" |
| "left center right" |
| "bottom bottom bottom"; |
| }} |
| |
| .scanlines {{ |
| position: fixed; |
| top: 0; left: 0; width: 100%; height: 100%; |
| pointer-events: none; |
| z-index: 9999; |
| background: repeating-linear-gradient( |
| 0deg, |
| transparent, |
| transparent 2px, |
| rgba(0,0,0,0.03) 2px, |
| rgba(0,0,0,0.03) 4px |
| ); |
| }} |
| |
| .mono {{ font-family: 'Share Tech Mono', monospace; }} |
| .uppercase {{ text-transform: uppercase; }} |
| |
| #top-bar {{ |
| grid-area: top; |
| background: var(--void); |
| border-bottom: 1px solid var(--border); |
| display: flex; |
| justify-content: space-between; |
| align-items: center; |
| padding: 0 16px; |
| }} |
| |
| .top-left, .top-center, .top-right {{ display: flex; align-items: center; gap: 12px; }} |
| |
| .logo {{ font-size: 18px; color: var(--blue); font-weight: bold; }} |
| .logo-sub {{ font-size: 10px; color: var(--text-dim); }} |
| .separator {{ width: 1px; height: 24px; background: var(--border); }} |
| |
| .status-dot {{ width: 8px; height: 8px; border-radius: 50%; }} |
| .dot-green {{ background: var(--red); animation: livePulse 1.5s infinite; }} |
| .dot-grey {{ background: var(--text-dim); }} |
| |
| @keyframes livePulse {{ |
| 0% {{ opacity: 0; }} |
| 50% {{ opacity: 1; }} |
| 100% {{ opacity: 0; }} |
| }} |
| |
| .control-label {{ font-size: 9px; color: var(--text-dim); }} |
| .terminal-input {{ |
| background: var(--surface); |
| border: 1px solid var(--border-bright); |
| color: var(--blue); |
| font-family: 'Share Tech Mono', monospace; |
| padding: 4px 8px; |
| outline: none; |
| }} |
| .btn-deploy {{ |
| background: var(--blue-dim); |
| border: 1px solid var(--blue); |
| color: var(--blue); |
| font-family: 'Share Tech Mono', monospace; |
| font-size: 11px; |
| padding: 6px 16px; |
| cursor: pointer; |
| transition: 0.2s; |
| }} |
| .btn-deploy:hover {{ background: var(--blue); color: var(--void); }} |
| |
| .step-counter {{ font-size: 16px; color: var(--cyan); }} |
| .score-display-small {{ font-size: 20px; font-weight: bold; }} |
| .clock {{ font-size: 11px; color: var(--text-dim); }} |
| |
| .panel {{ |
| padding: 16px; |
| display: flex; |
| flex-direction: column; |
| gap: 12px; |
| overflow: hidden; |
| }} |
| #left-panel {{ grid-area: left; border-right: 1px solid var(--border); }} |
| #center-panel {{ grid-area: center; border-right: 1px solid var(--border); }} |
| #right-panel {{ grid-area: right; border-color: var(--purple); }} |
| |
| .panel-header {{ |
| display: flex; align-items: center; gap: 8px; font-size: 9px; color: var(--text-dim); margin-bottom: 8px; |
| }} |
| .pill {{ background: var(--surface2); padding: 2px 6px; border-radius: 10px; color: var(--text); }} |
| |
| #service-list {{ display: flex; flex-direction: column; gap: 8px; overflow-y: auto; flex: 1; }} |
| .service-item {{ |
| height: 52px; padding: 0 12px; display: flex; justify-content: space-between; align-items: center; flex-shrink: 0; transition: border-color 0.3s, background 0.3s; |
| }} |
| .svc-name {{ font-size: 12px; color: var(--text); }} |
| .svc-status {{ font-size: 9px; margin-top: 4px; }} |
| |
| .svc-stats {{ text-align: right; }} |
| .svc-stat-line {{ font-size: 11px; }} |
| |
| @keyframes statusFlash {{ |
| 0% {{ border-color: var(--text); }} |
| 100% {{ border-color: inherit; }} |
| }} |
| @keyframes criticalFlash {{ |
| 0%, 50%, 100% {{ border-color: var(--border); }} |
| 25%, 75% {{ border-color: var(--red); }} |
| }} |
| .flash-critical {{ animation: criticalFlash 0.5s ease-in-out; border-color: var(--red) !important; }} |
| |
| @keyframes resolveFlash {{ |
| 0%, 50%, 100% {{ border-color: var(--border); }} |
| 25%, 75% {{ border-color: var(--green); }} |
| }} |
| .flash-resolve {{ animation: resolveFlash 2s ease-in-out; border-color: var(--green) !important; }} |
| |
| @keyframes pulseScore {{ |
| 0% {{ transform: scale(1); }} |
| 50% {{ transform: scale(1.1); }} |
| 100% {{ transform: scale(1); }} |
| }} |
| .pulse-score {{ animation: pulseScore 2s ease-in-out; }} |
| |
| @keyframes slideInRight {{ |
| from {{ transform: translateX(20px); opacity: 0; }} |
| to {{ transform: translateX(0); opacity: 1; }} |
| }} |
| @keyframes fadeIn {{ |
| from {{ opacity: 0; }} |
| to {{ opacity: 1; }} |
| }} |
| |
| .center-top {{ flex: 1; display: flex; flex-direction: column; overflow: hidden; }} |
| .center-bottom {{ height: 200px; display: flex; flex-direction: column; justify-content: flex-end; }} |
| |
| #alerts-list {{ display: flex; flex-direction: column; gap: 8px; flex: 1; }} |
| .alert-strip {{ |
| height: 36px; display: flex; align-items: center; gap: 8px; padding-right: 12px; animation: slideInRight 0.3s ease-out; |
| }} |
| .alert-badge {{ |
| height: 100%; padding: 0 8px; display: flex; align-items: center; font-size: 9px; font-weight: bold; color: #000; |
| }} |
| .alert-text {{ font-size: 11px; color: var(--text); white-space: nowrap; overflow: hidden; text-overflow: ellipsis; }} |
| .no-alerts {{ text-align: center; color: var(--text-dim); margin-top: 40px; animation: livePulse 3s infinite; }} |
| |
| .giant-score {{ font-size: 48px; font-weight: bold; text-align: center; margin-bottom: 12px; text-shadow: 0 0 20px currentColor; }} |
| .progress-container {{ width: 100%; height: 8px; background: var(--surface); margin-bottom: 8px; }} |
| .progress-fill {{ height: 100%; background: linear-gradient(90deg, var(--blue), var(--green)); transition: width 0.5s ease; width: 0%; }} |
| .score-stats {{ display: flex; justify-content: space-between; font-size: 10px; color: var(--text-dim); margin-bottom: 16px; }} |
| |
| .sparkline {{ display: flex; align-items: flex-end; gap: 4px; height: 40px; margin-top: auto; }} |
| .spark-bar {{ width: 16px; background: var(--green); animation: slideInRight 0.2s ease-out; position: relative; }} |
| .spark-label {{ position: absolute; bottom: -14px; left: 50%; transform: translateX(-50%); font-size: 8px; color: var(--text-dim); }} |
| |
| #agent-log {{ |
| flex: 1; overflow-y: auto; display: flex; flex-direction: column; gap: 4px; |
| }} |
| .log-entry {{ animation: fadeIn 0.2s ease-out; font-size: 11px; line-height: 1.4; }} |
| .log-time {{ color: var(--text-dim); margin-right: 8px; }} |
| .log-action {{ color: var(--purple); }} |
| .log-reward {{ padding-left: 48px; }} |
| .log-evidence {{ color: var(--text-dim); font-style: italic; padding-left: 48px; }} |
| .log-diagnose {{ color: var(--yellow); }} |
| .log-fix {{ color: var(--cyan); }} |
| .log-episode-start {{ color: var(--cyan); text-align: center; margin: 8px 0; }} |
| .log-episode-end-ok {{ color: var(--green); text-align: center; margin: 8px 0; }} |
| .log-episode-end-fail {{ color: var(--red); text-align: center; margin: 8px 0; }} |
| |
| #bottom-bar {{ |
| grid-area: bottom; background: var(--void); border-top: 1px solid var(--border); display: flex; justify-content: space-between; align-items: center; padding: 0 16px; |
| }} |
| .ws-status {{ display: flex; align-items: center; gap: 8px; font-size: 11px; }} |
| .tip-text {{ font-size: 11px; color: var(--text-dim); font-style: italic; transition: opacity 0.5s; }} |
| .footer-right {{ font-size: 10px; color: var(--text-dim); }} |
| |
| ::-webkit-scrollbar {{ width: 4px; }} |
| ::-webkit-scrollbar-track {{ background: transparent; }} |
| ::-webkit-scrollbar-thumb {{ background: var(--border-bright); }} |
| </style> |
| </head> |
| <body> |
| <div class="scanlines"></div> |
| |
| <div id="top-bar"> |
| <div class="top-left"> |
| <div class="logo mono">▣ ARIA</div> |
| <div class="logo-sub uppercase">Incident Response System</div> |
| <div class="separator"></div> |
| <div class="status-dot dot-grey" id="live-dot"></div> |
| <div class="logo-sub mono" id="live-text" style="color: var(--text)">OFFLINE</div> |
| </div> |
| |
| <div class="top-center"> |
| <div class="control-label uppercase">Active Scenario</div> |
| <select class="terminal-input" id="task-select"> |
| <option value="easy">EASY</option> |
| <option value="medium">MEDIUM</option> |
| <option value="hard">HARD</option> |
| <option value="bonus">BONUS</option> |
| <option value="security">SECURITY</option> |
| <option value="database">DATABASE</option> |
| <option value="failover">FAILOVER</option> |
| <option value="generated">GENERATED</option> |
| </select> |
| <div class="control-label uppercase">Seed:</div> |
| <input type="number" class="terminal-input" id="seed-input" value="42" style="width: 70px;"> |
| <button class="btn-deploy" onclick="deployIncident()">▶ DEPLOY INCIDENT</button> |
| </div> |
| |
| <div class="top-right"> |
| <div class="step-counter mono" id="top-step">00 / 15</div> |
| <div class="separator"></div> |
| <div class="score-display-small mono" id="top-score">0.000</div> |
| <div class="separator"></div> |
| <div class="clock mono" id="clock">00:00:00</div> |
| </div> |
| </div> |
| |
| <div id="left-panel" class="panel"> |
| <div class="panel-header uppercase"> |
| ◈ Infrastructure Status <span class="pill mono" id="svc-count">0</span> |
| </div> |
| <div id="service-list"></div> |
| </div> |
| |
| <div id="center-panel" class="panel"> |
| <div class="center-top"> |
| <div class="panel-header uppercase"> |
| ◈ Active Alerts <span class="pill mono" id="alert-count" style="background:var(--surface2)">0</span> |
| </div> |
| <div id="alerts-list"> |
| <div class="no-alerts mono">◎ ALL SYSTEMS NOMINAL</div> |
| </div> |
| </div> |
| |
| <div class="center-bottom"> |
| <div class="panel-header uppercase">◈ Episode Metrics</div> |
| <div class="giant-score mono" id="giant-score" style="color: var(--text-dim)">0.000</div> |
| <div class="progress-container"><div class="progress-fill" id="score-bar"></div></div> |
| <div class="score-stats mono uppercase"> |
| <span id="stat-step">STEP 0/15</span> |
| <span id="stat-task">TASK: --</span> |
| <span id="stat-seed">SEED: --</span> |
| </div> |
| <div class="sparkline" id="sparkline"></div> |
| </div> |
| </div> |
| |
| <div id="right-panel" class="panel"> |
| <div class="panel-header uppercase" style="color: var(--purple)">◈ Agent Reasoning</div> |
| <div id="agent-log" class="mono"></div> |
| </div> |
| |
| <div id="bottom-bar"> |
| <div class="ws-status mono"> |
| <div class="status-dot dot-grey" id="btm-dot"></div> |
| <span id="btm-text" style="color: var(--text-dim)">○ API DISCONNECTED</span> |
| </div> |
| <div class="tip-text" id="tip-text">ⓘ Agents must read_logs before acting — blind remediation triggers -0.10 penalty</div> |
| <div class="footer-right mono">ARIA v2.0 · OpenEnv Compliant 🤗 Arijit-07</div> |
| </div> |
| |
| <script> |
| const TIPS = [ |
| "ⓘ Agents must read_logs before acting — blind remediation triggers -0.10 penalty", |
| "ⓘ Collateral damage: restarting healthy services costs -0.15", |
| "ⓘ 7 tasks · 14 actions · Dense reward shaping · Semantic diagnosis matching", |
| "ⓘ Curriculum Engine adapts difficulty to agent performance", |
| "ⓘ Dual-Agent Mode: Observer sees logs, Responder sees metrics", |
| "ⓘ Grader clamped to (0.001, 0.999) for GRPO advantage stability", |
| "ⓘ Hard task: all services green — signal buried in business metrics" |
| ]; |
| let tipIdx = 0; |
| setInterval(() => {{ |
| const el = document.getElementById('tip-text'); |
| el.style.opacity = 0; |
| setTimeout(() => {{ |
| tipIdx = (tipIdx + 1) % TIPS.length; |
| el.textContent = TIPS[tipIdx]; |
| el.style.opacity = 1; |
| }}, 500); |
| }}, 15000); |
| |
| setInterval(() => {{ |
| const now = new Date(); |
| document.getElementById('clock').textContent = now.toTimeString().split(' ')[0]; |
| }}, 1000); |
| |
| let currentEpisodeId = null; |
| let lastStep = -1; |
| let rewardHistory = []; |
| let totalScore = 0; |
| |
| function getScoreColor(sc) {{ |
| if(sc < 0.3) return 'var(--red)'; |
| if(sc < 0.6) return 'var(--yellow)'; |
| return 'var(--green)'; |
| }} |
| |
| function updateScoreDisplay() {{ |
| const sc = Math.max(0, totalScore); |
| const col = getScoreColor(sc); |
| |
| const ts = document.getElementById('top-score'); |
| ts.textContent = sc.toFixed(3); |
| ts.style.color = col; |
| |
| const gs = document.getElementById('giant-score'); |
| gs.textContent = sc.toFixed(3); |
| gs.style.color = col; |
| |
| document.getElementById('score-bar').style.width = Math.min(100, sc * 100) + '%'; |
| }} |
| |
| function addLog(type, arg1, arg2) {{ |
| const logEl = document.getElementById('agent-log'); |
| const div = document.createElement('div'); |
| div.className = 'log-entry'; |
| |
| const timeStr = new Date().toTimeString().split(' ')[0]; |
| const timeSpan = `<span class="log-time">[${{timeStr}}]</span>`; |
| |
| if (type === 'SYSTEM') {{ |
| div.innerHTML = `${{timeSpan}} <span style="color:var(--text-dim)">${{arg1}}</span>`; |
| }} else if (type === 'EPISODE_START') {{ |
| div.innerHTML = `<div class="log-episode-start">━━━ NEW INCIDENT DEPLOYED ━━━<br>Task: ${{arg1.toUpperCase()}} | Seed: ${{arg2}}</div>`; |
| }} else if (type === 'ACTION') {{ |
| div.innerHTML = `${{timeSpan}} <span class="log-action">→ ${{arg1.action_type}} ${{arg1.service || ''}}</span>`; |
| }} else if (type === 'REWARD') {{ |
| let col = arg1 > 0 ? 'var(--green)' : (arg1 === 0 ? 'var(--red)' : 'var(--text-dim)'); |
| div.innerHTML = `<div class="log-reward" style="color:${{col}}">✦ ${{arg1 > 0 ? '+' : ''}}${{arg1.toFixed(3)}} reward</div>`; |
| }} else if (type === 'EVIDENCE') {{ |
| let txt = (arg1 || '').substring(0, 60); |
| if(arg1 && arg1.length > 60) txt += '...'; |
| div.innerHTML = `<div class="log-evidence">↳ ${{txt}}</div>`; |
| }} else if (type === 'DIAGNOSE') {{ |
| div.innerHTML = `${{timeSpan}} <span class="log-diagnose">⊕ DIAGNOSIS: ${{arg1}}</span>`; |
| }} else if (type === 'FIX') {{ |
| div.innerHTML = `${{timeSpan}} <span class="log-fix">⚡ FIX APPLIED: ${{arg1}} → ${{arg2}}</span>`; |
| }} else if (type === 'EPISODE_END') {{ |
| if (arg1 >= 0.7) {{ |
| div.innerHTML = `<div class="log-episode-end-ok">━━━ ✓ INCIDENT RESOLVED ━━━<br>Score: ${{arg1.toFixed(3)}} | Steps: ${{arg2}}/15<br>━━━━━━━━━━━━━━━━━━━━━━━━━━━</div>`; |
| document.getElementById('center-panel').classList.add('flash-resolve'); |
| document.getElementById('giant-score').classList.add('pulse-score'); |
| setTimeout(()=>{{ |
| document.getElementById('center-panel').classList.remove('flash-resolve'); |
| document.getElementById('giant-score').classList.remove('pulse-score'); |
| }}, 2000); |
| }} else {{ |
| div.innerHTML = `<div class="log-episode-end-fail">━━━ ✗ INCIDENT ESCALATED ━━━<br>Score: ${{arg1.toFixed(3)}} | Steps: ${{arg2}}/15<br>━━━━━━━━━━━━━━━━━━━━━━━━━━━</div>`; |
| }} |
| }} |
| |
| logEl.appendChild(div); |
| if(logEl.children.length > 200) logEl.removeChild(logEl.firstChild); |
| logEl.scrollTop = logEl.scrollHeight; |
| }} |
| |
| function updateSparkline() {{ |
| const sp = document.getElementById('sparkline'); |
| sp.innerHTML = ''; |
| const start = Math.max(0, rewardHistory.length - 12); |
| const recent = rewardHistory.slice(start); |
| |
| recent.forEach((r, i) => {{ |
| const h = Math.max(2, Math.min(40, (r / 0.5) * 40)); |
| const col = r > 0 ? 'var(--green)' : 'var(--red)'; |
| sp.innerHTML += `<div class="spark-bar" style="height:${{h}}px; background:${{col}}"><div class="spark-label">${{start + i + 1}}</div></div>`; |
| }}); |
| }} |
| |
| async function pollState() {{ |
| try {{ |
| const res = await fetch('/state'); |
| if (!res.ok) throw new Error('Not OK'); |
| const data = await res.json(); |
| |
| document.getElementById('live-dot').className = 'status-dot dot-green'; |
| document.getElementById('live-text').textContent = 'LIVE'; |
| document.getElementById('live-text').style.color = 'var(--red)'; |
| document.getElementById('btm-dot').className = 'status-dot dot-green'; |
| document.getElementById('btm-text').textContent = '◉ API SYNC'; |
| document.getElementById('btm-text').style.color = 'var(--green)'; |
| |
| handleState(data); |
| }} catch(e) {{ |
| document.getElementById('live-dot').className = 'status-dot dot-grey'; |
| document.getElementById('live-text').textContent = 'OFFLINE'; |
| document.getElementById('live-text').style.color = 'var(--text)'; |
| document.getElementById('btm-dot').className = 'status-dot dot-grey'; |
| document.getElementById('btm-text').textContent = '○ API DISCONNECTED'; |
| document.getElementById('btm-text').style.color = 'var(--text-dim)'; |
| }} |
| }} |
| |
| function handleState(state) {{ |
| if (!state.episode_id) return; |
| |
| // API returns current_observation not observation |
| const obs = state.current_observation || state.observation; |
| const maxSteps = (obs && obs.max_steps) || 15; |
| |
| if (state.episode_id !== currentEpisodeId) {{ |
| currentEpisodeId = state.episode_id; |
| lastStep = -1; |
| rewardHistory = []; |
| totalScore = 0; |
| document.getElementById('agent-log').innerHTML = ''; |
| addLog('EPISODE_START', state.task_id, state.info?.seed || '--'); |
| document.getElementById('stat-task').textContent = `TASK: ${{state.task_id.toUpperCase()}}`; |
| document.getElementById('stat-seed').textContent = `SEED: ${{state.info?.seed || '--'}}`; |
| }} |
| |
| if (state.step > lastStep) {{ |
| for (let i = Math.max(0, lastStep); i < state.action_history.length; i++) {{ |
| const hist = state.action_history[i]; |
| const act = hist.action; |
| |
| if(act.action_type === 'diagnose') addLog('DIAGNOSE', act.root_cause); |
| else if(act.action_type === 'restart_service' || act.action_type === 'rollback' || act.action_type === 'block_ip_range') |
| addLog('FIX', act.action_type, act.service || act.ip_range); |
| else addLog('ACTION', act); |
| |
| if(hist.reward !== undefined) {{ |
| rewardHistory.push(hist.reward); |
| addLog('REWARD', hist.reward); |
| }} |
| }} |
| |
| lastStep = state.step; |
| totalScore = state.total_reward; |
| |
| document.getElementById('top-step').textContent = `${{state.step.toString().padStart(2,'0')}} / ${{maxSteps}}`; |
| document.getElementById('stat-step').textContent = `STEP ${{state.step}}/${{maxSteps}}`; |
| updateScoreDisplay(); |
| updateSparkline(); |
| |
| if (state.incident_resolved) {{ |
| addLog('EPISODE_END', state.total_reward, state.step); |
| lastStep = 99999; |
| }} |
| }} |
| |
| if (obs) {{ |
| if (obs.services && obs.services.length > 0) {{ |
| // services is a JSON array — sort in place |
| const svcs = obs.services.slice().sort((a, b) => {{ |
| const val = (st) => st === 'down' ? 0 : (st === 'degraded' ? 1 : 2); |
| return val(a.status) - val(b.status); |
| }}); |
| |
| const list = document.getElementById('service-list'); |
| list.innerHTML = ''; |
| document.getElementById('svc-count').textContent = svcs.length; |
| |
| svcs.forEach(s => {{ |
| let bcol = 'var(--border)', bgcol = 'var(--surface)', tcol = 'var(--text-dim)', stxt = '○ UNKNOWN'; |
| if(s.status === 'down') {{ bcol = 'var(--red)'; bgcol = 'var(--red-dim)'; tcol = 'var(--red)'; stxt = '● DOWN'; }} |
| else if(s.status === 'degraded') {{ bcol = 'var(--yellow)'; bgcol = 'var(--yellow-dim)'; tcol = 'var(--yellow)'; stxt = '◐ DEGRADED'; }} |
| else if(s.status === 'healthy') {{ bcol = 'var(--green)'; bgcol = 'var(--green-dim)'; tcol = 'var(--green)'; stxt = '○ HEALTHY'; }} |
| |
| let errRate = (s.error_rate * 100).toFixed(1); |
| let memPct = (s.memory_percent || 0).toFixed(1); |
| let errCol = s.error_rate > 0.3 ? 'var(--red)' : (s.error_rate > 0.1 ? 'var(--yellow)' : 'var(--green)'); |
| let memCol = (s.memory_percent || 0) > 90 ? 'var(--red)' : ((s.memory_percent || 0) > 70 ? 'var(--yellow)' : 'var(--green)'); |
| |
| list.innerHTML += ` |
| <div class="service-item mono" style="border-left: 3px solid ${{bcol}}; background: ${{bgcol}}"> |
| <div> |
| <div class="svc-name">${{s.name}}</div> |
| <div class="svc-status" style="color:${{tcol}}">${{stxt}}</div> |
| </div> |
| <div class="svc-stats"> |
| <div class="svc-stat-line" style="color:${{errCol}}">ERR ${{errRate}}%</div> |
| <div class="svc-stat-line" style="color:${{memCol}}">MEM ${{memPct}}%</div> |
| </div> |
| </div> |
| `; |
| }}); |
| }} |
| |
| if (obs.active_alerts) {{ |
| const alist = document.getElementById('alerts-list'); |
| alist.innerHTML = ''; |
| document.getElementById('alert-count').textContent = obs.active_alerts.length; |
| document.getElementById('alert-count').style.background = obs.active_alerts.length > 0 ? 'var(--red)' : 'var(--surface2)'; |
| |
| if(obs.active_alerts.length === 0) {{ |
| alist.innerHTML = '<div class="no-alerts mono">◎ ALL SYSTEMS NOMINAL</div>'; |
| }} else {{ |
| let critFound = false; |
| obs.active_alerts.slice(0, 5).forEach(a => {{ |
| let bg = 'var(--surface)', border = 'var(--border)', txtCol = '#000'; |
| // Alert severity is lowercase: "critical", "warning", "info" |
| if(a.severity === 'critical') {{ border = 'var(--red)'; bg = 'var(--red)'; critFound = true; }} |
| else if(a.severity === 'high') {{ border = '#ff6600'; bg = '#ff6600'; }} |
| else if(a.severity === 'warning') {{ border = 'var(--yellow)'; bg = 'var(--yellow)'; }} |
| else {{ border = 'var(--blue)'; bg = 'var(--blue)'; txtCol = '#fff'; }} |
| |
| alist.innerHTML += ` |
| <div class="alert-strip mono" style="border-left: 3px solid ${{border}}; background: ${{bg}}20"> |
| <div class="alert-badge" style="background:${{bg}}; color:${{txtCol}}">${{a.severity.toUpperCase()}}</div> |
| <div class="alert-text">[${{a.service}}] ${{a.message}}</div> |
| </div> |
| `; |
| }}); |
| if(obs.active_alerts.length > 5) {{ |
| alist.innerHTML += `<div class="mono" style="font-size:9px; color:var(--text-dim); text-align:center">+${{obs.active_alerts.length - 5}} more</div>`; |
| }} |
| if(critFound) {{ |
| const lp = document.getElementById('left-panel'); |
| lp.classList.remove('flash-critical'); |
| void lp.offsetWidth; |
| lp.classList.add('flash-critical'); |
| }} |
| }} |
| }} |
| }} |
| }} |
| |
| async function deployIncident() {{ |
| const task = document.getElementById('task-select').value; |
| const seed = parseInt(document.getElementById('seed-input').value) || 42; |
| |
| // Call REST API |
| try {{ |
| await fetch('/reset', {{ |
| method: 'POST', |
| headers: {{'Content-Type': 'application/json'}}, |
| body: JSON.stringify({{task_id: task, seed: seed}}) |
| }}); |
| // The poller will pick up the state change |
| }} catch (e) {{ |
| console.error(e); |
| }} |
| }} |
| |
| setInterval(pollState, 1000); |
| pollState(); |
| </script> |
| </body> |
| </html> |
| """ |
| return HTMLResponse(html) |
|
|
|
|
| @app.get("/", response_class=HTMLResponse) |
| def dashboard(): |
| html = f"""<!DOCTYPE html> |
| <html lang="en"> |
| <head> |
| <meta charset="UTF-8"> |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> |
| <title>ARIA - DevOps Incident Response</title> |
| <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;500;600;700&family=JetBrains+Mono:wght@400;700&display=swap" rel="stylesheet"> |
| <style> |
| :root {{ |
| --bg: #060914; |
| --bg-card: #111827; |
| --border: #1f2937; |
| --blue: #3b82f6; |
| --cyan: #06b6d4; |
| --green: #10b981; |
| --red: #ef4444; |
| --yellow: #f59e0b; |
| --purple: #8b5cf6; |
| --text: #f9fafb; |
| --muted: #9ca3af; |
| }} |
| * {{ margin: 0; padding: 0; box-sizing: border-box; }} |
| body {{ |
| background: var(--bg); |
| color: var(--text); |
| font-family: 'Inter', sans-serif; |
| min-height: 100vh; |
| overflow-x: hidden; |
| }} |
| html {{ scroll-behavior: smooth; }} |
| a {{ text-decoration: none; color: inherit; }} |
| |
| /* Animation */ |
| @keyframes fadeInUp {{ |
| from {{ opacity: 0; transform: translateY(20px); }} |
| to {{ opacity: 1; transform: translateY(0); }} |
| }} |
| .fade-in {{ |
| opacity: 0; |
| transform: translateY(20px); |
| transition: opacity 0.6s ease-out, transform 0.6s ease-out; |
| }} |
| .fade-in.visible {{ opacity: 1; transform: translateY(0); }} |
| |
| /* Canvas Background */ |
| #bg-canvas {{ |
| position: fixed; |
| top: 0; |
| left: 0; |
| width: 100vw; |
| height: 100vh; |
| z-index: 0; |
| pointer-events: none; |
| }} |
| |
| .container {{ |
| max-width: 1280px; |
| margin: 0 auto; |
| padding: 0 24px; |
| position: relative; |
| z-index: 1; |
| }} |
| section {{ padding: 80px 0; }} |
| |
| /* Navbar */ |
| nav {{ |
| position: fixed; |
| top: 0; |
| width: 100%; |
| height: 64px; |
| background: rgba(6, 9, 20, 0.8); |
| backdrop-filter: blur(20px); |
| border-bottom: 1px solid var(--border); |
| z-index: 100; |
| display: flex; |
| align-items: center; |
| }} |
| .nav-inner {{ |
| display: flex; |
| justify-content: space-between; |
| align-items: center; |
| width: 100%; |
| max-width: 1280px; |
| margin: 0 auto; |
| padding: 0 24px; |
| }} |
| .nav-left {{ display: flex; align-items: center; gap: 8px; }} |
| .nav-logo {{ font-size: 20px; font-weight: 700; color: var(--blue); }} |
| .nav-desc {{ font-size: 13px; color: var(--muted); display: none; }} |
| @media (min-width: 768px) {{ .nav-desc {{ display: block; }} }} |
| |
| .nav-center {{ display: flex; justify-content: center; flex: 1; }} |
| .status-pill {{ |
| display: flex; |
| align-items: center; |
| gap: 6px; |
| background: rgba(16, 185, 129, 0.2); |
| border: 1px solid var(--green); |
| color: var(--green); |
| padding: 4px 12px; |
| border-radius: 999px; |
| font-size: 12px; |
| font-weight: 600; |
| }} |
| .status-dot {{ |
| width: 6px; |
| height: 6px; |
| background: var(--green); |
| border-radius: 50%; |
| animation: pulse 2s infinite; |
| }} |
| @keyframes pulse {{ 0% {{ transform: scale(1); opacity: 1; }} 50% {{ transform: scale(1.5); opacity: 0.5; }} 100% {{ transform: scale(1); opacity: 1; }} }} |
| |
| .nav-right {{ display: flex; gap: 24px; }} |
| .nav-link {{ font-size: 13px; color: var(--muted); transition: color 0.2s; }} |
| .nav-link:hover {{ color: var(--text); }} |
| |
| /* Hero */ |
| .hero {{ padding: 120px 0 80px; text-align: center; }} |
| .hero-badge {{ |
| background: rgba(59, 130, 246, 0.1); |
| border: 1px solid rgba(59, 130, 246, 0.3); |
| border-radius: 999px; |
| padding: 6px 16px; |
| font-size: 12px; |
| color: var(--blue); |
| display: inline-block; |
| margin-bottom: 24px; |
| }} |
| .hero-title {{ |
| font-size: clamp(72px, 12vw, 140px); |
| font-weight: 700; |
| background: linear-gradient(135deg, var(--blue) 0%, var(--cyan) 50%, var(--purple) 100%); |
| -webkit-background-clip: text; |
| -webkit-text-fill-color: transparent; |
| line-height: 1; |
| letter-spacing: -4px; |
| }} |
| .hero-subtitle {{ font-size: 20px; color: var(--muted); margin-top: 16px; font-weight: 400; }} |
| .hero-desc {{ font-size: 15px; color: #4b5563; margin-top: 12px; line-height: 1.6; max-width: 600px; margin-inline: auto; }} |
| |
| .hero-buttons {{ margin-top: 40px; display: flex; justify-content: center; gap: 16px; flex-wrap: wrap; }} |
| .btn-primary, .btn-secondary {{ |
| padding: 14px 28px; border-radius: 8px; font-weight: 600; font-size: 15px; transition: all 0.2s; cursor: pointer; display: inline-block; |
| }} |
| .btn-primary {{ background: var(--blue); color: white; border: none; }} |
| .btn-primary:hover {{ background: #2563eb; transform: translateY(-2px); }} |
| .btn-secondary {{ background: transparent; border: 1px solid var(--border); color: var(--muted); }} |
| .btn-secondary:hover {{ border-color: var(--blue); color: white; transform: translateY(-2px); }} |
| |
| .hero-stats {{ margin-top: 64px; display: grid; grid-template-columns: repeat(auto-fit, minmax(200px, 1fr)); gap: 16px; }} |
| .stat-card {{ background: var(--bg-card); border: 1px solid var(--border); border-radius: 12px; padding: 20px 32px; text-align: center; }} |
| .stat-val {{ font-family: 'JetBrains Mono', monospace; font-size: 32px; font-weight: 700; color: var(--blue); }} |
| .stat-label {{ font-size: 13px; color: var(--muted); margin-top: 4px; }} |
| |
| .section-title {{ font-size: 24px; font-weight: 600; margin-bottom: 8px; }} |
| .section-subtitle {{ font-size: 15px; color: var(--muted); margin-bottom: 32px; }} |
| |
| /* Tasks Grid */ |
| .task-grid {{ display: grid; grid-template-columns: repeat(4, 1fr); gap: 16px; }} |
| @media (max-width: 1024px) {{ .task-grid {{ grid-template-columns: repeat(2, 1fr); }} }} |
| @media (max-width: 640px) {{ .task-grid {{ grid-template-columns: 1fr; }} }} |
| |
| .task-card {{ |
| background: var(--bg-card); border: 1px solid var(--border); border-radius: 16px; padding: 24px; |
| transition: all 0.3s; cursor: pointer; position: relative; overflow: hidden; display: flex; flex-direction: column; |
| }} |
| .task-card::before {{ content: ''; position: absolute; top: 0; left: 0; right: 0; height: 2px; background: transparent; transition: all 0.3s; }} |
| .task-card:hover {{ transform: translateY(-4px); box-shadow: 0 20px 40px rgba(0,0,0,0.4); }} |
| .task-card:hover::before {{ background: var(--card-color, var(--border)); }} |
| |
| .task-header {{ display: flex; justify-content: space-between; align-items: flex-start; }} |
| .task-icon {{ font-size: 32px; }} |
| .task-badge {{ font-size: 11px; font-weight: 700; padding: 4px 8px; border-radius: 6px; background: var(--card-bg); color: var(--card-color); letter-spacing: 0.5px; }} |
| .task-name {{ font-size: 16px; font-weight: 600; margin-top: 16px; }} |
| .task-desc {{ font-size: 13px; color: var(--muted); margin-top: 8px; line-height: 1.5; flex-grow: 1; }} |
| .task-footer {{ display: flex; justify-content: space-between; align-items: center; margin-top: 20px; }} |
| .task-steps {{ font-family: 'JetBrains Mono', monospace; font-size: 12px; color: #4b5563; }} |
| .task-status {{ display: flex; align-items: center; gap: 6px; font-size: 12px; color: var(--card-color); font-weight: 500; }} |
| .task-status::before {{ content: ''; width: 6px; height: 6px; border-radius: 50%; background: var(--card-color); }} |
| |
| /* Features */ |
| .features-grid {{ display: grid; grid-template-columns: repeat(3, 1fr); gap: 24px; }} |
| @media (max-width: 900px) {{ .features-grid {{ grid-template-columns: 1fr; }} }} |
| .feature-card {{ background: var(--bg-card); border: 1px solid var(--border); border-radius: 16px; padding: 32px; display: flex; flex-direction: column; }} |
| .feature-icon {{ font-size: 48px; margin-bottom: 24px; }} |
| .feature-title {{ font-size: 20px; font-weight: 600; margin-bottom: 12px; color: var(--text); }} |
| .feature-desc {{ font-size: 14px; color: var(--muted); line-height: 1.6; margin-bottom: 24px; flex-grow: 1; }} |
| |
| .c-bar-row {{ display: flex; align-items: center; justify-content: space-between; margin-bottom: 8px; font-size: 12px; font-family: 'JetBrains Mono', monospace; }} |
| .c-bar-name {{ color: var(--muted); width: 80px; overflow: hidden; text-overflow: ellipsis; }} |
| .c-bar-track {{ flex-grow: 1; margin: 0 12px; letter-spacing: -2px; color: #4b5563; }} |
| .c-bar-score {{ width: 30px; text-align: right; }} |
| |
| .generator-input {{ display: flex; gap: 8px; margin-bottom: 16px; }} |
| .gen-seed {{ background: #0d1117; border: 1px solid var(--border); color: white; padding: 8px 12px; border-radius: 6px; width: 80px; font-family: 'JetBrains Mono', monospace; }} |
| .btn-gen {{ background: var(--purple); color: white; border: none; padding: 8px 16px; border-radius: 6px; cursor: pointer; font-weight: 600; }} |
| .gen-result {{ background: #0d1117; border: 1px solid var(--border); border-radius: 8px; padding: 16px; display: none; }} |
| .gen-badges {{ display: flex; gap: 8px; margin-bottom: 12px; }} |
| .gen-badge {{ font-size: 10px; padding: 2px 6px; border-radius: 4px; font-weight: 600; text-transform: uppercase; }} |
| .gen-diff-bar {{ height: 4px; background: var(--border); border-radius: 2px; margin: 12px 0; overflow: hidden; }} |
| |
| .dual-diagram {{ background: #0d1117; border: 1px solid var(--border); border-radius: 8px; padding: 16px; font-family: 'JetBrains Mono', monospace; font-size: 11px; margin-bottom: 24px; color: var(--muted); display: flex; justify-content: space-between; align-items: center; }} |
| .agent-box {{ border: 1px solid var(--border); padding: 8px; border-radius: 4px; background: rgba(0,0,0,0.2); width: 42%; }} |
| .agent-arrow {{ flex-grow: 1; text-align: center; color: var(--green); position: relative; }} |
| .agent-arrow::after {{ content: '→'; position: absolute; top: -10px; left: 50%; transform: translateX(-50%); animation: flowRight 1.5s infinite linear; }} |
| @keyframes flowRight {{ 0% {{ left: 20%; opacity: 0; }} 50% {{ opacity: 1; }} 100% {{ left: 80%; opacity: 0; }} }} |
| .btn-green {{ background: var(--green); color: white; border: none; padding: 8px 16px; border-radius: 6px; cursor: pointer; font-weight: 600; }} |
| |
| .feature-link {{ color: var(--blue); font-size: 14px; font-weight: 500; margin-top: 16px; display: inline-block; }} |
| |
| /* Live Metrics */ |
| .metrics-bar {{ background: #0d1117; border-top: 1px solid var(--border); border-bottom: 1px solid var(--border); padding: 24px 0; }} |
| .metrics-grid {{ display: flex; justify-content: space-between; }} |
| .metric-item {{ text-align: center; flex: 1; border-right: 1px solid var(--border); }} |
| .metric-item:last-child {{ border-right: none; }} |
| .metric-val {{ font-family: 'JetBrains Mono', monospace; font-size: 28px; font-weight: 700; color: var(--blue); }} |
| .metric-label {{ font-size: 12px; color: var(--muted); margin-top: 4px; }} |
| @media (max-width: 640px) {{ .metrics-grid {{ flex-wrap: wrap; gap: 24px; }} .metric-item {{ min-width: 40%; border: none; }} }} |
| |
| /* Leaderboard */ |
| .leaderboard-card {{ background: var(--bg-card); border: 1px solid var(--border); border-radius: 16px; overflow-x: auto; }} |
| table {{ width: 100%; border-collapse: collapse; text-align: left; }} |
| th {{ background: rgba(255,255,255,0.03); font-size: 11px; text-transform: uppercase; letter-spacing: 1px; color: #4b5563; padding: 12px 24px; border-bottom: 1px solid var(--border); }} |
| td {{ padding: 16px 24px; border-bottom: 1px solid var(--border); font-size: 14px; }} |
| tr:last-child td {{ border-bottom: none; }} |
| .lb-score {{ font-family: 'JetBrains Mono', monospace; font-weight: 600; }} |
| |
| /* Quick Start */ |
| .tabs {{ display: flex; gap: 8px; margin-bottom: 16px; }} |
| .tab {{ background: transparent; border: none; color: var(--muted); padding: 8px 16px; border-radius: 6px; cursor: pointer; font-size: 14px; font-weight: 500; font-family: 'Inter', sans-serif;}} |
| .tab.active {{ background: var(--blue); color: white; }} |
| .code-block {{ background: #020408; border: 1px solid var(--border); border-radius: 12px; padding: 24px; position: relative; display: none; overflow-x: auto; }} |
| .code-block.active {{ display: block; }} |
| .code-text {{ font-family: 'JetBrains Mono', monospace; font-size: 13px; line-height: 1.8; color: var(--text); white-space: pre; }} |
| .btn-copy {{ position: absolute; top: 12px; right: 12px; background: rgba(255,255,255,0.1); border: 1px solid var(--border); color: var(--muted); padding: 4px 10px; border-radius: 4px; font-size: 12px; cursor: pointer; }} |
| |
| .c-com {{ color: #4b5563; }} .c-str {{ color: var(--green); }} .c-cmd {{ color: var(--blue); }} .c-url {{ color: var(--cyan); }} .c-key {{ color: var(--yellow); }} |
| |
| /* Training Evidence */ |
| .training-grid {{ display: grid; grid-template-columns: 1fr 1fr; gap: 24px; }} |
| @media (max-width: 900px) {{ .training-grid {{ grid-template-columns: 1fr; }} }} |
| .train-card {{ background: var(--bg-card); border: 1px solid var(--border); border-radius: 16px; padding: 32px; display: flex; flex-direction: column; }} |
| .train-title {{ font-size: 18px; font-weight: 600; margin-bottom: 24px; }} |
| .train-row {{ margin-bottom: 24px; }} |
| .train-label {{ font-size: 12px; color: var(--muted); margin-bottom: 8px; display: flex; justify-content: space-between; align-items: center; }} |
| .train-badge {{ padding: 4px 8px; border-radius: 4px; font-family: 'JetBrains Mono', monospace; font-weight: 600; }} |
| .train-desc {{ font-size: 14px; color: var(--muted); line-height: 1.5; margin-left: 28px; }} |
| .train-vis {{ float: left; font-size: 18px; margin-top: 2px; }} |
| .tt-row {{ display: flex; justify-content: space-between; padding: 12px 0; border-bottom: 1px solid var(--border); }} |
| .tt-row:last-child {{ border-bottom: none; }} |
| .tt-key {{ font-size: 13px; color: var(--muted); }} |
| .tt-val {{ font-size: 13px; font-family: 'JetBrains Mono', monospace; color: var(--text); }} |
| |
| /* Footer */ |
| footer {{ background: #0d1117; border-top: 1px solid var(--border); padding: 48px 0 32px; margin-top: 80px; }} |
| .footer-grid {{ display: grid; grid-template-columns: 2fr 1fr 1fr; gap: 32px; }} |
| @media (max-width: 768px) {{ .footer-grid {{ grid-template-columns: 1fr; }} }} |
| .f-title {{ font-size: 14px; font-weight: 600; margin-bottom: 16px; }} |
| .f-text {{ font-size: 13px; color: #4b5563; line-height: 1.6; }} |
| .f-links {{ display: flex; flex-direction: column; gap: 12px; }} |
| .f-link {{ font-size: 13px; color: var(--muted); transition: color 0.2s; }} |
| .f-link:hover {{ color: var(--text); }} |
| .f-bottom {{ border-top: 1px solid var(--border); margin-top: 32px; padding-top: 24px; display: flex; justify-content: space-between; font-size: 12px; color: #4b5563; }} |
| </style> |
| </head> |
| <body> |
| <canvas id="bg-canvas"></canvas> |
| |
| <nav> |
| <div class="nav-inner"> |
| <div class="nav-left"> |
| <div class="nav-logo">🚨 ARIA</div> |
| <div class="nav-desc">DevOps Incident Response</div> |
| </div> |
| <div class="nav-center"> |
| <div class="status-pill"> |
| <div class="status-dot"></div> |
| <span id="nav-status-text">CONNECTING</span> |
| </div> |
| </div> |
| <div class="nav-right"> |
| <a href="/docs" class="nav-link">API Docs</a> |
| <a href="/validate" class="nav-link">Validate</a> |
| <a href="/metrics" class="nav-link">Metrics</a> |
| <a href="/leaderboard" class="nav-link">Leaderboard</a> |
| </div> |
| </div> |
| </nav> |
| |
| <main class="container"> |
| <section class="hero fade-in"> |
| <div class="hero-badge">⚡ OpenEnv Compliant · Meta × PyTorch × HuggingFace</div> |
| <h1 class="hero-title">ARIA</h1> |
| <div class="hero-subtitle">Adaptive Reward & Incident Architecture</div> |
| <p class="hero-desc">The first OpenEnv RL environment for production incident response.<br>7 tasks · 14 actions · Curriculum · Dual-agent · Trained Llama-3.1-8B</p> |
| |
| <div class="hero-buttons"> |
| <a href="/docs" class="btn-primary">Try Live API →</a> |
| <a href="https://github.com/Twilight-13/devops-incident-response" target="_blank" class="btn-secondary">View GitHub →</a> |
| </div> |
| |
| <div class="hero-stats"> |
| <div class="stat-card"><div class="stat-val">7</div><div class="stat-label">Tasks</div></div> |
| <div class="stat-card"><div class="stat-val">14</div><div class="stat-label">Actions</div></div> |
| <div class="stat-card"><div class="stat-val">∞</div><div class="stat-label">Scenarios</div></div> |
| <div class="stat-card"><div class="stat-val">0.99</div><div class="stat-label">Max Score</div></div> |
| </div> |
| </section> |
| |
| <section class="fade-in"> |
| <h2 class="section-title">Environment Tasks</h2> |
| <p class="section-subtitle">Eight scenarios of escalating operational complexity</p> |
| <div class="task-grid" id="task-grid"><div style="grid-column: 1/-1; text-align: center; color: var(--muted);">Loading tasks...</div></div> |
| </section> |
| |
| <section class="fade-in"> |
| <h2 class="section-title">ARIA Features</h2> |
| <p class="section-subtitle">What makes this environment unique</p> |
| <div class="features-grid"> |
| <div class="feature-card"> |
| <div class="feature-icon">🎓</div> |
| <h3 class="feature-title">Curriculum Engine</h3> |
| <p class="feature-desc">Tracks agent performance per task with rolling averages. Promotes when mastered (avg > 0.75). Scaffolds with hints when struggling (avg < 0.30). Agents always train at the edge of their capability.</p> |
| <div id="curriculum-container" style="margin-bottom: 24px;"></div> |
| <a href="/curriculum/status" class="feature-link" style="color: var(--blue);">View Status →</a> |
| </div> |
| |
| <div class="feature-card"> |
| <div class="feature-icon">⚡</div> |
| <h3 class="feature-title">Incident Generator</h3> |
| <p class="feature-desc">Procedural incidents from seeds 0–99,999. Six failure modes × eight services × variable noise = infinite unique training scenarios. Same seed always produces the same incident.</p> |
| <div class="generator-input"> |
| <input type="number" id="gen-seed" class="gen-seed" value="42" min="0" max="99999"> |
| <button class="btn-gen" onclick="generateIncident()">Generate</button> |
| </div> |
| <div class="gen-result" id="gen-result"> |
| <div class="gen-badges" id="gen-badges"></div> |
| <div style="font-size: 13px; font-weight: 600; margin-bottom: 8px;" id="gen-affected"></div> |
| <div style="font-size: 12px; color: var(--muted); line-height: 1.5;" id="gen-desc"></div> |
| <div class="gen-diff-bar"><div id="gen-diff-fill" style="height: 100%; transition: width 0.3s;"></div></div> |
| </div> |
| <a href="/generate/preview?seed=42" class="feature-link" style="color: var(--purple);">Try Generator →</a> |
| </div> |
| |
| <div class="feature-card"> |
| <div class="feature-icon">🤝</div> |
| <h3 class="feature-title">Dual-Agent Mode</h3> |
| <p class="feature-desc">Split observability between two agents. Observer sees logs and alerts. Responder sees metrics and dependencies. Neither can solve the incident alone — they must coordinate via share_finding.</p> |
| <div class="dual-diagram"> |
| <div class="agent-box"><div style="font-weight:700; margin-bottom:4px;">AGENT A: Observer</div><div>• alerts, logs</div></div> |
| <div class="agent-arrow">share_finding</div> |
| <div class="agent-box"><div style="font-weight:700; margin-bottom:4px;">AGENT B: Responder</div><div>• metrics, deps</div></div> |
| </div> |
| <button class="btn-green" onclick="startDualSession()">Start Session</button> |
| <div id="dual-session-info" style="margin-top: 16px; font-family: 'JetBrains Mono', monospace; font-size: 11px; color: var(--green); display: none; word-break: break-all;"></div> |
| <a href="/multi-agent/sessions" class="feature-link" style="color: var(--green); margin-top: auto;">View Sessions →</a> |
| </div> |
| </div> |
| </section> |
| </main> |
| |
| <div class="metrics-bar fade-in"> |
| <div class="container metrics-grid"> |
| <div class="metric-item"><div class="metric-val" id="m-episodes">--</div><div class="metric-label">Total Episodes</div></div> |
| <div class="metric-item"><div class="metric-val" id="m-avg">--</div><div class="metric-label">Avg Score</div></div> |
| <div class="metric-item"><div class="metric-val" id="m-res">--</div><div class="metric-label">Resolution Rate</div></div> |
| <div class="metric-item"><div class="metric-val" id="m-best">--</div><div class="metric-label">Best Score</div></div> |
| </div> |
| </div> |
| |
| <main class="container"> |
| <section class="fade-in"> |
| <h2 class="section-title">🏆 Leaderboard</h2> |
| <div class="leaderboard-card"> |
| <table> |
| <thead><tr><th>Rank</th><th>Task</th><th>Score</th><th>Steps</th><th>Status</th></tr></thead> |
| <tbody id="lb-body"><tr><td colspan="5" style="text-align: center; color: var(--muted);">Loading leaderboard...</td></tr></tbody> |
| </table> |
| </div> |
| </section> |
| |
| <section class="fade-in"> |
| <h2 class="section-title">Quick Start</h2> |
| <div class="tabs"> |
| <button class="tab active" onclick="switchTab('curl')">curl</button> |
| <button class="tab" onclick="switchTab('python')">Python</button> |
| </div> |
| |
| <div id="code-curl" class="code-block active"> |
| <button class="btn-copy" onclick="copyCode('code-curl-text', this)">Copy</button> |
| <div class="code-text" id="code-curl-text"><span class="c-com"># 1. Start an incident</span> |
| <span class="c-cmd">curl</span> -X POST https://arijit-07-devops-incident-response.hf.space/reset \ |
| -H <span class="c-str">"Content-Type: application/json"</span> \ |
| -d <span class="c-str">'{{<span class="c-key">"task_id"</span>: <span class="c-str">"easy"</span>, <span class="c-key">"seed"</span>: 42}}'</span> |
| |
| <span class="c-com"># 2. Read logs (reward: +0.15)</span> |
| <span class="c-cmd">curl</span> -X POST https://arijit-07-devops-incident-response.hf.space/step \ |
| -H <span class="c-str">"Content-Type: application/json"</span> \ |
| -d <span class="c-str">'{{<span class="c-key">"action_type"</span>: <span class="c-str">"read_logs"</span>, <span class="c-key">"service"</span>: <span class="c-str">"payment-service"</span>}}'</span> |
| |
| <span class="c-com"># 3. Diagnose (reward: +0.30)</span> |
| <span class="c-cmd">curl</span> -X POST https://arijit-07-devops-incident-response.hf.space/step \ |
| -H <span class="c-str">"Content-Type: application/json"</span> \ |
| -d <span class="c-str">'{{<span class="c-key">"action_type"</span>: <span class="c-str">"diagnose"</span>, <span class="c-key">"root_cause"</span>: <span class="c-str">"memory leak in payment-service"</span>}}'</span> |
| |
| <span class="c-com"># 4. Fix it (reward: +0.40)</span> |
| <span class="c-cmd">curl</span> -X POST https://arijit-07-devops-incident-response.hf.space/step \ |
| -H <span class="c-str">"Content-Type: application/json"</span> \ |
| -d <span class="c-str">'{{<span class="c-key">"action_type"</span>: <span class="c-str">"restart_service"</span>, <span class="c-key">"service"</span>: <span class="c-str">"payment-service"</span>}}'</span> |
| |
| <span class="c-com"># Score: ~0.94 ✅</span></div> |
| </div> |
| |
| <div id="code-python" class="code-block"> |
| <button class="btn-copy" onclick="copyCode('code-py-text', this)">Copy</button> |
| <div class="code-text" id="code-py-text"><span class="c-cmd">import</span> requests |
| BASE = <span class="c-str">"https://arijit-07-devops-incident-response.hf.space"</span> |
| |
| <span class="c-com"># Start episode</span> |
| obs = requests.post(<span class="c-url">f"{{BASE}}/reset"</span>, json={{<span class="c-key">"task_id"</span>: <span class="c-str">"easy"</span>, <span class="c-key">"seed"</span>: 42}}).json() |
| |
| <span class="c-com"># Take action</span> |
| result = requests.post(<span class="c-url">f"{{BASE}}/step"</span>, |
| json={{<span class="c-key">"action_type"</span>: <span class="c-str">"read_logs"</span>, <span class="c-key">"service"</span>: <span class="c-str">"payment-service"</span>}}).json() |
| |
| print(<span class="c-url">f"Reward: {{result['reward']}}"</span>) <span class="c-com"># 0.15</span></div> |
| </div> |
| </section> |
| |
| <section class="fade-in"> |
| <h2 class="section-title">🧠 Training Evidence</h2> |
| <div class="training-grid"> |
| <div class="train-card"> |
| <h3 class="train-title">Before vs After</h3> |
| <div class="train-row"> |
| <div class="train-label"><span>Base Llama-3.1-8B</span><span class="train-badge" style="background: rgba(239, 68, 68, 0.2); color: var(--red);">0.000</span></div> |
| <div class="train-vis" style="color: var(--red);">❌</div> |
| <div class="train-desc">jumps to diagnose, gets penalized</div> |
| </div> |
| <div class="train-row"> |
| <div class="train-label"><span>ARIA Fine-tuned</span><span class="train-badge" style="background: rgba(16, 185, 129, 0.2); color: var(--green);">0.150</span></div> |
| <div class="train-vis" style="color: var(--green);">✅</div> |
| <div class="train-desc">reads logs first, every time</div> |
| </div> |
| <a href="https://huggingface.co/Arijit-07/aria-devops-llama8b" target="_blank" class="feature-link">Model weights →</a> |
| </div> |
| <div class="train-card"> |
| <h3 class="train-title">Training Details</h3> |
| <div class="tt-row"><div class="tt-key">Algorithm</div><div class="tt-val">GRPO</div></div> |
| <div class="tt-row"><div class="tt-key">Base Model</div><div class="tt-val">Llama-3.1-8B-Instruct</div></div> |
| <div class="tt-row"><div class="tt-key">Framework</div><div class="tt-val">Unsloth + HuggingFace TRL</div></div> |
| <div class="tt-row"><div class="tt-key">LoRA Rank</div><div class="tt-val">32 (alpha 64)</div></div> |
| <div class="tt-row"><div class="tt-key">Episodes</div><div class="tt-val">160</div></div> |
| <div class="tt-row"><div class="tt-key">GPU</div><div class="tt-val">NVIDIA L4</div></div> |
| </div> |
| </div> |
| </section> |
| </main> |
| |
| <footer> |
| <div class="container"> |
| <div class="footer-grid"> |
| <div> |
| <div style="font-size: 20px; font-weight: 700; color: var(--blue); margin-bottom: 8px;">🚨 ARIA</div> |
| <div class="f-text">DevOps Incident Response<br>OpenEnv-compliant RL environment</div> |
| <div style="display: flex; gap: 16px; margin-top: 16px;"> |
| <a href="https://github.com/Twilight-13/devops-incident-response" target="_blank" class="f-link">GitHub</a> |
| <a href="https://huggingface.co/Arijit-07/aria-devops-llama8b" target="_blank" class="f-link">Model</a> |
| </div> |
| </div> |
| <div> |
| <div class="f-title">Resources</div> |
| <div class="f-links"> |
| <a href="/docs" class="f-link">Live API Docs</a> |
| <a href="/validate" class="f-link">Validate</a> |
| <a href="/metrics" class="f-link">Metrics</a> |
| <a href="/leaderboard" class="f-link">Leaderboard</a> |
| </div> |
| </div> |
| <div> |
| <div class="f-title">Built for</div> |
| <div class="f-text">Meta × PyTorch × HuggingFace<br>OpenEnv Hackathon Finals<br>Bangalore, April 2026</div> |
| </div> |
| </div> |
| <div class="f-bottom"> |
| <div>© 2026 ARIA — Apache 2.0 License</div> |
| <div>Can your agent handle a SEV-1 at 3am?</div> |
| </div> |
| </div> |
| </footer> |
| |
| <script> |
| const canvas = document.getElementById('bg-canvas'); |
| const ctx = canvas.getContext('2d'); |
| let width, height, particles = []; |
| |
| function resize() {{ width = canvas.width = window.innerWidth; height = canvas.height = window.innerHeight; }} |
| window.addEventListener('resize', resize); resize(); |
| |
| for(let i=0; i<50; i++) {{ |
| particles.push({{ x: Math.random() * width, y: Math.random() * height, vx: (Math.random()-0.5)*0.5, vy: (Math.random()-0.5)*0.5 }}); |
| }} |
| |
| function draw() {{ |
| ctx.clearRect(0, 0, width, height); |
| ctx.fillStyle = 'rgba(59, 130, 246, 0.2)'; |
| ctx.strokeStyle = 'rgba(59, 130, 246, 0.1)'; |
| for(let i=0; i<particles.length; i++) {{ |
| let p = particles[i]; |
| p.x += p.vx; p.y += p.vy; |
| if(p.x < 0 || p.x > width) p.vx *= -1; |
| if(p.y < 0 || p.y > height) p.vy *= -1; |
| ctx.beginPath(); ctx.arc(p.x, p.y, 2, 0, Math.PI*2); ctx.fill(); |
| for(let j=i+1; j<particles.length; j++) {{ |
| let p2 = particles[j], dist = Math.hypot(p.x-p2.x, p.y-p2.y); |
| if(dist < 150) {{ ctx.beginPath(); ctx.moveTo(p.x, p.y); ctx.lineTo(p2.x, p2.y); ctx.stroke(); }} |
| }} |
| }} |
| requestAnimationFrame(draw); |
| }} |
| draw(); |
| |
| const observer = new IntersectionObserver(e => e.forEach(en => {{ if(en.isIntersecting) en.target.classList.add('visible'); }}), {{threshold: 0.1}}); |
| document.querySelectorAll('.fade-in').forEach(el => observer.observe(el)); |
| |
| fetch('/health').then(r => r.json()).then(d => {{ |
| if(d.status === 'ok') document.getElementById('nav-status-text').innerText = 'LIVE'; |
| }}).catch(e => console.error(e)); |
| |
| const tMap = {{ |
| 'easy': {{icon: '💻', color: '#10b981', badge: 'EASY'}}, 'medium': {{icon: '⚡', color: '#f59e0b', badge: 'MEDIUM'}}, |
| 'hard': {{icon: '🔥', color: '#ef4444', badge: 'HARD'}}, 'bonus': {{icon: '💥', color: '#8b5cf6', badge: 'EXPERT'}}, |
| 'security': {{icon: '🛡️', color: '#06b6d4', badge: 'SECURITY'}}, 'database': {{icon: '🗄️', color: '#f97316', badge: 'DATABASE'}}, |
| 'failover': {{icon: '🌐', color: '#6366f1', badge: 'FAILOVER'}}, 'generated': {{icon: '✨', color: '#ec4899', badge: 'DYNAMIC'}} |
| }}; |
| fetch('/tasks').then(r => r.json()).then(d => {{ |
| document.getElementById('task-grid').innerHTML = d.tasks.map(t => {{ |
| let c = tMap[t.id] || tMap['easy']; |
| return `<div class="task-card" style="--card-color:${{c.color}};--card-bg:${{c.color}}20;"> |
| <div class="task-header"><div class="task-icon">${{c.icon}}</div><div class="task-badge">${{c.badge}}</div></div> |
| <div class="task-name">${{t.name}}</div><div class="task-desc">${{t.description}}</div> |
| <div class="task-footer"><div class="task-steps">Max steps: ${{t.max_steps}}</div><div class="task-status">Ready</div></div> |
| </div>`; |
| }}).join(''); |
| }}).catch(e => console.error(e)); |
| |
| fetch('/curriculum/status').then(r => r.json()).then(d => {{ |
| const el = document.getElementById('curriculum-container'); |
| if(!d.total_episodes_recorded) el.innerHTML = '<div style="color:var(--muted); font-size:13px; text-align:center;">No episodes yet — run POST /reset to begin</div>'; |
| else {{ |
| el.innerHTML = Object.keys(d.tasks).slice(0, 4).map(k => {{ |
| let avg = d.tasks[k].rolling_avg, col = avg < 0.3 ? 'var(--red)' : (avg < 0.6 ? 'var(--yellow)' : 'var(--green)'); |
| let bl = Math.round(avg * 10); |
| return `<div class="c-bar-row"><div class="c-bar-name">${{k}}</div> |
| <div class="c-bar-track" style="color:${{col}}"><span>${{'█'.repeat(bl)}}</span><span style="opacity:0.3">${{'░'.repeat(10-bl)}}</span></div> |
| <div class="c-bar-score">${{avg.toFixed(2)}}</div></div>`; |
| }}).join(''); |
| }} |
| }}).catch(e => console.error(e)); |
| |
| window.generateIncident = () => {{ |
| const seed = document.getElementById('gen-seed').value || 42; |
| fetch(`/generate/preview?seed=${{seed}}`).then(r => r.json()).then(d => {{ |
| const colors = {{oom: '#ef4444', cascade: '#f59e0b', corruption: '#8b5cf6', security: '#06b6d4', database: '#f97316', network_partition: '#6366f1'}}; |
| const sc = {{sev1: '#ef4444', sev2: '#f59e0b', sev3: '#10b981'}}; |
| let fcol = colors[d.failure_mode] || 'var(--blue)'; |
| document.getElementById('gen-badges').innerHTML = `<span class="gen-badge" style="background:${{fcol}}20;color:${{fcol}}">${{d.failure_mode}}</span><span class="gen-badge" style="background:${{sc[d.severity]||fcol}}20;color:${{sc[d.severity]||fcol}}">${{d.severity}}</span><span class="gen-badge" style="background:rgba(255,255,255,0.1);color:var(--muted)">${{d.incident_id}}</span>`; |
| document.getElementById('gen-affected').innerText = `Affected: ${{d.affected_service}}`; |
| document.getElementById('gen-desc').innerText = d.description; |
| let dc = d.difficulty_score < 0.4 ? 'var(--green)' : (d.difficulty_score < 0.7 ? 'var(--yellow)' : 'var(--red)'); |
| let fill = document.getElementById('gen-diff-fill'); |
| fill.style.width = `${{d.difficulty_score*100}}%`; fill.style.background = dc; |
| document.getElementById('gen-result').style.display = 'block'; |
| }}).catch(e => console.error(e)); |
| }}; |
| |
| window.startDualSession = () => {{ |
| fetch('/multi-agent/reset', {{ method: 'POST', headers: {{'Content-Type': 'application/json'}}, body: JSON.stringify({{task_id: "easy", seed: 42}}) }}) |
| .then(r => r.json()).then(d => {{ |
| let info = document.getElementById('dual-session-info'); |
| info.innerHTML = `Session: ${{d.session_id}}<br><br>Agent A (POST): /multi-agent/step/a/${{d.session_id}}<br>Agent B (POST): /multi-agent/step/b/${{d.session_id}}`; |
| info.style.display = 'block'; |
| }}).catch(e => console.error(e)); |
| }}; |
| |
| const loadMetrics = () => {{ |
| fetch('/metrics').then(r => r.json()).then(d => {{ |
| document.getElementById('m-episodes').innerText = d.total_episodes || 0; |
| document.getElementById('m-avg').innerText = (d.overall_avg_score || 0).toFixed(3); |
| if(d.by_task) {{ |
| let tRes = 0, tCnt = 0, best = 0; |
| Object.values(d.by_task).forEach(t => {{ tRes += t.resolution_rate*t.count; tCnt += t.count; if(t.max_score > best) best = t.max_score; }}); |
| document.getElementById('m-res').innerText = (tCnt ? (tRes/tCnt)*100 : 0).toFixed(1) + '%'; |
| document.getElementById('m-best').innerText = best.toFixed(3); |
| }} |
| }}).catch(e => console.error(e)); |
| }}; |
| loadMetrics(); setInterval(loadMetrics, 30000); |
| |
| fetch('/leaderboard').then(r => r.json()).then(d => {{ |
| const body = document.getElementById('lb-body'); |
| if(!d.leaderboard || !d.leaderboard.length) {{ body.innerHTML = '<tr><td colspan="5" style="text-align: center; color: var(--muted);">No episodes yet. Try POST /reset to start.</td></tr>'; return; }} |
| body.innerHTML = d.leaderboard.map(r => {{ |
| let rank = r.rank === 1 ? 'color:#fbbf24;font-weight:bold' : (r.rank === 2 ? 'color:#9ca3af;font-weight:bold' : (r.rank === 3 ? 'color:#cd7f32;font-weight:bold' : '')); |
| let sCol = r.score >= 0.8 ? 'var(--green)' : (r.score >= 0.5 ? 'var(--yellow)' : 'var(--red)'); |
| let status = r.score > 0.5 ? '<span style="color:var(--green)">✅ Resolved</span>' : '<span style="color:var(--red)">❌ Failed</span>'; |
| return `<tr><td style="${{rank}}">#${{r.rank}}</td><td>${{r.task_id}}</td><td class="lb-score" style="color:${{sCol}}">${{r.score.toFixed(4)}}</td><td>${{r.steps}}</td><td>${{status}}</td></tr>`; |
| }}).join(''); |
| }}).catch(e => console.error(e)); |
| |
| window.switchTab = t => {{ |
| document.querySelectorAll('.tab').forEach(el => el.classList.remove('active')); |
| document.querySelectorAll('.code-block').forEach(el => el.classList.remove('active')); |
| document.querySelectorAll('.tab')[t === 'curl' ? 0 : 1].classList.add('active'); |
| document.getElementById('code-'+t).classList.add('active'); |
| }}; |
| |
| window.copyCode = (id, btn) => {{ |
| navigator.clipboard.writeText(document.getElementById(id).innerText).then(() => {{ |
| let old = btn.innerText; btn.innerText = 'Copied ✓'; setTimeout(() => btn.innerText = old, 2000); |
| }}); |
| }}; |
| </script> |
| </body> |
| </html>""" |
| return html |
|
|
|
|
| @app.get("/health") |
| def health(): |
| """ |
| Health check endpoint. |
| |
| Returns a simple status object confirming the server is running. |
| |
| Returns: |
| {"status": "ok", "env": "devops-incident-response", "version": "2.0.0"} |
| """ |
| return {"status": "ok", "env": "devops-incident-response", "version": "2.0.0"} |
|
|
|
|
|
|
| @app.get("/generate/preview") |
| def preview_incident(seed: int = 42): |
| """ |
| Preview a procedurally generated incident without starting an episode. |
| |
| Uses ARIA's IncidentFactory to generate a deterministic incident description |
| from the given integer seed. Same seed always produces the same incident. |
| |
| Args: |
| seed: Integer seed in range 0–99999 (default: 42) |
| |
| Returns: |
| Incident object with: failure_mode, severity, affected_service, |
| description, noise_alerts, difficulty_score |
| """ |
| return _factory.generate(seed) |
|
|
|
|
| @app.post("/reset", response_model=Observation) |
| async def reset(req: Optional[ResetRequest] = None): |
| """ |
| Start a new episode. |
| |
| Initializes the environment for the specified task and seed. |
| Same seed always produces the same episode (deterministic). |
| |
| Args: |
| task_id: One of easy/medium/hard/bonus/security/database/failover/generated |
| seed: Integer seed for reproducibility (optional, random if not provided) |
| |
| Returns: |
| Observation with: services, active_alerts, recent_logs, |
| service_dependencies, evidence_log, sla_status, available_runbooks |
| """ |
| if req is None: |
| req = ResetRequest() |
| if req.task_id not in VALID_TASKS and req.task_id != "generated": |
| raise HTTPException( |
| status_code=400, |
| detail=f"task_id must be one of {VALID_TASKS} or 'generated'. Got: {req.task_id}", |
| ) |
| current_episode_steps.clear() |
| return await _env.reset(seed=req.seed, task_id=req.task_id) |
|
|
|
|
| @app.post("/step", response_model=StepResult) |
| async def step(action: Action): |
| """ |
| Take one action in the current episode. |
| |
| Must call /reset first. Accepts any of the 14 action types with their |
| corresponding parameters. Returns the new observation, reward signal, |
| and done flag. |
| |
| Args: |
| action_type: One of diagnose/read_logs/read_metrics/read_runbook/ |
| search_logs/restart_service/rollback/scale_up/ |
| alert_oncall/acknowledge/noop/block_ip_range/ |
| create_index/failover |
| service: Target service name (required for most actions) |
| root_cause: Diagnosis string (required for diagnose action) |
| runbook: Runbook filename (required for read_runbook) |
| version: Target version (required for rollback) |
| reason: Reason string (required for alert_oncall) |
| ip_range: CIDR range (required for block_ip_range) |
| table: Table name (required for create_index) |
| column: Column name (required for create_index) |
| target_region: Target region (required for failover) |
| |
| Returns: |
| StepResult with: observation (new state), reward (float), done (bool), info (dict) |
| |
| Side effects: |
| On done=True, records the episode in the leaderboard and metrics history. |
| """ |
| global replay_counter |
| if _env._logic is None: |
| raise HTTPException(status_code=400, detail="Call /reset before /step") |
| res = await _env.step(action) |
|
|
| step_data = { |
| "step": len(current_episode_steps), |
| "action": action.dict(), |
| "reward": res.reward, |
| "observation_summary": { |
| "failing_services": [s.name for s in res.observation.services if s.status in ("down", "degraded")], |
| "alert_count": len(res.observation.active_alerts), |
| "evidence_count": len(res.observation.evidence_log), |
| }, |
| } |
| current_episode_steps.append(step_data) |
|
|
| if res.done: |
| track_episode(_env.state) |
| state = _env.state |
| replay_store[str(replay_counter)] = { |
| "episode_id": str(replay_counter), |
| "task_id": state.task_id, |
| "seed": state.info.get("seed", 0), |
| "final_score": round(state.total_reward, 3), |
| "resolved": state.incident_resolved, |
| "total_steps": state.step, |
| "timestamp": datetime.utcnow().isoformat(), |
| "steps": list(current_episode_steps), |
| } |
| replay_counter += 1 |
| if len(replay_store) > 20: |
| oldest = min(replay_store.keys(), key=int) |
| del replay_store[oldest] |
|
|
| return res |
|
|
|
|
| @app.get("/state", response_model=State) |
| def state(): |
| """ |
| Return the full current environment state including ground truth. |
| |
| Unlike /step which returns partial observations, /state reveals the |
| ground truth root cause, fix, and full action history. Useful for |
| evaluation and debugging. |
| |
| Returns: |
| State with: all Observation fields plus ground_truth_root_cause, |
| ground_truth_fix, incident_resolved, total_reward, action_history, |
| episode_id, task_id, step count |
| """ |
| if _env._logic is None: |
| raise HTTPException(status_code=400, detail="Call /reset before /state") |
| return _env.state |
|
|
|
|
| @app.get("/tasks") |
| def list_tasks(): |
| """ |
| List all 8 tasks with metadata. |
| |
| Returns all available task IDs with their name, difficulty, max_steps, |
| and description. Use the task_id values in POST /reset to start an episode. |
| |
| Returns: |
| {"tasks": [...]} — list of 8 task objects (7 curated + 1 procedural) |
| """ |
| return { |
| "tasks": [ |
| { |
| "id": "easy", |
| "name": "Single Service OOM", |
| "difficulty": "easy", |
| "max_steps": 15, |
| "description": "One service crash-loops from a memory leak. Which service varies by seed.", |
| }, |
| { |
| "id": "medium", |
| "name": "Cascading Multi-Service Failure", |
| "difficulty": "medium", |
| "max_steps": 20, |
| "description": ( |
| "Bad deployment causes connection pool exhaustion cascading through 3 services. " |
| "One red-herring alert included." |
| ), |
| }, |
| { |
| "id": "hard", |
| "name": "Silent Data Corruption", |
| "difficulty": "hard", |
| "max_steps": 25, |
| "description": ( |
| "No error-rate alerts fire. Signals are WARN-level logs and a business metric anomaly. " |
| "Requires rollback + on-call alert for full credit." |
| ), |
| }, |
| { |
| "id": "bonus", |
| "name": "Simultaneous Dual Failure", |
| "difficulty": "hard", |
| "max_steps": 25, |
| "description": ( |
| "Two independent failures at once: disk full on log aggregator + " |
| "model reload CPU loop on ml-inference. Both must be fixed for full credit." |
| ), |
| }, |
| { |
| "id": "security", |
| "name": "Security Incident (DDoS)", |
| "difficulty": "hard", |
| "max_steps": 20, |
| "description": ( |
| "A botnet is performing a DDoS and credential stuffing attack against the login endpoint. " |
| "The agent must read access logs, diagnose the attack IP range, block the CIDR, and alert the security team." |
| ), |
| }, |
| { |
| "id": "database", |
| "name": "Database Performance Degradation", |
| "difficulty": "hard", |
| "max_steps": 20, |
| "description": ( |
| "A recent migration added a user_segment column to the orders table without an index. " |
| "Sequential table scans are spiking DB CPU. Discovered via read_metrics and the slow query log." |
| ), |
| }, |
| { |
| "id": "failover", |
| "name": "Multi-Region Failover", |
| "difficulty": "hard", |
| "max_steps": 25, |
| "description": ( |
| "A primary datacenter region (us-east-1) is degraded due to a network partition. " |
| "The agent must correctly identify which services support automatic multi-region failover " |
| "and which do not. Failing over the wrong services causes severe data inconsistency penalties." |
| ), |
| }, |
| { |
| "id": "generated", |
| "name": "Procedural Incident", |
| "difficulty": "variable", |
| "max_steps": 20, |
| "description": "A seed-based procedural incident generated by ARIA. Deterministic and reproducible.", |
| }, |
| ] |
| } |
|
|
|
|
| @app.get("/validate") |
| def validate(): |
| """ |
| Self-validation endpoint — runs all 7 curated tasks and returns per-task scores. |
| |
| Instantiates each task environment with seed=42 and runs a random agent |
| for up to 30 steps. Verifies that: the environment runs without errors, |
| scores stay within [0.0, 1.0], and grading completes successfully. |
| |
| This endpoint is safe to call at any time — it does not affect the current |
| episode state (the active _env._logic is restored after validation). |
| |
| Returns: |
| { |
| "validation": "passed" | "failed", |
| "summary": "X/Y tasks passed validation", |
| "total_tasks": N, |
| "passed": N, |
| "tasks": [ |
| { |
| "task_id": "easy", |
| "score": 0.12, |
| "in_range": true, |
| "resolved": false, |
| "steps": 15, |
| "status": "ok" |
| }, ... |
| ] |
| } |
| """ |
| import random |
| from graders.grader import grade_episode |
| results = [] |
| old_logic = _env._logic |
| for task_id in VALID_TASKS: |
| try: |
| import asyncio |
| from env import DevOpsIncidentEnv as LogicClass |
| env_logic = LogicClass(task_id=task_id, seed=42) |
| env_logic.reset() |
| done = False |
| steps = 0 |
| import random as _random |
| while not done and steps < 30: |
| action = Action(action_type=_random.choice(list(ActionType))) |
| result = env_logic.step(action) |
| done = result.done |
| steps += 1 |
| s = env_logic.state() |
| score = grade_episode( |
| task_id, s.action_history, s.ground_truth_root_cause, |
| s.ground_truth_fix, s.incident_resolved, s.total_reward, |
| ) |
| results.append({ |
| "task_id": task_id, |
| "score": round(float(score), 4), |
| "in_range": 0.0 <= score <= 1.0, |
| "resolved": s.incident_resolved, |
| "steps": steps, |
| "status": "ok", |
| }) |
| except Exception as e: |
| results.append({"task_id": task_id, "status": "error", "error": str(e)}) |
|
|
| _env._logic = old_logic |
| passed_count = sum(1 for r in results if r.get("status") == "ok" and r.get("in_range")) |
| total_count = len(results) |
| all_ok = passed_count == total_count |
| |
| details = {} |
| for r in results: |
| details[r["task_id"]] = { |
| "status": "passed" if r.get("status") == "ok" and r.get("in_range") else r.get("status", "failed"), |
| "score": r.get("score"), |
| "resolved": r.get("resolved") |
| } |
|
|
| return { |
| "validation": "passed" if all_ok else "failed", |
| "summary": f"{passed_count}/{total_count} tasks passed validation", |
| "tasks_checked": total_count, |
| "tasks_passed": passed_count, |
| "details": details, |
| "environment": "devops-incident-response", |
| "version": "2.0.0", |
| "note": "Generated task excluded — procedural tasks require fixed parameters" |
| } |
|
|
|
|
| @app.get("/metrics") |
| def get_metrics(): |
| """ |
| Aggregate episode statistics across all completed episodes. |
| |
| Statistics are computed in-memory and reset when the server restarts. |
| |
| Returns: |
| { |
| "total_episodes": N, |
| "overall_avg_score": 0.XX, |
| "by_task": { |
| "easy": {"count", "avg_score", "max_score", "min_score", |
| "resolution_rate", "avg_steps_to_diagnosis", |
| "avg_info_gathering_ratio"}, |
| ... |
| }, |
| "last_updated": "ISO timestamp" |
| } |
| """ |
| total_episodes = len(episode_history) |
| by_task = {} |
| total_score = 0.0 |
| |
| if total_episodes == 0: |
| return { |
| "total_episodes": 0, |
| "by_task": {}, |
| "overall_avg_score": 0.0, |
| "last_updated": datetime.utcnow().isoformat() + "Z" |
| } |
| |
| for rec in episode_history: |
| tid = rec["task_id"] |
| if tid not in by_task: |
| by_task[tid] = {"scores": [], "resolved": 0, "steps_to_diag": [], "info_ratios": []} |
| |
| by_task[tid]["scores"].append(rec["final_score"]) |
| if rec["incident_resolved"]: |
| by_task[tid]["resolved"] += 1 |
| if rec["steps_to_diagnosis"] is not None: |
| by_task[tid]["steps_to_diag"].append(rec["steps_to_diagnosis"]) |
| by_task[tid]["info_ratios"].append(rec["info_gathering_ratio"]) |
| total_score += rec["final_score"] |
| |
| out_by_task = {} |
| for tid, agg in by_task.items(): |
| cnt = len(agg["scores"]) |
| out_by_task[tid] = { |
| "count": cnt, |
| "avg_score": round(sum(agg["scores"]) / cnt, 3), |
| "max_score": round(max(agg["scores"]), 3), |
| "min_score": round(min(agg["scores"]), 3), |
| "resolution_rate": round(agg["resolved"] / cnt, 3), |
| "avg_steps_to_diagnosis": round(sum(agg["steps_to_diag"]) / len(agg["steps_to_diag"]), 1) if agg["steps_to_diag"] else None, |
| "avg_info_gathering_ratio": round(sum(agg["info_ratios"]) / len(agg["info_ratios"]), 2) if agg["info_ratios"] else 0.0 |
| } |
| |
| return { |
| "total_episodes": total_episodes, |
| "by_task": out_by_task, |
| "overall_avg_score": round(total_score / total_episodes, 3), |
| "last_updated": datetime.utcnow().isoformat() + "Z" |
| } |
|
|
|
|
| @app.get("/leaderboard") |
| def get_leaderboard(): |
| """ |
| Top-10 episodes ranked by score (ties broken by fewer steps). |
| |
| Returns: |
| {"leaderboard": [{"rank", "task_id", "score", "steps", "timestamp"}, ...]} |
| """ |
| sorted_eps = sorted(episode_history, key=lambda x: (x["final_score"], -x["steps_taken"]), reverse=True) |
| top_10 = [] |
| for i, rec in enumerate(sorted_eps[:10]): |
| top_10.append({ |
| "rank": i + 1, |
| "task_id": rec["task_id"], |
| "score": rec["final_score"], |
| "steps": rec["steps_taken"], |
| "timestamp": rec["timestamp"] |
| }) |
| return {"leaderboard": top_10} |
|
|
|
|
| @app.websocket("/ws") |
| async def websocket_endpoint(websocket: WebSocket): |
| await websocket.accept() |
| |
| ws_env = DevOpsEnvironment() |
| |
| try: |
| while True: |
| data = await websocket.receive_json() |
| command = data.get("command") |
| |
| print(f"WebSocket received: {data}") |
| |
| if command == "reset": |
| task_id = data.get("task_id", "easy") |
| seed = data.get("seed") |
| obs = await ws_env.reset(seed=seed, task_id=task_id) |
| await websocket.send_json({ |
| "type": "observation", |
| "data": obs.model_dump() if hasattr(obs, "model_dump") else obs.dict() |
| }) |
| |
| elif command == "step": |
| if ws_env._logic is None: |
| await websocket.send_json({ |
| "type": "error", |
| "message": "Call reset before step" |
| }) |
| continue |
| |
| action_data = data.get("action", {}) |
| try: |
| action = Action(**action_data) |
| step_result = await ws_env.step(action) |
| if step_result.done: |
| track_episode(ws_env.state) |
| await websocket.send_json({ |
| "type": "step_result", |
| "data": { |
| "observation": step_result.observation.model_dump() if hasattr(step_result.observation, "model_dump") else step_result.observation.dict(), |
| "reward": step_result.reward, |
| "done": step_result.done, |
| "info": step_result.info |
| } |
| }) |
| except Exception as e: |
| await websocket.send_json({ |
| "type": "error", |
| "message": str(e) |
| }) |
| |
| elif command == "state": |
| if ws_env._logic is None: |
| await websocket.send_json({ |
| "type": "error", |
| "message": "Call reset before state" |
| }) |
| continue |
| |
| state = ws_env.state |
| await websocket.send_json({ |
| "type": "state", |
| "data": state.model_dump() if hasattr(state, "model_dump") else state.dict() |
| }) |
| |
| else: |
| await websocket.send_json({ |
| "type": "error", |
| "message": f"Unrecognized command: {command}" |
| }) |
| |
| except WebSocketDisconnect: |
| print("WebSocket client disconnected") |
| except Exception as e: |
| print(f"WebSocket error: {e}") |
| try: |
| await websocket.send_json({ |
| "type": "error", |
| "message": str(e) |
| }) |
| except: |
| pass |
| await websocket.close() |
|
|
|
|
| |
|
|
| @app.post("/multi-agent/reset") |
| def multi_agent_reset(body: MultiAgentResetRequest): |
| """ |
| Start a new dual-agent session with split observability. |
| |
| Creates two views of the same incident: |
| - Agent A (Observer): sees logs and active alerts only |
| - Agent B (Responder): sees metrics and service dependencies only |
| |
| Args: |
| task_id: Task to run (same valid values as POST /reset) |
| seed: Deterministic seed (default: 42) |
| |
| Returns: |
| session_id, agent roles, step instructions, and initial observations |
| for both agents. |
| """ |
| session = DualAgentSession(task_id=body.task_id, seed=body.seed) |
| multi_agent_sessions[session.session_id] = session |
| return { |
| "session_id": session.session_id, |
| "task_id": body.task_id, |
| "seed": body.seed, |
| "agent_a_role": "observer — sees logs and alerts only", |
| "agent_b_role": "responder — sees metrics and dependencies only", |
| "instructions": { |
| "agent_a": "POST /multi-agent/step/a/{session_id} body: {\"finding\": \"your observation\"}", |
| "agent_b": "POST /multi-agent/step/b/{session_id} body: Action JSON (same schema as POST /step)", |
| }, |
| "observation_a": session.get_observation_a(), |
| "observation_b": session.get_observation_b(), |
| } |
|
|
|
|
| @app.post("/multi-agent/step/a/{session_id}") |
| def multi_agent_step_a(session_id: str, body: AgentAStepRequest): |
| """ |
| Agent A (Observer) shares a finding with Agent B. |
| |
| Agent A sees logs and alerts only. Findings are appended to the shared |
| findings log that Agent B can see when deciding its next action. |
| |
| Args: |
| session_id: Session ID from POST /multi-agent/reset |
| finding: Text description of what Agent A observed |
| |
| Returns: |
| Updated findings log and current Observer-view observation. |
| """ |
| session = multi_agent_sessions.get(session_id) |
| if not session: |
| raise HTTPException(status_code=404, detail="Session not found") |
| return session.step_a(body.finding) |
|
|
|
|
| @app.post("/multi-agent/step/b/{session_id}") |
| def multi_agent_step_b(session_id: str, body: Action): |
| """ |
| Agent B (Responder) takes an action in the environment. |
| |
| Agent B sees metrics and service dependencies. It receives all findings |
| shared by Agent A, then executes an action. Action schema is identical |
| to POST /step. |
| |
| Args: |
| session_id: Session ID from POST /multi-agent/reset |
| body: Action object (same schema as POST /step) |
| |
| Returns: |
| StepResult with reward, done flag, and updated Responder-view observation. |
| """ |
| session = multi_agent_sessions.get(session_id) |
| if not session: |
| raise HTTPException(status_code=404, detail="Session not found") |
| return session.step_b(body) |
|
|
|
|
| @app.get("/multi-agent/state/{session_id}") |
| def multi_agent_state(session_id: str): |
| """ |
| Full state for a dual-agent session including both agent perspectives. |
| |
| Returns: |
| Session state with findings_log, step count, done flag, |
| and both Observer and Responder observations. |
| """ |
| session = multi_agent_sessions.get(session_id) |
| if not session: |
| raise HTTPException(status_code=404, detail="Session not found") |
| return session.get_state() |
|
|
|
|
| @app.get("/multi-agent/sessions") |
| def list_multi_agent_sessions(): |
| """ |
| List all active dual-agent sessions. |
| |
| Returns: |
| List of active sessions with session_id, task_id, current step, |
| done flag, and number of findings shared by Agent A. |
| """ |
| return [ |
| { |
| "session_id": s.session_id, |
| "task_id": s.task_id, |
| "step": s.step_count, |
| "done": s.done, |
| "findings_count": len(s.findings_log), |
| } |
| for s in multi_agent_sessions.values() |
| ] |
|
|
|
|
| |
|
|
| @app.get("/curriculum/status") |
| def get_curriculum_status(): |
| """ |
| Agent mastery levels across all tasks. |
| |
| Returns the curriculum engine's current view of agent performance: |
| rolling average score, mastery level (0–3), whether scaffolding is |
| needed, and a diagnostic hint per task. |
| |
| Returns: |
| {"tasks": {"easy": {"rolling_avg", "mastery_level", "scaffold_needed", "hint"}, ...}, |
| "recommended_task": "easy"} |
| """ |
| return curriculum_engine.get_status() |
|
|
|
|
| @app.get("/curriculum/next") |
| def get_next_curriculum_task(): |
| """ |
| Recommended next task for adaptive training. |
| |
| Returns the task with the lowest rolling average score among non-mastered |
| tasks. Training loops should call this between episodes to implement |
| curriculum learning automatically. |
| |
| Returns: |
| {"recommended_task": "medium", "reasoning": "..."} |
| """ |
| return { |
| "recommended_task": curriculum_engine.get_next_curriculum_task(), |
| "reasoning": "Lowest rolling average among non-mastered tasks.", |
| } |
|
|
|
|
| @app.post("/curriculum/record") |
| def record_curriculum_episode(req: CurriculumRecordRequest): |
| """ |
| Record an episode result to update the curriculum engine. |
| |
| Training loops should call this after each episode to keep the |
| curriculum engine's rolling averages and mastery levels current. |
| |
| Args: |
| task_id: Task that was just run |
| score: Episode score (float, typically 0.0–1.0) |
| |
| Returns: |
| {"recorded": true, "new_status": {...}} — updated task status |
| """ |
| try: |
| curriculum_engine.record_episode(req.task_id, req.score) |
| except ValueError as exc: |
| raise HTTPException(status_code=400, detail=str(exc)) |
| return { |
| "recorded": True, |
| "new_status": curriculum_engine.get_status()["tasks"][req.task_id], |
| } |
|
|
|
|
| @app.get("/curriculum/hint/{task_id}") |
| def get_curriculum_hint(task_id: str): |
| """ |
| Get a diagnostic hint and scaffold flag for a specific task. |
| |
| If an agent is repeatedly failing a task, this returns a structured hint |
| explaining what the agent should try (e.g., "read logs before acting"). |
| |
| Args: |
| task_id: One of easy/medium/hard/bonus/security/database/failover |
| |
| Returns: |
| {"task_id", "hint", "scaffold_needed": bool, "mastery_level": 0–3} |
| """ |
| try: |
| return { |
| "task_id": task_id, |
| "hint": curriculum_engine.get_hint(task_id), |
| "scaffold_needed": curriculum_engine.should_scaffold(task_id), |
| "mastery_level": curriculum_engine.get_mastery(task_id), |
| } |
| except ValueError as exc: |
| raise HTTPException(status_code=400, detail=str(exc)) |
|
|
|
|
| |
|
|
| @app.get("/replays") |
| def list_replays(): |
| """List available episode replays, newest first.""" |
| items = [] |
| for ep_id, r in replay_store.items(): |
| items.append({ |
| "episode_id": r["episode_id"], |
| "task_id": r["task_id"], |
| "score": r["final_score"], |
| "resolved": r["resolved"], |
| "total_steps": r["total_steps"], |
| "timestamp": r["timestamp"], |
| }) |
| items.sort(key=lambda x: int(x["episode_id"]), reverse=True) |
| return items |
|
|
|
|
| @app.get("/replay/{episode_id}") |
| def get_replay(episode_id: str): |
| """Return full replay data for an episode.""" |
| if episode_id not in replay_store: |
| raise HTTPException(status_code=404, detail="Episode not found") |
| return replay_store[episode_id] |
|
|
|
|
| @app.get("/replay/{episode_id}/html", response_class=HTMLResponse) |
| def get_replay_html(episode_id: str): |
| """Return an HTML timeline visualization of an episode replay.""" |
| if episode_id not in replay_store: |
| raise HTTPException(status_code=404, detail="Episode not found") |
| r = replay_store[episode_id] |
|
|
| def reward_color(reward: float) -> str: |
| if reward > 0.2: |
| return "#00ff88" |
| if reward > 0: |
| return "#ffaa00" |
| if reward == 0: |
| return "#ff3355" |
| return "#ff3355" |
|
|
| def reward_bg(reward: float) -> str: |
| if reward > 0.2: |
| return "rgba(0,255,136,0.08)" |
| if reward > 0: |
| return "rgba(255,170,0,0.08)" |
| return "rgba(255,51,85,0.08)" |
|
|
| steps_html = "" |
| running_score = 0.0 |
| for s in r["steps"]: |
| running_score += s["reward"] |
| act = s["action"] |
| act_type = act.get("action_type", "unknown") |
| service = act.get("service") or "" |
| rc = act.get("root_cause") or "" |
| obs = s["observation_summary"] |
| col = reward_color(s["reward"]) |
| bg = reward_bg(s["reward"]) |
| reward_sign = "+" if s["reward"] > 0 else "" |
| failing = ", ".join(obs["failing_services"]) if obs["failing_services"] else "none" |
| steps_html += f""" |
| <div class="step-card" style="border-left-color:{col}; background:{bg}"> |
| <div class="step-header"> |
| <span class="step-num mono">STEP {s['step'] + 1}</span> |
| <span class="step-action mono">{act_type} {service}</span> |
| <span class="step-reward mono" style="color:{col}">{reward_sign}{s['reward']:.3f}</span> |
| <span class="step-running mono" style="color:#4d9fff">∑ {running_score:.3f}</span> |
| </div> |
| {"<div class='step-rc'>→ " + rc + "</div>" if rc and act_type == "diagnose" else ""} |
| <div class="step-obs mono"> |
| failing: {failing} | |
| alerts: {obs['alert_count']} | |
| evidence: {obs['evidence_count']} |
| </div> |
| </div>""" |
|
|
| resolved_color = "#00ff88" if r["resolved"] else "#ff3355" |
| resolved_text = "INCIDENT RESOLVED" if r["resolved"] else "INCIDENT ESCALATED" |
| resolved_icon = "✓" if r["resolved"] else "✗" |
| score_col = "#00ff88" if r["final_score"] >= 0.7 else "#ffaa00" if r["final_score"] >= 0.4 else "#ff3355" |
|
|
| html = f"""<!DOCTYPE html> |
| <html lang="en"> |
| <head> |
| <meta charset="UTF-8"> |
| <title>ARIA Replay #{episode_id}</title> |
| <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700&family=Share+Tech+Mono&display=swap" rel="stylesheet"> |
| <style> |
| :root {{--bg:#060914;--surface:#0a0f1e;--surface2:#0d1628;--border:#1a2744;--blue:#4d9fff;--cyan:#00d4ff;--green:#00ff88;--yellow:#ffaa00;--red:#ff3355;--text:#c8d8f0;--text-dim:#4a6080;}} |
| *{{box-sizing:border-box;margin:0;padding:0;}} |
| body{{background:var(--bg);color:var(--text);font-family:'Inter',sans-serif;padding:24px;max-width:900px;margin:0 auto;}} |
| .mono{{font-family:'Share Tech Mono',monospace;}} |
| .header{{background:var(--surface);border:1px solid var(--border);padding:24px;margin-bottom:24px;}} |
| .header-title{{font-size:22px;color:var(--blue);font-weight:700;margin-bottom:8px;}} |
| .header-meta{{display:flex;gap:24px;flex-wrap:wrap;}} |
| .meta-item{{font-size:13px;color:var(--text-dim);}} |
| .meta-value{{color:var(--text);font-weight:600;}} |
| .step-card{{border-left:3px solid;padding:12px 16px;margin-bottom:8px;border-radius:0 4px 4px 0;}} |
| .step-header{{display:flex;align-items:center;gap:16px;flex-wrap:wrap;margin-bottom:4px;}} |
| .step-num{{font-size:11px;color:var(--text-dim);min-width:60px;}} |
| .step-action{{font-size:13px;color:var(--cyan);flex:1;}} |
| .step-reward{{font-size:14px;font-weight:700;}} |
| .step-running{{font-size:11px;}} |
| .step-rc{{font-size:12px;color:var(--yellow);padding:4px 0;}} |
| .step-obs{{font-size:11px;color:var(--text-dim);margin-top:4px;}} |
| .resolution-banner{{text-align:center;padding:32px;margin-top:24px;border:2px solid;font-size:24px;font-weight:700;}} |
| .nav-links{{display:flex;gap:16px;margin-bottom:24px;font-size:13px;}} |
| .nav-links a{{color:var(--blue);text-decoration:none;}} |
| .nav-links a:hover{{color:var(--cyan);}} |
| h2{{font-size:14px;color:var(--text-dim);text-transform:uppercase;letter-spacing:0.1em;margin-bottom:16px;}} |
| </style> |
| </head> |
| <body> |
| <div class="nav-links"> |
| <a href="/replays/html">← All Replays</a> |
| <a href="/live">Live NOC</a> |
| <a href="/progress">Progress</a> |
| <a href="/docs">API Docs</a> |
| </div> |
| <div class="header"> |
| <div class="header-title">▣ Episode Replay #{episode_id}</div> |
| <div class="header-meta"> |
| <div class="meta-item">TASK <span class="meta-value mono">{r['task_id'].upper()}</span></div> |
| <div class="meta-item">SEED <span class="meta-value mono">{r['seed']}</span></div> |
| <div class="meta-item">SCORE <span class="meta-value mono" style="color:{score_col}">{r['final_score']:.3f}</span></div> |
| <div class="meta-item">STEPS <span class="meta-value mono">{r['total_steps']}</span></div> |
| <div class="meta-item">TIME <span class="meta-value mono">{r['timestamp'][:19]}</span></div> |
| </div> |
| </div> |
| <h2>Step Timeline</h2> |
| {steps_html if steps_html else '<div style="color:var(--text-dim);padding:24px">No steps recorded.</div>'} |
| <div class="resolution-banner mono" style="color:{resolved_color};border-color:{resolved_color};background:{'rgba(0,255,136,0.05)' if r['resolved'] else 'rgba(255,51,85,0.05)'}"> |
| {resolved_icon} {resolved_text} | Final Score: {r['final_score']:.3f} |
| </div> |
| </body> |
| </html>""" |
| return HTMLResponse(html) |
|
|
|
|
| @app.get("/replays/html", response_class=HTMLResponse) |
| def list_replays_html(): |
| """HTML index of all replays.""" |
| items = sorted(replay_store.values(), key=lambda x: int(x["episode_id"]), reverse=True) |
| rows = "" |
| for r in items: |
| score_col = "#00ff88" if r["final_score"] >= 0.7 else "#ffaa00" if r["final_score"] >= 0.4 else "#ff3355" |
| resolved_icon = "✓" if r["resolved"] else "✗" |
| rows += f"""<tr> |
| <td class="mono"><a href="/replay/{r['episode_id']}/html" style="color:#4d9fff">#{r['episode_id']}</a></td> |
| <td class="mono">{r['task_id'].upper()}</td> |
| <td class="mono" style="color:{score_col}">{r['final_score']:.3f}</td> |
| <td class="mono" style="color:{'#00ff88' if r['resolved'] else '#ff3355'}">{resolved_icon}</td> |
| <td class="mono">{r['total_steps']}</td> |
| <td class="mono" style="color:#4a6080">{r['timestamp'][:19]}</td> |
| </tr>""" |
| if not rows: |
| rows = '<tr><td colspan="6" style="text-align:center;color:#4a6080;padding:32px">No replays yet. Complete an episode to generate one.</td></tr>' |
| html = f"""<!DOCTYPE html> |
| <html lang="en"> |
| <head> |
| <meta charset="UTF-8"> |
| <title>ARIA Episode Replays</title> |
| <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700&family=Share+Tech+Mono&display=swap" rel="stylesheet"> |
| <style> |
| :root{{--bg:#060914;--surface:#0a0f1e;--border:#1a2744;--blue:#4d9fff;--text:#c8d8f0;--text-dim:#4a6080;}} |
| *{{box-sizing:border-box;margin:0;padding:0;}} |
| body{{background:var(--bg);color:var(--text);font-family:'Inter',sans-serif;padding:32px;max-width:900px;margin:0 auto;}} |
| .mono{{font-family:'Share Tech Mono',monospace;}} |
| h1{{font-size:22px;color:var(--blue);margin-bottom:24px;}} |
| table{{width:100%;border-collapse:collapse;}} |
| th{{text-align:left;font-size:11px;color:var(--text-dim);text-transform:uppercase;padding:8px 12px;border-bottom:1px solid var(--border);}} |
| td{{padding:10px 12px;border-bottom:1px solid rgba(26,39,68,0.5);font-size:13px;}} |
| tr:hover td{{background:rgba(77,159,255,0.04);}} |
| .nav-links{{display:flex;gap:16px;margin-bottom:24px;font-size:13px;}} |
| .nav-links a{{color:var(--blue);text-decoration:none;}} |
| </style> |
| </head> |
| <body> |
| <div class="nav-links"><a href="/live">Live NOC</a><a href="/progress">Progress</a><a href="/challenge">Challenge</a></div> |
| <h1>▣ Episode Replays</h1> |
| <table> |
| <thead><tr><th>ID</th><th>Task</th><th>Score</th><th>Resolved</th><th>Steps</th><th>Time</th></tr></thead> |
| <tbody>{rows}</tbody> |
| </table> |
| </body> |
| </html>""" |
| return HTMLResponse(html) |
|
|
|
|
| |
|
|
| @app.get("/challenge", response_class=HTMLResponse) |
| async def challenge_page(): |
| """Human-playable incident response challenge.""" |
| html = """<!DOCTYPE html> |
| <html lang="en"> |
| <head> |
| <meta charset="UTF-8"> |
| <title>ARIA Human Challenge</title> |
| <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700&family=Share+Tech+Mono&display=swap" rel="stylesheet"> |
| <style> |
| :root { |
| --bg:#060914;--surface:#0a0f1e;--surface2:#0d1628;--border:#1a2744; |
| --border-bright:#2a4080;--blue:#4d9fff;--cyan:#00d4ff;--green:#00ff88; |
| --yellow:#ffaa00;--red:#ff3355;--purple:#9d4edd;--text:#c8d8f0;--text-dim:#4a6080; |
| } |
| *{box-sizing:border-box;margin:0;padding:0;} |
| body{background:var(--bg);color:var(--text);font-family:'Inter',sans-serif;min-height:100vh;} |
| .mono{font-family:'Share Tech Mono',monospace;} |
| .nav{background:#000;border-bottom:1px solid var(--border);padding:0 24px;height:48px;display:flex;align-items:center;justify-content:space-between;} |
| .nav-logo{font-size:16px;color:var(--blue);font-weight:700;} |
| .nav-links{display:flex;gap:16px;font-size:12px;} |
| .nav-links a{color:var(--text-dim);text-decoration:none;} |
| .nav-links a:hover{color:var(--blue);} |
| .warning{background:rgba(255,170,0,0.1);border-bottom:1px solid var(--yellow);padding:8px 24px;font-size:12px;color:var(--yellow);text-align:center;} |
| .main{display:grid;grid-template-columns:1fr 380px;height:calc(100vh - 80px);} |
| .left{padding:20px;overflow-y:auto;border-right:1px solid var(--border);} |
| .right{padding:20px;overflow-y:auto;background:var(--surface);} |
| .panel{background:var(--surface2);border:1px solid var(--border);padding:16px;margin-bottom:16px;} |
| .panel-title{font-size:10px;color:var(--text-dim);text-transform:uppercase;letter-spacing:0.1em;margin-bottom:12px;} |
| table{width:100%;border-collapse:collapse;font-size:12px;} |
| th{text-align:left;color:var(--text-dim);font-size:10px;text-transform:uppercase;padding:4px 8px;border-bottom:1px solid var(--border);} |
| td{padding:6px 8px;font-family:'Share Tech Mono',monospace;font-size:12px;border-bottom:1px solid rgba(26,39,68,0.5);} |
| .status-down{color:var(--red);} |
| .status-degraded{color:var(--yellow);} |
| .status-healthy{color:var(--green);} |
| .alert-item{padding:6px 10px;border-left:3px solid;margin-bottom:6px;font-size:12px;} |
| .sev-critical{border-color:var(--red);background:rgba(255,51,85,0.08);} |
| .sev-high{border-color:#ff6600;background:rgba(255,102,0,0.08);} |
| .sev-warning{border-color:var(--yellow);background:rgba(255,170,0,0.08);} |
| .sev-info{border-color:var(--blue);background:rgba(77,159,255,0.08);} |
| .evidence-item{font-size:11px;color:var(--text-dim);padding:4px 0;border-bottom:1px solid rgba(26,39,68,0.3);} |
| .form-row{margin-bottom:12px;} |
| label{display:block;font-size:11px;color:var(--text-dim);text-transform:uppercase;margin-bottom:4px;} |
| select,input[type=text]{width:100%;background:var(--surface);border:1px solid var(--border-bright);color:var(--text);font-family:'Share Tech Mono',monospace;font-size:13px;padding:8px 10px;outline:none;} |
| select:focus,input[type=text]:focus{border-color:var(--blue);} |
| .btn-action{width:100%;background:rgba(77,159,255,0.15);border:1px solid var(--blue);color:var(--blue);font-family:'Share Tech Mono',monospace;font-size:14px;padding:12px;cursor:pointer;transition:0.2s;margin-top:8px;text-transform:uppercase;letter-spacing:0.1em;} |
| .btn-action:hover{background:var(--blue);color:#000;} |
| .btn-action:disabled{opacity:0.4;cursor:not-allowed;} |
| .btn-reset{background:rgba(255,51,85,0.1);border:1px solid var(--red);color:var(--red);} |
| .btn-reset:hover{background:var(--red);color:#000;} |
| .score-header{display:flex;justify-content:space-between;align-items:center;margin-bottom:16px;} |
| .big-score{font-size:36px;font-weight:700;font-family:'Share Tech Mono',monospace;} |
| .step-info{font-size:12px;color:var(--text-dim);} |
| .reward-flash{padding:8px 12px;margin-bottom:8px;font-size:14px;font-family:'Share Tech Mono',monospace;border-left:3px solid;} |
| .result-log{max-height:150px;overflow-y:auto;font-size:11px;color:var(--text-dim);font-family:'Share Tech Mono',monospace;background:var(--surface);padding:8px;margin-top:8px;white-space:pre-wrap;} |
| .comparison{display:grid;grid-template-columns:1fr 1fr;gap:16px;margin-top:16px;} |
| .cmp-card{background:var(--surface2);border:1px solid var(--border);padding:20px;text-align:center;} |
| .cmp-label{font-size:11px;color:var(--text-dim);text-transform:uppercase;margin-bottom:8px;} |
| .cmp-score{font-size:32px;font-weight:700;font-family:'Share Tech Mono',monospace;} |
| .cmp-steps{font-size:12px;color:var(--text-dim);margin-top:4px;} |
| .done-banner{text-align:center;padding:24px;border:2px solid;font-size:20px;font-weight:700;margin-bottom:16px;} |
| ::-webkit-scrollbar{width:4px;}::-webkit-scrollbar-thumb{background:var(--border-bright);} |
| </style> |
| </head> |
| <body> |
| <div class="nav"> |
| <div class="nav-logo mono">▣ ARIA HUMAN CHALLENGE</div> |
| <div class="nav-links"> |
| <a href="/live">Live NOC</a> |
| <a href="/progress">Progress</a> |
| <a href="/replays/html">Replays</a> |
| <a href="/docs">API</a> |
| </div> |
| </div> |
| <div class="warning">⚠ Note: This resets the shared environment — agent runs will be interrupted while you play</div> |
| <div class="main"> |
| <div class="left"> |
| <div class="panel" id="task-desc-panel" style="display:none"> |
| <div class="panel-title">► Task Description</div> |
| <div id="task-desc" style="font-size:13px;color:#8ab4d4"></div> |
| </div> |
| <div class="panel"> |
| <div class="panel-title">► Infrastructure Status</div> |
| <table> |
| <thead><tr><th>Service</th><th>Status</th><th>CPU%</th><th>Mem%</th><th>Err/s</th><th>P99ms</th></tr></thead> |
| <tbody id="svc-tbody"><tr><td colspan="6" style="color:var(--text-dim);padding:16px">Loading...</td></tr></tbody> |
| </table> |
| </div> |
| <div class="panel"> |
| <div class="panel-title">► Active Alerts <span id="alert-count" style="color:var(--red)"></span></div> |
| <div id="alerts-container"><div style="color:var(--text-dim);font-size:12px">No alerts</div></div> |
| </div> |
| <div class="panel"> |
| <div class="panel-title">► Evidence Gathered <span id="evidence-count" style="color:var(--blue)"></span></div> |
| <div id="evidence-container"><div style="color:var(--text-dim);font-size:12px">No evidence yet</div></div> |
| </div> |
| <div id="done-panel" style="display:none"> |
| <div class="done-banner mono" id="done-banner"></div> |
| <div class="comparison"> |
| <div class="cmp-card"> |
| <div class="cmp-label">Your Score</div> |
| <div class="cmp-score" id="human-score">—</div> |
| <div class="cmp-steps" id="human-steps"></div> |
| </div> |
| <div class="cmp-card"> |
| <div class="cmp-label">Trained Agent (Llama-3.1-8B)</div> |
| <div class="cmp-score" id="agent-score">—</div> |
| <div class="cmp-steps">from /metrics avg</div> |
| </div> |
| </div> |
| <button class="btn-action" onclick="startGame(true)" style="margin-top:16px;width:100%">▶ Play Again (Random Seed)</button> |
| </div> |
| </div> |
| <div class="right"> |
| <div class="score-header"> |
| <div> |
| <div class="panel-title">Episode Progress</div> |
| <div class="big-score mono" id="score-display" style="color:var(--text-dim)">0.000</div> |
| </div> |
| <div style="text-align:right"> |
| <div class="step-info mono" id="step-display">Step 0 / 15</div> |
| <div class="step-info mono" id="task-display" style="color:var(--blue)">EASY</div> |
| </div> |
| </div> |
| <div class="panel"> |
| <div class="panel-title">► Take Action</div> |
| <div class="form-row"> |
| <label>Action Type</label> |
| <select id="action-type" onchange="onActionTypeChange()"> |
| <option value="read_logs">read_logs — Read full log output</option> |
| <option value="search_logs">search_logs — Search logs for keyword</option> |
| <option value="read_metrics">read_metrics — Read CPU/mem/latency</option> |
| <option value="read_runbook">read_runbook — Open a runbook</option> |
| <option value="diagnose">diagnose — Submit root cause diagnosis</option> |
| <option value="restart_service">restart_service — Restart a crashed service</option> |
| <option value="rollback">rollback — Roll back a deployment</option> |
| <option value="scale_up">scale_up — Scale up replicas</option> |
| <option value="alert_oncall">alert_oncall — Page on-call engineer</option> |
| <option value="acknowledge">acknowledge — Acknowledge an alert</option> |
| <option value="block_ip_range">block_ip_range — Block a CIDR range</option> |
| <option value="create_index">create_index — Create a DB index</option> |
| <option value="failover">failover — Regional failover</option> |
| <option value="noop">noop — No action</option> |
| </select> |
| </div> |
| <div class="form-row" id="svc-row"> |
| <label>Service Name</label> |
| <input type="text" id="service-input" placeholder="e.g. payment-service"> |
| </div> |
| <div class="form-row" id="rc-row" style="display:none"> |
| <label>Root Cause (for diagnose)</label> |
| <input type="text" id="rc-input" placeholder="e.g. Memory leak in payment-service causing OOM"> |
| </div> |
| <div class="form-row" id="query-row" style="display:none"> |
| <label>Search Query</label> |
| <input type="text" id="query-input" placeholder="e.g. OOM"> |
| </div> |
| <div class="form-row" id="rb-row" style="display:none"> |
| <label>Runbook File</label> |
| <input type="text" id="rb-input" placeholder="e.g. memory_leak.md"> |
| </div> |
| <div class="form-row" id="ip-row" style="display:none"> |
| <label>IP Range</label> |
| <input type="text" id="ip-input" placeholder="e.g. 185.220.0.0/16"> |
| </div> |
| <div class="form-row" id="reason-row" style="display:none"> |
| <label>Reason (for alert_oncall)</label> |
| <input type="text" id="reason-input" placeholder="e.g. Data corruption detected"> |
| </div> |
| <button class="btn-action" id="action-btn" onclick="takeAction()">▶ TAKE ACTION</button> |
| <div id="reward-flash" style="display:none" class="reward-flash"></div> |
| <div id="result-log" class="result-log" style="display:none"></div> |
| </div> |
| <div class="panel"> |
| <div class="panel-title">► Reward History</div> |
| <div id="reward-history" class="mono" style="font-size:11px;max-height:120px;overflow-y:auto"></div> |
| </div> |
| <button class="btn-action btn-reset" onclick="startGame(true)" style="margin-top:8px">↻ New Incident (Random Seed)</button> |
| </div> |
| </div> |
| <script> |
| let isDone = false; |
| let currentSeed = 1234; |
| let stepCount = 0; |
| let totalScore = 0; |
| let maxSteps = 15; |
| |
| async function startGame(randomSeed) { |
| currentSeed = randomSeed ? Math.floor(Math.random() * 99999) : 1234; |
| stepCount = 0; totalScore = 0; isDone = false; |
| document.getElementById('done-panel').style.display = 'none'; |
| document.getElementById('reward-history').innerHTML = ''; |
| document.getElementById('result-log').style.display = 'none'; |
| document.getElementById('reward-flash').style.display = 'none'; |
| document.getElementById('action-btn').disabled = false; |
| const res = await fetch('/reset', { |
| method: 'POST', headers: {'Content-Type': 'application/json'}, |
| body: JSON.stringify({task_id: 'easy', seed: currentSeed}) |
| }); |
| const obs = await res.json(); |
| maxSteps = obs.max_steps || 15; |
| renderObs(obs); |
| document.getElementById('step-display').textContent = 'Step 0 / ' + maxSteps; |
| document.getElementById('task-display').textContent = (obs.task_id || 'easy').toUpperCase() + ' \\u00b7 SEED ' + currentSeed; |
| if (obs.task_description) { |
| document.getElementById('task-desc').textContent = obs.task_description; |
| document.getElementById('task-desc-panel').style.display = 'block'; |
| } |
| updateScoreDisplay(0); |
| } |
| |
| function onActionTypeChange() { |
| const at = document.getElementById('action-type').value; |
| document.getElementById('rc-row').style.display = at === 'diagnose' ? 'block' : 'none'; |
| document.getElementById('query-row').style.display = at === 'search_logs' ? 'block' : 'none'; |
| document.getElementById('rb-row').style.display = at === 'read_runbook' ? 'block' : 'none'; |
| document.getElementById('ip-row').style.display = at === 'block_ip_range' ? 'block' : 'none'; |
| document.getElementById('reason-row').style.display = at === 'alert_oncall' ? 'block' : 'none'; |
| document.getElementById('svc-row').style.display = ['read_runbook','block_ip_range','noop'].includes(at) ? 'none' : 'block'; |
| } |
| |
| async function takeAction() { |
| if (isDone) return; |
| const at = document.getElementById('action-type').value; |
| const svc = document.getElementById('service-input').value.trim() || null; |
| const body = {action_type: at}; |
| if (svc) body.service = svc; |
| if (at === 'diagnose') body.root_cause = document.getElementById('rc-input').value.trim(); |
| if (at === 'search_logs') body.query = document.getElementById('query-input').value.trim(); |
| if (at === 'read_runbook') body.runbook = document.getElementById('rb-input').value.trim(); |
| if (at === 'block_ip_range') body.ip_range = document.getElementById('ip-input').value.trim(); |
| if (at === 'alert_oncall') body.reason = document.getElementById('reason-input').value.trim(); |
| document.getElementById('action-btn').disabled = true; |
| const res = await fetch('/step', { |
| method: 'POST', headers: {'Content-Type': 'application/json'}, |
| body: JSON.stringify(body) |
| }); |
| const data = await res.json(); |
| document.getElementById('action-btn').disabled = isDone ? true : false; |
| const reward = data.reward || 0; |
| totalScore += reward; stepCount += 1; |
| renderObs(data.observation); |
| updateScoreDisplay(totalScore); |
| document.getElementById('step-display').textContent = 'Step ' + stepCount + ' / ' + maxSteps; |
| showReward(reward, data.observation && data.observation.last_action_result); |
| appendRewardHistory(stepCount, at, reward); |
| if (data.done) { isDone = true; document.getElementById('action-btn').disabled = true; showDonePanel(totalScore, stepCount, data.observation && data.observation.incident_resolved); } |
| } |
| |
| function renderObs(obs) { |
| if (!obs) return; |
| if (obs.services) { |
| document.getElementById('svc-tbody').innerHTML = obs.services.map(s => { |
| const sc = s.status === 'down' ? 'status-down' : s.status === 'degraded' ? 'status-degraded' : 'status-healthy'; |
| return '<tr><td>' + s.name + '</td><td class="' + sc + '">' + s.status.toUpperCase() + '</td><td>' + (s.cpu_percent||0).toFixed(1) + '</td><td>' + (s.memory_percent||0).toFixed(1) + '</td><td>' + (s.error_rate||0).toFixed(3) + '</td><td>' + (s.latency_p99_ms||0).toFixed(0) + '</td></tr>'; |
| }).join(''); |
| } |
| const ac = document.getElementById('alerts-container'); |
| if (obs.active_alerts && obs.active_alerts.length > 0) { |
| document.getElementById('alert-count').textContent = '(' + obs.active_alerts.length + ')'; |
| ac.innerHTML = obs.active_alerts.map(a => { |
| const sev = (a.severity||'INFO').toLowerCase(); |
| const cls = sev === 'critical' ? 'sev-critical' : sev === 'high' ? 'sev-high' : sev === 'warning' ? 'sev-warning' : 'sev-info'; |
| return '<div class="alert-item ' + cls + ' mono"><strong>' + a.severity + '</strong> [' + a.service + '] ' + a.message + '</div>'; |
| }).join(''); |
| } else { document.getElementById('alert-count').textContent = ''; ac.innerHTML = '<div style="color:var(--text-dim);font-size:12px">No active alerts</div>'; } |
| const ec = document.getElementById('evidence-container'); |
| if (obs.evidence_log && obs.evidence_log.length > 0) { |
| document.getElementById('evidence-count').textContent = '(' + obs.evidence_log.length + ')'; |
| ec.innerHTML = obs.evidence_log.slice(-5).map(e => '<div class="evidence-item">► [step ' + e.step + '] ' + e.source + ': ' + e.summary + '</div>').join(''); |
| } else { document.getElementById('evidence-count').textContent = ''; ec.innerHTML = '<div style="color:var(--text-dim);font-size:12px">No evidence yet — use read_logs, read_metrics, or search_logs</div>'; } |
| } |
| |
| function updateScoreDisplay(score) { |
| const el = document.getElementById('score-display'); |
| el.textContent = score.toFixed(3); |
| el.style.color = score >= 0.7 ? 'var(--green)' : score >= 0.4 ? 'var(--yellow)' : score > 0 ? 'var(--red)' : 'var(--text-dim)'; |
| } |
| |
| function showReward(reward, resultText) { |
| const el = document.getElementById('reward-flash'); |
| const sign = reward > 0 ? '+' : ''; |
| el.style.display = 'block'; el.style.borderColor = reward > 0 ? 'var(--green)' : 'var(--red)'; |
| el.style.color = reward > 0 ? 'var(--green)' : 'var(--red)'; |
| el.style.background = reward > 0 ? 'rgba(0,255,136,0.08)' : 'rgba(255,51,85,0.08)'; |
| el.textContent = 'Reward: ' + sign + reward.toFixed(3); |
| if (resultText) { const rl = document.getElementById('result-log'); rl.style.display = 'block'; rl.textContent = resultText.substring(0, 500); } |
| } |
| |
| function appendRewardHistory(step, action, reward) { |
| const el = document.getElementById('reward-history'); |
| const sign = reward > 0 ? '+' : ''; |
| const col = reward > 0 ? 'var(--green)' : 'var(--red)'; |
| el.innerHTML += '<div style="color:' + col + '">S' + step + ' ' + action + ' → ' + sign + reward.toFixed(3) + '</div>'; |
| el.scrollTop = el.scrollHeight; |
| } |
| |
| async function showDonePanel(score, steps, resolved) { |
| const banner = document.getElementById('done-banner'); |
| banner.style.color = resolved ? 'var(--green)' : 'var(--red)'; |
| banner.style.borderColor = resolved ? 'var(--green)' : 'var(--red)'; |
| banner.style.background = resolved ? 'rgba(0,255,136,0.05)' : 'rgba(255,51,85,0.05)'; |
| banner.textContent = (resolved ? '\\u2713 INCIDENT RESOLVED' : '\\u2717 INCIDENT ESCALATED') + ' \\u2014 Score: ' + score.toFixed(3); |
| document.getElementById('human-score').textContent = score.toFixed(3); |
| document.getElementById('human-score').style.color = score >= 0.7 ? 'var(--green)' : score >= 0.4 ? 'var(--yellow)' : 'var(--red)'; |
| document.getElementById('human-steps').textContent = steps + ' steps taken'; |
| try { |
| const m = await fetch('/metrics'); const mdata = await m.json(); |
| const easyData = mdata.by_task && mdata.by_task['easy']; |
| if (easyData) { document.getElementById('agent-score').textContent = easyData.avg_score.toFixed(3); document.getElementById('agent-score').style.color = easyData.avg_score >= 0.7 ? 'var(--green)' : easyData.avg_score >= 0.4 ? 'var(--yellow)' : 'var(--red)'; } |
| else { document.getElementById('agent-score').textContent = 'N/A'; } |
| } catch(e) { document.getElementById('agent-score').textContent = 'N/A'; } |
| document.getElementById('done-panel').style.display = 'block'; |
| } |
| |
| startGame(false); |
| </script> |
| </body> |
| </html>""" |
| return HTMLResponse(html) |
|
|
|
|
| |
|
|
| @app.get("/progress", response_class=HTMLResponse) |
| def progress_page(): |
| """Training progress and live performance visualization.""" |
| metrics = get_metrics() |
| by_task = metrics.get("by_task", {}) |
|
|
| BASELINES = { |
| "easy": 0.05, "medium": 0.03, "hard": 0.01, |
| "bonus": 0.01, "security": 0.01, "database": 0.01, "failover": 0.01, |
| } |
| TRAINING_RESULTS = [ |
| ("easy", 0.320, 0.685), |
| ("medium", 0.050, 0.378), |
| ("hard", 0.190, 0.869), |
| ("bonus", 0.152, 0.682), |
| ] |
| ALL_TASKS = ["easy", "medium", "hard", "bonus", "security", "database", "failover"] |
|
|
| def bar_row(task: str, score: float, baseline: float) -> str: |
| pct = round(score * 100, 1) |
| bpct = round(baseline * 100, 1) |
| col = "#00ff88" if score >= 0.7 else "#ffaa00" if score >= 0.4 else "#ff3355" |
| return f""" |
| <div class="bar-group"> |
| <div class="bar-label mono">{task.upper()}</div> |
| <div class="bar-tracks"> |
| <div class="bar-track"> |
| <span class="bar-tag">RANDOM</span> |
| <div class="bar-bg"><div class="bar-fill" style="width:{bpct}%;background:#4a6080"></div></div> |
| <span class="bar-val mono" style="color:#4a6080">{baseline:.3f}</span> |
| </div> |
| <div class="bar-track"> |
| <span class="bar-tag">ARIA</span> |
| <div class="bar-bg"><div class="bar-fill" style="width:{pct}%;background:{col}"></div></div> |
| <span class="bar-val mono" style="color:{col}">{score:.3f}</span> |
| </div> |
| </div> |
| </div>""" |
|
|
| bars = "".join( |
| bar_row(task, by_task.get(task, {}).get("avg_score", 0.0), BASELINES.get(task, 0.01)) |
| for task in ALL_TASKS |
| ) |
|
|
| training_rows = "".join( |
| f"""<tr> |
| <td class="mono">{task.upper()}</td> |
| <td class="mono" style="color:#4a6080">{base:.3f}</td> |
| <td class="mono" style="color:#4d9fff">{finetuned:.3f}</td> |
| <td class="mono" style="color:#00ff88">+{finetuned - base:.3f} ✓</td> |
| </tr>""" |
| for task, base, finetuned in TRAINING_RESULTS |
| ) |
|
|
| recent = list(episode_history)[-20:] if episode_history else [] |
| recent_scores_js = str([round(r["final_score"], 3) for r in recent]) |
| total_eps = metrics.get("total_episodes", 0) |
| overall = metrics.get("overall_avg_score", 0.0) |
| overall_col = "#00ff88" if overall >= 0.7 else "#ffaa00" if overall >= 0.4 else "#4d9fff" |
|
|
| html = f"""<!DOCTYPE html> |
| <html lang="en"> |
| <head> |
| <meta charset="UTF-8"> |
| <title>ARIA Progress</title> |
| <link href="https://fonts.googleapis.com/css2?family=Inter:wght@400;600;700&family=Share+Tech+Mono&display=swap" rel="stylesheet"> |
| <style> |
| :root{{--bg:#060914;--surface:#0a0f1e;--surface2:#0d1628;--border:#1a2744;--blue:#4d9fff;--cyan:#00d4ff;--green:#00ff88;--yellow:#ffaa00;--red:#ff3355;--text:#c8d8f0;--text-dim:#4a6080;}} |
| *{{box-sizing:border-box;margin:0;padding:0;}} |
| body{{background:var(--bg);color:var(--text);font-family:'Inter',sans-serif;}} |
| .mono{{font-family:'Share Tech Mono',monospace;}} |
| .nav{{background:#000;border-bottom:1px solid var(--border);padding:0 32px;height:48px;display:flex;align-items:center;justify-content:space-between;}} |
| .nav-logo{{font-size:16px;color:var(--blue);font-weight:700;}} |
| .nav-links{{display:flex;gap:16px;font-size:12px;}} |
| .nav-links a{{color:var(--text-dim);text-decoration:none;}} |
| .nav-links a:hover{{color:var(--blue);}} |
| .page{{max-width:960px;margin:0 auto;padding:32px;}} |
| h1{{font-size:22px;color:var(--blue);margin-bottom:8px;}} |
| .subtitle{{font-size:13px;color:var(--text-dim);margin-bottom:40px;}} |
| h2{{font-size:13px;color:var(--text-dim);text-transform:uppercase;letter-spacing:0.1em;margin-bottom:20px;padding-bottom:8px;border-bottom:1px solid var(--border);}} |
| section{{margin-bottom:48px;}} |
| .stats-row{{display:flex;gap:24px;margin-bottom:32px;flex-wrap:wrap;}} |
| .stat-card{{background:var(--surface);border:1px solid var(--border);padding:20px 24px;flex:1;min-width:140px;}} |
| .stat-label{{font-size:10px;color:var(--text-dim);text-transform:uppercase;margin-bottom:6px;}} |
| .stat-value{{font-size:28px;font-weight:700;font-family:'Share Tech Mono',monospace;}} |
| .bar-group{{margin-bottom:20px;}} |
| .bar-label{{font-size:11px;color:var(--text-dim);text-transform:uppercase;margin-bottom:8px;letter-spacing:0.08em;}} |
| .bar-tracks{{display:flex;flex-direction:column;gap:6px;}} |
| .bar-track{{display:flex;align-items:center;gap:10px;}} |
| .bar-tag{{font-size:9px;color:var(--text-dim);width:50px;text-align:right;font-family:'Share Tech Mono',monospace;}} |
| .bar-bg{{flex:1;height:12px;background:var(--surface);border:1px solid var(--border);overflow:hidden;}} |
| .bar-fill{{height:100%;transition:width 0.6s ease;}} |
| .bar-val{{font-size:12px;width:50px;text-align:right;}} |
| table{{width:100%;border-collapse:collapse;}} |
| th{{text-align:left;font-size:11px;color:var(--text-dim);text-transform:uppercase;padding:8px 12px;border-bottom:1px solid var(--border);}} |
| td{{padding:10px 12px;border-bottom:1px solid rgba(26,39,68,0.5);}} |
| .canvas-wrap{{background:var(--surface);border:1px solid var(--border);padding:16px;}} |
| canvas{{display:block;width:100%;max-width:880px;}} |
| .canvas-label{{font-size:11px;color:var(--text-dim);margin-top:8px;text-align:center;font-family:'Share Tech Mono',monospace;}} |
| </style> |
| </head> |
| <body> |
| <div class="nav"> |
| <div class="nav-logo mono">▣ ARIA PROGRESS</div> |
| <div class="nav-links"> |
| <a href="/live">Live NOC</a> |
| <a href="/challenge">Challenge</a> |
| <a href="/replays/html">Replays</a> |
| <a href="/docs">API Docs</a> |
| </div> |
| </div> |
| <div class="page"> |
| <h1>Training Progress & Live Performance</h1> |
| <div class="subtitle">Llama-3.1-8B fine-tuned with GRPO on 7 task types · 14 actions · Dense reward shaping</div> |
| <div class="stats-row"> |
| <div class="stat-card"><div class="stat-label">Live Episodes</div><div class="stat-value" id="live-eps" style="color:var(--blue)">{total_eps}</div></div> |
| <div class="stat-card"><div class="stat-label">Avg Live Score</div><div class="stat-value" id="live-avg" style="color:{overall_col}">{overall:.3f}</div></div> |
| <div class="stat-card"><div class="stat-label">Training Episodes</div><div class="stat-value" style="color:var(--blue)">160</div></div> |
| <div class="stat-card"><div class="stat-label">Model</div><div class="stat-value" style="font-size:14px;color:var(--cyan)">8B GRPO</div></div> |
| </div> |
| <section> |
| <h2>Section 1 — Live Task Performance vs Random Baseline</h2> |
| {bars} |
| </section> |
| <section> |
| <h2>Section 2 — Training Improvement (Llama-3.1-8B, GRPO)</h2> |
| <table> |
| <thead><tr><th>Task</th><th>Baseline</th><th>Fine-tuned</th><th>Improvement</th></tr></thead> |
| <tbody>{training_rows}</tbody> |
| </table> |
| </section> |
| <section> |
| <h2>Section 3 — Live Episode Score Timeline</h2> |
| <div class="canvas-wrap"> |
| <canvas id="timeline-canvas" width="880" height="120"></canvas> |
| <div class="canvas-label mono" id="canvas-label">Last 20 episodes · updates every 30s</div> |
| </div> |
| </section> |
| </div> |
| <script> |
| const SCORES = {recent_scores_js}; |
| function drawTimeline(scores) {{ |
| const canvas = document.getElementById('timeline-canvas'); |
| canvas.width = canvas.parentElement.clientWidth - 32; |
| const ctx = canvas.getContext('2d'); |
| const W = canvas.width, H = 120; |
| ctx.clearRect(0, 0, W, H); |
| if (scores.length === 0) {{ |
| ctx.fillStyle = '#4a6080'; ctx.font = '13px Share Tech Mono, monospace'; |
| ctx.textAlign = 'center'; ctx.fillText('No episodes yet', W/2, H/2); return; |
| }} |
| const pad = 30, plotW = W - pad * 2, plotH = H - pad * 2; |
| ctx.strokeStyle = '#1a2744'; ctx.lineWidth = 1; |
| [0, 0.25, 0.5, 0.75, 1.0].forEach(v => {{ |
| const y = pad + plotH - v * plotH; |
| ctx.beginPath(); ctx.moveTo(pad, y); ctx.lineTo(W - pad, y); ctx.stroke(); |
| ctx.fillStyle = '#4a6080'; ctx.font = '10px Share Tech Mono, monospace'; |
| ctx.textAlign = 'right'; ctx.fillText(v.toFixed(2), pad - 4, y + 3); |
| }}); |
| const n = scores.length; |
| const pts = scores.map((s, i) => [pad + (i / Math.max(n-1, 1)) * plotW, pad + plotH - s * plotH]); |
| ctx.strokeStyle = '#4d9fff'; ctx.lineWidth = 2; ctx.lineJoin = 'round'; |
| ctx.beginPath(); pts.forEach(([x,y], i) => i === 0 ? ctx.moveTo(x,y) : ctx.lineTo(x,y)); ctx.stroke(); |
| pts.forEach(([x,y], i) => {{ |
| const s = scores[i]; |
| ctx.fillStyle = s >= 0.7 ? '#00ff88' : s >= 0.4 ? '#ffaa00' : '#ff3355'; |
| ctx.beginPath(); ctx.arc(x, y, 4, 0, Math.PI*2); ctx.fill(); |
| }}); |
| }} |
| drawTimeline(SCORES); |
| async function refresh() {{ |
| try {{ |
| const res = await fetch('/metrics'); const data = await res.json(); |
| document.getElementById('live-eps').textContent = data.total_episodes || 0; |
| const avg = data.overall_avg_score || 0; |
| document.getElementById('live-avg').textContent = avg.toFixed(3); |
| }} catch(e) {{}} |
| }} |
| setInterval(refresh, 30000); |
| </script> |
| </body> |
| </html>""" |
| return HTMLResponse(html) |
|
|