# openenv.yaml — OpenEnv Specification for Adaptive Alert Triage # Matches the actual implementation in src/adaptive_alert_triage/ # Validated against: env.py, models.py, tasks/easy.py, tasks/medium.py, tasks/hard.py name: "AdaptiveAlertTriage" version: "0.1.0" description: | A partially-observable RL environment that simulates real-time IT alert triage and incident response. An agent receives a continuous stream of system alerts and must decide — for each one — whether to INVESTIGATE, IGNORE, ESCALATE, or DELAY, under time pressure, resource constraints, and the risk of cascading failures from unhandled correlated alerts. This environment models a task performed daily by DevOps and SOC engineers: triaging noisy monitoring signals while preventing real incidents from escalating into outages. authors: - name: "Scalar Hackathon Team" email: "team@scalar.com" license: "MIT" tags: - reinforcement-learning - openenv - alert-triage - incident-response - partial-observability - resource-constraints - cascading-failures # ── Environment class ───────────────────────────────────────────────────────── environment: module: "adaptive_alert_triage.env" class: "AdaptiveAlertTriageEnv" # Constructor accepts: task_id ("easy"|"medium"|"hard"), seed (int, optional) # ── OpenEnv interface ───────────────────────────────────────────────────────── # All three methods are implemented in AdaptiveAlertTriageEnv interface: reset: signature: "reset(seed=None, options=None) -> Observation" description: | Resets the episode. Generates an initial batch of synthetic alerts using the task-specific correlation_probability. Returns an Observation with alerts stripped of hidden fields (true_severity, is_correlated). step: signature: "step(action: Action) -> (Observation, Reward, done: bool, info: dict)" description: | Processes one Action, updates alert queue, checks for failures, generates new alerts, and returns the next observation. The info dict always contains: processed_alerts, correlation_groups, failures_this_step, system_failure, action_correct, cumulative_reward, step, failures_count. state: signature: "state() -> EpisodeState" description: | Returns the full internal EpisodeState including hidden ground-truth (true_severities, correlation_groups, false_positives, pending_failures). For evaluation and replay only — never exposed to the agent during training. # ── Configuration ───────────────────────────────────────────────────────────── config: actions: - "INVESTIGATE" - "IGNORE" - "ESCALATE" - "DELAY" # ── Observation space ───────────────────────────────────────────────────────── observation: type: "Pydantic BaseModel (Observation)" fields: alerts: type: "List[Alert]" description: "Active alerts awaiting triage. Each Alert has id, visible_severity, confidence, alert_type, age." hidden_fields: "true_severity, is_correlated — stripped before returned to agent" system_load: type: "float [0.0, 1.0]" description: "Current infrastructure utilisation" queue_length: type: "int >= 0" description: "Number of active alerts in queue" time_remaining: type: "int >= 0" description: "Steps left before episode ends" episode_step: type: "int >= 0" description: "Current step index (0-based)" resource_budget: type: "Optional[int]" description: "Remaining INVESTIGATE actions this step. None = unconstrained (easy task)." # ── Action space ────────────────────────────────────────────────────────────── action: type: "Pydantic BaseModel (Action)" fields: alert_id: type: "str" description: "ID of the target alert — must match an ID in current observation.alerts" action_type: type: "Literal['INVESTIGATE','IGNORE','ESCALATE','DELAY']" description: | INVESTIGATE — allocates resources to diagnose; counts against resource_budget IGNORE — dismisses alert as noise (best for false positives) ESCALATE — routes to specialist team (no budget cost) DELAY — keeps alert in queue for re-evaluation next step metadata: type: "Dict[str, Any]" description: "Optional context bag (e.g. reasoning from LLM agents)" # ── Reward ──────────────────────────────────────────────────────────────────── reward: type: "Pydantic BaseModel (Reward)" description: "Dense, shaped reward decomposed into named components" schedule: critical_handled: "+10.0 — INVESTIGATE or ESCALATE on critical alert (true_severity >= 0.75)" failure_prevented: "+5.0 — correlated alert handled (prevents cascade)" false_positive_ignored: "+3.0 — IGNORE on a false positive" medium_handled: "+2.0 * true_severity — INVESTIGATE on medium alert" unnecessary_invest: "-2.0 — INVESTIGATE on a false positive" missed_critical: "-8.0 — IGNORE on a critical alert" risky_delay: "-2.4 — DELAY on a critical alert" task_multipliers: "easy=1.0, medium=1.1, hard=1.2" range: [-8.0, 15.0] # per step before task multiplier; cascade bonus included in max # ── Tasks ───────────────────────────────────────────────────────────────────── tasks: - id: "easy" name: "Basic Alert Prioritisation" description: | Classify and respond to independent alerts with no resource constraint. The agent must learn to INVESTIGATE/ESCALATE critical alerts (true_severity >= 0.75) and IGNORE false positives (< 0.30). DELAY is always wrong in this task. difficulty: 1 max_steps: 30 failure_threshold: 5 max_investigations_per_step: null # unconstrained correlation_probability: 0.10 success_threshold: 0.70 # correct_actions / total_actions >= 0.70 grader: "tasks.easy.EasyTaskGrader" grading_formula: "score = (correct_actions / total_actions) * 0.98 + 0.01" - id: "medium" name: "Resource-Constrained Triage" description: | Triage under a hard per-step investigation budget of K=3. Agent must prioritise high-value investigations over false positives and use ESCALATE when budget is exhausted. Grader penalises wasting budget on FPs and missing critical alerts. difficulty: 2 max_steps: 40 failure_threshold: 5 max_investigations_per_step: 3 correlation_probability: 0.20 success_threshold: 0.55 grader: "tasks.medium.MediumTaskGrader" grading_formula: | raw = resolved_score / max_possible_score fp_penalty = 0.30 * (unnecessary_investigations / total_investigations) miss_penalty = 0.20 * (critical_missed / max(critical_total, 1)) penalised = raw - fp_penalty - miss_penalty score = (penalised * 0.6) + 0.35 - id: "hard" name: "Cascading Failure Prevention" description: | Detect and stop correlated alert chains before they cascade into system failures. Chains arrive sequentially: trigger at step N, child at step N+k if trigger was missed. Agent cannot observe is_correlated — must infer from visible patterns. Stability multiplier drops sharply with each system failure. difficulty: 3 max_steps: 50 failure_threshold: 3 # stricter than easy/medium max_investigations_per_step: 3 correlation_probability: 0.40 success_threshold: 0.50 grader: "tasks.hard.HardTaskGrader" grading_formula: | chain_score = Σ stop_reward(position) × severity_weight stability = {0 failures: 1.0, 1: 0.80, 2: 0.60, 3: 0.30, 4+: 0.00} raw = (chain_score / max_possible) * stability score = (raw * 0.98) + 0.01 # ── Evaluation metrics (produced by graders) ────────────────────────────────── metrics: - name: "correct_action_rate" description: "Fraction of actions matching the optimal ground-truth policy" range: [0.0, 1.0] tasks: ["easy"] - name: "resolved_score" description: "Weighted resolution quality normalised by max possible" range: [0.0, 1.0] tasks: ["medium"] - name: "resource_efficiency" description: "Ratio of productive investigations to total INVESTIGATE actions" range: [0.0, 1.0] tasks: ["medium"] - name: "chain_detection_rate" description: "Fraction of correlated chains stopped before system failure" range: [0.0, 1.0] tasks: ["hard"] - name: "system_failures" description: "Number of system failures triggered (lower is better)" range: [0, 10] tasks: ["hard"] - name: "stability_score" description: "Stability multiplier based on failure count" range: [0.0, 1.0] tasks: ["hard"] # ── Baseline agents ─────────────────────────────────────────────────────────── baselines: - name: "rule_based" module: "agents.baseline" class: "RuleBasedAgent" type: "threshold" description: "Simple severity/confidence thresholding policy" scores: easy: 0.539 medium: 0.618 hard: 0.355 - name: "improved_rule_based" module: "agents.baseline" class: "ImprovedRuleBasedAgent" type: "threshold" description: "Rule-based with age-urgency, system-load awareness, resource budget guard" scores: easy: 0.250 medium: 0.355 hard: 0.068 - name: "ppo_lstm" module: "rl_agent" class: "PPOTrainer" type: "rl" description: "PPO with LSTM memory — pure numpy, trained 300+ episodes per task" scores: easy: 0.665 medium: 0.931 hard: 0.325 - name: "llm_openai" module: "inference" class: "LLMTriageAgent" type: "llm" description: "OpenAI-compatible LLM agent via API_BASE_URL / MODEL_NAME / HF_TOKEN" # ── Infra / Docker ──────────────────────────────────────────────────────────── docker: image: "adaptive-alert-triage:latest" build: "docker build -t adaptive-alert-triage ." run: "docker run -p 8000:8000 adaptive-alert-triage" entrypoint: "uvicorn src.adaptive_alert_triage.server:app --host 0.0.0.0 --port 8000" # ── Setup and validation ────────────────────────────────────────────────────── setup: python: ">=3.9" install: "pip install -e ." pythonpath: "src" test: "pytest tests/" validate: "openenv validate" baseline: "python inference.py --n 3" api_version: "1.0" framework: "openenv" documentation: readme: "README.md" baseline: "inference.py" agents: "agents/" tasks: "tasks/" api_docs: "src/adaptive_alert_triage/" server: "src/adaptive_alert_triage/server.py"