scaler-openenv / openenv.yaml
Hacktrix-121's picture
grader fixes
c18a9d1
# openenv.yaml β€” OpenEnv Specification for Adaptive Alert Triage
# Matches the actual implementation in src/adaptive_alert_triage/
# Validated against: env.py, models.py, tasks/easy.py, tasks/medium.py, tasks/hard.py
name: "AdaptiveAlertTriage"
version: "0.1.0"
description: |
A partially-observable RL environment that simulates real-time IT alert triage
and incident response. An agent receives a continuous stream of system alerts
and must decide β€” for each one β€” whether to INVESTIGATE, IGNORE, ESCALATE, or
DELAY, under time pressure, resource constraints, and the risk of cascading
failures from unhandled correlated alerts.
This environment models a task performed daily by DevOps and SOC engineers:
triaging noisy monitoring signals while preventing real incidents from
escalating into outages.
authors:
- name: "Scalar Hackathon Team"
email: "team@scalar.com"
license: "MIT"
tags:
- reinforcement-learning
- openenv
- alert-triage
- incident-response
- partial-observability
- resource-constraints
- cascading-failures
# ── Environment class ─────────────────────────────────────────────────────────
environment:
module: "adaptive_alert_triage.env"
class: "AdaptiveAlertTriageEnv"
# Constructor accepts: task_id ("easy"|"medium"|"hard"), seed (int, optional)
# ── OpenEnv interface ─────────────────────────────────────────────────────────
# All three methods are implemented in AdaptiveAlertTriageEnv
interface:
reset:
signature: "reset(seed=None, options=None) -> Observation"
description: |
Resets the episode. Generates an initial batch of synthetic alerts
using the task-specific correlation_probability. Returns an Observation
with alerts stripped of hidden fields (true_severity, is_correlated).
step:
signature: "step(action: Action) -> (Observation, Reward, done: bool, info: dict)"
description: |
Processes one Action, updates alert queue, checks for failures, generates
new alerts, and returns the next observation. The info dict always
contains: processed_alerts, correlation_groups, failures_this_step,
system_failure, action_correct, cumulative_reward, step, failures_count.
state:
signature: "state() -> EpisodeState"
description: |
Returns the full internal EpisodeState including hidden ground-truth
(true_severities, correlation_groups, false_positives, pending_failures).
For evaluation and replay only β€” never exposed to the agent during training.
# ── Configuration ─────────────────────────────────────────────────────────────
config:
actions:
- "INVESTIGATE"
- "IGNORE"
- "ESCALATE"
- "DELAY"
# ── Observation space ─────────────────────────────────────────────────────────
observation:
type: "Pydantic BaseModel (Observation)"
fields:
alerts:
type: "List[Alert]"
description: "Active alerts awaiting triage. Each Alert has id, visible_severity, confidence, alert_type, age."
hidden_fields: "true_severity, is_correlated β€” stripped before returned to agent"
system_load:
type: "float [0.0, 1.0]"
description: "Current infrastructure utilisation"
queue_length:
type: "int >= 0"
description: "Number of active alerts in queue"
time_remaining:
type: "int >= 0"
description: "Steps left before episode ends"
episode_step:
type: "int >= 0"
description: "Current step index (0-based)"
resource_budget:
type: "Optional[int]"
description: "Remaining INVESTIGATE actions this step. None = unconstrained (easy task)."
# ── Action space ──────────────────────────────────────────────────────────────
action:
type: "Pydantic BaseModel (Action)"
fields:
alert_id:
type: "str"
description: "ID of the target alert β€” must match an ID in current observation.alerts"
action_type:
type: "Literal['INVESTIGATE','IGNORE','ESCALATE','DELAY']"
description: |
INVESTIGATE β€” allocates resources to diagnose; counts against resource_budget
IGNORE β€” dismisses alert as noise (best for false positives)
ESCALATE β€” routes to specialist team (no budget cost)
DELAY β€” keeps alert in queue for re-evaluation next step
metadata:
type: "Dict[str, Any]"
description: "Optional context bag (e.g. reasoning from LLM agents)"
# ── Reward ────────────────────────────────────────────────────────────────────
reward:
type: "Pydantic BaseModel (Reward)"
description: "Dense, shaped reward decomposed into named components"
schedule:
critical_handled: "+10.0 β€” INVESTIGATE or ESCALATE on critical alert (true_severity >= 0.75)"
failure_prevented: "+5.0 β€” correlated alert handled (prevents cascade)"
false_positive_ignored: "+3.0 β€” IGNORE on a false positive"
medium_handled: "+2.0 * true_severity β€” INVESTIGATE on medium alert"
unnecessary_invest: "-2.0 β€” INVESTIGATE on a false positive"
missed_critical: "-8.0 β€” IGNORE on a critical alert"
risky_delay: "-2.4 β€” DELAY on a critical alert"
task_multipliers: "easy=1.0, medium=1.1, hard=1.2"
range: [-8.0, 15.0] # per step before task multiplier; cascade bonus included in max
# ── Tasks ─────────────────────────────────────────────────────────────────────
tasks:
- id: "easy"
name: "Basic Alert Prioritisation"
description: |
Classify and respond to independent alerts with no resource constraint.
The agent must learn to INVESTIGATE/ESCALATE critical alerts
(true_severity >= 0.75) and IGNORE false positives (< 0.30).
DELAY is always wrong in this task.
difficulty: 1
max_steps: 30
failure_threshold: 5
max_investigations_per_step: null # unconstrained
correlation_probability: 0.10
success_threshold: 0.70 # correct_actions / total_actions >= 0.70
grader: "tasks.easy.EasyTaskGrader"
grading_formula: "score = (correct_actions / total_actions) * 0.98 + 0.01"
- id: "medium"
name: "Resource-Constrained Triage"
description: |
Triage under a hard per-step investigation budget of K=3.
Agent must prioritise high-value investigations over false positives
and use ESCALATE when budget is exhausted. Grader penalises wasting
budget on FPs and missing critical alerts.
difficulty: 2
max_steps: 40
failure_threshold: 5
max_investigations_per_step: 3
correlation_probability: 0.20
success_threshold: 0.55
grader: "tasks.medium.MediumTaskGrader"
grading_formula: |
raw = resolved_score / max_possible_score
fp_penalty = 0.30 * (unnecessary_investigations / total_investigations)
miss_penalty = 0.20 * (critical_missed / max(critical_total, 1))
penalised = raw - fp_penalty - miss_penalty
score = (penalised * 0.6) + 0.35
- id: "hard"
name: "Cascading Failure Prevention"
description: |
Detect and stop correlated alert chains before they cascade into
system failures. Chains arrive sequentially: trigger at step N,
child at step N+k if trigger was missed. Agent cannot observe
is_correlated β€” must infer from visible patterns. Stability
multiplier drops sharply with each system failure.
difficulty: 3
max_steps: 50
failure_threshold: 3 # stricter than easy/medium
max_investigations_per_step: 3
correlation_probability: 0.40
success_threshold: 0.50
grader: "tasks.hard.HardTaskGrader"
grading_formula: |
chain_score = Ξ£ stop_reward(position) Γ— severity_weight
stability = {0 failures: 1.0, 1: 0.80, 2: 0.60, 3: 0.30, 4+: 0.00}
raw = (chain_score / max_possible) * stability
score = (raw * 0.98) + 0.01
# ── Evaluation metrics (produced by graders) ──────────────────────────────────
metrics:
- name: "correct_action_rate"
description: "Fraction of actions matching the optimal ground-truth policy"
range: [0.0, 1.0]
tasks: ["easy"]
- name: "resolved_score"
description: "Weighted resolution quality normalised by max possible"
range: [0.0, 1.0]
tasks: ["medium"]
- name: "resource_efficiency"
description: "Ratio of productive investigations to total INVESTIGATE actions"
range: [0.0, 1.0]
tasks: ["medium"]
- name: "chain_detection_rate"
description: "Fraction of correlated chains stopped before system failure"
range: [0.0, 1.0]
tasks: ["hard"]
- name: "system_failures"
description: "Number of system failures triggered (lower is better)"
range: [0, 10]
tasks: ["hard"]
- name: "stability_score"
description: "Stability multiplier based on failure count"
range: [0.0, 1.0]
tasks: ["hard"]
# ── Baseline agents ───────────────────────────────────────────────────────────
baselines:
- name: "rule_based"
module: "agents.baseline"
class: "RuleBasedAgent"
type: "threshold"
description: "Simple severity/confidence thresholding policy"
scores:
easy: 0.539
medium: 0.618
hard: 0.355
- name: "improved_rule_based"
module: "agents.baseline"
class: "ImprovedRuleBasedAgent"
type: "threshold"
description: "Rule-based with age-urgency, system-load awareness, resource budget guard"
scores:
easy: 0.250
medium: 0.355
hard: 0.068
- name: "ppo_lstm"
module: "rl_agent"
class: "PPOTrainer"
type: "rl"
description: "PPO with LSTM memory β€” pure numpy, trained 300+ episodes per task"
scores:
easy: 0.665
medium: 0.931
hard: 0.325
- name: "llm_openai"
module: "inference"
class: "LLMTriageAgent"
type: "llm"
description: "OpenAI-compatible LLM agent via API_BASE_URL / MODEL_NAME / HF_TOKEN"
# ── Infra / Docker ────────────────────────────────────────────────────────────
docker:
image: "adaptive-alert-triage:latest"
build: "docker build -t adaptive-alert-triage ."
run: "docker run -p 8000:8000 adaptive-alert-triage"
entrypoint: "uvicorn src.adaptive_alert_triage.server:app --host 0.0.0.0 --port 8000"
# ── Setup and validation ──────────────────────────────────────────────────────
setup:
python: ">=3.9"
install: "pip install -e ."
pythonpath: "src"
test: "pytest tests/"
validate: "openenv validate"
baseline: "python inference.py --n 3"
api_version: "1.0"
framework: "openenv"
documentation:
readme: "README.md"
baseline: "inference.py"
agents: "agents/"
tasks: "tasks/"
api_docs: "src/adaptive_alert_triage/"
server: "src/adaptive_alert_triage/server.py"