File size: 11,921 Bytes
b14c6e3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c18a9d1
b14c6e3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c18a9d1
 
 
 
b14c6e3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c18a9d1
 
b14c6e3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
# openenv.yaml β€” OpenEnv Specification for Adaptive Alert Triage
# Matches the actual implementation in src/adaptive_alert_triage/
# Validated against: env.py, models.py, tasks/easy.py, tasks/medium.py, tasks/hard.py

name: "AdaptiveAlertTriage"
version: "0.1.0"
description: |
  A partially-observable RL environment that simulates real-time IT alert triage
  and incident response.  An agent receives a continuous stream of system alerts
  and must decide β€” for each one β€” whether to INVESTIGATE, IGNORE, ESCALATE, or
  DELAY, under time pressure, resource constraints, and the risk of cascading
  failures from unhandled correlated alerts.

  This environment models a task performed daily by DevOps and SOC engineers:
  triaging noisy monitoring signals while preventing real incidents from
  escalating into outages.

authors:
  - name: "Scalar Hackathon Team"
    email: "team@scalar.com"

license: "MIT"

tags:
  - reinforcement-learning
  - openenv
  - alert-triage
  - incident-response
  - partial-observability
  - resource-constraints
  - cascading-failures

# ── Environment class ─────────────────────────────────────────────────────────
environment:
  module: "adaptive_alert_triage.env"
  class: "AdaptiveAlertTriageEnv"
  # Constructor accepts: task_id ("easy"|"medium"|"hard"), seed (int, optional)

# ── OpenEnv interface ─────────────────────────────────────────────────────────
# All three methods are implemented in AdaptiveAlertTriageEnv
interface:
  reset:
    signature: "reset(seed=None, options=None) -> Observation"
    description: |
      Resets the episode.  Generates an initial batch of synthetic alerts
      using the task-specific correlation_probability.  Returns an Observation
      with alerts stripped of hidden fields (true_severity, is_correlated).
  step:
    signature: "step(action: Action) -> (Observation, Reward, done: bool, info: dict)"
    description: |
      Processes one Action, updates alert queue, checks for failures, generates
      new alerts, and returns the next observation.  The info dict always
      contains: processed_alerts, correlation_groups, failures_this_step,
      system_failure, action_correct, cumulative_reward, step, failures_count.
  state:
    signature: "state() -> EpisodeState"
    description: |
      Returns the full internal EpisodeState including hidden ground-truth
      (true_severities, correlation_groups, false_positives, pending_failures).
      For evaluation and replay only β€” never exposed to the agent during training.

# ── Configuration ─────────────────────────────────────────────────────────────
config:
  actions:
    - "INVESTIGATE"
    - "IGNORE"
    - "ESCALATE"
    - "DELAY"

# ── Observation space ─────────────────────────────────────────────────────────
observation:
  type: "Pydantic BaseModel (Observation)"
  fields:
    alerts:
      type: "List[Alert]"
      description: "Active alerts awaiting triage. Each Alert has id, visible_severity, confidence, alert_type, age."
      hidden_fields: "true_severity, is_correlated β€” stripped before returned to agent"
    system_load:
      type: "float [0.0, 1.0]"
      description: "Current infrastructure utilisation"
    queue_length:
      type: "int >= 0"
      description: "Number of active alerts in queue"
    time_remaining:
      type: "int >= 0"
      description: "Steps left before episode ends"
    episode_step:
      type: "int >= 0"
      description: "Current step index (0-based)"
    resource_budget:
      type: "Optional[int]"
      description: "Remaining INVESTIGATE actions this step. None = unconstrained (easy task)."

# ── Action space ──────────────────────────────────────────────────────────────
action:
  type: "Pydantic BaseModel (Action)"
  fields:
    alert_id:
      type: "str"
      description: "ID of the target alert β€” must match an ID in current observation.alerts"
    action_type:
      type: "Literal['INVESTIGATE','IGNORE','ESCALATE','DELAY']"
      description: |
        INVESTIGATE β€” allocates resources to diagnose; counts against resource_budget
        IGNORE      β€” dismisses alert as noise (best for false positives)
        ESCALATE    β€” routes to specialist team (no budget cost)
        DELAY       β€” keeps alert in queue for re-evaluation next step
    metadata:
      type: "Dict[str, Any]"
      description: "Optional context bag (e.g. reasoning from LLM agents)"

# ── Reward ────────────────────────────────────────────────────────────────────
reward:
  type: "Pydantic BaseModel (Reward)"
  description: "Dense, shaped reward decomposed into named components"
  schedule:
    critical_handled: "+10.0  β€” INVESTIGATE or ESCALATE on critical alert (true_severity >= 0.75)"
    failure_prevented: "+5.0   β€” correlated alert handled (prevents cascade)"
    false_positive_ignored: "+3.0   β€” IGNORE on a false positive"
    medium_handled: "+2.0 * true_severity  β€” INVESTIGATE on medium alert"
    unnecessary_invest: "-2.0   β€” INVESTIGATE on a false positive"
    missed_critical: "-8.0   β€” IGNORE on a critical alert"
    risky_delay: "-2.4   β€” DELAY on a critical alert"
    task_multipliers: "easy=1.0, medium=1.1, hard=1.2"
  range: [-8.0, 15.0] # per step before task multiplier; cascade bonus included in max

# ── Tasks ─────────────────────────────────────────────────────────────────────
tasks:
  - id: "easy"
    name: "Basic Alert Prioritisation"
    description: |
      Classify and respond to independent alerts with no resource constraint.
      The agent must learn to INVESTIGATE/ESCALATE critical alerts
      (true_severity >= 0.75) and IGNORE false positives (< 0.30).
      DELAY is always wrong in this task.
    difficulty: 1
    max_steps: 30
    failure_threshold: 5
    max_investigations_per_step: null # unconstrained
    correlation_probability: 0.10
    success_threshold: 0.70 # correct_actions / total_actions >= 0.70
    grader: "tasks.easy.EasyTaskGrader"
    grading_formula: "score = (correct_actions / total_actions) * 0.98 + 0.01"

  - id: "medium"
    name: "Resource-Constrained Triage"
    description: |
      Triage under a hard per-step investigation budget of K=3.
      Agent must prioritise high-value investigations over false positives
      and use ESCALATE when budget is exhausted.  Grader penalises wasting
      budget on FPs and missing critical alerts.
    difficulty: 2
    max_steps: 40
    failure_threshold: 5
    max_investigations_per_step: 3
    correlation_probability: 0.20
    success_threshold: 0.55
    grader: "tasks.medium.MediumTaskGrader"
    grading_formula: |
      raw = resolved_score / max_possible_score
      fp_penalty = 0.30 * (unnecessary_investigations / total_investigations)
      miss_penalty = 0.20 * (critical_missed / max(critical_total, 1))
      penalised = raw - fp_penalty - miss_penalty
      score = (penalised * 0.6) + 0.35

  - id: "hard"
    name: "Cascading Failure Prevention"
    description: |
      Detect and stop correlated alert chains before they cascade into
      system failures.  Chains arrive sequentially: trigger at step N,
      child at step N+k if trigger was missed.  Agent cannot observe
      is_correlated β€” must infer from visible patterns.  Stability
      multiplier drops sharply with each system failure.
    difficulty: 3
    max_steps: 50
    failure_threshold: 3 # stricter than easy/medium
    max_investigations_per_step: 3
    correlation_probability: 0.40
    success_threshold: 0.50
    grader: "tasks.hard.HardTaskGrader"
    grading_formula: |
      chain_score = Ξ£ stop_reward(position) Γ— severity_weight
      stability   = {0 failures: 1.0, 1: 0.80, 2: 0.60, 3: 0.30, 4+: 0.00}
      raw         = (chain_score / max_possible) * stability
      score       = (raw * 0.98) + 0.01

# ── Evaluation metrics (produced by graders) ──────────────────────────────────
metrics:
  - name: "correct_action_rate"
    description: "Fraction of actions matching the optimal ground-truth policy"
    range: [0.0, 1.0]
    tasks: ["easy"]

  - name: "resolved_score"
    description: "Weighted resolution quality normalised by max possible"
    range: [0.0, 1.0]
    tasks: ["medium"]

  - name: "resource_efficiency"
    description: "Ratio of productive investigations to total INVESTIGATE actions"
    range: [0.0, 1.0]
    tasks: ["medium"]

  - name: "chain_detection_rate"
    description: "Fraction of correlated chains stopped before system failure"
    range: [0.0, 1.0]
    tasks: ["hard"]

  - name: "system_failures"
    description: "Number of system failures triggered (lower is better)"
    range: [0, 10]
    tasks: ["hard"]

  - name: "stability_score"
    description: "Stability multiplier based on failure count"
    range: [0.0, 1.0]
    tasks: ["hard"]

# ── Baseline agents ───────────────────────────────────────────────────────────
baselines:
  - name: "rule_based"
    module: "agents.baseline"
    class: "RuleBasedAgent"
    type: "threshold"
    description: "Simple severity/confidence thresholding policy"
    scores:
      easy: 0.539
      medium: 0.618
      hard: 0.355

  - name: "improved_rule_based"
    module: "agents.baseline"
    class: "ImprovedRuleBasedAgent"
    type: "threshold"
    description: "Rule-based with age-urgency, system-load awareness, resource budget guard"
    scores:
      easy: 0.250
      medium: 0.355
      hard: 0.068

  - name: "ppo_lstm"
    module: "rl_agent"
    class: "PPOTrainer"
    type: "rl"
    description: "PPO with LSTM memory β€” pure numpy, trained 300+ episodes per task"
    scores:
      easy: 0.665
      medium: 0.931
      hard: 0.325

  - name: "llm_openai"
    module: "inference"
    class: "LLMTriageAgent"
    type: "llm"
    description: "OpenAI-compatible LLM agent via API_BASE_URL / MODEL_NAME / HF_TOKEN"

# ── Infra / Docker ────────────────────────────────────────────────────────────
docker:
  image: "adaptive-alert-triage:latest"
  build: "docker build -t adaptive-alert-triage ."
  run: "docker run -p 8000:8000 adaptive-alert-triage"
  entrypoint: "uvicorn src.adaptive_alert_triage.server:app --host 0.0.0.0 --port 8000"

# ── Setup and validation ──────────────────────────────────────────────────────
setup:
  python: ">=3.9"
  install: "pip install -e ."
  pythonpath: "src"
  test: "pytest tests/"
  validate: "openenv validate"
  baseline: "python inference.py --n 3"

api_version: "1.0"
framework: "openenv"

documentation:
  readme: "README.md"
  baseline: "inference.py"
  agents: "agents/"
  tasks: "tasks/"
  api_docs: "src/adaptive_alert_triage/"
  server: "src/adaptive_alert_triage/server.py"