File size: 614 Bytes
5fe9036
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
name: sre-incident-response
version: "1.0.0"
description: "SRE Incident Response environment — train AI agents to diagnose and fix production incidents"

tasks:
  - id: easy
    name: Single Service OOM Crash
    difficulty: easy
    max_steps: 15
  - id: medium
    name: Cascading Database Deadlock
    difficulty: medium
    max_steps: 25
  - id: hard
    name: Concurrent Faults with Misleading Evidence
    difficulty: hard
    max_steps: 35

models:
  action: models.Action
  observation: models.Observation
  reward: models.Reward
  state: models.State

runtime:
  port: 8000
  entrypoint: server.app:app