Spaces:
Sleeping
Sleeping
File size: 7,143 Bytes
34bd75f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 | """
Interactive simulation runner β demonstrates OnCallEnv with optimal agent strategies.
Shows step-by-step investigation and remediation for all 6 tasks.
"""
import sys
sys.path.insert(0, ".")
from environment import OnCallEnvironment
from models import Action
from graders import grade_task
def banner(text: str):
w = 70
print("\n" + "=" * w)
print(f" {text}")
print("=" * w)
def run_task(env: OnCallEnvironment, task_id: str, actions: list[str]):
"""Run a task with a predefined action sequence and print each step."""
obs = env.reset(task_id)
print(f"\n Task: {obs.task_id}")
print(f" Goal: {obs.goal}")
print(f" Max steps: {obs.max_steps}")
print(f" Services: {', '.join(obs.services)}")
print(f" Alerts:")
for a in obs.alerts:
print(f" [{a.severity.upper()}] {a.service}: {a.message}")
print()
for i, cmd in enumerate(actions, 1):
resp = env.step(Action(command=cmd))
obs = resp.observation
status = "ERROR" if obs.last_action_error else "OK"
print(f" Step {i}: {cmd}")
lines = (obs.last_action_result or "").split("\n")
for line in lines[:5]:
print(f" | {line}")
if len(lines) > 5:
print(f" | ... ({len(lines) - 5} more lines)")
print(f" [{status}] Running score: {resp.reward.total}")
print()
if resp.done:
print(f" >>> Episode finished: {resp.info.get('reason', '?')}")
break
state = env.state()
grader_score = grade_task(task_id, state)
print(f"\n -- Final Results --")
print(f" Env Score: {state.score}")
print(f" Grader Score: {grader_score}")
print(f" Breakdown:")
for k, v in state.reward_breakdown.items():
print(f" {k:20s} {v:+.3f}")
print(f" Steps used: {state.step}")
print(f" Root cause ID: {state.root_cause_identified}")
print(f" Remediation: {state.remediation_applied}")
return state.score
def main():
env = OnCallEnvironment()
scores = {}
# ββ EASY ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
banner("EASY: Memory Leak in Payment Service")
scores["easy_memory_leak"] = run_task(env, "easy_memory_leak", [
"check_logs payment-service",
"check_metrics payment-service",
"check_metrics api-gateway",
"restart_service payment-service",
"mark_resolved payment-service memory leak OOM out of memory causing repeated kills",
])
# ββ MEDIUM ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
banner("MEDIUM: Cascading Connection Pool Exhaustion")
scores["medium_cascading_failure"] = run_task(env, "medium_cascading_failure", [
"check_metrics api-gateway",
"check_logs api-gateway",
"check_dependencies api-gateway",
"check_metrics order-service",
"check_logs order-service",
"check_config order-service",
"update_config order-service db_pool_size 50",
"mark_resolved order-service connection pool exhausted db_pool_size config changed to 5 by auto-scaler",
])
# ββ HARD (Cache) ββββββββββββββββββββββββββββββββββββββββββββββββββ
banner("HARD: Subtle Cache Bug Causing Cross-Service Degradation")
scores["hard_cache_degradation"] = run_task(env, "hard_cache_degradation", [
"check_metrics api-gateway",
"check_metrics order-service",
"check_metrics product-service",
"check_metrics cache-service",
"check_logs cache-service",
"check_deploy_history cache-service",
"check_metrics postgres-primary",
"rollback_deploy cache-service",
"mark_resolved cache-service deployment changed key hashing algorithm causing 60% cache miss rate",
])
# ββ MEDIUM (DNS) ββββββββββββββββββββββββββββββββββββββββββββββββββ
banner("MEDIUM: DNS Misconfiguration Causing Intermittent Failures")
scores["medium_dns_misconfiguration"] = run_task(env, "medium_dns_misconfiguration", [
"check_metrics order-service",
"check_logs order-service",
"check_config order-service",
"check_metrics inventory-service",
"check_metrics api-gateway",
"update_config order-service inventory_host inventory-service.internal",
"mark_resolved order-service dns hostname misconfiguration inventory_host pointed to decommissioned host",
])
# ββ HARD (Replication) ββββββββββββββββββββββββββββββββββββββββββββ
banner("HARD: Database Replication Lag from Runaway Batch Job")
scores["hard_replication_lag"] = run_task(env, "hard_replication_lag", [
"check_metrics user-service",
"check_logs user-service",
"check_metrics order-service",
"check_logs order-service",
"check_metrics postgres-primary",
"check_logs postgres-primary",
"check_config postgres-primary",
"check_metrics postgres-replica",
"update_config postgres-primary batch_job_enabled false",
"mark_resolved postgres-primary batch job nightly_aggregation running during peak hours causing replication lag",
])
# ββ EXPERT (Multi-Root-Cause) ββββββββββββββββββββββββββββββββββββ
banner("EXPERT: Simultaneous Bad Deployment and Config Drift")
scores["expert_multi_root_cause"] = run_task(env, "expert_multi_root_cause", [
"check_metrics api-gateway",
"check_logs api-gateway",
"check_metrics search-service",
"check_logs search-service",
"check_deploy_history search-service",
"check_metrics order-service",
"check_logs order-service",
"check_config order-service",
"check_metrics elasticsearch",
"rollback_deploy search-service",
"update_config order-service db_pool_size 50",
"mark_resolved search-service bad deployment v3.1.0 broke elasticsearch query AND order-service db_pool_size config reduced to 3 by capacity-planner both issues fixed",
])
# ββ SUMMARY βββββββββββββββββββββββββββββββββββββββββββββββββββββββ
banner("SIMULATION SUMMARY")
total = 0.0
for tid, score in scores.items():
print(f" {tid:35s} {score:.3f}")
total += score
avg = total / len(scores)
print(f" {'':35s} -----")
print(f" {'Average':35s} {avg:.3f}")
print()
if __name__ == "__main__":
main()
|