Spaces:
Sleeping
Sleeping
| """ | |
| Interactive simulation runner β demonstrates OnCallEnv with optimal agent strategies. | |
| Shows step-by-step investigation and remediation for all 6 tasks. | |
| """ | |
| import sys | |
| sys.path.insert(0, ".") | |
| from environment import OnCallEnvironment | |
| from models import Action | |
| from graders import grade_task | |
| def banner(text: str): | |
| w = 70 | |
| print("\n" + "=" * w) | |
| print(f" {text}") | |
| print("=" * w) | |
| def run_task(env: OnCallEnvironment, task_id: str, actions: list[str]): | |
| """Run a task with a predefined action sequence and print each step.""" | |
| obs = env.reset(task_id) | |
| print(f"\n Task: {obs.task_id}") | |
| print(f" Goal: {obs.goal}") | |
| print(f" Max steps: {obs.max_steps}") | |
| print(f" Services: {', '.join(obs.services)}") | |
| print(f" Alerts:") | |
| for a in obs.alerts: | |
| print(f" [{a.severity.upper()}] {a.service}: {a.message}") | |
| print() | |
| for i, cmd in enumerate(actions, 1): | |
| resp = env.step(Action(command=cmd)) | |
| obs = resp.observation | |
| status = "ERROR" if obs.last_action_error else "OK" | |
| print(f" Step {i}: {cmd}") | |
| lines = (obs.last_action_result or "").split("\n") | |
| for line in lines[:5]: | |
| print(f" | {line}") | |
| if len(lines) > 5: | |
| print(f" | ... ({len(lines) - 5} more lines)") | |
| print(f" [{status}] Running score: {resp.reward.total}") | |
| print() | |
| if resp.done: | |
| print(f" >>> Episode finished: {resp.info.get('reason', '?')}") | |
| break | |
| state = env.state() | |
| grader_score = grade_task(task_id, state) | |
| print(f"\n -- Final Results --") | |
| print(f" Env Score: {state.score}") | |
| print(f" Grader Score: {grader_score}") | |
| print(f" Breakdown:") | |
| for k, v in state.reward_breakdown.items(): | |
| print(f" {k:20s} {v:+.3f}") | |
| print(f" Steps used: {state.step}") | |
| print(f" Root cause ID: {state.root_cause_identified}") | |
| print(f" Remediation: {state.remediation_applied}") | |
| return state.score | |
| def main(): | |
| env = OnCallEnvironment() | |
| scores = {} | |
| # ββ EASY ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| banner("EASY: Memory Leak in Payment Service") | |
| scores["easy_memory_leak"] = run_task(env, "easy_memory_leak", [ | |
| "check_logs payment-service", | |
| "check_metrics payment-service", | |
| "check_metrics api-gateway", | |
| "restart_service payment-service", | |
| "mark_resolved payment-service memory leak OOM out of memory causing repeated kills", | |
| ]) | |
| # ββ MEDIUM ββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| banner("MEDIUM: Cascading Connection Pool Exhaustion") | |
| scores["medium_cascading_failure"] = run_task(env, "medium_cascading_failure", [ | |
| "check_metrics api-gateway", | |
| "check_logs api-gateway", | |
| "check_dependencies api-gateway", | |
| "check_metrics order-service", | |
| "check_logs order-service", | |
| "check_config order-service", | |
| "update_config order-service db_pool_size 50", | |
| "mark_resolved order-service connection pool exhausted db_pool_size config changed to 5 by auto-scaler", | |
| ]) | |
| # ββ HARD (Cache) ββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| banner("HARD: Subtle Cache Bug Causing Cross-Service Degradation") | |
| scores["hard_cache_degradation"] = run_task(env, "hard_cache_degradation", [ | |
| "check_metrics api-gateway", | |
| "check_metrics order-service", | |
| "check_metrics product-service", | |
| "check_metrics cache-service", | |
| "check_logs cache-service", | |
| "check_deploy_history cache-service", | |
| "check_metrics postgres-primary", | |
| "rollback_deploy cache-service", | |
| "mark_resolved cache-service deployment changed key hashing algorithm causing 60% cache miss rate", | |
| ]) | |
| # ββ MEDIUM (DNS) ββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| banner("MEDIUM: DNS Misconfiguration Causing Intermittent Failures") | |
| scores["medium_dns_misconfiguration"] = run_task(env, "medium_dns_misconfiguration", [ | |
| "check_metrics order-service", | |
| "check_logs order-service", | |
| "check_config order-service", | |
| "check_metrics inventory-service", | |
| "check_metrics api-gateway", | |
| "update_config order-service inventory_host inventory-service.internal", | |
| "mark_resolved order-service dns hostname misconfiguration inventory_host pointed to decommissioned host", | |
| ]) | |
| # ββ HARD (Replication) ββββββββββββββββββββββββββββββββββββββββββββ | |
| banner("HARD: Database Replication Lag from Runaway Batch Job") | |
| scores["hard_replication_lag"] = run_task(env, "hard_replication_lag", [ | |
| "check_metrics user-service", | |
| "check_logs user-service", | |
| "check_metrics order-service", | |
| "check_logs order-service", | |
| "check_metrics postgres-primary", | |
| "check_logs postgres-primary", | |
| "check_config postgres-primary", | |
| "check_metrics postgres-replica", | |
| "update_config postgres-primary batch_job_enabled false", | |
| "mark_resolved postgres-primary batch job nightly_aggregation running during peak hours causing replication lag", | |
| ]) | |
| # ββ EXPERT (Multi-Root-Cause) ββββββββββββββββββββββββββββββββββββ | |
| banner("EXPERT: Simultaneous Bad Deployment and Config Drift") | |
| scores["expert_multi_root_cause"] = run_task(env, "expert_multi_root_cause", [ | |
| "check_metrics api-gateway", | |
| "check_logs api-gateway", | |
| "check_metrics search-service", | |
| "check_logs search-service", | |
| "check_deploy_history search-service", | |
| "check_metrics order-service", | |
| "check_logs order-service", | |
| "check_config order-service", | |
| "check_metrics elasticsearch", | |
| "rollback_deploy search-service", | |
| "update_config order-service db_pool_size 50", | |
| "mark_resolved search-service bad deployment v3.1.0 broke elasticsearch query AND order-service db_pool_size config reduced to 3 by capacity-planner both issues fixed", | |
| ]) | |
| # ββ SUMMARY βββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| banner("SIMULATION SUMMARY") | |
| total = 0.0 | |
| for tid, score in scores.items(): | |
| print(f" {tid:35s} {score:.3f}") | |
| total += score | |
| avg = total / len(scores) | |
| print(f" {'':35s} -----") | |
| print(f" {'Average':35s} {avg:.3f}") | |
| print() | |
| if __name__ == "__main__": | |
| main() | |