|
|
|
|
|
import requests |
|
|
import pandas as pd |
|
|
from datetime import datetime |
|
|
|
|
|
class AgenticSupervisor: |
|
|
def __init__(self, config_path="supervisor_config.yml"): |
|
|
self.config = self._load_config(config_path) |
|
|
self.df = pd.read_csv(self.config['master_plan_path']) |
|
|
|
|
|
def _load_config(self, path): |
|
|
try: |
|
|
import yaml |
|
|
with open(path) as f: |
|
|
return yaml.safe_load(f) |
|
|
except Exception as e: |
|
|
self._alert(f"Config load failed: {str(e)}", critical=True) |
|
|
|
|
|
def _alert(self, message, critical=False): |
|
|
if critical: |
|
|
requests.post(self.config['alerts']['pagerduty'], json={ |
|
|
"title": "CRITICAL: Agentic Supervisor", |
|
|
"message": message |
|
|
}) |
|
|
print(f"[{datetime.now()}] {message}") |
|
|
|
|
|
def check_workflows(self): |
|
|
for _, row in self.df.iterrows(): |
|
|
try: |
|
|
resp = requests.get( |
|
|
f"{self.config['workflow_api']}/status/{row['Component']}", |
|
|
timeout=5 |
|
|
) |
|
|
if resp.status_code != 200: |
|
|
self._alert(f"Workflow DOWN: {row['Component']}", critical=True) |
|
|
except Exception as e: |
|
|
self._alert(f"Workflow check failed: {str(e)}") |
|
|
|
|
|
def enforce_sla(self): |
|
|
for _, row in self.df[self.df['SLA_Penalty'].notna()].iterrows(): |
|
|
metric = get_metric(row['Component']) |
|
|
if metric > float(row['SLA_Threshold']): |
|
|
self._alert(f"SLA Violation: {row['Component']} at {metric}") |
|
|
|
|
|
if __name__ == "__main__": |
|
|
supervisor = AgenticSupervisor() |
|
|
supervisor.check_workflows() |
|
|
supervisor.enforce_sla() |
|
|
|