Spaces:

Arijit-07
/

aria-training

Paused

App Files Files Community

Arijit-07 commited on Apr 25

Commit

ead0ec3

verified ·

1 Parent(s): 9a2f12c

Update train_model.py

Browse files

Files changed (1) hide show

train_model.py +71 -3

train_model.py CHANGED Viewed

@@ -100,6 +100,42 @@ def env_state():
     r.raise_for_status()
     return r.json()
 health = requests.get(f'{BASE_URL}/health', timeout=15).json()
 print(f'✅ Environment: {health}')
 test_obs = env_reset('easy', seed=0)
@@ -111,9 +147,40 @@ print('Config loaded:')
 for k, v in CONFIG.items():
     print(f'  {k}: {v}')
-SYSTEM_PROMPT = """You are an autonomous DevOps incident response agent.
-Return exactly one JSON object describing the next action to take.
-The JSON object must include an "action_type" field."""
 def observation_to_prompt(obs, task_id):
     return (
@@ -232,6 +299,7 @@ def run_episode(task_id, seed=None, verbose=False):
         if done: break
         action, _ = generate_action(obs, task_id)
         if verbose: print(f'  Step {step+1}: {action}')
         result = env_step(action)
         total_reward += result.get('reward', 0.0)
         obs = result.get('observation', obs)

     r.raise_for_status()
     return r.json()
+VALID_ACTIONS = {
+    "diagnose", "read_logs", "read_metrics", "read_runbook",
+    "search_logs", "restart_service", "rollback", "scale_up",
+    "alert_oncall", "acknowledge", "noop", "block_ip_range",
+    "create_index", "failover"
+}
+def sanitize_action(action):
+    if not isinstance(action, dict):
+        return {"action_type": "noop"}
+    action_type = action.get("action_type", "").lower()
+    # Fix common mistakes
+    if action_type == "read_service_logs":
+        action_type = "read_logs"
+    if action_type not in VALID_ACTIONS:
+        return {"action_type": "noop"}
+    # Fix parameter names
+    service = action.get("service") or action.get("service_name")
+    clean = {"action_type": action_type}
+    if service:
+        clean["service"] = service
+    # add other fields safely
+    for key in ["root_cause", "runbook", "version", "reason",
+                "query", "ip_range", "table", "column", "target_region"]:
+        if key in action:
+            clean[key] = action[key]
+    return clean
 health = requests.get(f'{BASE_URL}/health', timeout=15).json()
 print(f'✅ Environment: {health}')
 test_obs = env_reset('easy', seed=0)
 for k, v in CONFIG.items():
     print(f'  {k}: {v}')
+SYSTEM_PROMPT = """
+You are an autonomous DevOps agent.
+You MUST return ONLY valid JSON.
+STRICT RULES:
+- action_type MUST be one of:
+  diagnose, read_logs, read_metrics, read_runbook, search_logs,
+  restart_service, rollback, scale_up, alert_oncall, acknowledge,
+  noop, block_ip_range, create_index, failover
+- Use EXACT parameter names:
+  service (NOT service_name)
+  root_cause
+  runbook
+  version
+  reason
+  query
+  ip_range
+  table
+  column
+  target_region
+- DO NOT invent new fields
+- DO NOT change names
+- DO NOT use service_name
+- Always output valid JSON only
+Example:
+{
+  "action_type": "read_logs",
+  "service": "order-service"
+}
+"""
 def observation_to_prompt(obs, task_id):
     return (
         if done: break
         action, _ = generate_action(obs, task_id)
         if verbose: print(f'  Step {step+1}: {action}')
+        action = sanitize_action(action)
         result = env_step(action)
         total_reward += result.get('reward', 0.0)
         obs = result.get('observation', obs)