petter2025 commited on
Commit
9186179
·
verified ·
1 Parent(s): 3e50ac5

Update healing_policies.py

Browse files
Files changed (1) hide show
  1. healing_policies.py +16 -9
healing_policies.py CHANGED
@@ -1,4 +1,4 @@
1
- import datetime # ← ADD THIS IMPORT
2
  from models import HealingPolicy, HealingAction, EventSeverity
3
  from typing import Dict, List
4
 
@@ -8,8 +8,7 @@ DEFAULT_HEALING_POLICIES = [
8
  name="high_latency_restart",
9
  conditions={
10
  "latency_p99": {"operator": ">", "value": 300},
11
- "error_rate": {"operator": "<", "value": 0.05},
12
- "severity": {"operator": "in", "value": [EventSeverity.MEDIUM, EventSeverity.HIGH]}
13
  },
14
  actions=[HealingAction.RESTART_CONTAINER],
15
  priority=2
@@ -17,8 +16,7 @@ DEFAULT_HEALING_POLICIES = [
17
  HealingPolicy(
18
  name="cascading_failure",
19
  conditions={
20
- "error_rate": {"operator": ">", "value": 0.3},
21
- "upstream_deps": {"operator": "not_empty", "value": True}
22
  },
23
  actions=[HealingAction.CIRCUIT_BREAKER, HealingAction.ALERT_TEAM],
24
  priority=1
@@ -26,8 +24,8 @@ DEFAULT_HEALING_POLICIES = [
26
  HealingPolicy(
27
  name="resource_exhaustion",
28
  conditions={
29
- "cpu_util": {"operator": ">", "value": 0.9},
30
- "memory_util": {"operator": ">", "value": 0.9}
31
  },
32
  actions=[HealingAction.SCALE_OUT, HealingAction.ALERT_TEAM],
33
  priority=1
@@ -36,10 +34,19 @@ DEFAULT_HEALING_POLICIES = [
36
  name="moderate_performance_issue",
37
  conditions={
38
  "latency_p99": {"operator": ">", "value": 200},
39
- "error_rate": {"operator": ">", "value": 0.1}
40
  },
41
  actions=[HealingAction.TRAFFIC_SHIFT],
42
  priority=3
 
 
 
 
 
 
 
 
 
43
  )
44
  ]
45
 
@@ -58,7 +65,7 @@ class PolicyEngine:
58
 
59
  # Check cooldown
60
  policy_key = f"{policy.name}_{event.component}"
61
- current_time = datetime.datetime.now().timestamp() # ← THIS NEEDS datetime IMPORT
62
  last_exec = self.last_execution.get(policy_key, 0)
63
 
64
  if current_time - last_exec < policy.cool_down_seconds:
 
1
+ import datetime
2
  from models import HealingPolicy, HealingAction, EventSeverity
3
  from typing import Dict, List
4
 
 
8
  name="high_latency_restart",
9
  conditions={
10
  "latency_p99": {"operator": ">", "value": 300},
11
+ "error_rate": {"operator": "<", "value": 0.1},
 
12
  },
13
  actions=[HealingAction.RESTART_CONTAINER],
14
  priority=2
 
16
  HealingPolicy(
17
  name="cascading_failure",
18
  conditions={
19
+ "error_rate": {"operator": ">", "value": 0.15},
 
20
  },
21
  actions=[HealingAction.CIRCUIT_BREAKER, HealingAction.ALERT_TEAM],
22
  priority=1
 
24
  HealingPolicy(
25
  name="resource_exhaustion",
26
  conditions={
27
+ "cpu_util": {"operator": ">", "value": 0.85},
28
+ "memory_util": {"operator": ">", "value": 0.85}
29
  },
30
  actions=[HealingAction.SCALE_OUT, HealingAction.ALERT_TEAM],
31
  priority=1
 
34
  name="moderate_performance_issue",
35
  conditions={
36
  "latency_p99": {"operator": ">", "value": 200},
37
+ "error_rate": {"operator": ">", "value": 0.05}
38
  },
39
  actions=[HealingAction.TRAFFIC_SHIFT],
40
  priority=3
41
+ ),
42
+ HealingPolicy(
43
+ name="critical_failure",
44
+ conditions={
45
+ "latency_p99": {"operator": ">", "value": 500},
46
+ "error_rate": {"operator": ">", "value": 0.1}
47
+ },
48
+ actions=[HealingAction.RESTART_CONTAINER, HealingAction.ALERT_TEAM, HealingAction.TRAFFIC_SHIFT],
49
+ priority=1
50
  )
51
  ]
52
 
 
65
 
66
  # Check cooldown
67
  policy_key = f"{policy.name}_{event.component}"
68
+ current_time = datetime.datetime.now().timestamp()
69
  last_exec = self.last_execution.get(policy_key, 0)
70
 
71
  if current_time - last_exec < policy.cool_down_seconds: