petter2025 commited on
Commit
00342ac
·
verified ·
1 Parent(s): 4a3065f

Create healing_policies.py

Browse files
Files changed (1) hide show
  1. healing_policies.py +116 -0
healing_policies.py ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from models import HealingPolicy, HealingAction, EventSeverity
2
+ from typing import Dict, List
3
+
4
+ # Default healing policies
5
+ DEFAULT_HEALING_POLICIES = [
6
+ HealingPolicy(
7
+ name="high_latency_restart",
8
+ conditions={
9
+ "latency_p99": {"operator": ">", "value": 300},
10
+ "error_rate": {"operator": "<", "value": 0.05},
11
+ "severity": {"operator": "in", "value": [EventSeverity.MEDIUM, EventSeverity.HIGH]}
12
+ },
13
+ actions=[HealingAction.RESTART_CONTAINER],
14
+ priority=2
15
+ ),
16
+ HealingPolicy(
17
+ name="cascading_failure",
18
+ conditions={
19
+ "error_rate": {"operator": ">", "value": 0.3},
20
+ "upstream_deps": {"operator": "not_empty", "value": True}
21
+ },
22
+ actions=[HealingAction.CIRCUIT_BREAKER, HealingAction.ALERT_TEAM],
23
+ priority=1
24
+ ),
25
+ HealingPolicy(
26
+ name="resource_exhaustion",
27
+ conditions={
28
+ "cpu_util": {"operator": ">", "value": 0.9},
29
+ "memory_util": {"operator": ">", "value": 0.9}
30
+ },
31
+ actions=[HealingAction.SCALE_OUT, HealingAction.ALERT_TEAM],
32
+ priority=1
33
+ ),
34
+ HealingPolicy(
35
+ name="moderate_performance_issue",
36
+ conditions={
37
+ "latency_p99": {"operator": ">", "value": 200},
38
+ "error_rate": {"operator": ">", "value": 0.1}
39
+ },
40
+ actions=[HealingAction.TRAFFIC_SHIFT],
41
+ priority=3
42
+ )
43
+ ]
44
+
45
+ class PolicyEngine:
46
+ def __init__(self, policies: List[HealingPolicy] = None):
47
+ self.policies = policies or DEFAULT_HEALING_POLICIES
48
+ self.last_execution: Dict[str, float] = {}
49
+
50
+ def evaluate_policies(self, event) -> List[HealingAction]:
51
+ """Evaluate all policies against the event and return matching actions"""
52
+ applicable_actions = []
53
+
54
+ for policy in self.policies:
55
+ if not policy.enabled:
56
+ continue
57
+
58
+ # Check cooldown
59
+ policy_key = f"{policy.name}_{event.component}"
60
+ current_time = datetime.datetime.now().timestamp()
61
+ last_exec = self.last_execution.get(policy_key, 0)
62
+
63
+ if current_time - last_exec < policy.cool_down_seconds:
64
+ continue
65
+
66
+ if self._evaluate_conditions(policy.conditions, event):
67
+ applicable_actions.extend(policy.actions)
68
+ self.last_execution[policy_key] = current_time
69
+
70
+ # Remove duplicates while preserving order
71
+ seen = set()
72
+ unique_actions = []
73
+ for action in applicable_actions:
74
+ if action not in seen:
75
+ seen.add(action)
76
+ unique_actions.append(action)
77
+
78
+ return unique_actions or [HealingAction.NO_ACTION]
79
+
80
+ def _evaluate_conditions(self, conditions: Dict, event) -> bool:
81
+ """Evaluate individual conditions against event data"""
82
+ for field, condition in conditions.items():
83
+ operator = condition["operator"]
84
+ value = condition["value"]
85
+
86
+ # Get event field value
87
+ event_value = getattr(event, field, None)
88
+
89
+ if not self._compare_values(event_value, operator, value):
90
+ return False
91
+
92
+ return True
93
+
94
+ def _compare_values(self, event_value, operator: str, condition_value) -> bool:
95
+ """Compare values based on operator"""
96
+ try:
97
+ if operator == ">":
98
+ return event_value > condition_value
99
+ elif operator == "<":
100
+ return event_value < condition_value
101
+ elif operator == ">=":
102
+ return event_value >= condition_value
103
+ elif operator == "<=":
104
+ return event_value <= condition_value
105
+ elif operator == "==":
106
+ return event_value == condition_value
107
+ elif operator == "in":
108
+ return event_value in condition_value
109
+ elif operator == "not_empty":
110
+ if isinstance(event_value, list):
111
+ return len(event_value) > 0 == condition_value
112
+ return bool(event_value) == condition_value
113
+ else:
114
+ return False
115
+ except (TypeError, ValueError):
116
+ return False