petter2025 commited on
Commit
20769ca
·
verified ·
1 Parent(s): 3f86dde

Update policy_engine.py

Browse files
Files changed (1) hide show
  1. policy_engine.py +12 -12
policy_engine.py CHANGED
@@ -4,11 +4,11 @@ Policy Engine for Automated Healing Actions.
4
 
5
  import threading
6
  import logging
7
- import datetime # <-- ADDED
8
  from collections import OrderedDict
9
  from typing import Dict, List, Optional, Any
10
 
11
- from agentic_reliability_framework.core.models.event import HealingPolicy, HealingAction, ReliabilityEvent
12
 
13
  logger = logging.getLogger(__name__)
14
 
@@ -16,7 +16,7 @@ logger = logging.getLogger(__name__)
16
  DEFAULT_HEALING_POLICIES = [
17
  HealingPolicy(
18
  name="high_latency_restart",
19
- conditions=[{"metric": "latency_p99", "operator": "gt", "threshold": 500.0}],
20
  actions=[HealingAction.RESTART_CONTAINER, HealingAction.ALERT_TEAM],
21
  priority=1,
22
  cool_down_seconds=300,
@@ -24,7 +24,7 @@ DEFAULT_HEALING_POLICIES = [
24
  ),
25
  HealingPolicy(
26
  name="critical_error_rate_rollback",
27
- conditions=[{"metric": "error_rate", "operator": "gt", "threshold": 0.3}],
28
  actions=[HealingAction.ROLLBACK, HealingAction.CIRCUIT_BREAKER, HealingAction.ALERT_TEAM],
29
  priority=1,
30
  cool_down_seconds=600,
@@ -32,7 +32,7 @@ DEFAULT_HEALING_POLICIES = [
32
  ),
33
  HealingPolicy(
34
  name="high_error_rate_traffic_shift",
35
- conditions=[{"metric": "error_rate", "operator": "gt", "threshold": 0.15}],
36
  actions=[HealingAction.TRAFFIC_SHIFT, HealingAction.ALERT_TEAM],
37
  priority=2,
38
  cool_down_seconds=300,
@@ -41,8 +41,8 @@ DEFAULT_HEALING_POLICIES = [
41
  HealingPolicy(
42
  name="resource_exhaustion_scale",
43
  conditions=[
44
- {"metric": "cpu_util", "operator": "gt", "threshold": 0.9},
45
- {"metric": "memory_util", "operator": "gt", "threshold": 0.9}
46
  ],
47
  actions=[HealingAction.SCALE_OUT],
48
  priority=2,
@@ -51,7 +51,7 @@ DEFAULT_HEALING_POLICIES = [
51
  ),
52
  HealingPolicy(
53
  name="moderate_latency_circuit_breaker",
54
- conditions=[{"metric": "latency_p99", "operator": "gt", "threshold": 300.0}],
55
  actions=[HealingAction.CIRCUIT_BREAKER],
56
  priority=3,
57
  cool_down_seconds=180,
@@ -105,11 +105,11 @@ class PolicyEngine:
105
  unique.append(a)
106
  return unique if unique else [HealingAction.NO_ACTION]
107
 
108
- def _evaluate_conditions(self, conditions: List[Dict[str, Any]], event: ReliabilityEvent) -> bool:
109
  for cond in conditions:
110
- metric = cond["metric"]
111
- op = cond["operator"]
112
- thresh = cond["threshold"]
113
  val = getattr(event, metric, None)
114
  if val is None:
115
  return False
 
4
 
5
  import threading
6
  import logging
7
+ import datetime
8
  from collections import OrderedDict
9
  from typing import Dict, List, Optional, Any
10
 
11
+ from agentic_reliability_framework.core.models.event import HealingPolicy, HealingAction, ReliabilityEvent, PolicyCondition
12
 
13
  logger = logging.getLogger(__name__)
14
 
 
16
  DEFAULT_HEALING_POLICIES = [
17
  HealingPolicy(
18
  name="high_latency_restart",
19
+ conditions=[PolicyCondition(metric="latency_p99", operator="gt", threshold=500.0)],
20
  actions=[HealingAction.RESTART_CONTAINER, HealingAction.ALERT_TEAM],
21
  priority=1,
22
  cool_down_seconds=300,
 
24
  ),
25
  HealingPolicy(
26
  name="critical_error_rate_rollback",
27
+ conditions=[PolicyCondition(metric="error_rate", operator="gt", threshold=0.3)],
28
  actions=[HealingAction.ROLLBACK, HealingAction.CIRCUIT_BREAKER, HealingAction.ALERT_TEAM],
29
  priority=1,
30
  cool_down_seconds=600,
 
32
  ),
33
  HealingPolicy(
34
  name="high_error_rate_traffic_shift",
35
+ conditions=[PolicyCondition(metric="error_rate", operator="gt", threshold=0.15)],
36
  actions=[HealingAction.TRAFFIC_SHIFT, HealingAction.ALERT_TEAM],
37
  priority=2,
38
  cool_down_seconds=300,
 
41
  HealingPolicy(
42
  name="resource_exhaustion_scale",
43
  conditions=[
44
+ PolicyCondition(metric="cpu_util", operator="gt", threshold=0.9),
45
+ PolicyCondition(metric="memory_util", operator="gt", threshold=0.9)
46
  ],
47
  actions=[HealingAction.SCALE_OUT],
48
  priority=2,
 
51
  ),
52
  HealingPolicy(
53
  name="moderate_latency_circuit_breaker",
54
+ conditions=[PolicyCondition(metric="latency_p99", operator="gt", threshold=300.0)],
55
  actions=[HealingAction.CIRCUIT_BREAKER],
56
  priority=3,
57
  cool_down_seconds=180,
 
105
  unique.append(a)
106
  return unique if unique else [HealingAction.NO_ACTION]
107
 
108
+ def _evaluate_conditions(self, conditions: List[PolicyCondition], event: ReliabilityEvent) -> bool:
109
  for cond in conditions:
110
+ metric = cond.metric
111
+ op = cond.operator
112
+ thresh = cond.threshold
113
  val = getattr(event, metric, None)
114
  if val is None:
115
  return False