petter2025 commited on
Commit
f61cb1c
·
verified ·
1 Parent(s): 666a364

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +97 -6
app.py CHANGED
@@ -1656,12 +1656,17 @@ class EnhancedReliabilityEngine:
1656
 
1657
  logger.info(f"Event processed: {result['status']} with {result['severity']} severity")
1658
 
1659
- # Enhance with Claude AI reasoning (optional layer)
1660
- try:
1661
- result = await self.enhance_with_claude(event, result)
1662
- except Exception as e:
1663
- logger.error(f"Failed to enhance with Claude: {e}")
1664
- # Continue without enhancement
 
 
 
 
 
1665
 
1666
  return result
1667
 
@@ -1669,6 +1674,92 @@ class EnhancedReliabilityEngine:
1669
  enhanced_engine = EnhancedReliabilityEngine()
1670
 
1671
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1672
  # === Rate Limiting ===
1673
  class RateLimiter:
1674
  """Simple rate limiter for request throttling"""
 
1656
 
1657
  logger.info(f"Event processed: {result['status']} with {result['severity']} severity")
1658
 
1659
+ # Track business metrics for ROI dashboard
1660
+ if is_anomaly and business_impact:
1661
+ auto_healed = len(healing_actions) > 0 and healing_actions[0] != HealingAction.NO_ACTION
1662
+ business_metrics.record_incident(
1663
+ severity=event.severity.value,
1664
+ auto_healed=auto_healed,
1665
+ revenue_loss=business_impact['revenue_loss_estimate'],
1666
+ detection_time_seconds=120.0 # Assume 2 min detection
1667
+ )
1668
+
1669
+ logger.info(f"Event processed: {result['status']} with {result['severity']} severity")
1670
 
1671
  return result
1672
 
 
1674
  enhanced_engine = EnhancedReliabilityEngine()
1675
 
1676
 
1677
+ # === Global Metrics Tracker for ROI Dashboard ===
1678
+ class BusinessMetricsTracker:
1679
+ """Track cumulative business metrics for ROI dashboard"""
1680
+
1681
+ def __init__(self):
1682
+ self.total_incidents = 0
1683
+ self.incidents_auto_healed = 0
1684
+ self.total_revenue_saved = 0.0
1685
+ self.total_revenue_at_risk = 0.0
1686
+ self.detection_times = []
1687
+ self._lock = threading.RLock()
1688
+ logger.info("Initialized BusinessMetricsTracker")
1689
+
1690
+ def record_incident(
1691
+ self,
1692
+ severity: str,
1693
+ auto_healed: bool,
1694
+ revenue_loss: float,
1695
+ detection_time_seconds: float = 120.0 # 2 minutes default
1696
+ ):
1697
+ """Record an incident and update metrics"""
1698
+ with self._lock:
1699
+ self.total_incidents += 1
1700
+
1701
+ if auto_healed:
1702
+ self.incidents_auto_healed += 1
1703
+
1704
+ # Calculate what revenue would have been lost (industry average: 14 min response)
1705
+ # vs what we actually lost (ARF average: 2 min response)
1706
+ industry_avg_response_minutes = 14
1707
+ arf_response_minutes = detection_time_seconds / 60
1708
+
1709
+ # Revenue at risk if using traditional monitoring
1710
+ revenue_per_minute = revenue_loss / max(1, arf_response_minutes)
1711
+ traditional_loss = revenue_per_minute * industry_avg_response_minutes
1712
+
1713
+ self.total_revenue_at_risk += traditional_loss
1714
+ self.total_revenue_saved += (traditional_loss - revenue_loss)
1715
+
1716
+ self.detection_times.append(detection_time_seconds)
1717
+
1718
+ logger.info(
1719
+ f"Recorded incident: auto_healed={auto_healed}, "
1720
+ f"saved=\${traditional_loss - revenue_loss:.2f}"
1721
+ )
1722
+
1723
+ def get_metrics(self) -> dict:
1724
+ """Get current cumulative metrics"""
1725
+ with self._lock:
1726
+ auto_heal_rate = (
1727
+ (self.incidents_auto_healed / self.total_incidents * 100)
1728
+ if self.total_incidents > 0 else 0
1729
+ )
1730
+
1731
+ avg_detection_time = (
1732
+ sum(self.detection_times) / len(self.detection_times)
1733
+ if self.detection_times else 120.0
1734
+ )
1735
+
1736
+ return {
1737
+ "total_incidents": self.total_incidents,
1738
+ "incidents_auto_healed": self.incidents_auto_healed,
1739
+ "auto_heal_rate": auto_heal_rate,
1740
+ "total_revenue_saved": self.total_revenue_saved,
1741
+ "total_revenue_at_risk": self.total_revenue_at_risk,
1742
+ "avg_detection_time_seconds": avg_detection_time,
1743
+ "avg_detection_time_minutes": avg_detection_time / 60,
1744
+ "time_improvement": (
1745
+ (14 - (avg_detection_time / 60)) / 14 * 100
1746
+ ) # vs industry 14 min
1747
+ }
1748
+
1749
+ def reset(self):
1750
+ """Reset all metrics (for demo purposes)"""
1751
+ with self._lock:
1752
+ self.total_incidents = 0
1753
+ self.incidents_auto_healed = 0
1754
+ self.total_revenue_saved = 0.0
1755
+ self.total_revenue_at_risk = 0.0
1756
+ self.detection_times = []
1757
+ logger.info("Reset BusinessMetricsTracker")
1758
+
1759
+
1760
+ # Initialize global tracker
1761
+ business_metrics = BusinessMetricsTracker()
1762
+
1763
  # === Rate Limiting ===
1764
  class RateLimiter:
1765
  """Simple rate limiter for request throttling"""