Update app.py
Browse files
app.py
CHANGED
|
@@ -1656,12 +1656,17 @@ class EnhancedReliabilityEngine:
|
|
| 1656 |
|
| 1657 |
logger.info(f"Event processed: {result['status']} with {result['severity']} severity")
|
| 1658 |
|
| 1659 |
-
#
|
| 1660 |
-
|
| 1661 |
-
|
| 1662 |
-
|
| 1663 |
-
|
| 1664 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1665 |
|
| 1666 |
return result
|
| 1667 |
|
|
@@ -1669,6 +1674,92 @@ class EnhancedReliabilityEngine:
|
|
| 1669 |
enhanced_engine = EnhancedReliabilityEngine()
|
| 1670 |
|
| 1671 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1672 |
# === Rate Limiting ===
|
| 1673 |
class RateLimiter:
|
| 1674 |
"""Simple rate limiter for request throttling"""
|
|
|
|
| 1656 |
|
| 1657 |
logger.info(f"Event processed: {result['status']} with {result['severity']} severity")
|
| 1658 |
|
| 1659 |
+
# Track business metrics for ROI dashboard
|
| 1660 |
+
if is_anomaly and business_impact:
|
| 1661 |
+
auto_healed = len(healing_actions) > 0 and healing_actions[0] != HealingAction.NO_ACTION
|
| 1662 |
+
business_metrics.record_incident(
|
| 1663 |
+
severity=event.severity.value,
|
| 1664 |
+
auto_healed=auto_healed,
|
| 1665 |
+
revenue_loss=business_impact['revenue_loss_estimate'],
|
| 1666 |
+
detection_time_seconds=120.0 # Assume 2 min detection
|
| 1667 |
+
)
|
| 1668 |
+
|
| 1669 |
+
logger.info(f"Event processed: {result['status']} with {result['severity']} severity")
|
| 1670 |
|
| 1671 |
return result
|
| 1672 |
|
|
|
|
| 1674 |
enhanced_engine = EnhancedReliabilityEngine()
|
| 1675 |
|
| 1676 |
|
| 1677 |
+
# === Global Metrics Tracker for ROI Dashboard ===
|
| 1678 |
+
class BusinessMetricsTracker:
|
| 1679 |
+
"""Track cumulative business metrics for ROI dashboard"""
|
| 1680 |
+
|
| 1681 |
+
def __init__(self):
|
| 1682 |
+
self.total_incidents = 0
|
| 1683 |
+
self.incidents_auto_healed = 0
|
| 1684 |
+
self.total_revenue_saved = 0.0
|
| 1685 |
+
self.total_revenue_at_risk = 0.0
|
| 1686 |
+
self.detection_times = []
|
| 1687 |
+
self._lock = threading.RLock()
|
| 1688 |
+
logger.info("Initialized BusinessMetricsTracker")
|
| 1689 |
+
|
| 1690 |
+
def record_incident(
|
| 1691 |
+
self,
|
| 1692 |
+
severity: str,
|
| 1693 |
+
auto_healed: bool,
|
| 1694 |
+
revenue_loss: float,
|
| 1695 |
+
detection_time_seconds: float = 120.0 # 2 minutes default
|
| 1696 |
+
):
|
| 1697 |
+
"""Record an incident and update metrics"""
|
| 1698 |
+
with self._lock:
|
| 1699 |
+
self.total_incidents += 1
|
| 1700 |
+
|
| 1701 |
+
if auto_healed:
|
| 1702 |
+
self.incidents_auto_healed += 1
|
| 1703 |
+
|
| 1704 |
+
# Calculate what revenue would have been lost (industry average: 14 min response)
|
| 1705 |
+
# vs what we actually lost (ARF average: 2 min response)
|
| 1706 |
+
industry_avg_response_minutes = 14
|
| 1707 |
+
arf_response_minutes = detection_time_seconds / 60
|
| 1708 |
+
|
| 1709 |
+
# Revenue at risk if using traditional monitoring
|
| 1710 |
+
revenue_per_minute = revenue_loss / max(1, arf_response_minutes)
|
| 1711 |
+
traditional_loss = revenue_per_minute * industry_avg_response_minutes
|
| 1712 |
+
|
| 1713 |
+
self.total_revenue_at_risk += traditional_loss
|
| 1714 |
+
self.total_revenue_saved += (traditional_loss - revenue_loss)
|
| 1715 |
+
|
| 1716 |
+
self.detection_times.append(detection_time_seconds)
|
| 1717 |
+
|
| 1718 |
+
logger.info(
|
| 1719 |
+
f"Recorded incident: auto_healed={auto_healed}, "
|
| 1720 |
+
f"saved=\${traditional_loss - revenue_loss:.2f}"
|
| 1721 |
+
)
|
| 1722 |
+
|
| 1723 |
+
def get_metrics(self) -> dict:
|
| 1724 |
+
"""Get current cumulative metrics"""
|
| 1725 |
+
with self._lock:
|
| 1726 |
+
auto_heal_rate = (
|
| 1727 |
+
(self.incidents_auto_healed / self.total_incidents * 100)
|
| 1728 |
+
if self.total_incidents > 0 else 0
|
| 1729 |
+
)
|
| 1730 |
+
|
| 1731 |
+
avg_detection_time = (
|
| 1732 |
+
sum(self.detection_times) / len(self.detection_times)
|
| 1733 |
+
if self.detection_times else 120.0
|
| 1734 |
+
)
|
| 1735 |
+
|
| 1736 |
+
return {
|
| 1737 |
+
"total_incidents": self.total_incidents,
|
| 1738 |
+
"incidents_auto_healed": self.incidents_auto_healed,
|
| 1739 |
+
"auto_heal_rate": auto_heal_rate,
|
| 1740 |
+
"total_revenue_saved": self.total_revenue_saved,
|
| 1741 |
+
"total_revenue_at_risk": self.total_revenue_at_risk,
|
| 1742 |
+
"avg_detection_time_seconds": avg_detection_time,
|
| 1743 |
+
"avg_detection_time_minutes": avg_detection_time / 60,
|
| 1744 |
+
"time_improvement": (
|
| 1745 |
+
(14 - (avg_detection_time / 60)) / 14 * 100
|
| 1746 |
+
) # vs industry 14 min
|
| 1747 |
+
}
|
| 1748 |
+
|
| 1749 |
+
def reset(self):
|
| 1750 |
+
"""Reset all metrics (for demo purposes)"""
|
| 1751 |
+
with self._lock:
|
| 1752 |
+
self.total_incidents = 0
|
| 1753 |
+
self.incidents_auto_healed = 0
|
| 1754 |
+
self.total_revenue_saved = 0.0
|
| 1755 |
+
self.total_revenue_at_risk = 0.0
|
| 1756 |
+
self.detection_times = []
|
| 1757 |
+
logger.info("Reset BusinessMetricsTracker")
|
| 1758 |
+
|
| 1759 |
+
|
| 1760 |
+
# Initialize global tracker
|
| 1761 |
+
business_metrics = BusinessMetricsTracker()
|
| 1762 |
+
|
| 1763 |
# === Rate Limiting ===
|
| 1764 |
class RateLimiter:
|
| 1765 |
"""Simple rate limiter for request throttling"""
|