Update app.py
Browse files
app.py
CHANGED
|
@@ -3,7 +3,6 @@ import json
|
|
| 3 |
import logging
|
| 4 |
import logging.handlers
|
| 5 |
import numpy as np
|
| 6 |
-
import pandas as pd
|
| 7 |
from datetime import datetime
|
| 8 |
from typing import Dict, Any, List, Optional, Tuple
|
| 9 |
import threading
|
|
@@ -14,6 +13,7 @@ import contextlib
|
|
| 14 |
import signal
|
| 15 |
import sys
|
| 16 |
import functools
|
|
|
|
| 17 |
from scipy.stats import beta
|
| 18 |
import plotly.graph_objects as go
|
| 19 |
|
|
@@ -69,6 +69,7 @@ console_handler.setFormatter(logging.Formatter('%(asctime)s - %(name)s - %(level
|
|
| 69 |
|
| 70 |
logger.addHandler(file_handler)
|
| 71 |
logger.addHandler(console_handler)
|
|
|
|
| 72 |
|
| 73 |
# ----------------------------------------------------------------------
|
| 74 |
# SQLite persistence with secure permissions
|
|
@@ -89,9 +90,12 @@ def init_db():
|
|
| 89 |
)
|
| 90 |
''')
|
| 91 |
conn.commit()
|
| 92 |
-
# Restrict permissions (owner read/write only)
|
| 93 |
-
|
| 94 |
-
|
|
|
|
|
|
|
|
|
|
| 95 |
|
| 96 |
def save_decision_to_db(decision: dict, risk: float):
|
| 97 |
"""Insert a decision into the database."""
|
|
@@ -181,8 +185,9 @@ def refresh_history_from_db():
|
|
| 181 |
for ts, dec, risk in decisions:
|
| 182 |
decision_history.append((ts, dec, risk))
|
| 183 |
risk_history.append((ts, risk))
|
| 184 |
-
|
| 185 |
-
|
|
|
|
| 186 |
|
| 187 |
# ----------------------------------------------------------------------
|
| 188 |
# Memory monitoring (daemon thread with graceful stop)
|
|
@@ -219,19 +224,42 @@ def memory_monitor_loop():
|
|
| 219 |
logger.info("Process memory: unknown")
|
| 220 |
except Exception as e:
|
| 221 |
logger.error(f"Memory logging error: {e}")
|
| 222 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 223 |
|
| 224 |
# ----------------------------------------------------------------------
|
| 225 |
-
# Bayesian Risk Engine (Beta‑Binomial)
|
| 226 |
# ----------------------------------------------------------------------
|
| 227 |
class BayesianRiskEngine:
|
| 228 |
-
def __init__(self, alpha=ALPHA_PRIOR, beta=BETA_PRIOR):
|
| 229 |
self.alpha = alpha
|
| 230 |
self.beta = beta
|
|
|
|
|
|
|
|
|
|
|
|
|
| 231 |
|
| 232 |
def update(self, failures, successes):
|
| 233 |
-
|
| 234 |
-
self.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 235 |
|
| 236 |
def risk(self):
|
| 237 |
return self.alpha / (self.alpha + self.beta)
|
|
@@ -259,7 +287,7 @@ class PolicyEngine:
|
|
| 259 |
return "escalate", f"Risk in escalation zone ({self.thresholds['low']}-{self.thresholds['high']})"
|
| 260 |
|
| 261 |
# ----------------------------------------------------------------------
|
| 262 |
-
# Infrastructure analysis (synchronous, with validation)
|
| 263 |
# ----------------------------------------------------------------------
|
| 264 |
def handle_infra_with_governance(fault_type: str, context_window: int, session_state: dict):
|
| 265 |
start_time = time.time()
|
|
@@ -278,7 +306,9 @@ def handle_infra_with_governance(fault_type: str, context_window: int, session_s
|
|
| 278 |
}
|
| 279 |
failures, successes = fault_map.get(fault_type, (1, 99))
|
| 280 |
|
| 281 |
-
|
|
|
|
|
|
|
| 282 |
risk_engine.update(failures, successes)
|
| 283 |
risk = risk_engine.risk()
|
| 284 |
ci_low, ci_high = risk_engine.risk_interval(0.95)
|
|
@@ -296,7 +326,8 @@ def handle_infra_with_governance(fault_type: str, context_window: int, session_s
|
|
| 296 |
"posterior_parameters": {
|
| 297 |
"alpha": risk_engine.alpha,
|
| 298 |
"beta": risk_engine.beta
|
| 299 |
-
}
|
|
|
|
| 300 |
}
|
| 301 |
output = {
|
| 302 |
**analysis_result,
|
|
@@ -319,12 +350,14 @@ def handle_infra_with_governance(fault_type: str, context_window: int, session_s
|
|
| 319 |
|
| 320 |
def autonomous_control_decision(risk, risk_engine, policy_engine):
|
| 321 |
action, reason = policy_engine.evaluate(risk)
|
|
|
|
|
|
|
| 322 |
decision = {
|
| 323 |
"timestamp": datetime.utcnow().isoformat(),
|
| 324 |
"approved": action == "approve",
|
| 325 |
"actions": ["escalate_human"] if action == "escalate" else [],
|
| 326 |
"reason": reason,
|
| 327 |
-
"risk_level":
|
| 328 |
}
|
| 329 |
update_dashboard_data(decision, risk)
|
| 330 |
return decision
|
|
@@ -602,6 +635,15 @@ with gr.Blocks(title=f"ARF v{VERSION} – Bayesian Risk Scoring Demo") as demo:
|
|
| 602 |
value="none",
|
| 603 |
label="Inject Fault"
|
| 604 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 605 |
infra_btn = gr.Button("Evaluate Intent")
|
| 606 |
with gr.Column():
|
| 607 |
infra_output = gr.JSON(label="Analysis Result")
|
|
@@ -674,14 +716,14 @@ with gr.Blocks(title=f"ARF v{VERSION} – Bayesian Risk Scoring Demo") as demo:
|
|
| 674 |
|
| 675 |
<div style="text-align: center; margin-top: 2rem;">
|
| 676 |
<a href="https://calendly.com/petter2025us/30min" target="_blank" style="background: #764ba2; color: white; padding: 12px 24px; text-decoration: none; border-radius: 8px; font-weight: bold; margin-right: 1rem;">📅 Book a Demo</a>
|
| 677 |
-
<a href="mailto:petter2025us@outlook.com" style="background: #667eea; color: white; padding: 12px 24px; text-decoration: none; border-radius: 8px; font-weight: bold;">📧
|
| 678 |
</div>
|
| 679 |
""")
|
| 680 |
|
| 681 |
# Wire events
|
| 682 |
infra_btn.click(
|
| 683 |
fn=handle_infra_with_governance,
|
| 684 |
-
inputs=[infra_fault,
|
| 685 |
outputs=[infra_output, infra_state]
|
| 686 |
)
|
| 687 |
|
|
@@ -695,9 +737,7 @@ with gr.Blocks(title=f"ARF v{VERSION} – Bayesian Risk Scoring Demo") as demo:
|
|
| 695 |
# Launch
|
| 696 |
# ----------------------------------------------------------------------
|
| 697 |
if __name__ == "__main__":
|
| 698 |
-
# Enable queue and add metrics route if available
|
| 699 |
demo.queue()
|
| 700 |
-
if PROMETHEUS_AVAILABLE:
|
| 701 |
-
# Access the underlying FastAPI app after queueing
|
| 702 |
demo.app.add_api_route("/metrics", lambda: (generate_latest(), 200, {"Content-Type": CONTENT_TYPE_LATEST}), methods=["GET"])
|
| 703 |
demo.launch(theme="soft", server_name="0.0.0.0", server_port=7860)
|
|
|
|
| 3 |
import logging
|
| 4 |
import logging.handlers
|
| 5 |
import numpy as np
|
|
|
|
| 6 |
from datetime import datetime
|
| 7 |
from typing import Dict, Any, List, Optional, Tuple
|
| 8 |
import threading
|
|
|
|
| 13 |
import signal
|
| 14 |
import sys
|
| 15 |
import functools
|
| 16 |
+
from collections import deque
|
| 17 |
from scipy.stats import beta
|
| 18 |
import plotly.graph_objects as go
|
| 19 |
|
|
|
|
| 69 |
|
| 70 |
logger.addHandler(file_handler)
|
| 71 |
logger.addHandler(console_handler)
|
| 72 |
+
logger.propagate = False # Prevent duplicate logs
|
| 73 |
|
| 74 |
# ----------------------------------------------------------------------
|
| 75 |
# SQLite persistence with secure permissions
|
|
|
|
| 90 |
)
|
| 91 |
''')
|
| 92 |
conn.commit()
|
| 93 |
+
# Restrict permissions (owner read/write only) – best effort
|
| 94 |
+
try:
|
| 95 |
+
os.chmod(DB_PATH, 0o600)
|
| 96 |
+
except Exception as e:
|
| 97 |
+
logger.warning(f"Could not set secure permissions on DB: {e}")
|
| 98 |
+
logger.info(f"Database initialized at {DB_PATH}")
|
| 99 |
|
| 100 |
def save_decision_to_db(decision: dict, risk: float):
|
| 101 |
"""Insert a decision into the database."""
|
|
|
|
| 185 |
for ts, dec, risk in decisions:
|
| 186 |
decision_history.append((ts, dec, risk))
|
| 187 |
risk_history.append((ts, risk))
|
| 188 |
+
# After loading, set the Prometheus gauge to the latest risk
|
| 189 |
+
if PROMETHEUS_AVAILABLE and risk_history:
|
| 190 |
+
prom_risk_gauge.set(risk_history[-1][1])
|
| 191 |
|
| 192 |
# ----------------------------------------------------------------------
|
| 193 |
# Memory monitoring (daemon thread with graceful stop)
|
|
|
|
| 224 |
logger.info("Process memory: unknown")
|
| 225 |
except Exception as e:
|
| 226 |
logger.error(f"Memory logging error: {e}")
|
| 227 |
+
# Sleep in small intervals to react quickly to shutdown
|
| 228 |
+
for _ in range(60):
|
| 229 |
+
if shutdown_event.is_set():
|
| 230 |
+
break
|
| 231 |
+
time.sleep(1)
|
| 232 |
|
| 233 |
# ----------------------------------------------------------------------
|
| 234 |
+
# Bayesian Risk Engine (Beta‑Binomial) with sliding window
|
| 235 |
# ----------------------------------------------------------------------
|
| 236 |
class BayesianRiskEngine:
|
| 237 |
+
def __init__(self, alpha=ALPHA_PRIOR, beta=BETA_PRIOR, maxlen=None):
|
| 238 |
self.alpha = alpha
|
| 239 |
self.beta = beta
|
| 240 |
+
self.maxlen = maxlen
|
| 241 |
+
self.events = deque(maxlen=maxlen) # store (failures, successes)
|
| 242 |
+
self.total_failures = 0
|
| 243 |
+
self.total_successes = 0
|
| 244 |
|
| 245 |
def update(self, failures, successes):
|
| 246 |
+
# Add new event
|
| 247 |
+
self.events.append((failures, successes))
|
| 248 |
+
self.total_failures += failures
|
| 249 |
+
self.total_successes += successes
|
| 250 |
+
|
| 251 |
+
# If maxlen is reached and the queue overflows, we've already removed the oldest,
|
| 252 |
+
# but we need to subtract it from totals.
|
| 253 |
+
if self.maxlen is not None and len(self.events) == self.maxlen:
|
| 254 |
+
# The deque automatically discards the leftmost when full, but we have to
|
| 255 |
+
# manually adjust totals to reflect the discarded event.
|
| 256 |
+
# However, we can't easily know what was discarded. Instead, recompute from deque.
|
| 257 |
+
self.total_failures = sum(f for f, _ in self.events)
|
| 258 |
+
self.total_successes = sum(s for _, s in self.events)
|
| 259 |
+
|
| 260 |
+
# Set alpha,beta = prior + totals
|
| 261 |
+
self.alpha = ALPHA_PRIOR + self.total_failures
|
| 262 |
+
self.beta = BETA_PRIOR + self.total_successes
|
| 263 |
|
| 264 |
def risk(self):
|
| 265 |
return self.alpha / (self.alpha + self.beta)
|
|
|
|
| 287 |
return "escalate", f"Risk in escalation zone ({self.thresholds['low']}-{self.thresholds['high']})"
|
| 288 |
|
| 289 |
# ----------------------------------------------------------------------
|
| 290 |
+
# Infrastructure analysis (synchronous, with validation and sliding window)
|
| 291 |
# ----------------------------------------------------------------------
|
| 292 |
def handle_infra_with_governance(fault_type: str, context_window: int, session_state: dict):
|
| 293 |
start_time = time.time()
|
|
|
|
| 306 |
}
|
| 307 |
failures, successes = fault_map.get(fault_type, (1, 99))
|
| 308 |
|
| 309 |
+
# Use context_window: if >0, limit to last N events; else unlimited
|
| 310 |
+
maxlen = context_window if context_window > 0 else None
|
| 311 |
+
risk_engine = BayesianRiskEngine(maxlen=maxlen)
|
| 312 |
risk_engine.update(failures, successes)
|
| 313 |
risk = risk_engine.risk()
|
| 314 |
ci_low, ci_high = risk_engine.risk_interval(0.95)
|
|
|
|
| 326 |
"posterior_parameters": {
|
| 327 |
"alpha": risk_engine.alpha,
|
| 328 |
"beta": risk_engine.beta
|
| 329 |
+
},
|
| 330 |
+
"context_window": context_window
|
| 331 |
}
|
| 332 |
output = {
|
| 333 |
**analysis_result,
|
|
|
|
| 350 |
|
| 351 |
def autonomous_control_decision(risk, risk_engine, policy_engine):
|
| 352 |
action, reason = policy_engine.evaluate(risk)
|
| 353 |
+
# Use configurable thresholds for risk level
|
| 354 |
+
risk_level = "low" if risk < LOW_THRESHOLD else "medium" if risk < HIGH_THRESHOLD else "high"
|
| 355 |
decision = {
|
| 356 |
"timestamp": datetime.utcnow().isoformat(),
|
| 357 |
"approved": action == "approve",
|
| 358 |
"actions": ["escalate_human"] if action == "escalate" else [],
|
| 359 |
"reason": reason,
|
| 360 |
+
"risk_level": risk_level
|
| 361 |
}
|
| 362 |
update_dashboard_data(decision, risk)
|
| 363 |
return decision
|
|
|
|
| 635 |
value="none",
|
| 636 |
label="Inject Fault"
|
| 637 |
)
|
| 638 |
+
# Use a Number component to allow user to set context window
|
| 639 |
+
context_window_input = gr.Number(
|
| 640 |
+
value=50,
|
| 641 |
+
label="Context Window (number of recent events)",
|
| 642 |
+
minimum=1,
|
| 643 |
+
maximum=1000,
|
| 644 |
+
step=1,
|
| 645 |
+
info="How many past incidents to consider for risk calculation (0 = unlimited)"
|
| 646 |
+
)
|
| 647 |
infra_btn = gr.Button("Evaluate Intent")
|
| 648 |
with gr.Column():
|
| 649 |
infra_output = gr.JSON(label="Analysis Result")
|
|
|
|
| 716 |
|
| 717 |
<div style="text-align: center; margin-top: 2rem;">
|
| 718 |
<a href="https://calendly.com/petter2025us/30min" target="_blank" style="background: #764ba2; color: white; padding: 12px 24px; text-decoration: none; border-radius: 8px; font-weight: bold; margin-right: 1rem;">📅 Book a Demo</a>
|
| 719 |
+
<a href="mailto:petter2025us@outlook.com" style="background: #667eea; color: white; padding: 12px 24px; text-decoration: none; border-radius: 8px; font-weight: bold;">📧 Email me</a>
|
| 720 |
</div>
|
| 721 |
""")
|
| 722 |
|
| 723 |
# Wire events
|
| 724 |
infra_btn.click(
|
| 725 |
fn=handle_infra_with_governance,
|
| 726 |
+
inputs=[infra_fault, context_window_input, infra_state],
|
| 727 |
outputs=[infra_output, infra_state]
|
| 728 |
)
|
| 729 |
|
|
|
|
| 737 |
# Launch
|
| 738 |
# ----------------------------------------------------------------------
|
| 739 |
if __name__ == "__main__":
|
|
|
|
| 740 |
demo.queue()
|
| 741 |
+
if PROMETHEUS_AVAILABLE and hasattr(demo, 'app') and demo.app:
|
|
|
|
| 742 |
demo.app.add_api_route("/metrics", lambda: (generate_latest(), 200, {"Content-Type": CONTENT_TYPE_LATEST}), methods=["GET"])
|
| 743 |
demo.launch(theme="soft", server_name="0.0.0.0", server_port=7860)
|