petter2025's picture
Update app.py
aa09497 verified
raw
history blame
3.92 kB
import gradio as gr
import asyncio
import json
import logging
import traceback
from agentic_reliability_framework.runtime.engine import EnhancedReliabilityEngine
# Configure logging to show details
logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
# Initialize the engine
try:
logger.info("Initializing EnhancedReliabilityEngine...")
engine = EnhancedReliabilityEngine()
logger.info("Engine initialized successfully.")
except Exception as e:
logger.error(f"Failed to initialize engine: {e}\n{traceback.format_exc()}")
engine = None
async def analyze(component, latency, error_rate, throughput, cpu_util, memory_util):
"""Call the ARF v4 engine with telemetry data."""
if engine is None:
return json.dumps({"error": "Engine failed to initialize. Check logs."}, indent=2)
try:
logger.info(f"Analyzing: component={component}, latency={latency}, error_rate={error_rate}, throughput={throughput}, cpu={cpu_util}, mem={memory_util}")
result = await engine.process_event_enhanced(
component=component,
latency=float(latency),
error_rate=float(error_rate),
throughput=float(throughput) if throughput else 1000.0,
cpu_util=float(cpu_util) if cpu_util else None,
memory_util=float(memory_util) if memory_util else None
)
logger.info("Analysis completed successfully.")
return json.dumps(result, indent=2)
except Exception as e:
logger.error(f"Error during analysis: {e}\n{traceback.format_exc()}")
return json.dumps({"error": str(e), "traceback": traceback.format_exc()}, indent=2)
def sync_analyze(*args):
"""Synchronous wrapper for Gradio."""
return asyncio.run(analyze(*args))
# Define the Gradio interface
with gr.Blocks(title="ARF v4 – Reliability Lab", theme="soft") as demo:
gr.Markdown("""
# 🧠 Agentic Reliability Framework v4
**Hybrid Bayesian + HMC intelligence for infrastructure reliability**
Enter telemetry below to see ARF's advisory analysis. All outputs are **OSS advisory only** – no execution.
""")
with gr.Row():
with gr.Column():
component = gr.Dropdown(
choices=["api-service", "auth-service", "payment-service", "database", "cache-service"],
value="api-service",
label="Component"
)
latency = gr.Slider(10, 1000, value=100, label="Latency P99 (ms)")
error_rate = gr.Slider(0, 0.5, value=0.02, step=0.001, label="Error Rate")
throughput = gr.Number(value=1000, label="Throughput (req/s)")
cpu_util = gr.Slider(0, 1, value=0.4, label="CPU Utilization")
memory_util = gr.Slider(0, 1, value=0.3, label="Memory Utilization")
submit = gr.Button("🚀 Analyze", variant="primary")
with gr.Column():
output = gr.JSON(label="ARF Analysis Result")
submit.click(
fn=sync_analyze,
inputs=[component, latency, error_rate, throughput, cpu_util, memory_util],
outputs=output
)
gr.Markdown("""
---
### 📚 About This Demo
- Uses the full **ARF v4 engine** (`EnhancedReliabilityEngine`)
- Risk scores combine **online conjugate priors** + **offline HMC** (if trained)
- Multi‑agent system runs in parallel (detective, diagnostician, predictive)
- Optional Claude synthesis (if `ANTHROPIC_API_KEY` is set)
[📖 Tutorial](https://github.com/petter2025us/agentic-reliability-framework/blob/main/TUTORIAL.md) |
[🐙 GitHub](https://github.com/petter2025us/agentic-reliability-framework) |
[💼 Enterprise](mailto:petter2025us@outlook.com)
""")
if __name__ == "__main__":
demo.launch(server_name="0.0.0.0", server_port=7860)