petter2025 commited on
Commit
c2deb84
·
verified ·
1 Parent(s): adfc2a0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +107 -44
app.py CHANGED
@@ -3,13 +3,21 @@ import asyncio
3
  import json
4
  import logging
5
  import traceback
 
 
 
 
6
  from agentic_reliability_framework.runtime.engine import EnhancedReliabilityEngine
7
 
8
- # Configure logging to show details
 
 
 
 
9
  logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
10
  logger = logging.getLogger(__name__)
11
 
12
- # Initialize the engine
13
  try:
14
  logger.info("Initializing EnhancedReliabilityEngine...")
15
  engine = EnhancedReliabilityEngine()
@@ -18,12 +26,15 @@ except Exception as e:
18
  logger.error(f"Failed to initialize engine: {e}\n{traceback.format_exc()}")
19
  engine = None
20
 
21
- async def analyze(component, latency, error_rate, throughput, cpu_util, memory_util):
22
- """Call the ARF v4 engine with telemetry data."""
 
 
 
 
23
  if engine is None:
24
  return json.dumps({"error": "Engine failed to initialize. Check logs."}, indent=2)
25
  try:
26
- logger.info(f"Analyzing: component={component}, latency={latency}, error_rate={error_rate}, throughput={throughput}, cpu={cpu_util}, mem={memory_util}")
27
  result = await engine.process_event_enhanced(
28
  component=component,
29
  latency=float(latency),
@@ -32,57 +43,109 @@ async def analyze(component, latency, error_rate, throughput, cpu_util, memory_u
32
  cpu_util=float(cpu_util) if cpu_util else None,
33
  memory_util=float(memory_util) if memory_util else None
34
  )
35
- logger.info("Analysis completed successfully.")
36
  return json.dumps(result, indent=2)
37
  except Exception as e:
38
- logger.error(f"Error during analysis: {e}\n{traceback.format_exc()}")
39
  return json.dumps({"error": str(e), "traceback": traceback.format_exc()}, indent=2)
40
 
41
- def sync_analyze(*args):
42
- """Synchronous wrapper for Gradio."""
43
- return asyncio.run(analyze(*args))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
 
45
- # Define the Gradio interface
 
 
 
 
 
 
46
  with gr.Blocks(title="ARF v4 – Reliability Lab", theme="soft") as demo:
47
- gr.Markdown("""
48
- # 🧠 Agentic Reliability Framework v4
49
- **Hybrid Bayesian + HMC intelligence for infrastructure reliability**
50
-
51
- Enter telemetry below to see ARF's advisory analysis. All outputs are **OSS advisory only** – no execution.
52
- """)
53
 
54
- with gr.Row():
55
- with gr.Column():
56
- component = gr.Dropdown(
57
- choices=["api-service", "auth-service", "payment-service", "database", "cache-service"],
58
- value="api-service",
59
- label="Component"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  )
61
- latency = gr.Slider(10, 1000, value=100, label="Latency P99 (ms)")
62
- error_rate = gr.Slider(0, 0.5, value=0.02, step=0.001, label="Error Rate")
63
- throughput = gr.Number(value=1000, label="Throughput (req/s)")
64
- cpu_util = gr.Slider(0, 1, value=0.4, label="CPU Utilization")
65
- memory_util = gr.Slider(0, 1, value=0.3, label="Memory Utilization")
66
- submit = gr.Button("🚀 Analyze", variant="primary")
67
 
68
- with gr.Column():
69
- output = gr.JSON(label="ARF Analysis Result")
70
-
71
- submit.click(
72
- fn=sync_analyze,
73
- inputs=[component, latency, error_rate, throughput, cpu_util, memory_util],
74
- outputs=output
75
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
76
 
77
  gr.Markdown("""
78
  ---
79
- ### 📚 About This Demo
80
- - Uses the full **ARF v4 engine** (`EnhancedReliabilityEngine`)
81
- - Risk scores combine **online conjugate priors** + **offline HMC** (if trained)
82
- - Multi‑agent system runs in parallel (detective, diagnostician, predictive)
83
- - Optional Claude synthesis (if `ANTHROPIC_API_KEY` is set)
84
-
85
- [📖 Tutorial](https://github.com/petter2025us/agentic-reliability-framework/blob/main/TUTORIAL.md) |
86
  [🐙 GitHub](https://github.com/petter2025us/agentic-reliability-framework) |
87
  [💼 Enterprise](mailto:petter2025us@outlook.com)
88
  """)
 
3
  import json
4
  import logging
5
  import traceback
6
+ import random
7
+ from datetime import datetime
8
+
9
+ # Import the base engine
10
  from agentic_reliability_framework.runtime.engine import EnhancedReliabilityEngine
11
 
12
+ # Import our new AI components
13
+ from ai_event import AIEvent
14
+ from hallucination_detective import HallucinationDetectiveAgent
15
+ from memory_drift_diagnostician import MemoryDriftDiagnosticianAgent
16
+
17
  logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
18
  logger = logging.getLogger(__name__)
19
 
20
+ # Initialize the engine (for infrastructure analysis)
21
  try:
22
  logger.info("Initializing EnhancedReliabilityEngine...")
23
  engine = EnhancedReliabilityEngine()
 
26
  logger.error(f"Failed to initialize engine: {e}\n{traceback.format_exc()}")
27
  engine = None
28
 
29
+ # AI agents (initialize once)
30
+ hallucination_detective = HallucinationDetectiveAgent()
31
+ memory_drift_diagnostician = MemoryDriftDiagnosticianAgent()
32
+
33
+ async def analyze_infrastructure(component, latency, error_rate, throughput, cpu_util, memory_util):
34
+ """Original infrastructure analysis."""
35
  if engine is None:
36
  return json.dumps({"error": "Engine failed to initialize. Check logs."}, indent=2)
37
  try:
 
38
  result = await engine.process_event_enhanced(
39
  component=component,
40
  latency=float(latency),
 
43
  cpu_util=float(cpu_util) if cpu_util else None,
44
  memory_util=float(memory_util) if memory_util else None
45
  )
 
46
  return json.dumps(result, indent=2)
47
  except Exception as e:
48
+ logger.error(f"Infrastructure analysis error: {e}\n{traceback.format_exc()}")
49
  return json.dumps({"error": str(e), "traceback": traceback.format_exc()}, indent=2)
50
 
51
+ async def analyze_ai(component, prompt, model_name, model_version, confidence, perplexity, retrieval_score):
52
+ """AI reliability analysis."""
53
+ try:
54
+ # Simulate a response (in a real app, call an actual model)
55
+ response = f"Mock response to: {prompt}"
56
+ # Create AIEvent
57
+ event = AIEvent(
58
+ timestamp=datetime.utcnow(),
59
+ component=component,
60
+ service_mesh="ai",
61
+ latency_p99=random.uniform(100, 500),
62
+ error_rate=0.0,
63
+ throughput=1,
64
+ cpu_util=None,
65
+ memory_util=None,
66
+ model_name=model_name,
67
+ model_version=model_version,
68
+ prompt=prompt,
69
+ response=response,
70
+ response_length=len(response),
71
+ confidence=confidence,
72
+ perplexity=perplexity,
73
+ retrieval_scores=[retrieval_score],
74
+ user_feedback=None,
75
+ latency_ms=random.uniform(200, 800)
76
+ )
77
+ # Run agents
78
+ hallu_result = await hallucination_detective.analyze(event)
79
+ drift_result = await memory_drift_diagnostician.analyze(event)
80
+ # Combine results
81
+ result = {
82
+ "hallucination_detection": hallu_result,
83
+ "memory_drift_detection": drift_result,
84
+ "response": response
85
+ }
86
+ return json.dumps(result, indent=2)
87
+ except Exception as e:
88
+ logger.error(f"AI analysis error: {e}\n{traceback.format_exc()}")
89
+ return json.dumps({"error": str(e), "traceback": traceback.format_exc()}, indent=2)
90
 
91
+ def sync_infrastructure(*args):
92
+ return asyncio.run(analyze_infrastructure(*args))
93
+
94
+ def sync_ai(*args):
95
+ return asyncio.run(analyze_ai(*args))
96
+
97
+ # Build the Gradio interface with tabs
98
  with gr.Blocks(title="ARF v4 – Reliability Lab", theme="soft") as demo:
99
+ gr.Markdown("# 🧠 Agentic Reliability Framework v4\n**Infrastructure & AI Reliability**")
 
 
 
 
 
100
 
101
+ with gr.Tabs():
102
+ with gr.TabItem("Infrastructure"):
103
+ gr.Markdown("Enter telemetry to analyze infrastructure incidents.")
104
+ with gr.Row():
105
+ with gr.Column():
106
+ component = gr.Dropdown(
107
+ choices=["api-service", "auth-service", "payment-service", "database", "cache-service"],
108
+ value="api-service", label="Component"
109
+ )
110
+ latency = gr.Slider(10, 1000, value=100, label="Latency P99 (ms)")
111
+ error_rate = gr.Slider(0, 0.5, value=0.02, step=0.001, label="Error Rate")
112
+ throughput = gr.Number(value=1000, label="Throughput (req/s)")
113
+ cpu_util = gr.Slider(0, 1, value=0.4, label="CPU Utilization")
114
+ memory_util = gr.Slider(0, 1, value=0.3, label="Memory Utilization")
115
+ infra_submit = gr.Button("Analyze Infrastructure", variant="primary")
116
+ with gr.Column():
117
+ infra_output = gr.JSON(label="Analysis Result")
118
+ infra_submit.click(
119
+ fn=sync_infrastructure,
120
+ inputs=[component, latency, error_rate, throughput, cpu_util, memory_util],
121
+ outputs=infra_output
122
  )
 
 
 
 
 
 
123
 
124
+ with gr.TabItem("AI Reliability"):
125
+ gr.Markdown("Simulate an AI query to detect hallucinations and memory drift.")
126
+ with gr.Row():
127
+ with gr.Column():
128
+ ai_component = gr.Dropdown(
129
+ choices=["chat", "code", "summary"], label="Task Type", value="chat"
130
+ )
131
+ prompt = gr.Textbox(label="Prompt", value="What is the capital of France?")
132
+ model_name = gr.Dropdown(["gpt-3.5", "gpt-4", "claude"], label="Model", value="gpt-4")
133
+ model_version = gr.Textbox(value="v1", label="Version")
134
+ confidence = gr.Slider(0, 1, value=0.95, label="Model Confidence")
135
+ perplexity = gr.Slider(0, 50, value=5, label="Perplexity")
136
+ retrieval_score = gr.Slider(0, 1, value=0.8, label="Retrieval Score")
137
+ ai_submit = gr.Button("Analyze AI", variant="primary")
138
+ with gr.Column():
139
+ ai_output = gr.JSON(label="Analysis Result")
140
+ ai_submit.click(
141
+ fn=sync_ai,
142
+ inputs=[ai_component, prompt, model_name, model_version, confidence, perplexity, retrieval_score],
143
+ outputs=ai_output
144
+ )
145
 
146
  gr.Markdown("""
147
  ---
148
+ [📖 Tutorial](https://github.com/petter2025us/agentic-reliability-framework/blob/main/TUTORIAL.md) |
 
 
 
 
 
 
149
  [🐙 GitHub](https://github.com/petter2025us/agentic-reliability-framework) |
150
  [💼 Enterprise](mailto:petter2025us@outlook.com)
151
  """)