petter2025 commited on
Commit
e93eabb
·
verified ·
1 Parent(s): fa2c5e7

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +123 -88
app.py CHANGED
@@ -3,21 +3,24 @@ import asyncio
3
  import json
4
  import logging
5
  import traceback
6
- import random
 
7
  from datetime import datetime
 
 
8
 
9
- # Import the base engine
10
  from agentic_reliability_framework.runtime.engine import EnhancedReliabilityEngine
11
-
12
- # Import our new AI components
13
- from ai_event import AIEvent
14
  from hallucination_detective import HallucinationDetectiveAgent
15
  from memory_drift_diagnostician import MemoryDriftDiagnosticianAgent
 
 
 
16
 
17
  logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
18
  logger = logging.getLogger(__name__)
19
 
20
- # Initialize the engine (for infrastructure analysis)
21
  try:
22
  logger.info("Initializing EnhancedReliabilityEngine...")
23
  engine = EnhancedReliabilityEngine()
@@ -26,128 +29,160 @@ except Exception as e:
26
  logger.error(f"Failed to initialize engine: {e}\n{traceback.format_exc()}")
27
  engine = None
28
 
29
- # AI agents (initialize once)
30
- hallucination_detective = HallucinationDetectiveAgent()
 
 
 
 
 
 
 
 
 
 
 
 
31
  memory_drift_diagnostician = MemoryDriftDiagnosticianAgent()
32
 
33
- async def analyze_infrastructure(component, latency, error_rate, throughput, cpu_util, memory_util):
34
- """Original infrastructure analysis."""
35
- if engine is None:
36
- return json.dumps({"error": "Engine failed to initialize. Check logs."}, indent=2)
 
 
 
 
 
 
 
37
  try:
38
- result = await engine.process_event_enhanced(
39
- component=component,
40
- latency=float(latency),
41
- error_rate=float(error_rate),
42
- throughput=float(throughput) if throughput else 1000.0,
43
- cpu_util=float(cpu_util) if cpu_util else None,
44
- memory_util=float(memory_util) if memory_util else None
 
 
45
  )
46
- return json.dumps(result, indent=2)
 
 
 
47
  except Exception as e:
48
- logger.error(f"Infrastructure analysis error: {e}\n{traceback.format_exc()}")
49
- return json.dumps({"error": str(e), "traceback": traceback.format_exc()}, indent=2)
50
 
51
- async def analyze_ai(component, prompt, model_name, model_version, confidence, perplexity, retrieval_score):
52
- """AI reliability analysis."""
53
  try:
54
- # Simulate a response (in a real app, call an actual model)
55
- response = f"Mock response to: {prompt}"
 
 
 
56
  # Create AIEvent
57
  event = AIEvent(
58
  timestamp=datetime.utcnow(),
59
- component=component,
60
  service_mesh="ai",
61
- latency_p99=random.uniform(100, 500),
62
  error_rate=0.0,
63
  throughput=1,
64
  cpu_util=None,
65
  memory_util=None,
66
- model_name=model_name,
67
- model_version=model_version,
 
68
  prompt=prompt,
69
  response=response,
70
  response_length=len(response),
71
  confidence=confidence,
72
- perplexity=perplexity,
73
- retrieval_scores=[retrieval_score],
74
  user_feedback=None,
75
- latency_ms=random.uniform(200, 800)
76
  )
 
 
 
77
  # Run agents
78
  hallu_result = await hallucination_detective.analyze(event)
79
  drift_result = await memory_drift_diagnostician.analyze(event)
80
- # Combine results
 
 
 
81
  result = {
 
 
82
  "hallucination_detection": hallu_result,
83
  "memory_drift_detection": drift_result,
84
- "response": response
85
  }
86
  return json.dumps(result, indent=2)
87
  except Exception as e:
88
  logger.error(f"AI analysis error: {e}\n{traceback.format_exc()}")
89
  return json.dumps({"error": str(e), "traceback": traceback.format_exc()}, indent=2)
90
 
91
- def sync_infrastructure(*args):
92
- return asyncio.run(analyze_infrastructure(*args))
93
-
94
- def sync_ai(*args):
95
- return asyncio.run(analyze_ai(*args))
 
 
 
 
 
96
 
97
- # Build the Gradio interface with tabs
98
- with gr.Blocks(title="ARF v4 – Reliability Lab", theme="soft") as demo:
99
- gr.Markdown("# 🧠 Agentic Reliability Framework v4\n**Infrastructure & AI Reliability**")
100
 
101
- with gr.Tabs():
102
- with gr.TabItem("Infrastructure"):
103
- gr.Markdown("Enter telemetry to analyze infrastructure incidents.")
104
- with gr.Row():
105
- with gr.Column():
106
- component = gr.Dropdown(
107
- choices=["api-service", "auth-service", "payment-service", "database", "cache-service"],
108
- value="api-service", label="Component"
109
- )
110
- latency = gr.Slider(10, 1000, value=100, label="Latency P99 (ms)")
111
- error_rate = gr.Slider(0, 0.5, value=0.02, step=0.001, label="Error Rate")
112
- throughput = gr.Number(value=1000, label="Throughput (req/s)")
113
- cpu_util = gr.Slider(0, 1, value=0.4, label="CPU Utilization")
114
- memory_util = gr.Slider(0, 1, value=0.3, label="Memory Utilization")
115
- infra_submit = gr.Button("Analyze Infrastructure", variant="primary")
116
- with gr.Column():
117
- infra_output = gr.JSON(label="Analysis Result")
118
- infra_submit.click(
119
- fn=sync_infrastructure,
120
- inputs=[component, latency, error_rate, throughput, cpu_util, memory_util],
121
- outputs=infra_output
122
  )
123
-
124
- with gr.TabItem("AI Reliability"):
125
- gr.Markdown("Simulate an AI query to detect hallucinations and memory drift.")
126
- with gr.Row():
127
- with gr.Column():
128
- ai_component = gr.Dropdown(
129
- choices=["chat", "code", "summary"], label="Task Type", value="chat"
130
- )
131
- prompt = gr.Textbox(label="Prompt", value="What is the capital of France?")
132
- model_name = gr.Dropdown(["gpt-3.5", "gpt-4", "claude"], label="Model", value="gpt-4")
133
- model_version = gr.Textbox(value="v1", label="Version")
134
- confidence = gr.Slider(0, 1, value=0.95, label="Model Confidence")
135
- perplexity = gr.Slider(0, 50, value=5, label="Perplexity")
136
- retrieval_score = gr.Slider(0, 1, value=0.8, label="Retrieval Score")
137
- ai_submit = gr.Button("Analyze AI", variant="primary")
138
- with gr.Column():
139
- ai_output = gr.JSON(label="Analysis Result")
140
- ai_submit.click(
141
- fn=sync_ai,
142
- inputs=[ai_component, prompt, model_name, model_version, confidence, perplexity, retrieval_score],
143
- outputs=ai_output
144
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
 
146
  gr.Markdown("""
147
  ---
148
- [📖 Tutorial](https://github.com/petter2025us/agentic-reliability-framework/blob/main/TUTORIAL.md) |
149
- [🐙 GitHub](https://github.com/petter2025us/agentic-reliability-framework) |
150
- [💼 Enterprise](mailto:petter2025us@outlook.com)
 
151
  """)
152
 
153
  if __name__ == "__main__":
 
3
  import json
4
  import logging
5
  import traceback
6
+ import os
7
+ import numpy as np
8
  from datetime import datetime
9
+ from transformers import pipeline, set_seed
10
+ import torch
11
 
12
+ # Import our components
13
  from agentic_reliability_framework.runtime.engine import EnhancedReliabilityEngine
 
 
 
14
  from hallucination_detective import HallucinationDetectiveAgent
15
  from memory_drift_diagnostician import MemoryDriftDiagnosticianAgent
16
+ from ai_event import AIEvent
17
+ from ai_risk_engine import AIRiskEngine
18
+ from nli_detector import NLIDetector
19
 
20
  logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s')
21
  logger = logging.getLogger(__name__)
22
 
23
+ # Initialize infrastructure engine (optional)
24
  try:
25
  logger.info("Initializing EnhancedReliabilityEngine...")
26
  engine = EnhancedReliabilityEngine()
 
29
  logger.error(f"Failed to initialize engine: {e}\n{traceback.format_exc()}")
30
  engine = None
31
 
32
+ # Load generative model (small autoregressive)
33
+ gen_model_name = "microsoft/DialoGPT-small"
34
+ try:
35
+ generator = pipeline('text-generation', model=gen_model_name, device=0 if torch.cuda.is_available() else -1)
36
+ logger.info(f"Generator {gen_model_name} loaded.")
37
+ except Exception as e:
38
+ logger.error(f"Failed to load generator: {e}")
39
+ generator = None
40
+
41
+ # Load NLI detector
42
+ nli_detector = NLIDetector()
43
+
44
+ # AI agents
45
+ hallucination_detective = HallucinationDetectiveAgent(nli_detector=nli_detector)
46
  memory_drift_diagnostician = MemoryDriftDiagnosticianAgent()
47
 
48
+ # AI risk engine
49
+ ai_risk_engine = AIRiskEngine()
50
+
51
+ # In‑memory storage for last event to attach feedback
52
+ last_ai_event = None
53
+ last_ai_category = None
54
+
55
+ async def generate_response(prompt: str, max_length: int = 100) -> tuple:
56
+ """Generate response using the small autoregressive model."""
57
+ if generator is None:
58
+ return "[Model not loaded]", 0.0, "Model loading failed"
59
  try:
60
+ loop = asyncio.get_event_loop()
61
+ # We need to compute confidence; text-generation pipeline returns text but not logits.
62
+ # For simplicity, we'll set confidence based on a heuristic (e.g., generation length?).
63
+ # Alternatively, use a model that returns probabilities.
64
+ # Let's use a simple placeholder: confidence = 0.8 if generation succeeds.
65
+ # In practice, we'd need to access logits.
66
+ result = await loop.run_in_executor(
67
+ None,
68
+ lambda: generator(prompt, max_new_tokens=max_length, return_full_text=False)
69
  )
70
+ response = result[0]['generated_text']
71
+ # Placeholder confidence
72
+ confidence = 0.8
73
+ return response, confidence, ""
74
  except Exception as e:
75
+ logger.error(f"Generation error: {e}")
76
+ return "", 0.0, str(e)
77
 
78
+ async def analyze_ai(task_type, prompt):
79
+ global last_ai_event, last_ai_category
80
  try:
81
+ # Generate response
82
+ response, confidence, error = await generate_response(prompt)
83
+ if error:
84
+ return json.dumps({"error": error}, indent=2)
85
+
86
  # Create AIEvent
87
  event = AIEvent(
88
  timestamp=datetime.utcnow(),
89
+ component="ai",
90
  service_mesh="ai",
91
+ latency_p99=0,
92
  error_rate=0.0,
93
  throughput=1,
94
  cpu_util=None,
95
  memory_util=None,
96
+ action_category=task_type,
97
+ model_name=gen_model_name,
98
+ model_version="latest",
99
  prompt=prompt,
100
  response=response,
101
  response_length=len(response),
102
  confidence=confidence,
103
+ perplexity=None,
104
+ retrieval_scores=None,
105
  user_feedback=None,
106
+ latency_ms=0
107
  )
108
+ last_ai_event = event
109
+ last_ai_category = task_type
110
+
111
  # Run agents
112
  hallu_result = await hallucination_detective.analyze(event)
113
  drift_result = await memory_drift_diagnostician.analyze(event)
114
+
115
+ # Get current risk metrics
116
+ risk_metrics = ai_risk_engine.risk_score(task_type)
117
+
118
  result = {
119
+ "response": response,
120
+ "confidence": confidence,
121
  "hallucination_detection": hallu_result,
122
  "memory_drift_detection": drift_result,
123
+ "risk_metrics": risk_metrics
124
  }
125
  return json.dumps(result, indent=2)
126
  except Exception as e:
127
  logger.error(f"AI analysis error: {e}\n{traceback.format_exc()}")
128
  return json.dumps({"error": str(e), "traceback": traceback.format_exc()}, indent=2)
129
 
130
+ def feedback(thumbs_up: bool):
131
+ """Handle user feedback to update Beta priors."""
132
+ global last_ai_category, last_ai_event
133
+ if last_ai_category is None:
134
+ return "No previous analysis to rate."
135
+ ai_risk_engine.update_outcome(last_ai_category, success=thumbs_up)
136
+ # Optionally, also update the event with feedback
137
+ if last_ai_event:
138
+ last_ai_event.user_feedback = thumbs_up
139
+ return f"Feedback recorded: {'👍' if thumbs_up else '👎'} for {last_ai_category}."
140
 
141
+ # Build the Gradio interface
142
+ with gr.Blocks(title="ARF v4 – AI Reliability Lab", theme="soft") as demo:
143
+ gr.Markdown("# 🧠 ARF v4 AI Reliability Lab\n**Detect hallucinations and drift in generative AI**")
144
 
145
+ with gr.Row():
146
+ with gr.Column():
147
+ task_type = gr.Dropdown(
148
+ choices=["chat", "code", "summary"],
149
+ value="chat",
150
+ label="Task Type"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
151
  )
152
+ prompt = gr.Textbox(
153
+ label="Prompt",
154
+ value="What is the capital of France?",
155
+ lines=3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
  )
157
+ analyze_btn = gr.Button("Analyze", variant="primary")
158
+ with gr.Column():
159
+ output = gr.JSON(label="Analysis Result")
160
+
161
+ with gr.Row():
162
+ feedback_btn_up = gr.Button("👍 Correct")
163
+ feedback_btn_down = gr.Button("👎 Incorrect")
164
+ feedback_msg = gr.Textbox(label="Feedback", interactive=False)
165
+
166
+ analyze_btn.click(
167
+ fn=analyze_ai,
168
+ inputs=[task_type, prompt],
169
+ outputs=output
170
+ )
171
+ feedback_btn_up.click(
172
+ fn=lambda: feedback(True),
173
+ outputs=feedback_msg
174
+ )
175
+ feedback_btn_down.click(
176
+ fn=lambda: feedback(False),
177
+ outputs=feedback_msg
178
+ )
179
 
180
  gr.Markdown("""
181
  ---
182
+ - **Model**: `microsoft/DialoGPT-small` (autoregressive, 117M params)
183
+ - **NLI Detector**: `typeform/distilroberta-base-mnli` (82M params)
184
+ - **Risk engine**: Beta conjugate priors per task category
185
+ - **Feedback** updates the posterior distribution
186
  """)
187
 
188
  if __name__ == "__main__":