import gradio as gr import asyncio import json import logging import traceback import random from datetime import datetime # Import the base engine from agentic_reliability_framework.runtime.engine import EnhancedReliabilityEngine # Import our new AI components from ai_event import AIEvent from hallucination_detective import HallucinationDetectiveAgent from memory_drift_diagnostician import MemoryDriftDiagnosticianAgent logging.basicConfig(level=logging.DEBUG, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) # Initialize the engine (for infrastructure analysis) try: logger.info("Initializing EnhancedReliabilityEngine...") engine = EnhancedReliabilityEngine() logger.info("Engine initialized successfully.") except Exception as e: logger.error(f"Failed to initialize engine: {e}\n{traceback.format_exc()}") engine = None # AI agents (initialize once) hallucination_detective = HallucinationDetectiveAgent() memory_drift_diagnostician = MemoryDriftDiagnosticianAgent() async def analyze_infrastructure(component, latency, error_rate, throughput, cpu_util, memory_util): """Original infrastructure analysis.""" if engine is None: return json.dumps({"error": "Engine failed to initialize. Check logs."}, indent=2) try: result = await engine.process_event_enhanced( component=component, latency=float(latency), error_rate=float(error_rate), throughput=float(throughput) if throughput else 1000.0, cpu_util=float(cpu_util) if cpu_util else None, memory_util=float(memory_util) if memory_util else None ) return json.dumps(result, indent=2) except Exception as e: logger.error(f"Infrastructure analysis error: {e}\n{traceback.format_exc()}") return json.dumps({"error": str(e), "traceback": traceback.format_exc()}, indent=2) async def analyze_ai(component, prompt, model_name, model_version, confidence, perplexity, retrieval_score): """AI reliability analysis.""" try: # Simulate a response (in a real app, call an actual model) response = f"Mock response to: {prompt}" # Create AIEvent event = AIEvent( timestamp=datetime.utcnow(), component=component, service_mesh="ai", latency_p99=random.uniform(100, 500), error_rate=0.0, throughput=1, cpu_util=None, memory_util=None, model_name=model_name, model_version=model_version, prompt=prompt, response=response, response_length=len(response), confidence=confidence, perplexity=perplexity, retrieval_scores=[retrieval_score], user_feedback=None, latency_ms=random.uniform(200, 800) ) # Run agents hallu_result = await hallucination_detective.analyze(event) drift_result = await memory_drift_diagnostician.analyze(event) # Combine results result = { "hallucination_detection": hallu_result, "memory_drift_detection": drift_result, "response": response } return json.dumps(result, indent=2) except Exception as e: logger.error(f"AI analysis error: {e}\n{traceback.format_exc()}") return json.dumps({"error": str(e), "traceback": traceback.format_exc()}, indent=2) def sync_infrastructure(*args): return asyncio.run(analyze_infrastructure(*args)) def sync_ai(*args): return asyncio.run(analyze_ai(*args)) # Build the Gradio interface with tabs with gr.Blocks(title="ARF v4 – Reliability Lab", theme="soft") as demo: gr.Markdown("# 🧠 Agentic Reliability Framework v4\n**Infrastructure & AI Reliability**") with gr.Tabs(): with gr.TabItem("Infrastructure"): gr.Markdown("Enter telemetry to analyze infrastructure incidents.") with gr.Row(): with gr.Column(): component = gr.Dropdown( choices=["api-service", "auth-service", "payment-service", "database", "cache-service"], value="api-service", label="Component" ) latency = gr.Slider(10, 1000, value=100, label="Latency P99 (ms)") error_rate = gr.Slider(0, 0.5, value=0.02, step=0.001, label="Error Rate") throughput = gr.Number(value=1000, label="Throughput (req/s)") cpu_util = gr.Slider(0, 1, value=0.4, label="CPU Utilization") memory_util = gr.Slider(0, 1, value=0.3, label="Memory Utilization") infra_submit = gr.Button("Analyze Infrastructure", variant="primary") with gr.Column(): infra_output = gr.JSON(label="Analysis Result") infra_submit.click( fn=sync_infrastructure, inputs=[component, latency, error_rate, throughput, cpu_util, memory_util], outputs=infra_output ) with gr.TabItem("AI Reliability"): gr.Markdown("Simulate an AI query to detect hallucinations and memory drift.") with gr.Row(): with gr.Column(): ai_component = gr.Dropdown( choices=["chat", "code", "summary"], label="Task Type", value="chat" ) prompt = gr.Textbox(label="Prompt", value="What is the capital of France?") model_name = gr.Dropdown(["gpt-3.5", "gpt-4", "claude"], label="Model", value="gpt-4") model_version = gr.Textbox(value="v1", label="Version") confidence = gr.Slider(0, 1, value=0.95, label="Model Confidence") perplexity = gr.Slider(0, 50, value=5, label="Perplexity") retrieval_score = gr.Slider(0, 1, value=0.8, label="Retrieval Score") ai_submit = gr.Button("Analyze AI", variant="primary") with gr.Column(): ai_output = gr.JSON(label="Analysis Result") ai_submit.click( fn=sync_ai, inputs=[ai_component, prompt, model_name, model_version, confidence, perplexity, retrieval_score], outputs=ai_output ) gr.Markdown(""" --- [📖 Tutorial](https://github.com/petter2025us/agentic-reliability-framework/blob/main/TUTORIAL.md) | [🐙 GitHub](https://github.com/petter2025us/agentic-reliability-framework) | [💼 Enterprise](mailto:petter2025us@outlook.com) """) if __name__ == "__main__": demo.launch(server_name="0.0.0.0", server_port=7860)