| |
| import logging |
| from datetime import datetime, timezone |
|
|
| from fastapi import FastAPI, HTTPException |
| from fastapi.middleware.cors import CORSMiddleware |
| import gradio as gr |
|
|
| |
| from agentic_reliability_framework.core.governance.risk_engine import RiskEngine |
| from agentic_reliability_framework.runtime.memory import create_faiss_index, RAGGraphMemory |
| from agentic_reliability_framework.runtime.memory.constants import MemoryConstants |
|
|
| logging.basicConfig(level=logging.INFO) |
| logger = logging.getLogger(__name__) |
|
|
| app = FastAPI(title="ARF v4 API with Memory") |
|
|
| |
| app.add_middleware( |
| CORSMiddleware, |
| allow_origins=["https://arf-frontend-sandy.vercel.app"], |
| allow_methods=["*"], |
| allow_headers=["*"], |
| ) |
|
|
| |
| |
| |
| risk_engine = RiskEngine() |
|
|
| |
| faiss_index = create_faiss_index(dim=MemoryConstants.VECTOR_DIM) |
| memory = RAGGraphMemory(faiss_index) |
|
|
| |
| |
| |
| @app.get("/") |
| async def root(): |
| return { |
| "service": "ARF OSS API", |
| "version": "4.0.0", |
| "status": "operational", |
| "memory_stats": memory.get_graph_stats() if memory.has_historical_data() else "empty", |
| } |
|
|
| @app.get("/health") |
| async def health(): |
| return {"status": "ok", "version": "4.0.0"} |
|
|
| @app.get("/api/v1/get_risk") |
| async def get_risk(): |
| """ |
| Compute a safe risk snapshot using the supported RiskEngine.calculate_risk() |
| API. This avoids calling the removed get_current_risk() method. |
| """ |
| try: |
| score = _calculate_demo_risk() |
| return { |
| "system_risk": score["risk"], |
| "status": "critical" if score["risk"] > 0.8 else "normal", |
| "details": score, |
| } |
| except Exception as e: |
| raise HTTPException(status_code=500, detail=str(e)) |
|
|
| @app.post("/api/v1/incident") |
| async def store_incident(event_data: dict, analysis: dict): |
| try: |
| incident_id = memory.store_incident(event_data, analysis) |
| return {"incident_id": incident_id} |
| except Exception as e: |
| raise HTTPException(status_code=500, detail=str(e)) |
|
|
| @app.get("/api/v1/memory/similar") |
| async def find_similar_incidents(action: str, k: int = 5): |
| class DummyEvent: |
| def __init__(self, action: str): |
| self.component = "user_action" |
| self.latency_p99 = 0.0 |
| self.error_rate = 0.0 |
| self.throughput = 0 |
| self.cpu_util = 0.0 |
| self.memory_util = 0.0 |
| self.timestamp = datetime.now() |
| self.severity = "low" |
| self.action = action |
|
|
| event = DummyEvent(action) |
| analysis = {"action": action} |
| similar = memory.find_similar(event, analysis, k=k) |
|
|
| results = [] |
| for node in similar: |
| results.append( |
| { |
| "incident_id": node.incident_id, |
| "component": node.component, |
| "severity": node.severity, |
| "timestamp": node.timestamp, |
| "metrics": node.metrics, |
| "agent_analysis": node.agent_analysis, |
| "similarity_score": node.metadata.get("similarity_score", 0.0), |
| } |
| ) |
|
|
| return {"similar": results, "count": len(results)} |
|
|
| @app.get("/api/v1/memory/stats") |
| async def memory_stats(): |
| return memory.get_graph_stats() |
|
|
| |
| |
| |
|
|
| class _DemoIntent: |
| """ |
| Minimal intent object for demo-only risk snapshots. |
| RiskEngine.categorize_intent() will fall back to DEFAULT for this object. |
| """ |
| environment = "dev" |
| deployment_target = "dev" |
| service_name = "demo" |
|
|
| def _calculate_demo_risk(): |
| """ |
| Use the supported RiskEngine.calculate_risk() API. |
| Avoids the removed get_current_risk() method. |
| """ |
| intent = _DemoIntent() |
| risk_value, explanation, contributions = risk_engine.calculate_risk( |
| intent=intent, |
| cost_estimate=None, |
| policy_violations=[], |
| ) |
|
|
| return { |
| "risk": float(risk_value), |
| "status": "critical" if risk_value > 0.8 else "normal", |
| "explanation": explanation, |
| "contributions": contributions, |
| } |
|
|
| def get_risk_snapshot(): |
| try: |
| snapshot = _calculate_demo_risk() |
| snapshot["timestamp"] = datetime.now(timezone.utc).isoformat() |
| return snapshot |
| except Exception as e: |
| logger.exception("Failed to compute risk snapshot") |
| return { |
| "status": "error", |
| "error": str(e), |
| "timestamp": datetime.now(timezone.utc).isoformat(), |
| } |
|
|
| def get_health_snapshot(): |
| try: |
| return { |
| "status": "ok", |
| "version": "4.0.0", |
| "service": "ARF OSS API", |
| "timestamp": datetime.now(timezone.utc).isoformat(), |
| } |
| except Exception as e: |
| return { |
| "status": "error", |
| "error": str(e), |
| "timestamp": datetime.now(timezone.utc).isoformat(), |
| } |
|
|
| def get_memory_snapshot(): |
| try: |
| if memory.has_historical_data(): |
| stats = memory.get_graph_stats() |
| return { |
| "status": "ok", |
| "memory_stats": stats, |
| "timestamp": datetime.now(timezone.utc).isoformat(), |
| } |
| return { |
| "status": "empty", |
| "memory_stats": "No historical memory yet.", |
| "timestamp": datetime.now(timezone.utc).isoformat(), |
| } |
| except Exception as e: |
| logger.exception("Failed to compute memory snapshot") |
| return { |
| "status": "error", |
| "error": str(e), |
| "timestamp": datetime.now(timezone.utc).isoformat(), |
| } |
|
|
| with gr.Blocks(title="ARF v4 Demo") as demo: |
| gr.Markdown("# Agentic Reliability Framework v4") |
| gr.Markdown("### Status dashboard") |
|
|
| with gr.Row(): |
| health_output = gr.JSON(label="Health") |
| risk_output = gr.JSON(label="Current Risk") |
|
|
| with gr.Row(): |
| memory_output = gr.JSON(label="Memory Stats") |
|
|
| with gr.Row(): |
| refresh_btn = gr.Button("Refresh Risk") |
| health_btn = gr.Button("Refresh Health") |
| memory_btn = gr.Button("Refresh Memory") |
|
|
| refresh_btn.click(fn=get_risk_snapshot, outputs=risk_output) |
| health_btn.click(fn=get_health_snapshot, outputs=health_output) |
| memory_btn.click(fn=get_memory_snapshot, outputs=memory_output) |
|
|
| |
| demo.load(fn=get_health_snapshot, outputs=health_output) |
| demo.load(fn=get_risk_snapshot, outputs=risk_output) |
| demo.load(fn=get_memory_snapshot, outputs=memory_output) |
|
|
| |
| if __name__ == "__main__": |
| |
| demo.launch(server_name="0.0.0.0") |
|
|