| import gradio as gr |
| import time |
| import random |
| import requests |
| import numpy as np |
| import pandas as pd |
| import faiss |
| from sentence_transformers import SentenceTransformer |
|
|
| |
| HF_TOKEN = (open(".env", "r").read().strip() if ".env" else None) or "" |
| HF_TOKEN = HF_TOKEN.strip() if HF_TOKEN else "" |
| if not HF_TOKEN: |
| print("⚠️ No Hugging Face token found. Running in fallback mode (local inference).") |
|
|
| API_URL = "https://router.huggingface.co/hf-inference" |
|
|
| model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") |
| incident_embeddings = [] |
| incident_texts = [] |
|
|
| recent_events = [] |
|
|
| |
| def simulate_healing_action(component: str) -> str: |
| actions = [ |
| "Restarted container", |
| "Cleared queue backlog", |
| "Rebalanced load", |
| "No actionable step detected.", |
| ] |
| return random.choice(actions) |
|
|
| def detect_anomaly(latency: float, error_rate: float) -> bool: |
| |
| score = latency * error_rate |
| threshold = random.uniform(5, 25) |
| return score > threshold |
|
|
| def embed_incident(text: str): |
| emb = model.encode([text], normalize_embeddings=True) |
| return np.array(emb).astype("float32") |
|
|
| def find_similar_incidents(new_text: str, top_k=3): |
| if not incident_embeddings: |
| return "Not enough incidents stored yet." |
| new_emb = embed_incident(new_text) |
| index = faiss.IndexFlatIP(len(new_emb[0])) |
| index.add(np.vstack(incident_embeddings)) |
| scores, ids = index.search(new_emb, top_k) |
| similar = [ |
| f"Component: {incident_texts[i]['component']} | Latency: {incident_texts[i]['latency']} | ErrorRate: {incident_texts[i]['error_rate']} | Analysis: {incident_texts[i]['analysis'][:60]}..." |
| for i in ids[0] if i < len(incident_texts) |
| ] |
| return f"Found {len(similar)} similar incidents ({'; '.join(similar)})." |
|
|
| |
| def process_event(component, latency, error_rate): |
| timestamp = time.strftime("%Y-%m-%d %H:%M:%S") |
| anomaly = detect_anomaly(latency, error_rate) |
|
|
| |
| payload = { |
| "inputs": f"Component {component} showing latency {latency} and error rate {error_rate}.", |
| } |
|
|
| try: |
| headers = {"Authorization": f"Bearer {HF_TOKEN.strip()}"} |
| response = requests.post(f"{API_URL}/facebook/bart-large-mnli", headers=headers, json=payload) |
| if response.status_code == 200: |
| analysis = response.json().get("generated_text", "No analysis output.") |
| else: |
| analysis = f"Error {response.status_code}: {response.text}" |
| except Exception as e: |
| analysis = f"Error generating analysis: {str(e)}" |
|
|
| status = "Anomaly" if anomaly else "Normal" |
| healing_action = simulate_healing_action(component) if anomaly else "-" |
| similar_info = find_similar_incidents(analysis) |
|
|
| event = { |
| "timestamp": timestamp, |
| "component": component, |
| "latency": latency, |
| "error_rate": error_rate, |
| "status": status, |
| "analysis": analysis, |
| "healing_action": f"{healing_action} {similar_info}" if anomaly else f"- {similar_info}", |
| } |
|
|
| recent_events.append(event) |
| if len(recent_events) > 20: |
| recent_events.pop(0) |
|
|
| |
| incident_embeddings.append(embed_incident(analysis)) |
| incident_texts.append(event) |
|
|
| return f"✅ Event Processed", pd.DataFrame(recent_events) |
|
|
| |
| with gr.Blocks(theme=gr.themes.Soft()) as demo: |
| gr.Markdown("## 🧠 Agentic Reliability Framework MVP") |
| gr.Markdown("Adaptive anomaly detection + AI-driven self-healing + vector memory") |
|
|
| with gr.Row(): |
| component = gr.Textbox(label="Component", value="api-service") |
| latency = gr.Number(label="Latency (ms)", value=random.uniform(50, 200)) |
| error_rate = gr.Number(label="Error Rate", value=random.uniform(0.01, 0.2)) |
|
|
| submit = gr.Button("🚀 Submit Telemetry Event") |
| output_text = gr.Textbox(label="Detection Output") |
| output_table = gr.Dataframe(headers=["timestamp", "component", "latency", "error_rate", "status", "analysis", "healing_action"], label="Recent Events (Last 20)") |
|
|
| submit.click(process_event, inputs=[component, latency, error_rate], outputs=[output_text, output_table]) |
|
|
| demo.launch() |
|
|