File size: 3,128 Bytes
7517eb3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import gradio as gr
import sqlite3
import time
from datetime import datetime

DB_PATH = "reliability.db"

# --- Setup database (first run only) ---
def init_db():
    conn = sqlite3.connect(DB_PATH)
    c = conn.cursor()
    c.execute("""
    CREATE TABLE IF NOT EXISTS telemetry (
        id INTEGER PRIMARY KEY AUTOINCREMENT,
        timestamp TEXT,
        component TEXT,
        latency REAL,
        error_rate REAL
    )
    """)
    c.execute("""
    CREATE TABLE IF NOT EXISTS alerts (
        id INTEGER PRIMARY KEY AUTOINCREMENT,
        event_id INTEGER,
        alert_type TEXT,
        threshold REAL,
        timestamp TEXT
    )
    """)
    conn.commit()
    conn.close()

init_db()

# --- Core functions ---
def log_event(component, latency, error_rate):
    conn = sqlite3.connect(DB_PATH)
    c = conn.cursor()
    c.execute("INSERT INTO telemetry (timestamp, component, latency, error_rate) VALUES (?, ?, ?, ?)",
              (datetime.now().isoformat(), component, latency, error_rate))
    conn.commit()
    conn.close()
    return detect_anomaly()

def detect_anomaly(threshold_latency=200, threshold_error=0.3):
    conn = sqlite3.connect(DB_PATH)
    c = conn.cursor()
    c.execute("SELECT * FROM telemetry ORDER BY id DESC LIMIT 1")
    row = c.fetchone()
    conn.close()
    if row:
        id, ts, component, latency, error_rate = row
        if latency > threshold_latency or error_rate > threshold_error:
            alert_msg = f"⚠️ Anomaly detected in {component} — latency {latency}ms, error rate {error_rate}"
            save_alert(id, "anomaly", max(latency, error_rate))
            return alert_msg
    return "✅ No anomaly detected."

def save_alert(event_id, alert_type, threshold):
    conn = sqlite3.connect(DB_PATH)
    c = conn.cursor()
    c.execute("INSERT INTO alerts (event_id, alert_type, threshold, timestamp) VALUES (?, ?, ?, ?)",
              (event_id, alert_type, threshold, datetime.now().isoformat()))
    conn.commit()
    conn.close()

def show_recent_alerts():
    conn = sqlite3.connect(DB_PATH)
    c = conn.cursor()
    c.execute("SELECT * FROM alerts ORDER BY id DESC LIMIT 10")
    rows = c.fetchall()
    conn.close()
    if not rows:
        return "No alerts yet."
    return "\n".join([f"[{r[4]}] {r[2]} (threshold: {r[3]})" for r in rows])

# --- Gradio UI ---
with gr.Blocks() as demo:
    gr.Markdown("# 🧠 Agentic Reliability Framework MVP")
    gr.Markdown("Simulate telemetry events and detect anomalies automatically.")
    
    with gr.Row():
        component = gr.Textbox(label="Component", value="api-service")
        latency = gr.Number(label="Latency (ms)", value=150)
        error_rate = gr.Number(label="Error rate", value=0.05)
    btn = gr.Button("Submit Event")
    output = gr.Textbox(label="Detection Output")
    
    btn.click(fn=log_event, inputs=[component, latency, error_rate], outputs=output)
    
    gr.Markdown("### Recent Alerts")
    alert_box = gr.Textbox(label="", interactive=False)
    refresh_btn = gr.Button("Refresh Alerts")
    refresh_btn.click(fn=show_recent_alerts, outputs=alert_box)

demo.launch()