heramb04's picture
Update app.py
8726b74 verified
import gradio as gr
import joblib
import pandas as pd
import numpy as np
import os
# --- 1. Load the Model ---
MODEL_FILE = "server_failure_model.pkl"
def load_model():
if os.path.exists(MODEL_FILE):
try:
return joblib.load(MODEL_FILE)
except Exception:
import pickle
with open(MODEL_FILE, 'rb') as f:
return pickle.load(f)
return None
model = load_model()
# --- 2. The Prediction Function ---
def predict_failure(cpu_current, cpu_sustained, ram_current, temp_current, temp_change):
if model is None:
return "Error: Model not found. Please upload .pkl file.", 0.0
# Construct the 9 features exactly as the model expects
# 1. cpu_percent
# 2. ram_percent
# 3. cpu_temp
# 4. gpu_temp (Inferred)
# 5. net_recv_bytes (Noise)
# 6. disk_write_bytes (Noise)
# 7. cpu_rolling_avg
# 8. ram_rolling_avg (Inferred)
# 9. cpu_temp_change
input_df = pd.DataFrame([{
'cpu_percent': float(cpu_current),
'ram_percent': float(ram_current),
'cpu_temp': float(temp_current),
'gpu_temp': float(temp_current) - 15.0, # Heuristic
'net_recv_bytes': 1024.0,
'disk_write_bytes': 0.0,
'cpu_rolling_avg': float(cpu_sustained),
'ram_rolling_avg': float(ram_current),
'cpu_temp_change': float(temp_change)
}])
# Get Prediction
pred_class = model.predict(input_df)[0]
pred_prob = model.predict_proba(input_df)[0][1]
# Return user-friendly outputs
status = "CRITICAL FAILURE IMMINENT" if pred_class == 1 else "SYSTEM NORMAL"
probability = f"{pred_prob * 100:.1f}%"
return status, probability
# --- 3. The Gradio UI Interface ---
# This replaces your HTML file. Gradio builds the UI for you.
with gr.Blocks(theme=gr.themes.Soft()) as demo:
gr.Markdown("# Server Health Sentinel AI")
gr.Markdown("### AIOps Failure Prediction System (PoC)")
gr.Markdown("This AI predicts if a server will overheat (>80°C) in the next 5 minutes based on telemetry trends.")
with gr.Row():
with gr.Column():
gr.Markdown("### Live Telemetry Simulation")
# The 5 Sliders
s_cpu = gr.Slider(0, 100, value=10, label="Current CPU Load (%)")
s_cpu_avg = gr.Slider(0, 100, value=10, label="Sustained CPU Load (Last 1 min avg) (%)")
s_ram = gr.Slider(0, 100, value=30, label="RAM Usage (%)")
s_temp = gr.Slider(30, 100, value=50, label="Current Temperature (°C)")
s_change = gr.Slider(-2, 5, value=0, step=0.5, label="Temp Change Rate (°C/sec)")
btn = gr.Button("Run Prediction", variant="primary")
with gr.Column():
gr.Markdown("### AI Diagnosis")
out_status = gr.Textbox(label="Status")
out_prob = gr.Textbox(label="Failure Probability")
gr.Markdown("""
**Architecture:** Random forest Classifier
**Trained on:** 10,000+ Real-world Linux Telemetry Points
""")
# Connect the button to the function
btn.click(fn=predict_failure,
inputs=[s_cpu, s_cpu_avg, s_ram, s_temp, s_change],
outputs=[out_status, out_prob])
# Launch
demo.launch()