import gradio as gr import joblib import pandas as pd import numpy as np import os # --- 1. Load the Model --- MODEL_FILE = "server_failure_model.pkl" def load_model(): if os.path.exists(MODEL_FILE): try: return joblib.load(MODEL_FILE) except Exception: import pickle with open(MODEL_FILE, 'rb') as f: return pickle.load(f) return None model = load_model() # --- 2. The Prediction Function --- def predict_failure(cpu_current, cpu_sustained, ram_current, temp_current, temp_change): if model is None: return "Error: Model not found. Please upload .pkl file.", 0.0 # Construct the 9 features exactly as the model expects # 1. cpu_percent # 2. ram_percent # 3. cpu_temp # 4. gpu_temp (Inferred) # 5. net_recv_bytes (Noise) # 6. disk_write_bytes (Noise) # 7. cpu_rolling_avg # 8. ram_rolling_avg (Inferred) # 9. cpu_temp_change input_df = pd.DataFrame([{ 'cpu_percent': float(cpu_current), 'ram_percent': float(ram_current), 'cpu_temp': float(temp_current), 'gpu_temp': float(temp_current) - 15.0, # Heuristic 'net_recv_bytes': 1024.0, 'disk_write_bytes': 0.0, 'cpu_rolling_avg': float(cpu_sustained), 'ram_rolling_avg': float(ram_current), 'cpu_temp_change': float(temp_change) }]) # Get Prediction pred_class = model.predict(input_df)[0] pred_prob = model.predict_proba(input_df)[0][1] # Return user-friendly outputs status = "CRITICAL FAILURE IMMINENT" if pred_class == 1 else "SYSTEM NORMAL" probability = f"{pred_prob * 100:.1f}%" return status, probability # --- 3. The Gradio UI Interface --- # This replaces your HTML file. Gradio builds the UI for you. with gr.Blocks(theme=gr.themes.Soft()) as demo: gr.Markdown("# Server Health Sentinel AI") gr.Markdown("### AIOps Failure Prediction System (PoC)") gr.Markdown("This AI predicts if a server will overheat (>80°C) in the next 5 minutes based on telemetry trends.") with gr.Row(): with gr.Column(): gr.Markdown("### Live Telemetry Simulation") # The 5 Sliders s_cpu = gr.Slider(0, 100, value=10, label="Current CPU Load (%)") s_cpu_avg = gr.Slider(0, 100, value=10, label="Sustained CPU Load (Last 1 min avg) (%)") s_ram = gr.Slider(0, 100, value=30, label="RAM Usage (%)") s_temp = gr.Slider(30, 100, value=50, label="Current Temperature (°C)") s_change = gr.Slider(-2, 5, value=0, step=0.5, label="Temp Change Rate (°C/sec)") btn = gr.Button("Run Prediction", variant="primary") with gr.Column(): gr.Markdown("### AI Diagnosis") out_status = gr.Textbox(label="Status") out_prob = gr.Textbox(label="Failure Probability") gr.Markdown(""" **Architecture:** Random forest Classifier **Trained on:** 10,000+ Real-world Linux Telemetry Points """) # Connect the button to the function btn.click(fn=predict_failure, inputs=[s_cpu, s_cpu_avg, s_ram, s_temp, s_change], outputs=[out_status, out_prob]) # Launch demo.launch()