Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import joblib | |
| import pandas as pd | |
| import numpy as np | |
| import os | |
| # --- 1. Load the Model --- | |
| MODEL_FILE = "server_failure_model.pkl" | |
| def load_model(): | |
| if os.path.exists(MODEL_FILE): | |
| try: | |
| return joblib.load(MODEL_FILE) | |
| except Exception: | |
| import pickle | |
| with open(MODEL_FILE, 'rb') as f: | |
| return pickle.load(f) | |
| return None | |
| model = load_model() | |
| # --- 2. The Prediction Function --- | |
| def predict_failure(cpu_current, cpu_sustained, ram_current, temp_current, temp_change): | |
| if model is None: | |
| return "Error: Model not found. Please upload .pkl file.", 0.0 | |
| # Construct the 9 features exactly as the model expects | |
| # 1. cpu_percent | |
| # 2. ram_percent | |
| # 3. cpu_temp | |
| # 4. gpu_temp (Inferred) | |
| # 5. net_recv_bytes (Noise) | |
| # 6. disk_write_bytes (Noise) | |
| # 7. cpu_rolling_avg | |
| # 8. ram_rolling_avg (Inferred) | |
| # 9. cpu_temp_change | |
| input_df = pd.DataFrame([{ | |
| 'cpu_percent': float(cpu_current), | |
| 'ram_percent': float(ram_current), | |
| 'cpu_temp': float(temp_current), | |
| 'gpu_temp': float(temp_current) - 15.0, # Heuristic | |
| 'net_recv_bytes': 1024.0, | |
| 'disk_write_bytes': 0.0, | |
| 'cpu_rolling_avg': float(cpu_sustained), | |
| 'ram_rolling_avg': float(ram_current), | |
| 'cpu_temp_change': float(temp_change) | |
| }]) | |
| # Get Prediction | |
| pred_class = model.predict(input_df)[0] | |
| pred_prob = model.predict_proba(input_df)[0][1] | |
| # Return user-friendly outputs | |
| status = "CRITICAL FAILURE IMMINENT" if pred_class == 1 else "SYSTEM NORMAL" | |
| probability = f"{pred_prob * 100:.1f}%" | |
| return status, probability | |
| # --- 3. The Gradio UI Interface --- | |
| # This replaces your HTML file. Gradio builds the UI for you. | |
| with gr.Blocks(theme=gr.themes.Soft()) as demo: | |
| gr.Markdown("# Server Health Sentinel AI") | |
| gr.Markdown("### AIOps Failure Prediction System (PoC)") | |
| gr.Markdown("This AI predicts if a server will overheat (>80°C) in the next 5 minutes based on telemetry trends.") | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown("### Live Telemetry Simulation") | |
| # The 5 Sliders | |
| s_cpu = gr.Slider(0, 100, value=10, label="Current CPU Load (%)") | |
| s_cpu_avg = gr.Slider(0, 100, value=10, label="Sustained CPU Load (Last 1 min avg) (%)") | |
| s_ram = gr.Slider(0, 100, value=30, label="RAM Usage (%)") | |
| s_temp = gr.Slider(30, 100, value=50, label="Current Temperature (°C)") | |
| s_change = gr.Slider(-2, 5, value=0, step=0.5, label="Temp Change Rate (°C/sec)") | |
| btn = gr.Button("Run Prediction", variant="primary") | |
| with gr.Column(): | |
| gr.Markdown("### AI Diagnosis") | |
| out_status = gr.Textbox(label="Status") | |
| out_prob = gr.Textbox(label="Failure Probability") | |
| gr.Markdown(""" | |
| **Architecture:** Random forest Classifier | |
| **Trained on:** 10,000+ Real-world Linux Telemetry Points | |
| """) | |
| # Connect the button to the function | |
| btn.click(fn=predict_failure, | |
| inputs=[s_cpu, s_cpu_avg, s_ram, s_temp, s_change], | |
| outputs=[out_status, out_prob]) | |
| # Launch | |
| demo.launch() | |