Spaces:

heramb04
/

server_failure_predictor

Sleeping

App Files Files Community

heramb04 commited on Nov 20, 2025

Commit

08cf9a9

verified ·

1 Parent(s): 51d86f2

Initial Commit

Browse files

Uploading model and app files

Files changed (3) hide show

App.py +96 -0
requirements.txt +6 -0
server_failure_model.pkl +3 -0

App.py ADDED Viewed

	@@ -0,0 +1,96 @@

+import gradio as gr
+import joblib
+import pandas as pd
+import numpy as np
+import os
+# --- 1. Load the Model ---
+MODEL_FILE = "server_failure_model.pkl"
+def load_model():
+    if os.path.exists(MODEL_FILE):
+        try:
+            return joblib.load(MODEL_FILE)
+        except Exception:
+            import pickle
+            with open(MODEL_FILE, 'rb') as f:
+                return pickle.load(f)
+    return None
+model = load_model()
+# --- 2. The Prediction Function ---
+def predict_failure(cpu_current, cpu_sustained, ram_current, temp_current, temp_change):
+    if model is None:
+        return "Error: Model not found. Please upload .pkl file.", 0.0
+    # Construct the 9 features exactly as the model expects
+    # 1. cpu_percent
+    # 2. ram_percent
+    # 3. cpu_temp
+    # 4. gpu_temp (Inferred)
+    # 5. net_recv_bytes (Noise)
+    # 6. disk_write_bytes (Noise)
+    # 7. cpu_rolling_avg
+    # 8. ram_rolling_avg (Inferred)
+    # 9. cpu_temp_change
+    input_df = pd.DataFrame([{
+        'cpu_percent': float(cpu_current),
+        'ram_percent': float(ram_current),
+        'cpu_temp': float(temp_current),
+        'gpu_temp': float(temp_current) - 15.0, # Heuristic
+        'net_recv_bytes': 1024.0,
+        'disk_write_bytes': 0.0,
+        'cpu_rolling_avg': float(cpu_sustained),
+        'ram_rolling_avg': float(ram_current),
+        'cpu_temp_change': float(temp_change)
+    }])
+    # Get Prediction
+    pred_class = model.predict(input_df)[0]
+    pred_prob = model.predict_proba(input_df)[0][1]
+    # Return user-friendly outputs
+    status = "CRITICAL FAILURE IMMINENT" if pred_class == 1 else "SYSTEM NORMAL"
+    probability = f"{pred_prob * 100:.1f}%"
+    return status, probability
+# --- 3. The Gradio UI Interface ---
+# This replaces your HTML file. Gradio builds the UI for you.
+with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("# 🖥️ Server Health Sentinel AI")
+    gr.Markdown("### AIOps Failure Prediction System (PoC)")
+    gr.Markdown("This AI predicts if a server will overheat (>80°C) in the next 5 minutes based on telemetry trends.")
+    with gr.Row():
+        with gr.Column():
+            gr.Markdown("### 🎛️ Live Telemetry Simulation")
+            # The 5 Sliders
+            s_cpu = gr.Slider(0, 100, value=10, label="Current CPU Load (%)")
+            s_cpu_avg = gr.Slider(0, 100, value=10, label="Sustained CPU Load (Last 1 min avg) (%)")
+            s_ram = gr.Slider(0, 100, value=30, label="RAM Usage (%)")
+            s_temp = gr.Slider(30, 100, value=50, label="Current Temperature (°C)")
+            s_change = gr.Slider(-2, 5, value=0, step=0.5, label="Temp Change Rate (°C/sec)")
+            btn = gr.Button("Run Prediction", variant="primary")
+        with gr.Column():
+            gr.Markdown("### 🧠 AI Diagnosis")
+            out_status = gr.Textbox(label="Status")
+            out_prob = gr.Textbox(label="Failure Probability")
+            gr.Markdown("""
+            **Architecture:** Random forest Classifier
+            **Trained on:** 10,000+ Real-world Linux Telemetry Points
+            """)
+    # Connect the button to the function
+    btn.click(fn=predict_failure,
+              inputs=[s_cpu, s_cpu_avg, s_ram, s_temp, s_change],
+              outputs=[out_status, out_prob])
+# Launch
+demo.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+flask
+pandas
+numpy
+scikit-learn
+gunicorn
+gradio

server_failure_model.pkl ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:7c6876a5cc41dfc0d4ed24cfdf26bf133f610a9ea1ef7166d4a0f9c27e77ffe4
+size 1463449