heramb04 commited on
Commit
08cf9a9
·
verified ·
1 Parent(s): 51d86f2

Initial Commit

Browse files

Uploading model and app files

Files changed (3) hide show
  1. App.py +96 -0
  2. requirements.txt +6 -0
  3. server_failure_model.pkl +3 -0
App.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import joblib
3
+ import pandas as pd
4
+ import numpy as np
5
+ import os
6
+
7
+ # --- 1. Load the Model ---
8
+ MODEL_FILE = "server_failure_model.pkl"
9
+
10
+ def load_model():
11
+ if os.path.exists(MODEL_FILE):
12
+ try:
13
+ return joblib.load(MODEL_FILE)
14
+ except Exception:
15
+ import pickle
16
+ with open(MODEL_FILE, 'rb') as f:
17
+ return pickle.load(f)
18
+ return None
19
+
20
+ model = load_model()
21
+
22
+ # --- 2. The Prediction Function ---
23
+ def predict_failure(cpu_current, cpu_sustained, ram_current, temp_current, temp_change):
24
+ if model is None:
25
+ return "Error: Model not found. Please upload .pkl file.", 0.0
26
+
27
+ # Construct the 9 features exactly as the model expects
28
+ # 1. cpu_percent
29
+ # 2. ram_percent
30
+ # 3. cpu_temp
31
+ # 4. gpu_temp (Inferred)
32
+ # 5. net_recv_bytes (Noise)
33
+ # 6. disk_write_bytes (Noise)
34
+ # 7. cpu_rolling_avg
35
+ # 8. ram_rolling_avg (Inferred)
36
+ # 9. cpu_temp_change
37
+
38
+ input_df = pd.DataFrame([{
39
+ 'cpu_percent': float(cpu_current),
40
+ 'ram_percent': float(ram_current),
41
+ 'cpu_temp': float(temp_current),
42
+ 'gpu_temp': float(temp_current) - 15.0, # Heuristic
43
+ 'net_recv_bytes': 1024.0,
44
+ 'disk_write_bytes': 0.0,
45
+ 'cpu_rolling_avg': float(cpu_sustained),
46
+ 'ram_rolling_avg': float(ram_current),
47
+ 'cpu_temp_change': float(temp_change)
48
+ }])
49
+
50
+ # Get Prediction
51
+ pred_class = model.predict(input_df)[0]
52
+ pred_prob = model.predict_proba(input_df)[0][1]
53
+
54
+ # Return user-friendly outputs
55
+ status = "CRITICAL FAILURE IMMINENT" if pred_class == 1 else "SYSTEM NORMAL"
56
+ probability = f"{pred_prob * 100:.1f}%"
57
+
58
+ return status, probability
59
+
60
+ # --- 3. The Gradio UI Interface ---
61
+ # This replaces your HTML file. Gradio builds the UI for you.
62
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
63
+ gr.Markdown("# 🖥️ Server Health Sentinel AI")
64
+ gr.Markdown("### AIOps Failure Prediction System (PoC)")
65
+ gr.Markdown("This AI predicts if a server will overheat (>80°C) in the next 5 minutes based on telemetry trends.")
66
+
67
+ with gr.Row():
68
+ with gr.Column():
69
+ gr.Markdown("### 🎛️ Live Telemetry Simulation")
70
+
71
+ # The 5 Sliders
72
+ s_cpu = gr.Slider(0, 100, value=10, label="Current CPU Load (%)")
73
+ s_cpu_avg = gr.Slider(0, 100, value=10, label="Sustained CPU Load (Last 1 min avg) (%)")
74
+ s_ram = gr.Slider(0, 100, value=30, label="RAM Usage (%)")
75
+ s_temp = gr.Slider(30, 100, value=50, label="Current Temperature (°C)")
76
+ s_change = gr.Slider(-2, 5, value=0, step=0.5, label="Temp Change Rate (°C/sec)")
77
+
78
+ btn = gr.Button("Run Prediction", variant="primary")
79
+
80
+ with gr.Column():
81
+ gr.Markdown("### 🧠 AI Diagnosis")
82
+ out_status = gr.Textbox(label="Status")
83
+ out_prob = gr.Textbox(label="Failure Probability")
84
+
85
+ gr.Markdown("""
86
+ **Architecture:** Random forest Classifier
87
+ **Trained on:** 10,000+ Real-world Linux Telemetry Points
88
+ """)
89
+
90
+ # Connect the button to the function
91
+ btn.click(fn=predict_failure,
92
+ inputs=[s_cpu, s_cpu_avg, s_ram, s_temp, s_change],
93
+ outputs=[out_status, out_prob])
94
+
95
+ # Launch
96
+ demo.launch()
requirements.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ flask
2
+ pandas
3
+ numpy
4
+ scikit-learn
5
+ gunicorn
6
+ gradio
server_failure_model.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c6876a5cc41dfc0d4ed24cfdf26bf133f610a9ea1ef7166d4a0f9c27e77ffe4
3
+ size 1463449