Create app.py
Browse files
app.py
ADDED
|
@@ -0,0 +1,247 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import os, tempfile
|
| 2 |
+
import numpy as np
|
| 3 |
+
import pandas as pd
|
| 4 |
+
import matplotlib.pyplot as plt
|
| 5 |
+
from pandas.api.types import is_datetime64_any_dtype as is_datetime
|
| 6 |
+
from sklearn.model_selection import train_test_split
|
| 7 |
+
from sklearn.metrics import mean_absolute_error, r2_score
|
| 8 |
+
from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
|
| 9 |
+
import gradio as gr
|
| 10 |
+
|
| 11 |
+
# ---------- Helpers ----------
|
| 12 |
+
def infer_target_column(df: pd.DataFrame):
|
| 13 |
+
for c in ["power_usage_kwh", "energy_kwh", "power_kwh", "energy"]:
|
| 14 |
+
if c in df.columns:
|
| 15 |
+
return c
|
| 16 |
+
raise ValueError("Target column not found. Expected one of: "
|
| 17 |
+
"['power_usage_kwh','energy_kwh','power_kwh','energy'].")
|
| 18 |
+
|
| 19 |
+
def ensure_datetime_naive(df: pd.DataFrame, tz_target: str = "Asia/Dubai"):
|
| 20 |
+
if "timestamp" not in df.columns:
|
| 21 |
+
return df
|
| 22 |
+
# Parse robustly with UTC, then convert to target tz and drop tz
|
| 23 |
+
ts = pd.to_datetime(df["timestamp"], errors="coerce", utc=True)
|
| 24 |
+
try:
|
| 25 |
+
ts = ts.dt.tz_convert(tz_target).dt.tz_localize(None)
|
| 26 |
+
except Exception:
|
| 27 |
+
try:
|
| 28 |
+
ts = ts.dt.tz_localize(None)
|
| 29 |
+
except Exception:
|
| 30 |
+
pass
|
| 31 |
+
df = df.copy()
|
| 32 |
+
df["timestamp"] = ts
|
| 33 |
+
return df
|
| 34 |
+
|
| 35 |
+
def feature_engineer(df: pd.DataFrame) -> pd.DataFrame:
|
| 36 |
+
df = df.copy()
|
| 37 |
+
df = ensure_datetime_naive(df, tz_target="Asia/Dubai")
|
| 38 |
+
|
| 39 |
+
# Light numeric imputation
|
| 40 |
+
num_cols = df.select_dtypes(include=[np.number]).columns
|
| 41 |
+
df[num_cols] = df[num_cols].ffill().bfill()
|
| 42 |
+
|
| 43 |
+
# Time features
|
| 44 |
+
if "timestamp" in df.columns and is_datetime(df["timestamp"]):
|
| 45 |
+
df["hour"] = df["timestamp"].dt.hour
|
| 46 |
+
df["dayofweek"] = df["timestamp"].dt.dayofweek
|
| 47 |
+
df["is_weekend"] = (df["dayofweek"] >= 5).astype(int)
|
| 48 |
+
df["month"] = df["timestamp"].dt.month
|
| 49 |
+
df["dayofyear"] = df["timestamp"].dt.dayofyear
|
| 50 |
+
df["hour_sin"] = np.sin(2*np.pi*df["hour"]/24)
|
| 51 |
+
df["hour_cos"] = np.cos(2*np.pi*df["hour"]/24)
|
| 52 |
+
df["dow_sin"] = np.sin(2*np.pi*df["dayofweek"]/7)
|
| 53 |
+
df["dow_cos"] = np.cos(2*np.pi*df["dayofweek"]/7)
|
| 54 |
+
else:
|
| 55 |
+
for c in ["hour","dayofweek","is_weekend","month","dayofyear","hour_sin","hour_cos","dow_sin","dow_cos"]:
|
| 56 |
+
if c not in df.columns:
|
| 57 |
+
df[c] = 0
|
| 58 |
+
|
| 59 |
+
# Domain features
|
| 60 |
+
tgt = infer_target_column(df)
|
| 61 |
+
if "cooling_eff_pct" in df.columns:
|
| 62 |
+
df["cooling_ineff_pct"] = 100 - df["cooling_eff_pct"]
|
| 63 |
+
if "server_load_pct" in df.columns:
|
| 64 |
+
df["energy_per_load"] = df[tgt] / np.maximum(df["server_load_pct"], 1)
|
| 65 |
+
if "ambient_temp_c" in df.columns and "server_load_pct" in df.columns:
|
| 66 |
+
df["temp_load_interaction"] = df["ambient_temp_c"] * df["server_load_pct"]
|
| 67 |
+
|
| 68 |
+
# Target lags/rollings
|
| 69 |
+
df["target_lag1"] = df[tgt].shift(1)
|
| 70 |
+
df["target_roll3"] = df[tgt].rolling(3, min_periods=1).mean()
|
| 71 |
+
df["target_roll24"] = df[tgt].rolling(24, min_periods=1).mean()
|
| 72 |
+
|
| 73 |
+
# Fill NaNs from shifts
|
| 74 |
+
df = df.ffill().bfill()
|
| 75 |
+
return df
|
| 76 |
+
|
| 77 |
+
def get_model(name: str):
|
| 78 |
+
return GradientBoostingRegressor(random_state=42) if name == "Gradient Boosting" \
|
| 79 |
+
else RandomForestRegressor(n_estimators=300, random_state=42)
|
| 80 |
+
|
| 81 |
+
def feature_target_split(df: pd.DataFrame):
|
| 82 |
+
y_col = infer_target_column(df)
|
| 83 |
+
X = df.drop(columns=[c for c in [y_col, "timestamp"] if c in df.columns], errors="ignore")
|
| 84 |
+
X = X.select_dtypes(include=[np.number]).copy()
|
| 85 |
+
y = df[y_col].astype(float)
|
| 86 |
+
return X, y, y_col
|
| 87 |
+
|
| 88 |
+
# ---------- Core pipeline ----------
|
| 89 |
+
def run_pipeline(file_path, model_name):
|
| 90 |
+
title = "β‘ AI-Driven Data Center Energy Optimization Dashboard"
|
| 91 |
+
|
| 92 |
+
try:
|
| 93 |
+
if not file_path:
|
| 94 |
+
return (title, "Please upload a CSV file.", None, None, None, None, None, None)
|
| 95 |
+
|
| 96 |
+
df_raw = pd.read_csv(file_path)
|
| 97 |
+
df = feature_engineer(df_raw)
|
| 98 |
+
|
| 99 |
+
# Guardrail
|
| 100 |
+
if len(df) < 10:
|
| 101 |
+
return (title, "Not enough rows to train a model (need >= 10).", None, None, None, None, None, None)
|
| 102 |
+
|
| 103 |
+
X, y, y_col = feature_target_split(df)
|
| 104 |
+
|
| 105 |
+
# Split, train, predict
|
| 106 |
+
test_size = 0.25 if len(df) >= 25 else 0.2
|
| 107 |
+
X_train, X_test, y_train, y_test = train_test_split(
|
| 108 |
+
X, y, test_size=test_size, random_state=42
|
| 109 |
+
)
|
| 110 |
+
model = get_model(model_name)
|
| 111 |
+
model.fit(X_train, y_train)
|
| 112 |
+
|
| 113 |
+
y_pred_all = model.predict(X)
|
| 114 |
+
y_pred_test = model.predict(X_test)
|
| 115 |
+
|
| 116 |
+
mae = mean_absolute_error(y_test, y_pred_test)
|
| 117 |
+
r2 = r2_score(y_test, y_pred_test)
|
| 118 |
+
avg_actual = float(np.mean(y))
|
| 119 |
+
avg_pred = float(np.mean(y_pred_all))
|
| 120 |
+
|
| 121 |
+
# ------ Visualizations ------
|
| 122 |
+
ts_plot = None
|
| 123 |
+
if "timestamp" in df.columns and is_datetime(df["timestamp"]):
|
| 124 |
+
plot_df = df.copy().sort_values("timestamp")
|
| 125 |
+
Xp = plot_df.drop(columns=[c for c in [y_col, "timestamp"] if c in plot_df.columns], errors="ignore")
|
| 126 |
+
Xp = Xp.select_dtypes(include=[np.number]).copy()
|
| 127 |
+
yp = model.predict(Xp)
|
| 128 |
+
ts_plot = plt.figure(figsize=(9, 3.6))
|
| 129 |
+
plt.plot(plot_df["timestamp"], plot_df[y_col], label="Actual")
|
| 130 |
+
plt.plot(plot_df["timestamp"], yp, label="Predicted")
|
| 131 |
+
plt.title("Time Series: Actual vs Predicted")
|
| 132 |
+
plt.xlabel("Time"); plt.ylabel(y_col)
|
| 133 |
+
plt.legend(); plt.tight_layout()
|
| 134 |
+
|
| 135 |
+
sc_plot = plt.figure(figsize=(4.6, 3.8))
|
| 136 |
+
plt.scatter(y_test, y_pred_test, alpha=0.6)
|
| 137 |
+
mn = min(y_test.min(), y_pred_test.min()); mx = max(y_test.max(), y_pred_test.max())
|
| 138 |
+
plt.plot([mn, mx], [mn, mx], linestyle="--")
|
| 139 |
+
plt.title("Holdout: Actual vs Predicted")
|
| 140 |
+
plt.xlabel("Actual"); plt.ylabel("Predicted")
|
| 141 |
+
plt.tight_layout()
|
| 142 |
+
|
| 143 |
+
res = y_test - y_pred_test
|
| 144 |
+
resid_plot = plt.figure(figsize=(4.6, 3.6))
|
| 145 |
+
plt.hist(res, bins=30)
|
| 146 |
+
plt.title("Holdout Residuals (Actual β Predicted)")
|
| 147 |
+
plt.xlabel("Residual"); plt.ylabel("Count")
|
| 148 |
+
plt.tight_layout()
|
| 149 |
+
|
| 150 |
+
fi_plot = None
|
| 151 |
+
if hasattr(model, "feature_importances_"):
|
| 152 |
+
importances = model.feature_importances_
|
| 153 |
+
fi = (pd.DataFrame({"feature": X.columns, "importance": importances})
|
| 154 |
+
.sort_values("importance", ascending=False).head(12))
|
| 155 |
+
fi_plot = plt.figure(figsize=(6.2, 3.8))
|
| 156 |
+
plt.barh(fi["feature"][::-1], fi["importance"][::-1])
|
| 157 |
+
plt.title("Top Feature Importances")
|
| 158 |
+
plt.tight_layout()
|
| 159 |
+
|
| 160 |
+
# Save predictions for download
|
| 161 |
+
out_df = df.copy()
|
| 162 |
+
out_df[f"{y_col}_pred"] = y_pred_all
|
| 163 |
+
tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
|
| 164 |
+
out_df.to_csv(tmp.name, index=False)
|
| 165 |
+
|
| 166 |
+
# --------- Copy text (explainer + KPIs) ---------
|
| 167 |
+
explainer = (
|
| 168 |
+
"### π§ What this app does\n"
|
| 169 |
+
"This AI-driven dashboard learns the relationship between **server load**, **ambient temperature**, "
|
| 170 |
+
"**cooling efficiency**, and time features to **predict power usage**. "
|
| 171 |
+
"Use it to quantify drivers of energy consumption, monitor deviations, and surface optimization levers.\n\n"
|
| 172 |
+
"### π Why it matters\n"
|
| 173 |
+
"- Reduces **OPEX** by forecasting and optimizing energy usage\n"
|
| 174 |
+
"- Identifies high-impact drivers (feature importance)\n"
|
| 175 |
+
"- Enables proactive actions (e.g., workload shaping, cooling set-point tuning)\n\n"
|
| 176 |
+
"### βοΈ How it works (high-level)\n"
|
| 177 |
+
"1) Cleans and engineers features (diurnal/weekly cycles, rolling stats, domain signals)\n"
|
| 178 |
+
"2) Trains a tree ensemble (Gradient Boosting or Random Forest)\n"
|
| 179 |
+
"3) Evaluates on a holdout split and produces predictions for the entire dataset\n"
|
| 180 |
+
"4) Visualizes time series, accuracy scatter, residuals, and top feature importance\n"
|
| 181 |
+
)
|
| 182 |
+
|
| 183 |
+
kpis = (
|
| 184 |
+
f"**Model:** {model_name}\n\n"
|
| 185 |
+
f"**Target:** {y_col}\n"
|
| 186 |
+
f"**Avg {y_col} (actual):** {avg_actual:,.2f}\n"
|
| 187 |
+
f"**Avg {y_col} (predicted):** {avg_pred:,.2f}\n"
|
| 188 |
+
f"**Rows:** {len(df):,}\n\n"
|
| 189 |
+
f"**Holdout MAE:** {mae:,.2f} | **RΒ²:** {r2:,.3f}"
|
| 190 |
+
)
|
| 191 |
+
|
| 192 |
+
# Sample preview table
|
| 193 |
+
preview = out_df.head(10)
|
| 194 |
+
|
| 195 |
+
return (
|
| 196 |
+
title,
|
| 197 |
+
explainer,
|
| 198 |
+
kpis,
|
| 199 |
+
preview,
|
| 200 |
+
ts_plot,
|
| 201 |
+
sc_plot,
|
| 202 |
+
resid_plot,
|
| 203 |
+
fi_plot,
|
| 204 |
+
tmp.name
|
| 205 |
+
)
|
| 206 |
+
|
| 207 |
+
except Exception as e:
|
| 208 |
+
err = f"β **Error:** {type(e).__name__}: {e}"
|
| 209 |
+
return (title, err, None, None, None, None, None, None, None)
|
| 210 |
+
|
| 211 |
+
# ---------- Gradio UI ----------
|
| 212 |
+
import gradio
|
| 213 |
+
gradio.close_all() # avoid port conflicts in Colab
|
| 214 |
+
|
| 215 |
+
with gr.Blocks(title="AI-Driven Data Center Energy Optimization") as demo:
|
| 216 |
+
gr.Markdown("## β‘ AI-Driven Data Center Energy Optimization Dashboard")
|
| 217 |
+
|
| 218 |
+
with gr.Row():
|
| 219 |
+
fpath = gr.File(label="π Upload Dataset (CSV)", file_types=[".csv"], type="filepath")
|
| 220 |
+
model_name = gr.Dropdown(
|
| 221 |
+
choices=["Gradient Boosting", "Random Forest"],
|
| 222 |
+
value="Gradient Boosting",
|
| 223 |
+
label="π Select Model"
|
| 224 |
+
)
|
| 225 |
+
|
| 226 |
+
run_btn = gr.Button("βΆοΈ Run")
|
| 227 |
+
|
| 228 |
+
title_out = gr.Markdown()
|
| 229 |
+
explainer_out = gr.Markdown()
|
| 230 |
+
kpi_out = gr.Markdown()
|
| 231 |
+
table_out = gr.Dataframe(label="π Sample (+ Predictions)", wrap=True, row_count=("fixed", 10))
|
| 232 |
+
|
| 233 |
+
gr.Markdown("### π Visual Insights")
|
| 234 |
+
ts_plot = gr.Plot(label="Time Series: Actual vs Predicted")
|
| 235 |
+
sc_plot = gr.Plot(label="Holdout: Actual vs Predicted")
|
| 236 |
+
resid_plot = gr.Plot(label="Residuals (Histogram)")
|
| 237 |
+
fi_plot = gr.Plot(label="Top Feature Importances")
|
| 238 |
+
|
| 239 |
+
dl = gr.File(label="π₯ Download Data (+ Predictions)")
|
| 240 |
+
|
| 241 |
+
run_btn.click(
|
| 242 |
+
fn=run_pipeline,
|
| 243 |
+
inputs=[fpath, model_name],
|
| 244 |
+
outputs=[title_out, explainer_out, kpi_out, table_out, ts_plot, sc_plot, resid_plot, fi_plot, dl]
|
| 245 |
+
)
|
| 246 |
+
|
| 247 |
+
demo.launch(share=True)
|