Spaces:

decodingdatascience
/

AIEnergyOptimizationDashboard

Running

App Files Files Community

decodingdatascience commited on Oct 24, 2025

Commit

74a8924

verified ·

1 Parent(s): dade342

Create app.py

Browse files

Files changed (1) hide show

app.py +247 -0

app.py ADDED Viewed

	@@ -0,0 +1,247 @@

+import os, tempfile
+import numpy as np
+import pandas as pd
+import matplotlib.pyplot as plt
+from pandas.api.types import is_datetime64_any_dtype as is_datetime
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import mean_absolute_error, r2_score
+from sklearn.ensemble import GradientBoostingRegressor, RandomForestRegressor
+import gradio as gr
+# ---------- Helpers ----------
+def infer_target_column(df: pd.DataFrame):
+    for c in ["power_usage_kwh", "energy_kwh", "power_kwh", "energy"]:
+        if c in df.columns:
+            return c
+    raise ValueError("Target column not found. Expected one of: "
+                     "['power_usage_kwh','energy_kwh','power_kwh','energy'].")
+def ensure_datetime_naive(df: pd.DataFrame, tz_target: str = "Asia/Dubai"):
+    if "timestamp" not in df.columns:
+        return df
+    # Parse robustly with UTC, then convert to target tz and drop tz
+    ts = pd.to_datetime(df["timestamp"], errors="coerce", utc=True)
+    try:
+        ts = ts.dt.tz_convert(tz_target).dt.tz_localize(None)
+    except Exception:
+        try:
+            ts = ts.dt.tz_localize(None)
+        except Exception:
+            pass
+    df = df.copy()
+    df["timestamp"] = ts
+    return df
+def feature_engineer(df: pd.DataFrame) -> pd.DataFrame:
+    df = df.copy()
+    df = ensure_datetime_naive(df, tz_target="Asia/Dubai")
+    # Light numeric imputation
+    num_cols = df.select_dtypes(include=[np.number]).columns
+    df[num_cols] = df[num_cols].ffill().bfill()
+    # Time features
+    if "timestamp" in df.columns and is_datetime(df["timestamp"]):
+        df["hour"]       = df["timestamp"].dt.hour
+        df["dayofweek"]  = df["timestamp"].dt.dayofweek
+        df["is_weekend"] = (df["dayofweek"] >= 5).astype(int)
+        df["month"]      = df["timestamp"].dt.month
+        df["dayofyear"]  = df["timestamp"].dt.dayofyear
+        df["hour_sin"]   = np.sin(2*np.pi*df["hour"]/24)
+        df["hour_cos"]   = np.cos(2*np.pi*df["hour"]/24)
+        df["dow_sin"]    = np.sin(2*np.pi*df["dayofweek"]/7)
+        df["dow_cos"]    = np.cos(2*np.pi*df["dayofweek"]/7)
+    else:
+        for c in ["hour","dayofweek","is_weekend","month","dayofyear","hour_sin","hour_cos","dow_sin","dow_cos"]:
+            if c not in df.columns:
+                df[c] = 0
+    # Domain features
+    tgt = infer_target_column(df)
+    if "cooling_eff_pct" in df.columns:
+        df["cooling_ineff_pct"] = 100 - df["cooling_eff_pct"]
+    if "server_load_pct" in df.columns:
+        df["energy_per_load"] = df[tgt] / np.maximum(df["server_load_pct"], 1)
+    if "ambient_temp_c" in df.columns and "server_load_pct" in df.columns:
+        df["temp_load_interaction"] = df["ambient_temp_c"] * df["server_load_pct"]
+    # Target lags/rollings
+    df["target_lag1"]   = df[tgt].shift(1)
+    df["target_roll3"]  = df[tgt].rolling(3,  min_periods=1).mean()
+    df["target_roll24"] = df[tgt].rolling(24, min_periods=1).mean()
+    # Fill NaNs from shifts
+    df = df.ffill().bfill()
+    return df
+def get_model(name: str):
+    return GradientBoostingRegressor(random_state=42) if name == "Gradient Boosting" \
+        else RandomForestRegressor(n_estimators=300, random_state=42)
+def feature_target_split(df: pd.DataFrame):
+    y_col = infer_target_column(df)
+    X = df.drop(columns=[c for c in [y_col, "timestamp"] if c in df.columns], errors="ignore")
+    X = X.select_dtypes(include=[np.number]).copy()
+    y = df[y_col].astype(float)
+    return X, y, y_col
+# ---------- Core pipeline ----------
+def run_pipeline(file_path, model_name):
+    title = "⚡ AI-Driven Data Center Energy Optimization Dashboard"
+    try:
+        if not file_path:
+            return (title, "Please upload a CSV file.", None, None, None, None, None, None)
+        df_raw = pd.read_csv(file_path)
+        df = feature_engineer(df_raw)
+        # Guardrail
+        if len(df) < 10:
+            return (title, "Not enough rows to train a model (need >= 10).", None, None, None, None, None, None)
+        X, y, y_col = feature_target_split(df)
+        # Split, train, predict
+        test_size = 0.25 if len(df) >= 25 else 0.2
+        X_train, X_test, y_train, y_test = train_test_split(
+            X, y, test_size=test_size, random_state=42
+        )
+        model = get_model(model_name)
+        model.fit(X_train, y_train)
+        y_pred_all  = model.predict(X)
+        y_pred_test = model.predict(X_test)
+        mae = mean_absolute_error(y_test, y_pred_test)
+        r2  = r2_score(y_test, y_pred_test)
+        avg_actual = float(np.mean(y))
+        avg_pred   = float(np.mean(y_pred_all))
+        # ------ Visualizations ------
+        ts_plot = None
+        if "timestamp" in df.columns and is_datetime(df["timestamp"]):
+            plot_df = df.copy().sort_values("timestamp")
+            Xp = plot_df.drop(columns=[c for c in [y_col, "timestamp"] if c in plot_df.columns], errors="ignore")
+            Xp = Xp.select_dtypes(include=[np.number]).copy()
+            yp = model.predict(Xp)
+            ts_plot = plt.figure(figsize=(9, 3.6))
+            plt.plot(plot_df["timestamp"], plot_df[y_col], label="Actual")
+            plt.plot(plot_df["timestamp"], yp, label="Predicted")
+            plt.title("Time Series: Actual vs Predicted")
+            plt.xlabel("Time"); plt.ylabel(y_col)
+            plt.legend(); plt.tight_layout()
+        sc_plot = plt.figure(figsize=(4.6, 3.8))
+        plt.scatter(y_test, y_pred_test, alpha=0.6)
+        mn = min(y_test.min(), y_pred_test.min()); mx = max(y_test.max(), y_pred_test.max())
+        plt.plot([mn, mx], [mn, mx], linestyle="--")
+        plt.title("Holdout: Actual vs Predicted")
+        plt.xlabel("Actual"); plt.ylabel("Predicted")
+        plt.tight_layout()
+        res = y_test - y_pred_test
+        resid_plot = plt.figure(figsize=(4.6, 3.6))
+        plt.hist(res, bins=30)
+        plt.title("Holdout Residuals (Actual − Predicted)")
+        plt.xlabel("Residual"); plt.ylabel("Count")
+        plt.tight_layout()
+        fi_plot = None
+        if hasattr(model, "feature_importances_"):
+            importances = model.feature_importances_
+            fi = (pd.DataFrame({"feature": X.columns, "importance": importances})
+                  .sort_values("importance", ascending=False).head(12))
+            fi_plot = plt.figure(figsize=(6.2, 3.8))
+            plt.barh(fi["feature"][::-1], fi["importance"][::-1])
+            plt.title("Top Feature Importances")
+            plt.tight_layout()
+        # Save predictions for download
+        out_df = df.copy()
+        out_df[f"{y_col}_pred"] = y_pred_all
+        tmp = tempfile.NamedTemporaryFile(delete=False, suffix=".csv")
+        out_df.to_csv(tmp.name, index=False)
+        # --------- Copy text (explainer + KPIs) ---------
+        explainer = (
+            "### 🧠 What this app does\n"
+            "This AI-driven dashboard learns the relationship between **server load**, **ambient temperature**, "
+            "**cooling efficiency**, and time features to **predict power usage**. "
+            "Use it to quantify drivers of energy consumption, monitor deviations, and surface optimization levers.\n\n"
+            "### 🔎 Why it matters\n"
+            "- Reduces **OPEX** by forecasting and optimizing energy usage\n"
+            "- Identifies high-impact drivers (feature importance)\n"
+            "- Enables proactive actions (e.g., workload shaping, cooling set-point tuning)\n\n"
+            "### ⚙️ How it works (high-level)\n"
+            "1) Cleans and engineers features (diurnal/weekly cycles, rolling stats, domain signals)\n"
+            "2) Trains a tree ensemble (Gradient Boosting or Random Forest)\n"
+            "3) Evaluates on a holdout split and produces predictions for the entire dataset\n"
+            "4) Visualizes time series, accuracy scatter, residuals, and top feature importance\n"
+        )
+        kpis = (
+            f"**Model:** {model_name}\n\n"
+            f"**Target:** {y_col}\n"
+            f"**Avg {y_col} (actual):** {avg_actual:,.2f}\n"
+            f"**Avg {y_col} (predicted):** {avg_pred:,.2f}\n"
+            f"**Rows:** {len(df):,}\n\n"
+            f"**Holdout MAE:** {mae:,.2f}   |   **R²:** {r2:,.3f}"
+        )
+        # Sample preview table
+        preview = out_df.head(10)
+        return (
+            title,
+            explainer,
+            kpis,
+            preview,
+            ts_plot,
+            sc_plot,
+            resid_plot,
+            fi_plot,
+            tmp.name
+        )
+    except Exception as e:
+        err = f"❌ **Error:** {type(e).__name__}: {e}"
+        return (title, err, None, None, None, None, None, None, None)
+# ---------- Gradio UI ----------
+import gradio
+gradio.close_all()  # avoid port conflicts in Colab
+with gr.Blocks(title="AI-Driven Data Center Energy Optimization") as demo:
+    gr.Markdown("## ⚡ AI-Driven Data Center Energy Optimization Dashboard")
+    with gr.Row():
+        fpath = gr.File(label="📁 Upload Dataset (CSV)", file_types=[".csv"], type="filepath")
+        model_name = gr.Dropdown(
+            choices=["Gradient Boosting", "Random Forest"],
+            value="Gradient Boosting",
+            label="🔍 Select Model"
+        )
+    run_btn = gr.Button("▶️ Run")
+    title_out = gr.Markdown()
+    explainer_out = gr.Markdown()
+    kpi_out = gr.Markdown()
+    table_out = gr.Dataframe(label="📋 Sample (+ Predictions)", wrap=True, row_count=("fixed", 10))
+    gr.Markdown("### 📈 Visual Insights")
+    ts_plot = gr.Plot(label="Time Series: Actual vs Predicted")
+    sc_plot = gr.Plot(label="Holdout: Actual vs Predicted")
+    resid_plot = gr.Plot(label="Residuals (Histogram)")
+    fi_plot = gr.Plot(label="Top Feature Importances")
+    dl = gr.File(label="📥 Download Data (+ Predictions)")
+    run_btn.click(
+        fn=run_pipeline,
+        inputs=[fpath, model_name],
+        outputs=[title_out, explainer_out, kpi_out, table_out, ts_plot, sc_plot, resid_plot, fi_plot, dl]
+    )
+demo.launch(share=True)