Spaces:

LeonardoMdSA
/

ML-Inference-Service-with-Drift-Detection

Sleeping

App Files Files Community

LeonardoMdSA commited on Jan 14

Commit

79b961c

1 Parent(s): 8e3bbfe

Cron job update

Browse files

Files changed (8) hide show

app/api/background_drift.py +63 -0
app/api/dashboard_data.py +24 -0
app/api/routes.py +20 -1
app/main.py +20 -20
app/templates/dashboard.html +39 -0
data/production/predictions_log.csv +0 -0
reports/evidently/drift_report.html +0 -0
reports/evidently/drift_report.json +42 -0

app/api/background_drift.py ADDED Viewed

	@@ -0,0 +1,63 @@

+# app/api/background_drift.py
+import asyncio
+import pandas as pd
+import os
+import json
+import numpy as np
+from app.monitoring.drift import run_drift_check
+from app.inference.predictor import Predictor
+predictor = Predictor()
+REFERENCE_PATH = "models/v1/reference_data.csv"
+CURRENT_DATA_PATH = "data/production/predictions_log.csv"
+DASHBOARD_JSON = "reports/evidently/drift_report.json"
+# Ensure folder exists and JSON file exists at startup
+os.makedirs(os.path.dirname(DASHBOARD_JSON), exist_ok=True)
+if not os.path.exists(DASHBOARD_JSON):
+    with open(DASHBOARD_JSON, "w") as f:
+        json.dump({"n_rows": 0, "results": [], "drift": [{"column": feat, "score": 0.0} for feat in predictor.features]}, f, indent=2)
+async def drift_loop(interval_seconds: int = 30):
+    """
+    Continuously run drift checks and update dashboard JSON.
+    """
+    while True:
+        try:
+            current_df = pd.read_csv(CURRENT_DATA_PATH)
+            reference_df = pd.read_csv(REFERENCE_PATH)
+            _, drift_dict = run_drift_check(
+                current_df[predictor.features],
+                reference_df[predictor.features],
+                "v1"
+            )
+            # Ensure numeric safe drift values
+            drift_for_chart = []
+            for col, score in drift_dict.items():
+                try:
+                    val = float(score)
+                    if not np.isfinite(val):
+                        val = 0.0
+                except Exception:
+                    val = 0.0
+                drift_for_chart.append({"column": col, "score": val})
+            dashboard_payload = {
+                "n_rows": len(current_df),
+                "results": [],  # predictions not included in background loop
+                "drift": drift_for_chart
+            }
+            # Atomic write to avoid read/write collision
+            tmp_path = DASHBOARD_JSON + ".tmp"
+            with open(tmp_path, "w") as f:
+                json.dump(dashboard_payload, f, indent=2)
+            os.replace(tmp_path, DASHBOARD_JSON)
+        except Exception as e:
+            print("Drift loop error:", e)
+        await asyncio.sleep(interval_seconds)

app/api/dashboard_data.py ADDED Viewed

	@@ -0,0 +1,24 @@

+# app/api/dashboard_data.py
+from fastapi import APIRouter
+from fastapi.responses import JSONResponse
+import json
+import os
+router = APIRouter()
+DATA_FILE = "reports/evidently/drift_report.json"  # we will write drift info here
+@router.get("/dashboard/data")
+def get_dashboard_data():
+    """
+    Return the latest drift and prediction summary for the frontend dashboard.
+    """
+    if os.path.exists(DATA_FILE):
+        try:
+            with open(DATA_FILE, "r") as f:
+                data = json.load(f)
+            return JSONResponse({"status": "ok", "data": data})
+        except Exception as e:
+            return JSONResponse({"status": "error", "message": str(e)}, status_code=500)
+    else:
+        return JSONResponse({"status": "error", "message": "No data available"}, status_code=404)

app/api/routes.py CHANGED Viewed

@@ -10,6 +10,7 @@ from app.monitoring.governance import run_governance_checks
 import pandas as pd
 import numpy as np
 import os
 templates = Jinja2Templates(directory="app/templates")
@@ -70,7 +71,25 @@ async def predict_file(background_tasks: BackgroundTasks, file: UploadFile = Fil
     # ---- Background full drift check ----
     background_tasks.add_task(run_drift_check, df[predictor.features], reference_df[predictor.features], "v1")
     return JSONResponse({"n_rows": len(results), "results": results, "drift": drift_for_chart})

 import pandas as pd
 import numpy as np
+import json
 import os
 templates = Jinja2Templates(directory="app/templates")
     # ---- Background full drift check ----
     background_tasks.add_task(run_drift_check, df[predictor.features], reference_df[predictor.features], "v1")
+    DASHBOARD_JSON = "reports/evidently/drift_report.json"
+    # After computing drift_for_chart
+    dashboard_payload = {
+        "n_rows": len(results),
+        "results": results,
+        "drift": drift_for_chart
+    }
+    # Write JSON for dashboard frontend
+    os.makedirs(os.path.dirname(DASHBOARD_JSON), exist_ok=True)
+    # atomic write to avoid read/write collision
+    import tempfile
+    tmp_path = DASHBOARD_JSON + ".tmp"
+    with open(tmp_path, "w") as f:
+        json.dump(dashboard_payload, f, indent=2)
+    os.replace(tmp_path, DASHBOARD_JSON)
     return JSONResponse({"n_rows": len(results), "results": results, "drift": drift_for_chart})

app/main.py CHANGED Viewed

@@ -1,29 +1,29 @@
-# FastAPI entrypoint
 from fastapi import FastAPI
 from fastapi.staticfiles import StaticFiles
 from app.api.routes import router
 from app.core.logging import init_db
-app = FastAPI(title="ML Inference Service")
-# Init DB
-init_db()
-# Mount static FIRST
-app.mount(
-    "/static",
-    StaticFiles(directory="app/static"),
-    name="static"
-)
-# Mount reports
-app.mount(
-    "/reports",
-    StaticFiles(directory="reports"),
-    name="reports"
-)
-# Include API routes
 app.include_router(router)

+# app/main.py (no other changes)
 from fastapi import FastAPI
 from fastapi.staticfiles import StaticFiles
+import asyncio
+from contextlib import asynccontextmanager
 from app.api.routes import router
+from app.api.dashboard_data import router as dashboard_data_router
 from app.core.logging import init_db
+from app.api.background_drift import drift_loop
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    init_db()
+    task = asyncio.create_task(drift_loop(interval_seconds=10))
+    yield
+    task.cancel()
+    try:
+        await task
+    except asyncio.CancelledError:
+        pass
+app = FastAPI(title="ML Inference Service", lifespan=lifespan)
+app.mount("/static", StaticFiles(directory="app/static"), name="static")
+app.mount("/reports", StaticFiles(directory="reports"), name="reports")
 app.include_router(router)
+app.include_router(dashboard_data_router)

app/templates/dashboard.html CHANGED Viewed

@@ -61,5 +61,44 @@ document.getElementById("upload-form").addEventListener("submit", async (e) => {
     }
 });
 </script>
 </body>
 </html>

     }
 });
 </script>
+<div id="drift-chart"></div>
+<script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
+<script>
+async function fetchDashboardData() {
+    try {
+        const resp = await fetch("/dashboard/data");
+        const json = await resp.json();
+        if(json.status === "ok") {
+            const driftData = json.data.drift;
+            const columns = driftData.map(d => d.column);
+            const scores = driftData.map(d => d.score);
+            const trace = {
+                x: columns,
+                y: scores,
+                type: 'bar',
+                marker: {color: 'orange'}
+            };
+            const layout = {
+                title: 'Drift Scores by Column',
+                yaxis: {title: 'Score'},
+                xaxis: {title: 'Column'}
+            };
+            Plotly.newPlot('drift-chart', [trace], layout);
+        } else {
+            console.warn("Dashboard data not available:", json.message);
+        }
+    } catch(err) {
+        console.error("Failed to fetch dashboard data:", err);
+    }
+}
+// Fetch every 10 seconds
+fetchDashboardData();
+setInterval(fetchDashboardData, 10000);
+</script>
 </body>
 </html>

data/production/predictions_log.csv CHANGED Viewed

The diff for this file is too large to render. See raw diff

reports/evidently/drift_report.html CHANGED Viewed

The diff for this file is too large to render. See raw diff

reports/evidently/drift_report.json ADDED Viewed

	@@ -0,0 +1,42 @@

+{
+  "n_rows": 9000,
+  "results": [],
+  "drift": [
+    {
+      "column": "dataset",
+      "score": 0.25
+    },
+    {
+      "column": "age",
+      "score": 0.02779295163704811
+    },
+    {
+      "column": "bill_amt_aug",
+      "score": 0.05813603880053332
+    },
+    {
+      "column": "bill_amt_sep",
+      "score": 0.05990164393199121
+    },
+    {
+      "column": "credit_limit",
+      "score": 0.10919756738678622
+    },
+    {
+      "column": "pay_amt_aug",
+      "score": 0.03387021661566681
+    },
+    {
+      "column": "pay_amt_sep",
+      "score": 0.03218554602837802
+    },
+    {
+      "column": "pay_delay_aug",
+      "score": 0.08452784641205481
+    },
+    {
+      "column": "pay_delay_sep",
+      "score": 0.10437369569624884
+    }
+  ]
+}