LeonardoMdSA commited on
Commit
79b961c
·
1 Parent(s): 8e3bbfe

Cron job update

Browse files
app/api/background_drift.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app/api/background_drift.py
2
+ import asyncio
3
+ import pandas as pd
4
+ import os
5
+ import json
6
+ import numpy as np
7
+
8
+ from app.monitoring.drift import run_drift_check
9
+ from app.inference.predictor import Predictor
10
+
11
+ predictor = Predictor()
12
+ REFERENCE_PATH = "models/v1/reference_data.csv"
13
+ CURRENT_DATA_PATH = "data/production/predictions_log.csv"
14
+ DASHBOARD_JSON = "reports/evidently/drift_report.json"
15
+
16
+ # Ensure folder exists and JSON file exists at startup
17
+ os.makedirs(os.path.dirname(DASHBOARD_JSON), exist_ok=True)
18
+ if not os.path.exists(DASHBOARD_JSON):
19
+ with open(DASHBOARD_JSON, "w") as f:
20
+ json.dump({"n_rows": 0, "results": [], "drift": [{"column": feat, "score": 0.0} for feat in predictor.features]}, f, indent=2)
21
+
22
+ async def drift_loop(interval_seconds: int = 30):
23
+ """
24
+ Continuously run drift checks and update dashboard JSON.
25
+ """
26
+ while True:
27
+ try:
28
+ current_df = pd.read_csv(CURRENT_DATA_PATH)
29
+ reference_df = pd.read_csv(REFERENCE_PATH)
30
+
31
+ _, drift_dict = run_drift_check(
32
+ current_df[predictor.features],
33
+ reference_df[predictor.features],
34
+ "v1"
35
+ )
36
+
37
+ # Ensure numeric safe drift values
38
+ drift_for_chart = []
39
+ for col, score in drift_dict.items():
40
+ try:
41
+ val = float(score)
42
+ if not np.isfinite(val):
43
+ val = 0.0
44
+ except Exception:
45
+ val = 0.0
46
+ drift_for_chart.append({"column": col, "score": val})
47
+
48
+ dashboard_payload = {
49
+ "n_rows": len(current_df),
50
+ "results": [], # predictions not included in background loop
51
+ "drift": drift_for_chart
52
+ }
53
+
54
+ # Atomic write to avoid read/write collision
55
+ tmp_path = DASHBOARD_JSON + ".tmp"
56
+ with open(tmp_path, "w") as f:
57
+ json.dump(dashboard_payload, f, indent=2)
58
+ os.replace(tmp_path, DASHBOARD_JSON)
59
+
60
+ except Exception as e:
61
+ print("Drift loop error:", e)
62
+
63
+ await asyncio.sleep(interval_seconds)
app/api/dashboard_data.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app/api/dashboard_data.py
2
+ from fastapi import APIRouter
3
+ from fastapi.responses import JSONResponse
4
+ import json
5
+ import os
6
+
7
+ router = APIRouter()
8
+
9
+ DATA_FILE = "reports/evidently/drift_report.json" # we will write drift info here
10
+
11
+ @router.get("/dashboard/data")
12
+ def get_dashboard_data():
13
+ """
14
+ Return the latest drift and prediction summary for the frontend dashboard.
15
+ """
16
+ if os.path.exists(DATA_FILE):
17
+ try:
18
+ with open(DATA_FILE, "r") as f:
19
+ data = json.load(f)
20
+ return JSONResponse({"status": "ok", "data": data})
21
+ except Exception as e:
22
+ return JSONResponse({"status": "error", "message": str(e)}, status_code=500)
23
+ else:
24
+ return JSONResponse({"status": "error", "message": "No data available"}, status_code=404)
app/api/routes.py CHANGED
@@ -10,6 +10,7 @@ from app.monitoring.governance import run_governance_checks
10
 
11
  import pandas as pd
12
  import numpy as np
 
13
  import os
14
 
15
  templates = Jinja2Templates(directory="app/templates")
@@ -70,7 +71,25 @@ async def predict_file(background_tasks: BackgroundTasks, file: UploadFile = Fil
70
 
71
  # ---- Background full drift check ----
72
  background_tasks.add_task(run_drift_check, df[predictor.features], reference_df[predictor.features], "v1")
73
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  return JSONResponse({"n_rows": len(results), "results": results, "drift": drift_for_chart})
75
 
76
 
 
10
 
11
  import pandas as pd
12
  import numpy as np
13
+ import json
14
  import os
15
 
16
  templates = Jinja2Templates(directory="app/templates")
 
71
 
72
  # ---- Background full drift check ----
73
  background_tasks.add_task(run_drift_check, df[predictor.features], reference_df[predictor.features], "v1")
74
+ DASHBOARD_JSON = "reports/evidently/drift_report.json"
75
+
76
+ # After computing drift_for_chart
77
+ dashboard_payload = {
78
+ "n_rows": len(results),
79
+ "results": results,
80
+ "drift": drift_for_chart
81
+ }
82
+
83
+ # Write JSON for dashboard frontend
84
+ os.makedirs(os.path.dirname(DASHBOARD_JSON), exist_ok=True)
85
+ # atomic write to avoid read/write collision
86
+ import tempfile
87
+ tmp_path = DASHBOARD_JSON + ".tmp"
88
+ with open(tmp_path, "w") as f:
89
+ json.dump(dashboard_payload, f, indent=2)
90
+ os.replace(tmp_path, DASHBOARD_JSON)
91
+
92
+
93
  return JSONResponse({"n_rows": len(results), "results": results, "drift": drift_for_chart})
94
 
95
 
app/main.py CHANGED
@@ -1,29 +1,29 @@
1
- # FastAPI entrypoint
2
-
3
  from fastapi import FastAPI
4
  from fastapi.staticfiles import StaticFiles
 
 
 
5
  from app.api.routes import router
 
6
  from app.core.logging import init_db
 
7
 
8
- app = FastAPI(title="ML Inference Service")
 
 
 
 
 
 
 
 
 
9
 
10
- # Init DB
11
- init_db()
12
 
13
- # Mount static FIRST
14
- app.mount(
15
- "/static",
16
- StaticFiles(directory="app/static"),
17
- name="static"
18
- )
19
 
20
- # Mount reports
21
- app.mount(
22
- "/reports",
23
- StaticFiles(directory="reports"),
24
- name="reports"
25
- )
26
-
27
- # Include API routes
28
  app.include_router(router)
29
-
 
1
+ # app/main.py (no other changes)
 
2
  from fastapi import FastAPI
3
  from fastapi.staticfiles import StaticFiles
4
+ import asyncio
5
+ from contextlib import asynccontextmanager
6
+
7
  from app.api.routes import router
8
+ from app.api.dashboard_data import router as dashboard_data_router
9
  from app.core.logging import init_db
10
+ from app.api.background_drift import drift_loop
11
 
12
+ @asynccontextmanager
13
+ async def lifespan(app: FastAPI):
14
+ init_db()
15
+ task = asyncio.create_task(drift_loop(interval_seconds=10))
16
+ yield
17
+ task.cancel()
18
+ try:
19
+ await task
20
+ except asyncio.CancelledError:
21
+ pass
22
 
23
+ app = FastAPI(title="ML Inference Service", lifespan=lifespan)
 
24
 
25
+ app.mount("/static", StaticFiles(directory="app/static"), name="static")
26
+ app.mount("/reports", StaticFiles(directory="reports"), name="reports")
 
 
 
 
27
 
 
 
 
 
 
 
 
 
28
  app.include_router(router)
29
+ app.include_router(dashboard_data_router)
app/templates/dashboard.html CHANGED
@@ -61,5 +61,44 @@ document.getElementById("upload-form").addEventListener("submit", async (e) => {
61
  }
62
  });
63
  </script>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  </body>
65
  </html>
 
61
  }
62
  });
63
  </script>
64
+ <div id="drift-chart"></div>
65
+
66
+ <script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
67
+ <script>
68
+ async function fetchDashboardData() {
69
+ try {
70
+ const resp = await fetch("/dashboard/data");
71
+ const json = await resp.json();
72
+ if(json.status === "ok") {
73
+ const driftData = json.data.drift;
74
+ const columns = driftData.map(d => d.column);
75
+ const scores = driftData.map(d => d.score);
76
+
77
+ const trace = {
78
+ x: columns,
79
+ y: scores,
80
+ type: 'bar',
81
+ marker: {color: 'orange'}
82
+ };
83
+
84
+ const layout = {
85
+ title: 'Drift Scores by Column',
86
+ yaxis: {title: 'Score'},
87
+ xaxis: {title: 'Column'}
88
+ };
89
+
90
+ Plotly.newPlot('drift-chart', [trace], layout);
91
+ } else {
92
+ console.warn("Dashboard data not available:", json.message);
93
+ }
94
+ } catch(err) {
95
+ console.error("Failed to fetch dashboard data:", err);
96
+ }
97
+ }
98
+
99
+ // Fetch every 10 seconds
100
+ fetchDashboardData();
101
+ setInterval(fetchDashboardData, 10000);
102
+ </script>
103
  </body>
104
  </html>
data/production/predictions_log.csv CHANGED
The diff for this file is too large to render. See raw diff
 
reports/evidently/drift_report.html CHANGED
The diff for this file is too large to render. See raw diff
 
reports/evidently/drift_report.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "n_rows": 9000,
3
+ "results": [],
4
+ "drift": [
5
+ {
6
+ "column": "dataset",
7
+ "score": 0.25
8
+ },
9
+ {
10
+ "column": "age",
11
+ "score": 0.02779295163704811
12
+ },
13
+ {
14
+ "column": "bill_amt_aug",
15
+ "score": 0.05813603880053332
16
+ },
17
+ {
18
+ "column": "bill_amt_sep",
19
+ "score": 0.05990164393199121
20
+ },
21
+ {
22
+ "column": "credit_limit",
23
+ "score": 0.10919756738678622
24
+ },
25
+ {
26
+ "column": "pay_amt_aug",
27
+ "score": 0.03387021661566681
28
+ },
29
+ {
30
+ "column": "pay_amt_sep",
31
+ "score": 0.03218554602837802
32
+ },
33
+ {
34
+ "column": "pay_delay_aug",
35
+ "score": 0.08452784641205481
36
+ },
37
+ {
38
+ "column": "pay_delay_sep",
39
+ "score": 0.10437369569624884
40
+ }
41
+ ]
42
+ }