LeonardoMdSA commited on
Commit
61ee9c4
·
1 Parent(s): 79b961c

auto drift

Browse files
app/api/background_drift.py CHANGED
@@ -3,55 +3,69 @@ import asyncio
3
  import pandas as pd
4
  import os
5
  import json
6
- import numpy as np
7
 
8
  from app.monitoring.drift import run_drift_check
9
  from app.inference.predictor import Predictor
10
 
11
  predictor = Predictor()
 
12
  REFERENCE_PATH = "models/v1/reference_data.csv"
13
- CURRENT_DATA_PATH = "data/production/predictions_log.csv"
14
  DASHBOARD_JSON = "reports/evidently/drift_report.json"
15
 
16
- # Ensure folder exists and JSON file exists at startup
 
 
17
  os.makedirs(os.path.dirname(DASHBOARD_JSON), exist_ok=True)
18
- if not os.path.exists(DASHBOARD_JSON):
19
- with open(DASHBOARD_JSON, "w") as f:
20
- json.dump({"n_rows": 0, "results": [], "drift": [{"column": feat, "score": 0.0} for feat in predictor.features]}, f, indent=2)
21
 
22
- async def drift_loop(interval_seconds: int = 30):
 
23
  """
24
- Continuously run drift checks and update dashboard JSON.
25
  """
26
  while True:
27
  try:
28
- current_df = pd.read_csv(CURRENT_DATA_PATH)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  reference_df = pd.read_csv(REFERENCE_PATH)
30
 
31
  _, drift_dict = run_drift_check(
32
- current_df[predictor.features],
33
  reference_df[predictor.features],
34
- "v1"
35
  )
36
 
37
- # Ensure numeric safe drift values
38
- drift_for_chart = []
39
- for col, score in drift_dict.items():
40
- try:
41
- val = float(score)
42
- if not np.isfinite(val):
43
- val = 0.0
44
- except Exception:
45
- val = 0.0
46
- drift_for_chart.append({"column": col, "score": val})
47
-
48
  dashboard_payload = {
49
- "n_rows": len(current_df),
50
- "results": [], # predictions not included in background loop
51
- "drift": drift_for_chart
 
 
 
52
  }
53
 
54
- # Atomic write to avoid read/write collision
55
  tmp_path = DASHBOARD_JSON + ".tmp"
56
  with open(tmp_path, "w") as f:
57
  json.dump(dashboard_payload, f, indent=2)
 
3
  import pandas as pd
4
  import os
5
  import json
 
6
 
7
  from app.monitoring.drift import run_drift_check
8
  from app.inference.predictor import Predictor
9
 
10
  predictor = Predictor()
11
+
12
  REFERENCE_PATH = "models/v1/reference_data.csv"
13
+ PROD_LOG_PATH = "data/production/predictions_log.csv"
14
  DASHBOARD_JSON = "reports/evidently/drift_report.json"
15
 
16
+ # Retention policy (VERY IMPORTANT for HF Spaces)
17
+ MAX_ROWS = 5000 # rolling window
18
+
19
  os.makedirs(os.path.dirname(DASHBOARD_JSON), exist_ok=True)
 
 
 
20
 
21
+
22
+ async def drift_loop(interval_seconds: int = 10):
23
  """
24
+ Continuously compute drift from production inference data.
25
  """
26
  while True:
27
  try:
28
+ if not os.path.exists(PROD_LOG_PATH):
29
+ await asyncio.sleep(interval_seconds)
30
+ continue
31
+
32
+ prod_df = pd.read_csv(PROD_LOG_PATH)
33
+
34
+ # ---- Retention window (prevents infinite growth) ----
35
+ if len(prod_df) > MAX_ROWS:
36
+ prod_df = prod_df.tail(MAX_ROWS)
37
+ prod_df.to_csv(PROD_LOG_PATH, index=False)
38
+
39
+ # ---- Keep only rows with all required features ----
40
+ missing_features = set(predictor.features) - set(prod_df.columns)
41
+ if missing_features:
42
+ print(f"Skipping drift check, missing features: {missing_features}")
43
+ await asyncio.sleep(interval_seconds)
44
+ continue
45
+
46
+ prod_df = prod_df.dropna(subset=predictor.features)
47
+ if prod_df.empty:
48
+ await asyncio.sleep(interval_seconds)
49
+ continue
50
+
51
  reference_df = pd.read_csv(REFERENCE_PATH)
52
 
53
  _, drift_dict = run_drift_check(
54
+ prod_df[predictor.features],
55
  reference_df[predictor.features],
56
+ model_version="v1",
57
  )
58
 
 
 
 
 
 
 
 
 
 
 
 
59
  dashboard_payload = {
60
+ "n_rows": len(prod_df),
61
+ "results": [],
62
+ "drift": [
63
+ {"column": col, "score": float(score)}
64
+ for col, score in drift_dict.items()
65
+ ],
66
  }
67
 
68
+ # Atomic write (prevents frontend race conditions)
69
  tmp_path = DASHBOARD_JSON + ".tmp"
70
  with open(tmp_path, "w") as f:
71
  json.dump(dashboard_payload, f, indent=2)
app/api/routes.py CHANGED
@@ -60,6 +60,7 @@ async def predict_file(background_tasks: BackgroundTasks, file: UploadFile = Fil
60
  df_log = df.copy()
61
  df_log["prediction"] = preds
62
  df_log["probability"] = probas
 
63
  df_log["model_version"] = predictor.model_version
64
  df_log["timestamp"] = pd.Timestamp.utcnow()
65
 
 
60
  df_log = df.copy()
61
  df_log["prediction"] = preds
62
  df_log["probability"] = probas
63
+ df_log["risk_level"] = ["High" if p >= 0.75 else "Medium" if p >= 0.5 else "Low" for p in probas]
64
  df_log["model_version"] = predictor.model_version
65
  df_log["timestamp"] = pd.Timestamp.utcnow()
66
 
app/templates/dashboard.html CHANGED
@@ -14,65 +14,34 @@
14
  <button type="submit">Run Prediction</button>
15
  </form>
16
 
17
- <h2>Predictions</h2>
18
  <div id="predictions"></div>
19
 
20
  <h2>Drift Metrics</h2>
21
  <div id="drift-chart"></div>
22
 
23
- <script>
24
- async function fetchResults(csvFile) {
25
- const formData = new FormData();
26
- formData.append("file", csvFile);
27
-
28
- const response = await fetch("/predict", { method: "POST", body: formData });
29
- const data = await response.json();
30
-
31
- document.getElementById("predictions").innerHTML =
32
- `<pre>${JSON.stringify(data.results, null, 2)}</pre>`;
33
-
34
- const driftContainer = document.getElementById("drift-chart");
35
- driftContainer.innerHTML = "";
36
-
37
- if (Array.isArray(data.drift)) {
38
- const cols = data.drift.map(d => d.column);
39
- const scores = data.drift.map(d => {
40
- let val = Number(d.score);
41
- if (!Number.isFinite(val)) val = 0;
42
- return val;
43
- });
44
-
45
- Plotly.newPlot(driftContainer, [{
46
- x: cols,
47
- y: scores,
48
- type: "bar"
49
- }]);
50
- } else {
51
- driftContainer.innerHTML =
52
- "<p>Drift report scheduled. Open the Evidently HTML report.</p>";
53
- }
54
- }
55
-
56
- document.getElementById("upload-form").addEventListener("submit", async (e) => {
57
- e.preventDefault();
58
- const fileInput = e.target.file.files[0];
59
- if (fileInput) {
60
- await fetchResults(fileInput);
61
- }
62
- });
63
- </script>
64
- <div id="drift-chart"></div>
65
-
66
- <script src="https://cdn.plot.ly/plotly-latest.min.js"></script>
67
  <script>
68
  async function fetchDashboardData() {
69
  try {
70
  const resp = await fetch("/dashboard/data");
71
  const json = await resp.json();
 
72
  if(json.status === "ok") {
73
- const driftData = json.data.drift;
 
 
 
 
 
 
 
 
 
 
 
 
74
  const columns = driftData.map(d => d.column);
75
- const scores = driftData.map(d => d.score);
76
 
77
  const trace = {
78
  x: columns,
@@ -87,10 +56,12 @@ async function fetchDashboardData() {
87
  xaxis: {title: 'Column'}
88
  };
89
 
90
- Plotly.newPlot('drift-chart', [trace], layout);
 
91
  } else {
92
  console.warn("Dashboard data not available:", json.message);
93
  }
 
94
  } catch(err) {
95
  console.error("Failed to fetch dashboard data:", err);
96
  }
@@ -99,6 +70,21 @@ async function fetchDashboardData() {
99
  // Fetch every 10 seconds
100
  fetchDashboardData();
101
  setInterval(fetchDashboardData, 10000);
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  </script>
103
  </body>
104
  </html>
 
14
  <button type="submit">Run Prediction</button>
15
  </form>
16
 
17
+ <h2>Recent Predictions</h2>
18
  <div id="predictions"></div>
19
 
20
  <h2>Drift Metrics</h2>
21
  <div id="drift-chart"></div>
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  <script>
24
  async function fetchDashboardData() {
25
  try {
26
  const resp = await fetch("/dashboard/data");
27
  const json = await resp.json();
28
+
29
  if(json.status === "ok") {
30
+ const data = json.data;
31
+
32
+ // Update predictions table
33
+ const predDiv = document.getElementById("predictions");
34
+ if(Array.isArray(data.results) && data.results.length > 0){
35
+ predDiv.innerHTML = "<pre>" + JSON.stringify(data.results, null, 2) + "</pre>";
36
+ } else {
37
+ predDiv.innerHTML = "<p>No recent predictions.</p>";
38
+ }
39
+
40
+ // Update drift chart
41
+ const driftContainer = document.getElementById("drift-chart");
42
+ const driftData = data.drift || [];
43
  const columns = driftData.map(d => d.column);
44
+ const scores = driftData.map(d => Number(d.score));
45
 
46
  const trace = {
47
  x: columns,
 
56
  xaxis: {title: 'Column'}
57
  };
58
 
59
+ Plotly.newPlot(driftContainer, [trace], layout);
60
+
61
  } else {
62
  console.warn("Dashboard data not available:", json.message);
63
  }
64
+
65
  } catch(err) {
66
  console.error("Failed to fetch dashboard data:", err);
67
  }
 
70
  // Fetch every 10 seconds
71
  fetchDashboardData();
72
  setInterval(fetchDashboardData, 10000);
73
+
74
+ // File upload handler
75
+ document.getElementById("upload-form").addEventListener("submit", async (e) => {
76
+ e.preventDefault();
77
+ const fileInput = e.target.file.files[0];
78
+ if (fileInput) {
79
+ const formData = new FormData();
80
+ formData.append("file", fileInput);
81
+
82
+ const response = await fetch("/predict", { method: "POST", body: formData });
83
+ const data = await response.json();
84
+ document.getElementById("predictions").innerHTML =
85
+ `<pre>${JSON.stringify(data.results, null, 2)}</pre>`;
86
+ }
87
+ });
88
  </script>
89
  </body>
90
  </html>
data/production/predictions_log.csv CHANGED
The diff for this file is too large to render. See raw diff
 
reports/evidently/drift_report.html CHANGED
The diff for this file is too large to render. See raw diff
 
reports/evidently/drift_report.json CHANGED
The diff for this file is too large to render. See raw diff