LeonardoMdSA commited on
Commit
ce96dae
Β·
1 Parent(s): b1725f1

working on the daemon

Browse files
README.md CHANGED
@@ -26,61 +26,64 @@ uvicorn app.main:app --reload
26
  # Repo Structure
27
 
28
  ml-inference-drift-service/
29
- β”‚
30
- β”œβ”€β”€ app/
31
- β”‚ β”œβ”€β”€ main.py # FastAPI entrypoint
32
- β”‚ β”œβ”€β”€ api/
33
- β”‚ β”‚ β”œβ”€β”€ routes.py # /predict, /health, /dashboard
34
- β”‚ β”‚ └── schemas.py # Pydantic input/output schemas
35
- β”‚ β”‚
36
- β”‚ β”œβ”€β”€ core/
37
- β”‚ β”‚ β”œβ”€β”€ config.py # env vars, paths, thresholds
38
- β”‚ β”‚ β”œβ”€β”€ logging.py # SQLite + file logging
39
- β”‚ β”‚ └── model_registry.py # model loading/versioning
40
- β”‚ β”‚
41
- β”‚ β”œβ”€β”€ inference/
42
- β”‚ β”‚ β”œβ”€β”€ predictor.py # model.predict wrapper
43
- β”‚ β”‚ └── preprocessing.py # feature handling
44
- β”‚ β”‚
45
- β”‚ β”œβ”€β”€ monitoring/
46
- β”‚ β”‚ β”œβ”€β”€ drift.py # Evidently logic
47
- β”‚ β”‚ β”œβ”€β”€ metrics.py # feature stats extraction
48
- β”‚ β”‚ └── alerts.py # threshold evaluation
49
- β”‚ β”‚
50
- β”‚ β”œβ”€β”€ db/
51
- β”‚ β”‚ β”œβ”€β”€ session.py # SQLite connection
52
- β”‚ β”‚ └── models.py # ORM-style tables (optional)
53
- β”‚ β”‚
54
- β”‚ β”œβ”€β”€ templates/
55
- β”‚ β”‚ └── dashboard.html # Evidently embed + metrics
56
- β”‚ β”‚
57
- β”‚ └── static/
58
- β”‚ └── styles.css
59
- β”‚
60
- β”œβ”€β”€ models/
61
- β”‚ β”œβ”€β”€ v1/
62
- β”‚ β”‚ β”œβ”€β”€ model.pkl
63
- β”‚ β”‚ └── reference_data.csv
64
- β”‚ └── v2/
65
- β”‚ └── ...
66
- β”‚
67
- β”œβ”€β”€ scripts/
68
- β”‚ β”œβ”€β”€ train.py # offline training
69
- β”‚ β”œβ”€β”€ evaluate.py # offline evaluation
70
- β”‚ └── run_drift_check.py # batch drift job
71
- β”‚
72
- β”œβ”€β”€ reports/
73
- β”‚ └── evidently/
74
- β”‚ └── drift_report.html
75
- β”‚
76
- β”œβ”€β”€ tests/
77
- β”‚ β”œβ”€β”€ test_api.py
78
- β”‚ β”œβ”€β”€ test_drift.py
79
- β”‚ └── test_schemas.py
80
- β”‚
81
- β”œβ”€β”€ Dockerfile
82
- β”œβ”€β”€ Dockerfile.hf # HF Spaces–compatible
83
- β”œβ”€β”€ requirements.txt
84
- β”œβ”€β”€ requirements-dev.txt
85
- β”œβ”€β”€ README.md
86
- └── .env.example
 
 
 
 
26
  # Repo Structure
27
 
28
  ml-inference-drift-service/
29
+ Dockerfile
30
+ LICENSE
31
+ README.md
32
+ requirements-dev.txt
33
+ requirements.txt
34
+ app/
35
+ main.py
36
+ api/
37
+ background_drift.py
38
+ dashboard_data.py
39
+ routes.py
40
+ schemas.py
41
+ traffic_daemon.py
42
+ core/
43
+ config.py
44
+ logging.py
45
+ templates.py
46
+ inference/
47
+ predictor.py
48
+ monitoring/
49
+ data_loader.py
50
+ drift.py
51
+ governance.py
52
+ static/
53
+ styles.css
54
+ templates/
55
+ dashboard.html
56
+ utils/
57
+ alerts.py
58
+ data/
59
+ processed/
60
+ credit_default_clean.csv
61
+ current_data.csv
62
+ production/
63
+ predictions_log.csv
64
+ raw/
65
+ credit_default.csv
66
+ database/
67
+ logs/
68
+ models/
69
+ v1/
70
+ features.json
71
+ reference_data.csv
72
+ v2/
73
+ reports/
74
+ evidently/
75
+ drift_report.html
76
+ drift_report.json
77
+ scripts/
78
+ prepare_data.py
79
+ simulate_inference.py
80
+ train.py
81
+ tests/
82
+ conftest.py
83
+ integration/
84
+ test_api.py
85
+ test_governance.json
86
+ test_governance.py
87
+ test_run_drift.py
88
+ unit/
89
+ test_schemas.py
app/api/background_drift.py CHANGED
@@ -13,7 +13,7 @@ REFERENCE_PATH = "models/v1/reference_data.csv"
13
  PROD_LOG_PATH = "data/production/predictions_log.csv"
14
  DASHBOARD_JSON = "reports/evidently/drift_report.json"
15
 
16
- MAX_ROWS = 5000 # rolling window
17
  os.makedirs(os.path.dirname(DASHBOARD_JSON), exist_ok=True)
18
 
19
  async def drift_loop(interval_seconds: int = 10):
@@ -25,15 +25,12 @@ async def drift_loop(interval_seconds: int = 10):
25
 
26
  prod_df = pd.read_csv(PROD_LOG_PATH)
27
 
28
- # Retention window
29
  if len(prod_df) > MAX_ROWS:
30
  prod_df = prod_df.tail(MAX_ROWS)
31
  prod_df.to_csv(PROD_LOG_PATH, index=False)
32
 
33
- # Keep only rows with all required features
34
- missing_features = set(predictor.features) - set(prod_df.columns)
35
- if missing_features:
36
- print(f"Skipping drift check, missing features: {missing_features}")
37
  await asyncio.sleep(interval_seconds)
38
  continue
39
 
@@ -44,26 +41,34 @@ async def drift_loop(interval_seconds: int = 10):
44
 
45
  reference_df = pd.read_csv(REFERENCE_PATH)
46
 
47
- # ---- Run drift on features only ----
48
  _, drift_dict = run_drift_check(
49
  prod_df[predictor.features],
50
  reference_df[predictor.features],
51
  model_version="v1"
52
  )
53
 
 
 
 
 
 
 
 
 
 
54
  dashboard_payload = {
55
  "n_rows": len(prod_df),
56
- "results": [],
57
  "drift": [
58
  {"column": col, "score": float(score)}
59
  for col, score in drift_dict.items()
60
  ],
61
  }
62
 
63
- tmp_path = DASHBOARD_JSON + ".tmp"
64
- with open(tmp_path, "w") as f:
65
  json.dump(dashboard_payload, f, indent=2)
66
- os.replace(tmp_path, DASHBOARD_JSON)
67
 
68
  except Exception as e:
69
  print("Drift loop error:", e)
 
13
  PROD_LOG_PATH = "data/production/predictions_log.csv"
14
  DASHBOARD_JSON = "reports/evidently/drift_report.json"
15
 
16
+ MAX_ROWS = 5000
17
  os.makedirs(os.path.dirname(DASHBOARD_JSON), exist_ok=True)
18
 
19
  async def drift_loop(interval_seconds: int = 10):
 
25
 
26
  prod_df = pd.read_csv(PROD_LOG_PATH)
27
 
 
28
  if len(prod_df) > MAX_ROWS:
29
  prod_df = prod_df.tail(MAX_ROWS)
30
  prod_df.to_csv(PROD_LOG_PATH, index=False)
31
 
32
+ missing = set(predictor.features) - set(prod_df.columns)
33
+ if missing:
 
 
34
  await asyncio.sleep(interval_seconds)
35
  continue
36
 
 
41
 
42
  reference_df = pd.read_csv(REFERENCE_PATH)
43
 
 
44
  _, drift_dict = run_drift_check(
45
  prod_df[predictor.features],
46
  reference_df[predictor.features],
47
  model_version="v1"
48
  )
49
 
50
+ # ---- RECENT PREDICTIONS FIX ----
51
+ recent_results = []
52
+ if "prediction" in prod_df.columns:
53
+ recent_results = (
54
+ prod_df[["prediction"]]
55
+ .tail(10)
56
+ .to_dict(orient="records")
57
+ )
58
+
59
  dashboard_payload = {
60
  "n_rows": len(prod_df),
61
+ "results": recent_results,
62
  "drift": [
63
  {"column": col, "score": float(score)}
64
  for col, score in drift_dict.items()
65
  ],
66
  }
67
 
68
+ tmp = DASHBOARD_JSON + ".tmp"
69
+ with open(tmp, "w") as f:
70
  json.dump(dashboard_payload, f, indent=2)
71
+ os.replace(tmp, DASHBOARD_JSON)
72
 
73
  except Exception as e:
74
  print("Drift loop error:", e)
app/api/traffic_daemon.py CHANGED
@@ -2,8 +2,8 @@
2
  import asyncio
3
  import pandas as pd
4
  import random
5
- import requests
6
  import os
 
7
 
8
  API_URL = "http://localhost:8000/predict"
9
  SOURCE_DATA = "data/processed/current_data.csv"
@@ -12,7 +12,7 @@ MIN_SLEEP = 2
12
  MAX_SLEEP = 8
13
  MIN_BATCH = 1
14
  MAX_BATCH = 5
15
- STARTUP_DELAY = 10 # allow server startup
16
 
17
  async def traffic_loop():
18
  await asyncio.sleep(STARTUP_DELAY)
@@ -24,23 +24,22 @@ async def traffic_loop():
24
  df = pd.read_csv(SOURCE_DATA)
25
  print("Traffic daemon started.")
26
 
27
- while True:
28
- try:
29
- batch_size = random.randint(MIN_BATCH, MAX_BATCH)
30
- sample = df.sample(batch_size)
31
- csv_bytes = sample.to_csv(index=False).encode("utf-8")
 
32
 
33
- # ---- Increased timeout to avoid ReadTimeout ----
34
- response = requests.post(
35
- API_URL,
36
- files={"file": ("sample.csv", csv_bytes, "text/csv")},
37
- timeout=60,
38
- )
39
 
40
- if response.status_code != 200:
41
- print("Traffic daemon warning:", response.status_code)
42
 
43
- except Exception as e:
44
- print("Traffic daemon error:", e)
45
 
46
- await asyncio.sleep(random.uniform(MIN_SLEEP, MAX_SLEEP))
 
2
  import asyncio
3
  import pandas as pd
4
  import random
 
5
  import os
6
+ import httpx
7
 
8
  API_URL = "http://localhost:8000/predict"
9
  SOURCE_DATA = "data/processed/current_data.csv"
 
12
  MAX_SLEEP = 8
13
  MIN_BATCH = 1
14
  MAX_BATCH = 5
15
+ STARTUP_DELAY = 7
16
 
17
  async def traffic_loop():
18
  await asyncio.sleep(STARTUP_DELAY)
 
24
  df = pd.read_csv(SOURCE_DATA)
25
  print("Traffic daemon started.")
26
 
27
+ async with httpx.AsyncClient(timeout=60.0) as client:
28
+ while True:
29
+ try:
30
+ batch_size = random.randint(MIN_BATCH, MAX_BATCH)
31
+ sample = df.sample(batch_size)
32
+ csv_bytes = sample.to_csv(index=False).encode("utf-8")
33
 
34
+ resp = await client.post(
35
+ API_URL,
36
+ files={"file": ("sample.csv", csv_bytes, "text/csv")}
37
+ )
 
 
38
 
39
+ if resp.status_code != 200:
40
+ print("Traffic daemon warning:", resp.status_code)
41
 
42
+ except Exception as e:
43
+ print("Traffic daemon error:", e)
44
 
45
+ await asyncio.sleep(random.uniform(MIN_SLEEP, MAX_SLEEP))
app/monitoring/governance.py CHANGED
@@ -84,8 +84,11 @@ class Governance:
84
  # Log and send alerts
85
  for alert in alerts:
86
  self.log_alert(alert, model_version)
 
87
  send_email_alert(alert)
88
  send_slack_alert(alert)
 
 
89
 
90
  return alerts
91
 
 
84
  # Log and send alerts
85
  for alert in alerts:
86
  self.log_alert(alert, model_version)
87
+ try:
88
  send_email_alert(alert)
89
  send_slack_alert(alert)
90
+ except Exception:
91
+ pass
92
 
93
  return alerts
94
 
data/production/predictions_log.csv DELETED
@@ -1,9 +0,0 @@
1
- credit_limit,age,pay_delay_sep,pay_delay_aug,bill_amt_sep,bill_amt_aug,pay_amt_sep,pay_amt_aug,target,model_prediction,model_probability,model_risk_level,model_version,timestamp
2
- 70000.0,48,0,0,20744.0,22093.0,2000.0,2000.0,0,0,0.2563046708563498,Low,v1,2026-01-14 19:33:08.201187+00:00
3
- 390000.0,42,0,0,310075.0,184647.0,10021.0,4000.0,0,0,0.07238720996682343,Low,v1,2026-01-14 19:33:08.201187+00:00
4
- 230000.0,50,-2,-2,2789.0,2942.0,2942.0,2520.0,0,0,0.06061841289885345,Low,v1,2026-01-14 19:33:08.201187+00:00
5
- 40000.0,47,2,2,52358.0,54892.0,4000.0,0.0,1,1,0.608097803732095,Medium,v1,2026-01-14 19:35:59.632246+00:00
6
- 140000.0,41,0,0,130138.0,132726.0,4756.0,4912.0,1,0,0.19300589506044843,Low,v1,2026-01-14 19:35:59.632246+00:00
7
- 50000.0,26,-1,-1,5052.0,0.0,0.0,0.0,0,0,0.12125611010883464,Low,v1,2026-01-14 19:37:40.778834+00:00
8
- 400000.0,42,-1,-1,44198.0,10132.0,10132.0,16932.0,0,0,0.06632404342712281,Low,v1,2026-01-14 19:39:17.748870+00:00
9
- 280000.0,45,-2,-2,26573.0,17597.0,18388.0,22302.0,0,0,0.036231471756518835,Low,v1,2026-01-14 19:39:17.748870+00:00
 
 
 
 
 
 
 
 
 
 
reports/evidently/drift_report.html CHANGED
The diff for this file is too large to render. See raw diff
 
reports/evidently/drift_report.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "n_rows": 8,
3
  "results": [],
4
  "drift": [
5
  {
@@ -8,35 +8,35 @@
8
  },
9
  {
10
  "column": "age",
11
- "score": 0.8679104867707123
12
  },
13
  {
14
  "column": "bill_amt_aug",
15
- "score": 0.23085843753803348
16
  },
17
  {
18
  "column": "bill_amt_sep",
19
- "score": 0.37457217443848057
20
  },
21
  {
22
  "column": "credit_limit",
23
- "score": 0.36873847560130574
24
  },
25
  {
26
  "column": "pay_amt_aug",
27
- "score": 0.20876449446182804
28
  },
29
  {
30
  "column": "pay_amt_sep",
31
- "score": 0.27529913358402724
32
  },
33
  {
34
  "column": "pay_delay_aug",
35
- "score": 0.32956762578626103
36
  },
37
  {
38
  "column": "pay_delay_sep",
39
- "score": 0.4885655407858251
40
  }
41
  ]
42
  }
 
1
  {
2
+ "n_rows": 20,
3
  "results": [],
4
  "drift": [
5
  {
 
8
  },
9
  {
10
  "column": "age",
11
+ "score": 0.47511682619708956
12
  },
13
  {
14
  "column": "bill_amt_aug",
15
+ "score": 0.18806444456731744
16
  },
17
  {
18
  "column": "bill_amt_sep",
19
+ "score": 0.24552730613716447
20
  },
21
  {
22
  "column": "credit_limit",
23
+ "score": 0.36053044124390277
24
  },
25
  {
26
  "column": "pay_amt_aug",
27
+ "score": 0.17193500059085817
28
  },
29
  {
30
  "column": "pay_amt_sep",
31
+ "score": 0.6302425864834968
32
  },
33
  {
34
  "column": "pay_delay_aug",
35
+ "score": 0.2880354597906329
36
  },
37
  {
38
  "column": "pay_delay_sep",
39
+ "score": 0.37250685167671616
40
  }
41
  ]
42
  }