LeonardoMdSA commited on
Commit
88260af
·
1 Parent(s): 2c53c82

Removed 0.01 drift on frontend

Browse files
app/api/routes.py CHANGED
@@ -1,6 +1,4 @@
1
  # app/api/routes.py
2
- # /predict, /health, /dashboard, /monitoring/run
3
-
4
  from fastapi import APIRouter, BackgroundTasks, UploadFile, File, Request, HTTPException
5
  from fastapi.responses import JSONResponse
6
  from fastapi.templating import Jinja2Templates
@@ -11,7 +9,7 @@ from app.monitoring.drift import run_drift_check
11
  from app.monitoring.governance import run_governance_checks
12
 
13
  import pandas as pd
14
- import numpy as np # needed for handling list/nested drift scores
15
 
16
  templates = Jinja2Templates(directory="app/templates")
17
 
@@ -21,83 +19,45 @@ predictor = Predictor()
21
 
22
  # CSV upload & prediction
23
  @router.post("/predict")
24
- async def predict_file(
25
- background_tasks: BackgroundTasks,
26
- file: UploadFile = File(...)
27
- ):
28
  df = pd.read_csv(file.file)
29
 
30
  # ---- STRICT MODE: schema enforcement ----
31
  missing = set(predictor.features) - set(df.columns)
32
  if missing:
33
- raise HTTPException(
34
- status_code=400,
35
- detail=f"Invalid schema. Missing required columns: {sorted(missing)}"
36
- )
37
 
38
  # ---- Model inference ----
39
  preds, probas = predictor.predict(df)
40
-
41
  results = []
42
  for i, (pred, proba) in enumerate(zip(preds, probas)):
43
  results.append({
44
  "row": i,
45
  "probability": round(float(proba), 4),
46
  "prediction": "Default" if pred == 1 else "No Default",
47
- "risk_level": (
48
- "High" if proba >= 0.75 else
49
- "Medium" if proba >= 0.5 else
50
- "Low"
51
- )
52
  })
53
 
54
  # ---- Drift: run once immediately to return chart data ----
55
  reference_df = pd.read_csv("models/v1/reference_data.csv")
56
 
57
- # run_drift_check returns (report_path, drift_dict)
58
  _, drift_dict = run_drift_check(df[predictor.features], reference_df[predictor.features], "v1")
59
 
60
- # Handle float, list/array, or nested dict drift scores safely
61
  drift_for_chart = []
62
  for col, score in drift_dict.items():
63
- score_value = 0.01 # default minimal value
64
- if isinstance(score, dict):
65
- numeric_values = []
66
-
67
- def extract_numbers(d):
68
- for v in d.values():
69
- if isinstance(v, (int, float)):
70
- numeric_values.append(v)
71
- elif isinstance(v, dict):
72
- extract_numbers(v)
73
- elif isinstance(v, (list, np.ndarray)):
74
- numeric_values.extend([float(x) for x in v if isinstance(x, (int, float))])
75
-
76
- extract_numbers(score)
77
- if numeric_values:
78
- score_value = float(np.mean(numeric_values))
79
- elif isinstance(score, (list, np.ndarray)):
80
- score_value = float(np.mean([s for s in score if isinstance(s, (int, float))]))
81
- elif isinstance(score, (int, float)):
82
  score_value = float(score)
83
-
84
- # ensure finite number
85
- score_value = float(np.nan_to_num(score_value, nan=0.01, posinf=1.0, neginf=0.01))
86
- drift_for_chart.append({"column": col, "score": max(score_value, 0.01)})
87
 
88
  # Schedule full drift in background as before
89
- background_tasks.add_task(
90
- run_drift_check,
91
- df[predictor.features],
92
- reference_df[predictor.features],
93
- "v1"
94
- )
95
 
96
- return JSONResponse({
97
- "n_rows": len(results),
98
- "results": results,
99
- "drift": drift_for_chart
100
- })
101
 
102
 
103
  # Health
@@ -111,10 +71,7 @@ def health():
111
  def run_drift():
112
  current_df = load_production_data()
113
  report_path = run_drift_check(current_df)
114
- return {
115
- "status": "drift_check_completed",
116
- "report_path": report_path
117
- }
118
 
119
 
120
  # Monitoring pipeline
@@ -123,28 +80,13 @@ def monitoring_run(background_tasks: BackgroundTasks, model_version: str = "v1")
123
  current_data = pd.read_csv("data/processed/current_data.csv")
124
  reference_data = pd.read_csv("data/processed/credit_default_clean.csv")
125
 
126
- background_tasks.add_task(
127
- run_drift_check,
128
- current_data[predictor.features],
129
- reference_data[predictor.features],
130
- model_version
131
- )
132
- background_tasks.add_task(
133
- run_governance_checks,
134
- current_data,
135
- model_version=model_version
136
- )
137
 
138
- return {
139
- "status": "monitoring triggered",
140
- "model_version": model_version
141
- }
142
 
143
 
144
  # Dashboard
145
  @router.get("/dashboard")
146
  def dashboard(request: Request):
147
- return templates.TemplateResponse(
148
- "dashboard.html",
149
- {"request": request}
150
- )
 
1
  # app/api/routes.py
 
 
2
  from fastapi import APIRouter, BackgroundTasks, UploadFile, File, Request, HTTPException
3
  from fastapi.responses import JSONResponse
4
  from fastapi.templating import Jinja2Templates
 
9
  from app.monitoring.governance import run_governance_checks
10
 
11
  import pandas as pd
12
+ import numpy as np # for numeric handling
13
 
14
  templates = Jinja2Templates(directory="app/templates")
15
 
 
19
 
20
  # CSV upload & prediction
21
  @router.post("/predict")
22
+ async def predict_file(background_tasks: BackgroundTasks, file: UploadFile = File(...)):
 
 
 
23
  df = pd.read_csv(file.file)
24
 
25
  # ---- STRICT MODE: schema enforcement ----
26
  missing = set(predictor.features) - set(df.columns)
27
  if missing:
28
+ raise HTTPException(status_code=400, detail=f"Invalid schema. Missing required columns: {sorted(missing)}")
 
 
 
29
 
30
  # ---- Model inference ----
31
  preds, probas = predictor.predict(df)
 
32
  results = []
33
  for i, (pred, proba) in enumerate(zip(preds, probas)):
34
  results.append({
35
  "row": i,
36
  "probability": round(float(proba), 4),
37
  "prediction": "Default" if pred == 1 else "No Default",
38
+ "risk_level": "High" if proba >= 0.75 else "Medium" if proba >= 0.5 else "Low"
 
 
 
 
39
  })
40
 
41
  # ---- Drift: run once immediately to return chart data ----
42
  reference_df = pd.read_csv("models/v1/reference_data.csv")
43
 
44
+ # Correctly get numeric drift scores per column
45
  _, drift_dict = run_drift_check(df[predictor.features], reference_df[predictor.features], "v1")
46
 
47
+ # Ensure minimal values for chart and safe numeric handling
48
  drift_for_chart = []
49
  for col, score in drift_dict.items():
50
+ try:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  score_value = float(score)
52
+ score_value = max(score_value, 0.01)
53
+ except Exception:
54
+ score_value = 0.01
55
+ drift_for_chart.append({"column": col, "score": score_value})
56
 
57
  # Schedule full drift in background as before
58
+ background_tasks.add_task(run_drift_check, df[predictor.features], reference_df[predictor.features], "v1")
 
 
 
 
 
59
 
60
+ return JSONResponse({"n_rows": len(results), "results": results, "drift": drift_for_chart})
 
 
 
 
61
 
62
 
63
  # Health
 
71
  def run_drift():
72
  current_df = load_production_data()
73
  report_path = run_drift_check(current_df)
74
+ return {"status": "drift_check_completed", "report_path": report_path}
 
 
 
75
 
76
 
77
  # Monitoring pipeline
 
80
  current_data = pd.read_csv("data/processed/current_data.csv")
81
  reference_data = pd.read_csv("data/processed/credit_default_clean.csv")
82
 
83
+ background_tasks.add_task(run_drift_check, current_data[predictor.features], reference_data[predictor.features], model_version)
84
+ background_tasks.add_task(run_governance_checks, current_data, model_version=model_version)
 
 
 
 
 
 
 
 
 
85
 
86
+ return {"status": "monitoring triggered", "model_version": model_version}
 
 
 
87
 
88
 
89
  # Dashboard
90
  @router.get("/dashboard")
91
  def dashboard(request: Request):
92
+ return templates.TemplateResponse("dashboard.html", {"request": request})
 
 
 
app/inference/predictor.py CHANGED
@@ -1,5 +1,4 @@
1
  # model.predict wrapper
2
-
3
  import json
4
  import joblib
5
  import numpy as np
 
1
  # model.predict wrapper
 
2
  import json
3
  import joblib
4
  import numpy as np
app/monitoring/drift.py CHANGED
@@ -1,5 +1,4 @@
1
  # Evidently logic
2
- # app/monitoring/drift.py
3
  import os
4
  import pandas as pd
5
  from evidently.report import Report
@@ -32,10 +31,15 @@ def run_drift_check(current_data: pd.DataFrame, reference_data: pd.DataFrame, mo
32
  report.run(current_data=current_data, reference_data=reference_data)
33
  report.save_html(REPORT_PATH)
34
 
35
- # report.as_dict() returns a dict; in newer Evidently versions it can be a list
36
- report_metrics = report.as_dict() if hasattr(report, "as_dict") else list(report)
 
 
 
 
 
37
 
38
- # Run governance checks
39
- alerts = governance.check_metrics(report_metrics, model_version=model_version)
40
 
41
- return alerts, report_metrics
 
1
  # Evidently logic
 
2
  import os
3
  import pandas as pd
4
  from evidently.report import Report
 
31
  report.run(current_data=current_data, reference_data=reference_data)
32
  report.save_html(REPORT_PATH)
33
 
34
+ # Extract numeric drift scores per column
35
+ report_dict = report.as_dict() if hasattr(report, "as_dict") else {}
36
+ drift_scores = {}
37
+ for metric in report_dict.get("metrics", []):
38
+ if metric["metric"] == "DataDriftMetric":
39
+ for col_name, col_data in metric["result"].get("dataset_drift", {}).items():
40
+ drift_scores[col_name] = col_data.get("drift_score", 0.0)
41
 
42
+ # Run governance checks (keeps existing alerts)
43
+ alerts = governance.check_metrics(report_dict, model_version=model_version)
44
 
45
+ return alerts, drift_scores
reports/evidently/drift_report.html CHANGED
The diff for this file is too large to render. See raw diff