Gutema-1990 commited on
Commit
2330821
·
1 Parent(s): 4e5e31b

new types of explainability added

Browse files
Files changed (1) hide show
  1. api/app.py +121 -197
api/app.py CHANGED
@@ -1,99 +1,84 @@
1
  from __future__ import annotations
2
 
3
  import json
4
- import os
5
  from pathlib import Path
6
  from typing import Any, Dict, List, Optional
7
 
8
  import joblib
9
  import numpy as np
10
  import pandas as pd
11
- import xgboost as xgb
12
  from fastapi import FastAPI, HTTPException
13
- from huggingface_hub import hf_hub_download
14
  from pydantic import BaseModel, Field
 
 
 
 
15
 
16
  # Compatibility shim for pickles created with newer sklearn that include _RemainderColsList
17
  import sklearn.compose._column_transformer as _ct # type: ignore
18
-
19
  if not hasattr(_ct, "_RemainderColsList"):
20
  class _RemainderColsList(list): # type: ignore
21
  pass
22
-
23
  _ct._RemainderColsList = _RemainderColsList
24
 
25
-
26
- # -----------------------------
27
- # Paths & configuration
28
- # -----------------------------
29
  ROOT = Path(__file__).resolve().parents[1]
30
  MODEL_DIR = Path(__file__).resolve().parent / "model"
31
-
32
  BOOSTER_PATH = MODEL_DIR / "xgboost_booster.json"
33
  META_PATH = MODEL_DIR / "explain_meta.json"
34
 
35
  HF_MODEL_REPO = os.getenv("HF_MODEL_REPO", "Gutema/frankscore-model-artifact")
36
  HF_MODEL_REVISION = os.getenv("HF_MODEL_REVISION", "main")
37
- HF_MODEL_FILENAME = os.getenv("HF_MODEL_FILENAME", "xgboost_pipeline.pkl")
38
-
39
 
40
- def download_pipeline_artifact() -> Path:
41
- """
42
- Download the .pkl artifact from Hugging Face Hub (cached locally).
43
- """
44
- try:
45
- p = hf_hub_download(
46
  repo_id=HF_MODEL_REPO,
47
- filename=HF_MODEL_FILENAME,
48
  revision=HF_MODEL_REVISION,
49
  )
50
- return Path(p)
51
- except Exception as e:
52
- raise RuntimeError(
53
- f"Failed to download model artifact from HF repo={HF_MODEL_REPO} "
54
- f"revision={HF_MODEL_REVISION} filename={HF_MODEL_FILENAME}: {e}"
55
- ) from e
56
-
57
-
58
- def require_local_file(p: Path, label: str) -> None:
59
- if not p.exists():
60
- raise FileNotFoundError(f"{label} missing at {p}")
61
-
62
-
63
- # -----------------------------
64
- # Load meta (local JSON)
65
- # -----------------------------
66
- require_local_file(META_PATH, "Explainability meta")
67
- require_local_file(BOOSTER_PATH, "Booster file")
68
- META: Dict[str, Any] = json.loads(META_PATH.read_text())
69
-
 
 
 
 
 
 
 
 
70
 
71
- # -----------------------------
72
- # Feature groups (unchanged)
73
- # -----------------------------
 
74
  RAW_FEATURE_SET = set((META.get("raw_num_cols") or []) + (META.get("raw_cat_cols") or []))
75
-
76
  FEATURE_GROUPS = {
77
- "Borrowing History & Maturity": [
78
- "account_age_days",
79
- "avg_past_amount",
80
- "avg_past_daily_burden",
81
- "avg_time_bw_loans",
82
- "borrower_history_strength",
83
- "days_since_last_loan",
84
- "loan_frequency_per_year",
85
- "num_previous_loans",
86
- "std_past_amount",
87
- "std_past_daily_burden",
88
- "trend_in_amount",
89
- "trend_in_burden",
90
- ],
91
- "Repayment Speed & Delinquency": [
92
  "num_previous_defaults",
93
  "past_default_rate",
94
  "repayment_consistency",
 
95
  ],
96
- "Current Loan Size, Pricing & Burden": [
97
  "Total_Amount",
98
  "Total_Amount_to_Repay",
99
  "amount_bucket",
@@ -102,84 +87,43 @@ FEATURE_GROUPS = {
102
  "duration",
103
  "duration_bucket",
104
  "interest_rate",
105
- ],
106
- "Affordability & Risk Ratios": [
107
  "amount_ratio",
108
  "burden_ratio",
109
- "repayment_intensity",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
110
  ],
111
- "Seasonality & Timing": [
 
112
  "days_to_local_festival",
113
  "days_to_salary_day",
114
  "month",
115
  "quarter",
116
  "week_of_year",
117
  ],
118
- "Operational, Referral & Lender Signals": [
119
- "lender_exposure_ratio",
120
- "lender_id",
121
- "lender_risk_profile",
122
- ],
123
- "Time-based Trends & Volatility": [
124
- "latest_amount_ma3",
125
- ],
126
  }
127
-
128
  FEATURE_GROUP_LOOKUP: Dict[str, str] = {}
129
  for group, variables in FEATURE_GROUPS.items():
130
  for var in variables:
131
  FEATURE_GROUP_LOOKUP[var] = group
132
 
133
-
134
- # -----------------------------
135
- # FastAPI app
136
- # -----------------------------
137
  app = FastAPI(title="FrankScore", version="1.0.0")
138
 
139
 
140
- # Globals populated at startup
141
- PIPELINE = None
142
- PREPROCESS = None
143
- EXPECTED_FEATURES: List[str] = []
144
- NUM_FEATURES: List[str] = []
145
- CAT_FEATURES: List[str] = []
146
- PRE_FEATURE_NAMES: List[str] = []
147
-
148
-
149
- @app.on_event("startup")
150
- def _startup() -> None:
151
- """
152
- Download + load pipeline on startup (safer than import-time).
153
- """
154
- global PIPELINE, PREPROCESS, EXPECTED_FEATURES, NUM_FEATURES, CAT_FEATURES, PRE_FEATURE_NAMES
155
-
156
- model_path = download_pipeline_artifact()
157
- PIPELINE = joblib.load(model_path)
158
-
159
- EXPECTED_FEATURES = list(getattr(PIPELINE, "feature_names_in_", []))
160
-
161
- PREPROCESS = PIPELINE.named_steps.get("preprocess") if hasattr(PIPELINE, "named_steps") else None
162
- if PREPROCESS is None:
163
- raise RuntimeError("Pipeline missing 'preprocess' step; cannot infer columns.")
164
-
165
- if not EXPECTED_FEATURES:
166
- EXPECTED_FEATURES = list(getattr(PREPROCESS, "feature_names_in_", []))
167
- if not EXPECTED_FEATURES:
168
- raise RuntimeError("Unable to determine expected feature names from the pipeline.")
169
-
170
- _col_map = {name: cols for name, _, cols in getattr(PREPROCESS, "transformers_", [])}
171
- NUM_FEATURES = list(_col_map.get("num", []))
172
- CAT_FEATURES = list(_col_map.get("cat", []))
173
-
174
- # From meta if present; fallback to preprocess get_feature_names_out
175
- PRE_FEATURE_NAMES = META.get("pre_feature_names") or list(
176
- getattr(PREPROCESS, "get_feature_names_out", lambda: [])()
177
- )
178
-
179
-
180
- # -----------------------------
181
- # Schemas
182
- # -----------------------------
183
  class PredictionRequest(BaseModel):
184
  records: List[Dict[str, Any]] = Field(..., description="List of borrower feature dictionaries")
185
 
@@ -209,6 +153,9 @@ class FeatureContribution(BaseModel):
209
  class GroupContribution(BaseModel):
210
  group: str
211
  total_shap_value: float
 
 
 
212
  features: List[FeatureContribution]
213
 
214
 
@@ -233,32 +180,18 @@ class PredictExplainResponse(BaseModel):
233
  results: List[PredictExplainItem]
234
 
235
 
236
- # -----------------------------
237
- # Helpers
238
- # -----------------------------
239
- def _require_loaded() -> None:
240
- if PIPELINE is None or PREPROCESS is None:
241
- raise HTTPException(status_code=503, detail="Model not loaded yet. Please retry.")
242
-
243
-
244
  def prepare_frame(records: List[Dict[str, Any]]) -> pd.DataFrame:
245
- _require_loaded()
246
-
247
  if not records:
248
  raise HTTPException(status_code=400, detail="No records provided.")
249
  df = pd.DataFrame(records)
250
-
251
  for col in EXPECTED_FEATURES:
252
  if col not in df.columns:
253
  df[col] = np.nan
254
-
255
  df = df[EXPECTED_FEATURES]
256
-
257
  if NUM_FEATURES:
258
  df[NUM_FEATURES] = df[NUM_FEATURES].apply(pd.to_numeric, errors="coerce")
259
  if CAT_FEATURES:
260
  df[CAT_FEATURES] = df[CAT_FEATURES].astype("object")
261
-
262
  return df
263
 
264
 
@@ -293,11 +226,57 @@ def _base_feature_name(name: str) -> str:
293
  return base
294
 
295
 
296
- def get_booster() -> xgb.Booster:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
297
  if not hasattr(get_booster, "_booster"):
298
  booster = xgb.Booster()
299
  booster.load_model(str(BOOSTER_PATH))
300
-
301
  base_score = booster.attr("base_score")
302
  if base_score:
303
  try:
@@ -310,36 +289,22 @@ def get_booster() -> xgb.Booster:
310
  cleaned_val = "0.5"
311
  booster.set_param({"base_score": cleaned_val})
312
  booster.set_attr(base_score=cleaned_val)
313
-
314
  get_booster._booster = booster
315
  return get_booster._booster
316
 
317
 
318
- # -----------------------------
319
- # Endpoints
320
- # -----------------------------
321
- @app.get("/health")
322
- def health() -> Dict[str, str]:
323
- # Do not crash health if model isn't loaded yet
324
- return {
325
- "status": "ok",
326
- "hf_repo": HF_MODEL_REPO,
327
- "hf_revision": HF_MODEL_REVISION,
328
- "hf_filename": HF_MODEL_FILENAME,
329
- "meta_path": str(META_PATH),
330
- "booster_path": str(BOOSTER_PATH),
331
- "loaded": str(PIPELINE is not None),
332
- }
333
-
334
-
335
  @app.post("/predict", response_model=PredictionResponse)
336
  def predict(req: PredictionRequest) -> PredictionResponse:
337
- _require_loaded()
338
  frame = prepare_frame(req.records)
339
  probas = PIPELINE.predict_proba(frame)[:, 1]
340
  return PredictionResponse(probabilities=probas.tolist())
341
 
342
 
 
 
 
 
 
343
  @app.post("/score", response_model=ScoreResponse)
344
  def score(req: ScoreRequest) -> ScoreResponse:
345
  if not req.probabilities:
@@ -351,51 +316,33 @@ def score(req: ScoreRequest) -> ScoreResponse:
351
 
352
  @app.post("/explain", response_model=ExplainResponse)
353
  def explain(req: ExplainRequest) -> ExplainResponse:
354
- _require_loaded()
355
  if not req.records:
356
  raise HTTPException(status_code=400, detail="No records provided.")
357
-
358
  frame = prepare_frame(req.records)
359
  probas = PIPELINE.predict_proba(frame)[:, 1]
360
-
361
  booster = get_booster()
362
  X_proc = PREPROCESS.transform(frame)
363
-
364
  feat_names = np.array(PRE_FEATURE_NAMES) if PRE_FEATURE_NAMES else np.array([f"f{i}" for i in range(X_proc.shape[1])])
365
  sanitized_names = [_sanitize_feature_name(n) for n in feat_names]
366
-
367
  dmat = xgb.DMatrix(X_proc, feature_names=sanitized_names)
368
  contribs = booster.predict(dmat, pred_contribs=True)
369
-
370
  if contribs.shape[1] != X_proc.shape[1] + 1:
371
  raise HTTPException(status_code=500, detail="Unexpected contribution shape from booster.")
372
-
373
  base_vals = contribs[:, -1]
374
  feat_contribs = contribs[:, :-1]
375
-
376
  explanations: List[ExplainItem] = []
377
  for i in range(feat_contribs.shape[0]):
378
  row_vals = feat_contribs[i]
379
-
380
  group_totals: Dict[str, float] = {}
381
  group_details: Dict[str, List[FeatureContribution]] = {}
382
-
383
  for name, val in zip(feat_names, row_vals):
384
  base = _base_feature_name(str(name))
385
  group = FEATURE_GROUP_LOOKUP.get(base, "Other")
386
-
387
  group_totals[group] = group_totals.get(group, 0.0) + float(val)
388
  group_details.setdefault(group, []).append(
389
  FeatureContribution(feature=str(name), shap_value=float(val))
390
  )
391
-
392
- group_contribs: List[GroupContribution] = []
393
- for grp, total in sorted(group_totals.items(), key=lambda kv: abs(kv[1]), reverse=True):
394
- feats = sorted(group_details.get(grp, []), key=lambda fc: abs(fc.shap_value), reverse=True)
395
- if req.top_k:
396
- feats = feats[: req.top_k]
397
- group_contribs.append(GroupContribution(group=grp, total_shap_value=total, features=feats))
398
-
399
  explanations.append(
400
  ExplainItem(
401
  probability=float(probas[i]),
@@ -403,59 +350,39 @@ def explain(req: ExplainRequest) -> ExplainResponse:
403
  group_contributions=group_contribs,
404
  )
405
  )
406
-
407
  return ExplainResponse(explanations=explanations)
408
 
409
 
410
  @app.post("/predict_explain", response_model=PredictExplainResponse)
411
  def predict_explain(req: ExplainRequest) -> PredictExplainResponse:
412
- _require_loaded()
413
  if not req.records:
414
  raise HTTPException(status_code=400, detail="No records provided.")
415
-
416
  frame = prepare_frame(req.records)
417
  probas = PIPELINE.predict_proba(frame)[:, 1]
418
-
419
  booster = get_booster()
420
  X_proc = PREPROCESS.transform(frame)
421
-
422
  feat_names = np.array(PRE_FEATURE_NAMES) if PRE_FEATURE_NAMES else np.array([f"f{i}" for i in range(X_proc.shape[1])])
423
  sanitized_names = [_sanitize_feature_name(n) for n in feat_names]
424
-
425
  dmat = xgb.DMatrix(X_proc, feature_names=sanitized_names)
426
  contribs = booster.predict(dmat, pred_contribs=True)
427
-
428
  if contribs.shape[1] != X_proc.shape[1] + 1:
429
  raise HTTPException(status_code=500, detail="Unexpected contribution shape from booster.")
430
-
431
  base_vals = contribs[:, -1]
432
  feat_contribs = contribs[:, :-1]
433
-
434
  items: List[PredictExplainItem] = []
435
  for i in range(feat_contribs.shape[0]):
436
  row_vals = feat_contribs[i]
437
-
438
  group_totals: Dict[str, float] = {}
439
  group_details: Dict[str, List[FeatureContribution]] = {}
440
-
441
  for name, val in zip(feat_names, row_vals):
442
  base = _base_feature_name(str(name))
443
  group = FEATURE_GROUP_LOOKUP.get(base, "Other")
444
-
445
  group_totals[group] = group_totals.get(group, 0.0) + float(val)
446
  group_details.setdefault(group, []).append(
447
  FeatureContribution(feature=str(name), shap_value=float(val))
448
  )
449
-
450
- group_contribs: List[GroupContribution] = []
451
- for grp, total in sorted(group_totals.items(), key=lambda kv: abs(kv[1]), reverse=True):
452
- feats = sorted(group_details.get(grp, []), key=lambda fc: abs(fc.shap_value), reverse=True)
453
- if req.top_k:
454
- feats = feats[: req.top_k]
455
- group_contribs.append(GroupContribution(group=grp, total_shap_value=total, features=feats))
456
-
457
  score_val = int(round(float(pd_to_score(np.array([probas[i]]))[0])))
458
-
459
  items.append(
460
  PredictExplainItem(
461
  probability=float(probas[i]),
@@ -464,7 +391,4 @@ def predict_explain(req: ExplainRequest) -> PredictExplainResponse:
464
  group_contributions=group_contribs,
465
  )
466
  )
467
-
468
  return PredictExplainResponse(results=items)
469
-
470
- # DEPLOY_MARKER: 2026-01-24_1
 
1
  from __future__ import annotations
2
 
3
  import json
 
4
  from pathlib import Path
5
  from typing import Any, Dict, List, Optional
6
 
7
  import joblib
8
  import numpy as np
9
  import pandas as pd
 
10
  from fastapi import FastAPI, HTTPException
 
11
  from pydantic import BaseModel, Field
12
+ import xgboost as xgb
13
+
14
+ import os
15
+ from huggingface_hub import hf_hub_download
16
 
17
  # Compatibility shim for pickles created with newer sklearn that include _RemainderColsList
18
  import sklearn.compose._column_transformer as _ct # type: ignore
 
19
  if not hasattr(_ct, "_RemainderColsList"):
20
  class _RemainderColsList(list): # type: ignore
21
  pass
 
22
  _ct._RemainderColsList = _RemainderColsList
23
 
 
 
 
 
24
  ROOT = Path(__file__).resolve().parents[1]
25
  MODEL_DIR = Path(__file__).resolve().parent / "model"
26
+ # MODEL_PATH = MODEL_DIR / "xgboost_pipeline.pkl"
27
  BOOSTER_PATH = MODEL_DIR / "xgboost_booster.json"
28
  META_PATH = MODEL_DIR / "explain_meta.json"
29
 
30
  HF_MODEL_REPO = os.getenv("HF_MODEL_REPO", "Gutema/frankscore-model-artifact")
31
  HF_MODEL_REVISION = os.getenv("HF_MODEL_REVISION", "main")
 
 
32
 
33
+ try:
34
+ MODEL_PATH = Path(
35
+ hf_hub_download(
 
 
 
36
  repo_id=HF_MODEL_REPO,
37
+ filename="xgboost_pipeline.pkl",
38
  revision=HF_MODEL_REVISION,
39
  )
40
+ )
41
+ except Exception as e:
42
+ raise RuntimeError(f"Failed to download model artifact from HF repo={HF_MODEL_REPO}: {e}") from e
43
+
44
+ if not META_PATH.exists():
45
+ raise FileNotFoundError(f"Explainability meta missing at {META_PATH}")
46
+ if not BOOSTER_PATH.exists():
47
+ raise FileNotFoundError(f"Booster file missing at {BOOSTER_PATH}")
48
+
49
+ if not MODEL_PATH.exists():
50
+ raise FileNotFoundError(f"Model file missing at {MODEL_PATH}")
51
+ if not META_PATH.exists():
52
+ raise FileNotFoundError(f"Explainability meta missing at {META_PATH}")
53
+ if not BOOSTER_PATH.exists():
54
+ raise FileNotFoundError(f"Booster file missing at {BOOSTER_PATH}")
55
+
56
+ PIPELINE = joblib.load(MODEL_PATH)
57
+ META = json.loads(META_PATH.read_text())
58
+
59
+ EXPECTED_FEATURES = list(getattr(PIPELINE, "feature_names_in_", []))
60
+ PREPROCESS = PIPELINE.named_steps.get("preprocess") if hasattr(PIPELINE, "named_steps") else None
61
+ if PREPROCESS is None:
62
+ raise RuntimeError("Pipeline missing 'preprocess' step; cannot infer columns.")
63
+
64
+ if not EXPECTED_FEATURES:
65
+ EXPECTED_FEATURES = list(getattr(PREPROCESS, "feature_names_in_", []))
66
+ if not EXPECTED_FEATURES:
67
+ raise RuntimeError("Unable to determine expected feature names from the pipeline.")
68
 
69
+ _col_map = {name: cols for name, _, cols in getattr(PREPROCESS, "transformers_", [])}
70
+ NUM_FEATURES = list(_col_map.get("num", []))
71
+ CAT_FEATURES = list(_col_map.get("cat", []))
72
+ PRE_FEATURE_NAMES = META.get("pre_feature_names") or list(getattr(PREPROCESS, "get_feature_names_out", lambda: [])())
73
  RAW_FEATURE_SET = set((META.get("raw_num_cols") or []) + (META.get("raw_cat_cols") or []))
 
74
  FEATURE_GROUPS = {
75
+ "Repayment Activity": [
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  "num_previous_defaults",
77
  "past_default_rate",
78
  "repayment_consistency",
79
+ "repayment_intensity",
80
  ],
81
+ "Loan Amount & Burden": [
82
  "Total_Amount",
83
  "Total_Amount_to_Repay",
84
  "amount_bucket",
 
87
  "duration",
88
  "duration_bucket",
89
  "interest_rate",
 
 
90
  "amount_ratio",
91
  "burden_ratio",
92
+ "lender_exposure_ratio",
93
+ ],
94
+ "Borrowing History": [
95
+ "account_age_days",
96
+ "avg_past_amount",
97
+ "avg_past_daily_burden",
98
+ "avg_time_bw_loans",
99
+ "borrower_history_strength",
100
+ "days_since_last_loan",
101
+ "loan_frequency_per_year",
102
+ "num_previous_loans",
103
+ "std_past_amount",
104
+ "std_past_daily_burden",
105
+ "trend_in_amount",
106
+ "trend_in_burden",
107
+ "lender_id",
108
+ "lender_risk_profile",
109
  ],
110
+ "Spending & Transactions": [
111
+ "latest_amount_ma3",
112
  "days_to_local_festival",
113
  "days_to_salary_day",
114
  "month",
115
  "quarter",
116
  "week_of_year",
117
  ],
 
 
 
 
 
 
 
 
118
  }
 
119
  FEATURE_GROUP_LOOKUP: Dict[str, str] = {}
120
  for group, variables in FEATURE_GROUPS.items():
121
  for var in variables:
122
  FEATURE_GROUP_LOOKUP[var] = group
123
 
 
 
 
 
124
  app = FastAPI(title="FrankScore", version="1.0.0")
125
 
126
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
127
  class PredictionRequest(BaseModel):
128
  records: List[Dict[str, Any]] = Field(..., description="List of borrower feature dictionaries")
129
 
 
153
  class GroupContribution(BaseModel):
154
  group: str
155
  total_shap_value: float
156
+ percentage: float
157
+ direction: str
158
+ label: str
159
  features: List[FeatureContribution]
160
 
161
 
 
180
  results: List[PredictExplainItem]
181
 
182
 
 
 
 
 
 
 
 
 
183
  def prepare_frame(records: List[Dict[str, Any]]) -> pd.DataFrame:
 
 
184
  if not records:
185
  raise HTTPException(status_code=400, detail="No records provided.")
186
  df = pd.DataFrame(records)
 
187
  for col in EXPECTED_FEATURES:
188
  if col not in df.columns:
189
  df[col] = np.nan
 
190
  df = df[EXPECTED_FEATURES]
 
191
  if NUM_FEATURES:
192
  df[NUM_FEATURES] = df[NUM_FEATURES].apply(pd.to_numeric, errors="coerce")
193
  if CAT_FEATURES:
194
  df[CAT_FEATURES] = df[CAT_FEATURES].astype("object")
 
195
  return df
196
 
197
 
 
226
  return base
227
 
228
 
229
+ def _label_for_percentage(pct: float) -> str:
230
+ if pct >= 30:
231
+ return "Exceptional"
232
+ if pct >= 20:
233
+ return "Very Good"
234
+ if pct >= 10:
235
+ return "Good"
236
+ if pct >= 5:
237
+ return "Bad"
238
+ return "Very Bad"
239
+
240
+
241
+ def _direction_for_value(val: float) -> str:
242
+ if val > 0:
243
+ return "raises risk"
244
+ if val < 0:
245
+ return "reduces risk"
246
+ return "neutral"
247
+
248
+
249
+ def _build_group_contribs(
250
+ group_totals: Dict[str, float],
251
+ group_details: Dict[str, List[FeatureContribution]],
252
+ top_k: Optional[int],
253
+ ) -> List[GroupContribution]:
254
+ denom = sum(abs(v) for v in group_totals.values())
255
+ if denom == 0:
256
+ denom = 1e-12 # avoid division by zero; all percentages become ~0
257
+ group_contribs: List[GroupContribution] = []
258
+ for grp, total in sorted(group_totals.items(), key=lambda kv: abs(kv[1]), reverse=True):
259
+ feats = sorted(group_details.get(grp, []), key=lambda fc: abs(fc.shap_value), reverse=True)
260
+ if top_k:
261
+ feats = feats[:top_k]
262
+ pct = abs(total) / denom * 100
263
+ group_contribs.append(
264
+ GroupContribution(
265
+ group=grp,
266
+ total_shap_value=total,
267
+ percentage=pct,
268
+ direction=_direction_for_value(total),
269
+ label=_label_for_percentage(pct),
270
+ features=feats,
271
+ )
272
+ )
273
+ return group_contribs
274
+
275
+
276
+ def get_booster():
277
  if not hasattr(get_booster, "_booster"):
278
  booster = xgb.Booster()
279
  booster.load_model(str(BOOSTER_PATH))
 
280
  base_score = booster.attr("base_score")
281
  if base_score:
282
  try:
 
289
  cleaned_val = "0.5"
290
  booster.set_param({"base_score": cleaned_val})
291
  booster.set_attr(base_score=cleaned_val)
 
292
  get_booster._booster = booster
293
  return get_booster._booster
294
 
295
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
296
  @app.post("/predict", response_model=PredictionResponse)
297
  def predict(req: PredictionRequest) -> PredictionResponse:
 
298
  frame = prepare_frame(req.records)
299
  probas = PIPELINE.predict_proba(frame)[:, 1]
300
  return PredictionResponse(probabilities=probas.tolist())
301
 
302
 
303
+ @app.get("/health")
304
+ def health() -> Dict[str, str]:
305
+ return {"status": "ok", "model_path": str(MODEL_PATH)}
306
+
307
+
308
  @app.post("/score", response_model=ScoreResponse)
309
  def score(req: ScoreRequest) -> ScoreResponse:
310
  if not req.probabilities:
 
316
 
317
  @app.post("/explain", response_model=ExplainResponse)
318
  def explain(req: ExplainRequest) -> ExplainResponse:
 
319
  if not req.records:
320
  raise HTTPException(status_code=400, detail="No records provided.")
 
321
  frame = prepare_frame(req.records)
322
  probas = PIPELINE.predict_proba(frame)[:, 1]
 
323
  booster = get_booster()
324
  X_proc = PREPROCESS.transform(frame)
 
325
  feat_names = np.array(PRE_FEATURE_NAMES) if PRE_FEATURE_NAMES else np.array([f"f{i}" for i in range(X_proc.shape[1])])
326
  sanitized_names = [_sanitize_feature_name(n) for n in feat_names]
 
327
  dmat = xgb.DMatrix(X_proc, feature_names=sanitized_names)
328
  contribs = booster.predict(dmat, pred_contribs=True)
 
329
  if contribs.shape[1] != X_proc.shape[1] + 1:
330
  raise HTTPException(status_code=500, detail="Unexpected contribution shape from booster.")
 
331
  base_vals = contribs[:, -1]
332
  feat_contribs = contribs[:, :-1]
 
333
  explanations: List[ExplainItem] = []
334
  for i in range(feat_contribs.shape[0]):
335
  row_vals = feat_contribs[i]
 
336
  group_totals: Dict[str, float] = {}
337
  group_details: Dict[str, List[FeatureContribution]] = {}
 
338
  for name, val in zip(feat_names, row_vals):
339
  base = _base_feature_name(str(name))
340
  group = FEATURE_GROUP_LOOKUP.get(base, "Other")
 
341
  group_totals[group] = group_totals.get(group, 0.0) + float(val)
342
  group_details.setdefault(group, []).append(
343
  FeatureContribution(feature=str(name), shap_value=float(val))
344
  )
345
+ group_contribs = _build_group_contribs(group_totals, group_details, req.top_k)
 
 
 
 
 
 
 
346
  explanations.append(
347
  ExplainItem(
348
  probability=float(probas[i]),
 
350
  group_contributions=group_contribs,
351
  )
352
  )
 
353
  return ExplainResponse(explanations=explanations)
354
 
355
 
356
  @app.post("/predict_explain", response_model=PredictExplainResponse)
357
  def predict_explain(req: ExplainRequest) -> PredictExplainResponse:
 
358
  if not req.records:
359
  raise HTTPException(status_code=400, detail="No records provided.")
 
360
  frame = prepare_frame(req.records)
361
  probas = PIPELINE.predict_proba(frame)[:, 1]
 
362
  booster = get_booster()
363
  X_proc = PREPROCESS.transform(frame)
 
364
  feat_names = np.array(PRE_FEATURE_NAMES) if PRE_FEATURE_NAMES else np.array([f"f{i}" for i in range(X_proc.shape[1])])
365
  sanitized_names = [_sanitize_feature_name(n) for n in feat_names]
 
366
  dmat = xgb.DMatrix(X_proc, feature_names=sanitized_names)
367
  contribs = booster.predict(dmat, pred_contribs=True)
 
368
  if contribs.shape[1] != X_proc.shape[1] + 1:
369
  raise HTTPException(status_code=500, detail="Unexpected contribution shape from booster.")
 
370
  base_vals = contribs[:, -1]
371
  feat_contribs = contribs[:, :-1]
 
372
  items: List[PredictExplainItem] = []
373
  for i in range(feat_contribs.shape[0]):
374
  row_vals = feat_contribs[i]
 
375
  group_totals: Dict[str, float] = {}
376
  group_details: Dict[str, List[FeatureContribution]] = {}
 
377
  for name, val in zip(feat_names, row_vals):
378
  base = _base_feature_name(str(name))
379
  group = FEATURE_GROUP_LOOKUP.get(base, "Other")
 
380
  group_totals[group] = group_totals.get(group, 0.0) + float(val)
381
  group_details.setdefault(group, []).append(
382
  FeatureContribution(feature=str(name), shap_value=float(val))
383
  )
384
+ group_contribs = _build_group_contribs(group_totals, group_details, req.top_k)
 
 
 
 
 
 
 
385
  score_val = int(round(float(pd_to_score(np.array([probas[i]]))[0])))
 
386
  items.append(
387
  PredictExplainItem(
388
  probability=float(probas[i]),
 
391
  group_contributions=group_contribs,
392
  )
393
  )
 
394
  return PredictExplainResponse(results=items)