Gutema-1990 commited on
Commit
5223365
·
1 Parent(s): ac6d643

the model path i added and refined

Browse files
Files changed (1) hide show
  1. api/app.py +157 -49
api/app.py CHANGED
@@ -1,76 +1,78 @@
1
  from __future__ import annotations
2
 
3
  import json
 
4
  from pathlib import Path
5
  from typing import Any, Dict, List, Optional
6
 
7
  import joblib
8
  import numpy as np
9
  import pandas as pd
10
- from fastapi import FastAPI, HTTPException
11
- from pydantic import BaseModel, Field
12
  import xgboost as xgb
13
-
14
- import os
15
  from huggingface_hub import hf_hub_download
 
16
 
17
  # Compatibility shim for pickles created with newer sklearn that include _RemainderColsList
18
  import sklearn.compose._column_transformer as _ct # type: ignore
 
19
  if not hasattr(_ct, "_RemainderColsList"):
20
  class _RemainderColsList(list): # type: ignore
21
  pass
 
22
  _ct._RemainderColsList = _RemainderColsList
23
 
 
 
 
 
24
  ROOT = Path(__file__).resolve().parents[1]
25
  MODEL_DIR = Path(__file__).resolve().parent / "model"
26
- # MODEL_PATH = MODEL_DIR / "xgboost_pipeline.pkl"
27
  BOOSTER_PATH = MODEL_DIR / "xgboost_booster.json"
28
  META_PATH = MODEL_DIR / "explain_meta.json"
29
 
30
  HF_MODEL_REPO = os.getenv("HF_MODEL_REPO", "Gutema/frankscore-model-artifact")
31
  HF_MODEL_REVISION = os.getenv("HF_MODEL_REVISION", "main")
 
 
32
 
33
- try:
34
- MODEL_PATH = Path(
35
- hf_hub_download(
 
 
 
36
  repo_id=HF_MODEL_REPO,
37
- filename="xgboost_pipeline.pkl",
38
  revision=HF_MODEL_REVISION,
39
  )
40
- )
41
- except Exception as e:
42
- raise RuntimeError(f"Failed to download model artifact from HF repo={HF_MODEL_REPO}: {e}") from e
43
-
44
- if not META_PATH.exists():
45
- raise FileNotFoundError(f"Explainability meta missing at {META_PATH}")
46
- if not BOOSTER_PATH.exists():
47
- raise FileNotFoundError(f"Booster file missing at {BOOSTER_PATH}")
48
-
49
- if not MODEL_PATH.exists():
50
- raise FileNotFoundError(f"Model file missing at {MODEL_PATH}")
51
- if not META_PATH.exists():
52
- raise FileNotFoundError(f"Explainability meta missing at {META_PATH}")
53
- if not BOOSTER_PATH.exists():
54
- raise FileNotFoundError(f"Booster file missing at {BOOSTER_PATH}")
55
-
56
- PIPELINE = joblib.load(MODEL_PATH)
57
- META = json.loads(META_PATH.read_text())
58
-
59
- EXPECTED_FEATURES = list(getattr(PIPELINE, "feature_names_in_", []))
60
- PREPROCESS = PIPELINE.named_steps.get("preprocess") if hasattr(PIPELINE, "named_steps") else None
61
- if PREPROCESS is None:
62
- raise RuntimeError("Pipeline missing 'preprocess' step; cannot infer columns.")
63
-
64
- if not EXPECTED_FEATURES:
65
- EXPECTED_FEATURES = list(getattr(PREPROCESS, "feature_names_in_", []))
66
- if not EXPECTED_FEATURES:
67
- raise RuntimeError("Unable to determine expected feature names from the pipeline.")
68
 
69
- _col_map = {name: cols for name, _, cols in getattr(PREPROCESS, "transformers_", [])}
70
- NUM_FEATURES = list(_col_map.get("num", []))
71
- CAT_FEATURES = list(_col_map.get("cat", []))
72
- PRE_FEATURE_NAMES = META.get("pre_feature_names") or list(getattr(PREPROCESS, "get_feature_names_out", lambda: [])())
 
 
 
 
 
 
 
 
 
 
 
 
73
  RAW_FEATURE_SET = set((META.get("raw_num_cols") or []) + (META.get("raw_cat_cols") or []))
 
74
  FEATURE_GROUPS = {
75
  "Borrowing History & Maturity": [
76
  "account_age_days",
@@ -122,14 +124,62 @@ FEATURE_GROUPS = {
122
  "latest_amount_ma3",
123
  ],
124
  }
 
125
  FEATURE_GROUP_LOOKUP: Dict[str, str] = {}
126
  for group, variables in FEATURE_GROUPS.items():
127
  for var in variables:
128
  FEATURE_GROUP_LOOKUP[var] = group
129
 
 
 
 
 
130
  app = FastAPI(title="FrankScore", version="1.0.0")
131
 
132
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
133
  class PredictionRequest(BaseModel):
134
  records: List[Dict[str, Any]] = Field(..., description="List of borrower feature dictionaries")
135
 
@@ -183,18 +233,32 @@ class PredictExplainResponse(BaseModel):
183
  results: List[PredictExplainItem]
184
 
185
 
 
 
 
 
 
 
 
 
186
  def prepare_frame(records: List[Dict[str, Any]]) -> pd.DataFrame:
 
 
187
  if not records:
188
  raise HTTPException(status_code=400, detail="No records provided.")
189
  df = pd.DataFrame(records)
 
190
  for col in EXPECTED_FEATURES:
191
  if col not in df.columns:
192
  df[col] = np.nan
 
193
  df = df[EXPECTED_FEATURES]
 
194
  if NUM_FEATURES:
195
  df[NUM_FEATURES] = df[NUM_FEATURES].apply(pd.to_numeric, errors="coerce")
196
  if CAT_FEATURES:
197
  df[CAT_FEATURES] = df[CAT_FEATURES].astype("object")
 
198
  return df
199
 
200
 
@@ -229,10 +293,11 @@ def _base_feature_name(name: str) -> str:
229
  return base
230
 
231
 
232
- def get_booster():
233
  if not hasattr(get_booster, "_booster"):
234
  booster = xgb.Booster()
235
  booster.load_model(str(BOOSTER_PATH))
 
236
  base_score = booster.attr("base_score")
237
  if base_score:
238
  try:
@@ -245,22 +310,36 @@ def get_booster():
245
  cleaned_val = "0.5"
246
  booster.set_param({"base_score": cleaned_val})
247
  booster.set_attr(base_score=cleaned_val)
 
248
  get_booster._booster = booster
249
  return get_booster._booster
250
 
251
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
252
  @app.post("/predict", response_model=PredictionResponse)
253
  def predict(req: PredictionRequest) -> PredictionResponse:
 
254
  frame = prepare_frame(req.records)
255
  probas = PIPELINE.predict_proba(frame)[:, 1]
256
  return PredictionResponse(probabilities=probas.tolist())
257
 
258
 
259
- @app.get("/health")
260
- def health() -> Dict[str, str]:
261
- return {"status": "ok", "model_path": str(MODEL_PATH)}
262
-
263
-
264
  @app.post("/score", response_model=ScoreResponse)
265
  def score(req: ScoreRequest) -> ScoreResponse:
266
  if not req.probabilities:
@@ -272,38 +351,51 @@ def score(req: ScoreRequest) -> ScoreResponse:
272
 
273
  @app.post("/explain", response_model=ExplainResponse)
274
  def explain(req: ExplainRequest) -> ExplainResponse:
 
275
  if not req.records:
276
  raise HTTPException(status_code=400, detail="No records provided.")
 
277
  frame = prepare_frame(req.records)
278
  probas = PIPELINE.predict_proba(frame)[:, 1]
 
279
  booster = get_booster()
280
  X_proc = PREPROCESS.transform(frame)
 
281
  feat_names = np.array(PRE_FEATURE_NAMES) if PRE_FEATURE_NAMES else np.array([f"f{i}" for i in range(X_proc.shape[1])])
282
  sanitized_names = [_sanitize_feature_name(n) for n in feat_names]
 
283
  dmat = xgb.DMatrix(X_proc, feature_names=sanitized_names)
284
  contribs = booster.predict(dmat, pred_contribs=True)
 
285
  if contribs.shape[1] != X_proc.shape[1] + 1:
286
  raise HTTPException(status_code=500, detail="Unexpected contribution shape from booster.")
 
287
  base_vals = contribs[:, -1]
288
  feat_contribs = contribs[:, :-1]
 
289
  explanations: List[ExplainItem] = []
290
  for i in range(feat_contribs.shape[0]):
291
  row_vals = feat_contribs[i]
 
292
  group_totals: Dict[str, float] = {}
293
  group_details: Dict[str, List[FeatureContribution]] = {}
 
294
  for name, val in zip(feat_names, row_vals):
295
  base = _base_feature_name(str(name))
296
  group = FEATURE_GROUP_LOOKUP.get(base, "Other")
 
297
  group_totals[group] = group_totals.get(group, 0.0) + float(val)
298
  group_details.setdefault(group, []).append(
299
  FeatureContribution(feature=str(name), shap_value=float(val))
300
  )
 
301
  group_contribs: List[GroupContribution] = []
302
  for grp, total in sorted(group_totals.items(), key=lambda kv: abs(kv[1]), reverse=True):
303
  feats = sorted(group_details.get(grp, []), key=lambda fc: abs(fc.shap_value), reverse=True)
304
  if req.top_k:
305
- feats = feats[:req.top_k]
306
  group_contribs.append(GroupContribution(group=grp, total_shap_value=total, features=feats))
 
307
  explanations.append(
308
  ExplainItem(
309
  probability=float(probas[i]),
@@ -311,44 +403,59 @@ def explain(req: ExplainRequest) -> ExplainResponse:
311
  group_contributions=group_contribs,
312
  )
313
  )
 
314
  return ExplainResponse(explanations=explanations)
315
 
316
 
317
  @app.post("/predict_explain", response_model=PredictExplainResponse)
318
  def predict_explain(req: ExplainRequest) -> PredictExplainResponse:
 
319
  if not req.records:
320
  raise HTTPException(status_code=400, detail="No records provided.")
 
321
  frame = prepare_frame(req.records)
322
  probas = PIPELINE.predict_proba(frame)[:, 1]
 
323
  booster = get_booster()
324
  X_proc = PREPROCESS.transform(frame)
 
325
  feat_names = np.array(PRE_FEATURE_NAMES) if PRE_FEATURE_NAMES else np.array([f"f{i}" for i in range(X_proc.shape[1])])
326
  sanitized_names = [_sanitize_feature_name(n) for n in feat_names]
 
327
  dmat = xgb.DMatrix(X_proc, feature_names=sanitized_names)
328
  contribs = booster.predict(dmat, pred_contribs=True)
 
329
  if contribs.shape[1] != X_proc.shape[1] + 1:
330
  raise HTTPException(status_code=500, detail="Unexpected contribution shape from booster.")
 
331
  base_vals = contribs[:, -1]
332
  feat_contribs = contribs[:, :-1]
 
333
  items: List[PredictExplainItem] = []
334
  for i in range(feat_contribs.shape[0]):
335
  row_vals = feat_contribs[i]
 
336
  group_totals: Dict[str, float] = {}
337
  group_details: Dict[str, List[FeatureContribution]] = {}
 
338
  for name, val in zip(feat_names, row_vals):
339
  base = _base_feature_name(str(name))
340
  group = FEATURE_GROUP_LOOKUP.get(base, "Other")
 
341
  group_totals[group] = group_totals.get(group, 0.0) + float(val)
342
  group_details.setdefault(group, []).append(
343
  FeatureContribution(feature=str(name), shap_value=float(val))
344
  )
 
345
  group_contribs: List[GroupContribution] = []
346
  for grp, total in sorted(group_totals.items(), key=lambda kv: abs(kv[1]), reverse=True):
347
  feats = sorted(group_details.get(grp, []), key=lambda fc: abs(fc.shap_value), reverse=True)
348
  if req.top_k:
349
- feats = feats[:req.top_k]
350
  group_contribs.append(GroupContribution(group=grp, total_shap_value=total, features=feats))
 
351
  score_val = int(round(float(pd_to_score(np.array([probas[i]]))[0])))
 
352
  items.append(
353
  PredictExplainItem(
354
  probability=float(probas[i]),
@@ -357,4 +464,5 @@ def predict_explain(req: ExplainRequest) -> PredictExplainResponse:
357
  group_contributions=group_contribs,
358
  )
359
  )
 
360
  return PredictExplainResponse(results=items)
 
1
  from __future__ import annotations
2
 
3
  import json
4
+ import os
5
  from pathlib import Path
6
  from typing import Any, Dict, List, Optional
7
 
8
  import joblib
9
  import numpy as np
10
  import pandas as pd
 
 
11
  import xgboost as xgb
12
+ from fastapi import FastAPI, HTTPException
 
13
  from huggingface_hub import hf_hub_download
14
+ from pydantic import BaseModel, Field
15
 
16
  # Compatibility shim for pickles created with newer sklearn that include _RemainderColsList
17
  import sklearn.compose._column_transformer as _ct # type: ignore
18
+
19
  if not hasattr(_ct, "_RemainderColsList"):
20
  class _RemainderColsList(list): # type: ignore
21
  pass
22
+
23
  _ct._RemainderColsList = _RemainderColsList
24
 
25
+
26
+ # -----------------------------
27
+ # Paths & configuration
28
+ # -----------------------------
29
  ROOT = Path(__file__).resolve().parents[1]
30
  MODEL_DIR = Path(__file__).resolve().parent / "model"
31
+
32
  BOOSTER_PATH = MODEL_DIR / "xgboost_booster.json"
33
  META_PATH = MODEL_DIR / "explain_meta.json"
34
 
35
  HF_MODEL_REPO = os.getenv("HF_MODEL_REPO", "Gutema/frankscore-model-artifact")
36
  HF_MODEL_REVISION = os.getenv("HF_MODEL_REVISION", "main")
37
+ HF_MODEL_FILENAME = os.getenv("HF_MODEL_FILENAME", "xgboost_pipeline.pkl")
38
+
39
 
40
+ def download_pipeline_artifact() -> Path:
41
+ """
42
+ Download the .pkl artifact from Hugging Face Hub (cached locally).
43
+ """
44
+ try:
45
+ p = hf_hub_download(
46
  repo_id=HF_MODEL_REPO,
47
+ filename=HF_MODEL_FILENAME,
48
  revision=HF_MODEL_REVISION,
49
  )
50
+ return Path(p)
51
+ except Exception as e:
52
+ raise RuntimeError(
53
+ f"Failed to download model artifact from HF repo={HF_MODEL_REPO} "
54
+ f"revision={HF_MODEL_REVISION} filename={HF_MODEL_FILENAME}: {e}"
55
+ ) from e
56
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
+ def require_local_file(p: Path, label: str) -> None:
59
+ if not p.exists():
60
+ raise FileNotFoundError(f"{label} missing at {p}")
61
+
62
+
63
+ # -----------------------------
64
+ # Load meta (local JSON)
65
+ # -----------------------------
66
+ require_local_file(META_PATH, "Explainability meta")
67
+ require_local_file(BOOSTER_PATH, "Booster file")
68
+ META: Dict[str, Any] = json.loads(META_PATH.read_text())
69
+
70
+
71
+ # -----------------------------
72
+ # Feature groups (unchanged)
73
+ # -----------------------------
74
  RAW_FEATURE_SET = set((META.get("raw_num_cols") or []) + (META.get("raw_cat_cols") or []))
75
+
76
  FEATURE_GROUPS = {
77
  "Borrowing History & Maturity": [
78
  "account_age_days",
 
124
  "latest_amount_ma3",
125
  ],
126
  }
127
+
128
  FEATURE_GROUP_LOOKUP: Dict[str, str] = {}
129
  for group, variables in FEATURE_GROUPS.items():
130
  for var in variables:
131
  FEATURE_GROUP_LOOKUP[var] = group
132
 
133
+
134
+ # -----------------------------
135
+ # FastAPI app
136
+ # -----------------------------
137
  app = FastAPI(title="FrankScore", version="1.0.0")
138
 
139
 
140
+ # Globals populated at startup
141
+ PIPELINE = None
142
+ PREPROCESS = None
143
+ EXPECTED_FEATURES: List[str] = []
144
+ NUM_FEATURES: List[str] = []
145
+ CAT_FEATURES: List[str] = []
146
+ PRE_FEATURE_NAMES: List[str] = []
147
+
148
+
149
+ @app.on_event("startup")
150
+ def _startup() -> None:
151
+ """
152
+ Download + load pipeline on startup (safer than import-time).
153
+ """
154
+ global PIPELINE, PREPROCESS, EXPECTED_FEATURES, NUM_FEATURES, CAT_FEATURES, PRE_FEATURE_NAMES
155
+
156
+ model_path = download_pipeline_artifact()
157
+ PIPELINE = joblib.load(model_path)
158
+
159
+ EXPECTED_FEATURES = list(getattr(PIPELINE, "feature_names_in_", []))
160
+
161
+ PREPROCESS = PIPELINE.named_steps.get("preprocess") if hasattr(PIPELINE, "named_steps") else None
162
+ if PREPROCESS is None:
163
+ raise RuntimeError("Pipeline missing 'preprocess' step; cannot infer columns.")
164
+
165
+ if not EXPECTED_FEATURES:
166
+ EXPECTED_FEATURES = list(getattr(PREPROCESS, "feature_names_in_", []))
167
+ if not EXPECTED_FEATURES:
168
+ raise RuntimeError("Unable to determine expected feature names from the pipeline.")
169
+
170
+ _col_map = {name: cols for name, _, cols in getattr(PREPROCESS, "transformers_", [])}
171
+ NUM_FEATURES = list(_col_map.get("num", []))
172
+ CAT_FEATURES = list(_col_map.get("cat", []))
173
+
174
+ # From meta if present; fallback to preprocess get_feature_names_out
175
+ PRE_FEATURE_NAMES = META.get("pre_feature_names") or list(
176
+ getattr(PREPROCESS, "get_feature_names_out", lambda: [])()
177
+ )
178
+
179
+
180
+ # -----------------------------
181
+ # Schemas
182
+ # -----------------------------
183
  class PredictionRequest(BaseModel):
184
  records: List[Dict[str, Any]] = Field(..., description="List of borrower feature dictionaries")
185
 
 
233
  results: List[PredictExplainItem]
234
 
235
 
236
+ # -----------------------------
237
+ # Helpers
238
+ # -----------------------------
239
+ def _require_loaded() -> None:
240
+ if PIPELINE is None or PREPROCESS is None:
241
+ raise HTTPException(status_code=503, detail="Model not loaded yet. Please retry.")
242
+
243
+
244
  def prepare_frame(records: List[Dict[str, Any]]) -> pd.DataFrame:
245
+ _require_loaded()
246
+
247
  if not records:
248
  raise HTTPException(status_code=400, detail="No records provided.")
249
  df = pd.DataFrame(records)
250
+
251
  for col in EXPECTED_FEATURES:
252
  if col not in df.columns:
253
  df[col] = np.nan
254
+
255
  df = df[EXPECTED_FEATURES]
256
+
257
  if NUM_FEATURES:
258
  df[NUM_FEATURES] = df[NUM_FEATURES].apply(pd.to_numeric, errors="coerce")
259
  if CAT_FEATURES:
260
  df[CAT_FEATURES] = df[CAT_FEATURES].astype("object")
261
+
262
  return df
263
 
264
 
 
293
  return base
294
 
295
 
296
+ def get_booster() -> xgb.Booster:
297
  if not hasattr(get_booster, "_booster"):
298
  booster = xgb.Booster()
299
  booster.load_model(str(BOOSTER_PATH))
300
+
301
  base_score = booster.attr("base_score")
302
  if base_score:
303
  try:
 
310
  cleaned_val = "0.5"
311
  booster.set_param({"base_score": cleaned_val})
312
  booster.set_attr(base_score=cleaned_val)
313
+
314
  get_booster._booster = booster
315
  return get_booster._booster
316
 
317
 
318
+ # -----------------------------
319
+ # Endpoints
320
+ # -----------------------------
321
+ @app.get("/health")
322
+ def health() -> Dict[str, str]:
323
+ # Do not crash health if model isn't loaded yet
324
+ return {
325
+ "status": "ok",
326
+ "hf_repo": HF_MODEL_REPO,
327
+ "hf_revision": HF_MODEL_REVISION,
328
+ "hf_filename": HF_MODEL_FILENAME,
329
+ "meta_path": str(META_PATH),
330
+ "booster_path": str(BOOSTER_PATH),
331
+ "loaded": str(PIPELINE is not None),
332
+ }
333
+
334
+
335
  @app.post("/predict", response_model=PredictionResponse)
336
  def predict(req: PredictionRequest) -> PredictionResponse:
337
+ _require_loaded()
338
  frame = prepare_frame(req.records)
339
  probas = PIPELINE.predict_proba(frame)[:, 1]
340
  return PredictionResponse(probabilities=probas.tolist())
341
 
342
 
 
 
 
 
 
343
  @app.post("/score", response_model=ScoreResponse)
344
  def score(req: ScoreRequest) -> ScoreResponse:
345
  if not req.probabilities:
 
351
 
352
  @app.post("/explain", response_model=ExplainResponse)
353
  def explain(req: ExplainRequest) -> ExplainResponse:
354
+ _require_loaded()
355
  if not req.records:
356
  raise HTTPException(status_code=400, detail="No records provided.")
357
+
358
  frame = prepare_frame(req.records)
359
  probas = PIPELINE.predict_proba(frame)[:, 1]
360
+
361
  booster = get_booster()
362
  X_proc = PREPROCESS.transform(frame)
363
+
364
  feat_names = np.array(PRE_FEATURE_NAMES) if PRE_FEATURE_NAMES else np.array([f"f{i}" for i in range(X_proc.shape[1])])
365
  sanitized_names = [_sanitize_feature_name(n) for n in feat_names]
366
+
367
  dmat = xgb.DMatrix(X_proc, feature_names=sanitized_names)
368
  contribs = booster.predict(dmat, pred_contribs=True)
369
+
370
  if contribs.shape[1] != X_proc.shape[1] + 1:
371
  raise HTTPException(status_code=500, detail="Unexpected contribution shape from booster.")
372
+
373
  base_vals = contribs[:, -1]
374
  feat_contribs = contribs[:, :-1]
375
+
376
  explanations: List[ExplainItem] = []
377
  for i in range(feat_contribs.shape[0]):
378
  row_vals = feat_contribs[i]
379
+
380
  group_totals: Dict[str, float] = {}
381
  group_details: Dict[str, List[FeatureContribution]] = {}
382
+
383
  for name, val in zip(feat_names, row_vals):
384
  base = _base_feature_name(str(name))
385
  group = FEATURE_GROUP_LOOKUP.get(base, "Other")
386
+
387
  group_totals[group] = group_totals.get(group, 0.0) + float(val)
388
  group_details.setdefault(group, []).append(
389
  FeatureContribution(feature=str(name), shap_value=float(val))
390
  )
391
+
392
  group_contribs: List[GroupContribution] = []
393
  for grp, total in sorted(group_totals.items(), key=lambda kv: abs(kv[1]), reverse=True):
394
  feats = sorted(group_details.get(grp, []), key=lambda fc: abs(fc.shap_value), reverse=True)
395
  if req.top_k:
396
+ feats = feats[: req.top_k]
397
  group_contribs.append(GroupContribution(group=grp, total_shap_value=total, features=feats))
398
+
399
  explanations.append(
400
  ExplainItem(
401
  probability=float(probas[i]),
 
403
  group_contributions=group_contribs,
404
  )
405
  )
406
+
407
  return ExplainResponse(explanations=explanations)
408
 
409
 
410
  @app.post("/predict_explain", response_model=PredictExplainResponse)
411
  def predict_explain(req: ExplainRequest) -> PredictExplainResponse:
412
+ _require_loaded()
413
  if not req.records:
414
  raise HTTPException(status_code=400, detail="No records provided.")
415
+
416
  frame = prepare_frame(req.records)
417
  probas = PIPELINE.predict_proba(frame)[:, 1]
418
+
419
  booster = get_booster()
420
  X_proc = PREPROCESS.transform(frame)
421
+
422
  feat_names = np.array(PRE_FEATURE_NAMES) if PRE_FEATURE_NAMES else np.array([f"f{i}" for i in range(X_proc.shape[1])])
423
  sanitized_names = [_sanitize_feature_name(n) for n in feat_names]
424
+
425
  dmat = xgb.DMatrix(X_proc, feature_names=sanitized_names)
426
  contribs = booster.predict(dmat, pred_contribs=True)
427
+
428
  if contribs.shape[1] != X_proc.shape[1] + 1:
429
  raise HTTPException(status_code=500, detail="Unexpected contribution shape from booster.")
430
+
431
  base_vals = contribs[:, -1]
432
  feat_contribs = contribs[:, :-1]
433
+
434
  items: List[PredictExplainItem] = []
435
  for i in range(feat_contribs.shape[0]):
436
  row_vals = feat_contribs[i]
437
+
438
  group_totals: Dict[str, float] = {}
439
  group_details: Dict[str, List[FeatureContribution]] = {}
440
+
441
  for name, val in zip(feat_names, row_vals):
442
  base = _base_feature_name(str(name))
443
  group = FEATURE_GROUP_LOOKUP.get(base, "Other")
444
+
445
  group_totals[group] = group_totals.get(group, 0.0) + float(val)
446
  group_details.setdefault(group, []).append(
447
  FeatureContribution(feature=str(name), shap_value=float(val))
448
  )
449
+
450
  group_contribs: List[GroupContribution] = []
451
  for grp, total in sorted(group_totals.items(), key=lambda kv: abs(kv[1]), reverse=True):
452
  feats = sorted(group_details.get(grp, []), key=lambda fc: abs(fc.shap_value), reverse=True)
453
  if req.top_k:
454
+ feats = feats[: req.top_k]
455
  group_contribs.append(GroupContribution(group=grp, total_shap_value=total, features=feats))
456
+
457
  score_val = int(round(float(pd_to_score(np.array([probas[i]]))[0])))
458
+
459
  items.append(
460
  PredictExplainItem(
461
  probability=float(probas[i]),
 
464
  group_contributions=group_contribs,
465
  )
466
  )
467
+
468
  return PredictExplainResponse(results=items)