marcilioduarte commited on
Commit
3c9829c
·
1 Parent(s): 7755c53

Add automatic model rebuild fallback for incompatible artifacts

Browse files
Files changed (1) hide show
  1. src/credit_risk/app_support.py +50 -7
src/credit_risk/app_support.py CHANGED
@@ -13,8 +13,17 @@ import pandas as pd
13
  import plotly.express as px
14
  import plotly.graph_objects as go
15
  from sklearn.metrics import confusion_matrix
 
16
 
17
- from credit_risk.config import MODEL_DIR, REPORTS_DIR, SELECTED_FEATURES
 
 
 
 
 
 
 
 
18
 
19
 
20
  @dataclass
@@ -33,16 +42,50 @@ def _load_model() -> Any:
33
  legacy_pickle_path = MODEL_DIR / "model.pickle"
34
 
35
  if joblib_path.exists():
36
- return joblib.load(joblib_path)
 
 
 
37
 
38
  if legacy_pickle_path.exists():
39
- with legacy_pickle_path.open("rb") as file:
40
- return pickle.load(file)
41
-
42
- raise FileNotFoundError(
43
- "No model artifact found. Run `python scripts/train_model.py` first."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
44
  )
45
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
  def _load_metrics() -> dict[str, float]:
48
  """Load cached metrics, or return an empty dict when not available."""
 
13
  import plotly.express as px
14
  import plotly.graph_objects as go
15
  from sklearn.metrics import confusion_matrix
16
+ from sklearn.model_selection import train_test_split
17
 
18
+ from credit_risk.config import (
19
+ DATA_PROCESSED_DIR,
20
+ DATA_RAW_PATH,
21
+ MODEL_DIR,
22
+ REPORTS_DIR,
23
+ SELECTED_FEATURES,
24
+ )
25
+ from credit_risk.features import build_training_frame
26
+ from credit_risk.modeling import evaluate_model, save_metrics, save_model, train_model
27
 
28
 
29
  @dataclass
 
42
  legacy_pickle_path = MODEL_DIR / "model.pickle"
43
 
44
  if joblib_path.exists():
45
+ try:
46
+ return joblib.load(joblib_path)
47
+ except Exception:
48
+ pass
49
 
50
  if legacy_pickle_path.exists():
51
+ try:
52
+ with legacy_pickle_path.open("rb") as file:
53
+ return pickle.load(file)
54
+ except Exception:
55
+ pass
56
+
57
+ return _retrain_and_persist_artifacts()
58
+
59
+
60
+ def _retrain_and_persist_artifacts() -> Any:
61
+ """Rebuild model artifacts when serialized files are missing/incompatible."""
62
+ raw_df = pd.read_csv(DATA_RAW_PATH)
63
+ features, target = build_training_frame(raw_df)
64
+
65
+ x_train, x_test, y_train, y_test = train_test_split(
66
+ features,
67
+ target,
68
+ test_size=0.3,
69
+ random_state=42,
70
+ stratify=target,
71
  )
72
 
73
+ model = train_model(x_train=x_train, y_train=y_train, random_state=42)
74
+ metrics, y_hat = evaluate_model(model=model, x_test=x_test, y_test=y_test)
75
+
76
+ DATA_PROCESSED_DIR.mkdir(parents=True, exist_ok=True)
77
+ x_train.to_parquet(DATA_PROCESSED_DIR / "x_train.parquet", index=False)
78
+ x_test.to_parquet(DATA_PROCESSED_DIR / "x_test.parquet", index=False)
79
+ y_train.to_frame(name="target").to_parquet(DATA_PROCESSED_DIR / "y_train.parquet", index=False)
80
+ y_test.to_frame(name="target").to_parquet(DATA_PROCESSED_DIR / "y_test.parquet", index=False)
81
+ y_hat.to_frame(name="prediction").to_parquet(DATA_PROCESSED_DIR / "yhat.parquet", index=False)
82
+
83
+ save_model(model=model, model_path=MODEL_DIR / "model.joblib")
84
+ with (MODEL_DIR / "model.pickle").open("wb") as file:
85
+ pickle.dump(model, file)
86
+ save_metrics(metrics=metrics, path=REPORTS_DIR / "metrics.json")
87
+ return model
88
+
89
 
90
  def _load_metrics() -> dict[str, float]:
91
  """Load cached metrics, or return an empty dict when not available."""