Spaces:
Sleeping
Sleeping
| from __future__ import annotations | |
| from pathlib import Path | |
| from typing import Any, Dict | |
| from shutil import copyfile | |
| import joblib | |
| import numpy as np | |
| import pandas as pd | |
| from huggingface_hub import hf_hub_download | |
| REPO_ID = "Fola-lad/loan-artifacts" | |
| ARTIFACT_DIR = Path("artifacts") | |
| ARTIFACT_DIR.mkdir(exist_ok=True) | |
| def _get_artifact(filename: str) -> Path: | |
| downloaded = hf_hub_download(repo_id=REPO_ID, filename=filename) | |
| dst = ARTIFACT_DIR / filename | |
| if not dst.exists(): | |
| copyfile(downloaded, dst) | |
| return dst | |
| missing_value_handler = joblib.load(_get_artifact("missing_value_handler.joblib")) | |
| preprocessor = joblib.load(_get_artifact("preprocessor.joblib")) | |
| model = joblib.load(_get_artifact("loan_model.joblib")) | |
| label_encoder = joblib.load(_get_artifact("label_encoder.joblib")) | |
| CLEANED_FEATURE_COLS = [ | |
| "Gender", | |
| "Married", | |
| "Dependents", | |
| "Education", | |
| "Self_Employed", | |
| "Property_Area", | |
| "ApplicantIncome", | |
| "CoapplicantIncome", | |
| "LoanAmount", | |
| "Loan_Amount_Term", | |
| "Credit_History", | |
| ] | |
| EXPECTED_INPUT_COLS = ["Loan_ID"] + CLEANED_FEATURE_COLS | |
| def _safe_log(series: pd.Series) -> np.ndarray: | |
| v = pd.to_numeric(series, errors="coerce").fillna(0).to_numpy(dtype=float) | |
| v = np.where(v > 0, v, 1.0) | |
| return np.log(v) | |
| def feature_engineering(df: pd.DataFrame) -> pd.DataFrame: | |
| df = df.copy() | |
| df["Dependents"] = df["Dependents"].replace("3+", "3") | |
| df["Dependents"] = pd.to_numeric(df["Dependents"], errors="coerce") | |
| df["ApplicantIncome"] = pd.to_numeric(df["ApplicantIncome"], errors="coerce") | |
| df["CoapplicantIncome"] = pd.to_numeric(df["CoapplicantIncome"], errors="coerce") | |
| df["Total_Income"] = df["ApplicantIncome"] + df["CoapplicantIncome"] | |
| df["LoanAmount_Log"] = _safe_log(df["LoanAmount"]) | |
| df["Total_Income_Log"] = _safe_log(df["Total_Income"]) | |
| df = df.drop( | |
| columns=["ApplicantIncome", "CoapplicantIncome", "LoanAmount", "Total_Income", "Loan_ID"], | |
| errors="ignore", | |
| ) | |
| return df | |
| def _normalize_input(df: pd.DataFrame) -> pd.DataFrame: | |
| df = df.copy() | |
| for c in EXPECTED_INPUT_COLS: | |
| if c not in df.columns: | |
| df[c] = np.nan | |
| df = df[EXPECTED_INPUT_COLS] | |
| df["ApplicantIncome"] = pd.to_numeric(df["ApplicantIncome"], errors="coerce") | |
| df["CoapplicantIncome"] = pd.to_numeric(df["CoapplicantIncome"], errors="coerce") | |
| df["LoanAmount"] = pd.to_numeric(df["LoanAmount"], errors="coerce") | |
| df["Loan_Amount_Term"] = pd.to_numeric(df["Loan_Amount_Term"], errors="coerce") | |
| df["Credit_History"] = pd.to_numeric(df["Credit_History"], errors="coerce") | |
| return df | |
| def _prepare_features(raw_df: pd.DataFrame): | |
| raw_df = _normalize_input(raw_df) | |
| cleaned_arr = missing_value_handler.transform(raw_df) | |
| cleaned_df = pd.DataFrame(cleaned_arr, columns=CLEANED_FEATURE_COLS, index=raw_df.index) | |
| fe_input = pd.concat([raw_df[["Loan_ID"]], cleaned_df], axis=1) | |
| fe_df = feature_engineering(fe_input) | |
| return preprocessor.transform(fe_df) | |
| def predict_one(payload: Dict[str, Any]) -> Dict[str, Any]: | |
| df = pd.DataFrame([payload]) | |
| X = _prepare_features(df) | |
| pred = model.predict(X) | |
| proba = model.predict_proba(X)[0] | |
| label = label_encoder.inverse_transform(pred)[0] | |
| return {"Loan_Status": str(label), "confidence": float(np.max(proba))} | |
| def predict_batch(df: pd.DataFrame) -> pd.DataFrame: | |
| X = _prepare_features(df) | |
| preds = model.predict(X) | |
| confs = model.predict_proba(X).max(axis=1) | |
| labels = label_encoder.inverse_transform(preds) | |
| out = df.copy() | |
| out["Loan_Status"] = labels | |
| out["confidence"] = confs.astype(float) | |
| return out | |