Spaces:

Fola-lad
/

Predictor-app

Sleeping

App Files Files Community

Predictor-app / predictor.py

Fola-lad

Add app + predictor with HF artifact download

d5d57ea about 2 months ago

raw

history blame contribute delete

3.69 kB

	from __future__ import annotations

	from pathlib import Path
	from typing import Any, Dict
	from shutil import copyfile

	import joblib
	import numpy as np
	import pandas as pd
	from huggingface_hub import hf_hub_download

	REPO_ID = "Fola-lad/loan-artifacts"

	ARTIFACT_DIR = Path("artifacts")
	ARTIFACT_DIR.mkdir(exist_ok=True)

	def _get_artifact(filename: str) -> Path:
	downloaded = hf_hub_download(repo_id=REPO_ID, filename=filename)
	dst = ARTIFACT_DIR / filename
	if not dst.exists():
	copyfile(downloaded, dst)
	return dst

	missing_value_handler = joblib.load(_get_artifact("missing_value_handler.joblib"))
	preprocessor = joblib.load(_get_artifact("preprocessor.joblib"))
	model = joblib.load(_get_artifact("loan_model.joblib"))
	label_encoder = joblib.load(_get_artifact("label_encoder.joblib"))

	CLEANED_FEATURE_COLS = [
	"Gender",
	"Married",
	"Dependents",
	"Education",
	"Self_Employed",
	"Property_Area",
	"ApplicantIncome",
	"CoapplicantIncome",
	"LoanAmount",
	"Loan_Amount_Term",
	"Credit_History",
	]

	EXPECTED_INPUT_COLS = ["Loan_ID"] + CLEANED_FEATURE_COLS

	def _safe_log(series: pd.Series) -> np.ndarray:
	v = pd.to_numeric(series, errors="coerce").fillna(0).to_numpy(dtype=float)
	v = np.where(v > 0, v, 1.0)
	return np.log(v)

	def feature_engineering(df: pd.DataFrame) -> pd.DataFrame:
	df = df.copy()

	df["Dependents"] = df["Dependents"].replace("3+", "3")
	df["Dependents"] = pd.to_numeric(df["Dependents"], errors="coerce")

	df["ApplicantIncome"] = pd.to_numeric(df["ApplicantIncome"], errors="coerce")
	df["CoapplicantIncome"] = pd.to_numeric(df["CoapplicantIncome"], errors="coerce")

	df["Total_Income"] = df["ApplicantIncome"] + df["CoapplicantIncome"]
	df["LoanAmount_Log"] = _safe_log(df["LoanAmount"])
	df["Total_Income_Log"] = _safe_log(df["Total_Income"])

	df = df.drop(
	columns=["ApplicantIncome", "CoapplicantIncome", "LoanAmount", "Total_Income", "Loan_ID"],
	errors="ignore",
	)
	return df

	def _normalize_input(df: pd.DataFrame) -> pd.DataFrame:
	df = df.copy()

	for c in EXPECTED_INPUT_COLS:
	if c not in df.columns:
	df[c] = np.nan

	df = df[EXPECTED_INPUT_COLS]

	df["ApplicantIncome"] = pd.to_numeric(df["ApplicantIncome"], errors="coerce")
	df["CoapplicantIncome"] = pd.to_numeric(df["CoapplicantIncome"], errors="coerce")
	df["LoanAmount"] = pd.to_numeric(df["LoanAmount"], errors="coerce")
	df["Loan_Amount_Term"] = pd.to_numeric(df["Loan_Amount_Term"], errors="coerce")
	df["Credit_History"] = pd.to_numeric(df["Credit_History"], errors="coerce")

	return df

	def _prepare_features(raw_df: pd.DataFrame):
	raw_df = _normalize_input(raw_df)

	cleaned_arr = missing_value_handler.transform(raw_df)
	cleaned_df = pd.DataFrame(cleaned_arr, columns=CLEANED_FEATURE_COLS, index=raw_df.index)

	fe_input = pd.concat([raw_df[["Loan_ID"]], cleaned_df], axis=1)
	fe_df = feature_engineering(fe_input)

	return preprocessor.transform(fe_df)

	def predict_one(payload: Dict[str, Any]) -> Dict[str, Any]:
	df = pd.DataFrame([payload])
	X = _prepare_features(df)

	pred = model.predict(X)
	proba = model.predict_proba(X)[0]
	label = label_encoder.inverse_transform(pred)[0]

	return {"Loan_Status": str(label), "confidence": float(np.max(proba))}

	def predict_batch(df: pd.DataFrame) -> pd.DataFrame:
	X = _prepare_features(df)

	preds = model.predict(X)
	confs = model.predict_proba(X).max(axis=1)
	labels = label_encoder.inverse_transform(preds)

	out = df.copy()
	out["Loan_Status"] = labels
	out["confidence"] = confs.astype(float)
	return out