Spaces:

asmithaaa
/

emi_predict

Sleeping

App Files Files Community

emi_predict / inference.py

asmithaaa

Upload 13 files

80bb933 verified 11 days ago

raw

history blame contribute delete

2.98 kB

	import os
	import joblib
	import pandas as pd
	import numpy as np
	from feature_builder import prepare_input_features

	# --------------------------------
	# PATHS
	# --------------------------------
	BASE_DIR = os.path.dirname(os.path.abspath(__file__))

	# Ensure these match the new XGBoost models you trained
	CLASSIFIER_PATH = os.path.join(BASE_DIR, "models", "emi_classifier_final.pkl")
	REGRESSOR_PATH = os.path.join(BASE_DIR, "models", "emi_model_optimized.pkl")
	LABEL_ENCODER_PATH = os.path.join(BASE_DIR, "models", "label_encoder.pkl")

	# --------------------------------
	# LOAD MODELS (ONCE)
	# --------------------------------
	classifier = joblib.load(CLASSIFIER_PATH)
	regressor = joblib.load(REGRESSOR_PATH)
	# We need the label encoder to know which index corresponds to "Eligible"
	label_encoder = joblib.load(LABEL_ENCODER_PATH)

	# --------------------------------
	# PREDICTION FUNCTION
	# --------------------------------
	def predict_emi(raw_input: dict):
	"""
	Returns:
	eligibility_label (str): Eligible \| High Risk \| Not Eligible
	max_emi (float): Predicted maximum EMI
	"""

	# 1. FEATURE ENGINEERING
	input_df = prepare_input_features(raw_input)

	# 2. PROBABILITY-BASED CLASSIFICATION (The Fix)
	# Get probabilities for all classes
	probs = classifier.predict_proba(input_df)[0]

	# Create a dictionary mapping Class Name -> Probability
	# Example: {'Eligible': 0.38, 'Not_Eligible': 0.62}
	prob_map = {
	label_encoder.inverse_transform([i])[0]: prob
	for i, prob in enumerate(probs)
	}

	# 3. THRESHOLD LOGIC (Anti-Bias)
	# Because of the 4.5:1 imbalance, the model is "shy" to predict Eligible.
	# We lower the requirement to 35% to give good candidates a fair chance.
	eligible_prob = prob_map.get("Eligible", 0)
	high_risk_prob = prob_map.get("High_Risk", 0)

	if eligible_prob > 0.35:
	ml_label = "Eligible"
	elif high_risk_prob > 0.40:
	ml_label = "High Risk"
	else:
	ml_label = "Not Eligible"

	# 4. REGRESSION PREDICTION (Amount)
	max_emi = float(regressor.predict(input_df)[0])
	max_emi = max(max_emi, 0.0) # Safety clamp

	# 5. HARD BUSINESS RULES (The "Banker's Veto")
	# These rules override the ML if the risk is objectively too high.
	credit_score = input_df["credit_score"].iloc[0]
	dti = input_df.get("debt_to_income", pd.Series([0])).iloc[0]

	# Relaxed but safe rejection criteria
	is_hard_reject = (
	credit_score < 400 or
	dti > 0.85
	)

	# 6. FINAL ELIGIBILITY LOGIC
	if is_hard_reject:
	eligibility_label = "Not Eligible"
	max_emi = 0.0
	else:
	eligibility_label = ml_label

	# If the label is "Not Eligible", we force Max EMI to 0 for consistency
	if eligibility_label == "Not Eligible":
	max_emi = 0.0

	return eligibility_label, round(max_emi, 2)