File size: 2,976 Bytes

5a9137a

import os
import joblib
import pandas as pd
import numpy as np
from feature_builder import prepare_input_features

# --------------------------------
# PATHS
# --------------------------------
BASE_DIR = os.path.dirname(os.path.abspath(__file__))

# Ensure these match the new XGBoost models you trained
CLASSIFIER_PATH = os.path.join(BASE_DIR, "models", "emi_classifier_final.pkl")
REGRESSOR_PATH = os.path.join(BASE_DIR, "models", "emi_model_optimized.pkl")
LABEL_ENCODER_PATH = os.path.join(BASE_DIR, "models", "label_encoder.pkl")

# --------------------------------
# LOAD MODELS (ONCE)
# --------------------------------
classifier = joblib.load(CLASSIFIER_PATH)
regressor = joblib.load(REGRESSOR_PATH)
# We need the label encoder to know which index corresponds to "Eligible"
label_encoder = joblib.load(LABEL_ENCODER_PATH)

# --------------------------------
# PREDICTION FUNCTION
# --------------------------------
def predict_emi(raw_input: dict):
    """

    Returns:

        eligibility_label (str): Eligible | High Risk | Not Eligible

        max_emi (float): Predicted maximum EMI

    """

    # 1. FEATURE ENGINEERING
    input_df = prepare_input_features(raw_input)

    # 2. PROBABILITY-BASED CLASSIFICATION (The Fix)
    # Get probabilities for all classes
    probs = classifier.predict_proba(input_df)[0]
    
    # Create a dictionary mapping Class Name -> Probability
    # Example: {'Eligible': 0.38, 'Not_Eligible': 0.62}
    prob_map = {
        label_encoder.inverse_transform([i])[0]: prob 
        for i, prob in enumerate(probs)
    }

    # 3. THRESHOLD LOGIC (Anti-Bias)
    # Because of the 4.5:1 imbalance, the model is "shy" to predict Eligible.
    # We lower the requirement to 35% to give good candidates a fair chance.
    eligible_prob = prob_map.get("Eligible", 0)
    high_risk_prob = prob_map.get("High_Risk", 0)

    if eligible_prob > 0.35:
        ml_label = "Eligible"
    elif high_risk_prob > 0.40:
        ml_label = "High Risk"
    else:
        ml_label = "Not Eligible"

    # 4. REGRESSION PREDICTION (Amount)
    max_emi = float(regressor.predict(input_df)[0])
    max_emi = max(max_emi, 0.0)  # Safety clamp

    # 5. HARD BUSINESS RULES (The "Banker's Veto")
    # These rules override the ML if the risk is objectively too high.
    credit_score = input_df["credit_score"].iloc[0]
    dti = input_df.get("debt_to_income", pd.Series([0])).iloc[0]
    
    # Relaxed but safe rejection criteria
    is_hard_reject = (
        credit_score < 400 or 
        dti > 0.85
    )

    # 6. FINAL ELIGIBILITY LOGIC
    if is_hard_reject:
        eligibility_label = "Not Eligible"
        max_emi = 0.0
    else:
        eligibility_label = ml_label

    # If the label is "Not Eligible", we force Max EMI to 0 for consistency
    if eligibility_label == "Not Eligible":
        max_emi = 0.0

    return eligibility_label, round(max_emi, 2)