Spaces:
Sleeping
Sleeping
| import os | |
| import joblib | |
| import pandas as pd | |
| import numpy as np | |
| from feature_builder import prepare_input_features | |
| # -------------------------------- | |
| # PATHS | |
| # -------------------------------- | |
| BASE_DIR = os.path.dirname(os.path.abspath(__file__)) | |
| # Ensure these match the new XGBoost models you trained | |
| CLASSIFIER_PATH = os.path.join(BASE_DIR, "models", "emi_classifier_final.pkl") | |
| REGRESSOR_PATH = os.path.join(BASE_DIR, "models", "emi_model_optimized.pkl") | |
| LABEL_ENCODER_PATH = os.path.join(BASE_DIR, "models", "label_encoder.pkl") | |
| # -------------------------------- | |
| # LOAD MODELS (ONCE) | |
| # -------------------------------- | |
| classifier = joblib.load(CLASSIFIER_PATH) | |
| regressor = joblib.load(REGRESSOR_PATH) | |
| # We need the label encoder to know which index corresponds to "Eligible" | |
| label_encoder = joblib.load(LABEL_ENCODER_PATH) | |
| # -------------------------------- | |
| # PREDICTION FUNCTION | |
| # -------------------------------- | |
| def predict_emi(raw_input: dict): | |
| """ | |
| Returns: | |
| eligibility_label (str): Eligible | High Risk | Not Eligible | |
| max_emi (float): Predicted maximum EMI | |
| """ | |
| # 1. FEATURE ENGINEERING | |
| input_df = prepare_input_features(raw_input) | |
| # 2. PROBABILITY-BASED CLASSIFICATION (The Fix) | |
| # Get probabilities for all classes | |
| probs = classifier.predict_proba(input_df)[0] | |
| # Create a dictionary mapping Class Name -> Probability | |
| # Example: {'Eligible': 0.38, 'Not_Eligible': 0.62} | |
| prob_map = { | |
| label_encoder.inverse_transform([i])[0]: prob | |
| for i, prob in enumerate(probs) | |
| } | |
| # 3. THRESHOLD LOGIC (Anti-Bias) | |
| # Because of the 4.5:1 imbalance, the model is "shy" to predict Eligible. | |
| # We lower the requirement to 35% to give good candidates a fair chance. | |
| eligible_prob = prob_map.get("Eligible", 0) | |
| high_risk_prob = prob_map.get("High_Risk", 0) | |
| if eligible_prob > 0.35: | |
| ml_label = "Eligible" | |
| elif high_risk_prob > 0.40: | |
| ml_label = "High Risk" | |
| else: | |
| ml_label = "Not Eligible" | |
| # 4. REGRESSION PREDICTION (Amount) | |
| max_emi = float(regressor.predict(input_df)[0]) | |
| max_emi = max(max_emi, 0.0) # Safety clamp | |
| # 5. HARD BUSINESS RULES (The "Banker's Veto") | |
| # These rules override the ML if the risk is objectively too high. | |
| credit_score = input_df["credit_score"].iloc[0] | |
| dti = input_df.get("debt_to_income", pd.Series([0])).iloc[0] | |
| # Relaxed but safe rejection criteria | |
| is_hard_reject = ( | |
| credit_score < 400 or | |
| dti > 0.85 | |
| ) | |
| # 6. FINAL ELIGIBILITY LOGIC | |
| if is_hard_reject: | |
| eligibility_label = "Not Eligible" | |
| max_emi = 0.0 | |
| else: | |
| eligibility_label = ml_label | |
| # If the label is "Not Eligible", we force Max EMI to 0 for consistency | |
| if eligibility_label == "Not Eligible": | |
| max_emi = 0.0 | |
| return eligibility_label, round(max_emi, 2) |