Spaces:
Sleeping
Sleeping
| import pickle | |
| import gradio as gr | |
| import joblib | |
| import numpy as np | |
| import pandas as pd | |
| from tensorflow.keras.models import load_model | |
| # Load models and scaler | |
| log_reg = joblib.load("models/logistic_regression_model.pkl") | |
| xgb = pickle.load(open("models/xgboost_model.pkl", "rb")) | |
| ffnn = load_model("models/ffnn_model.keras") | |
| scaler = joblib.load("models/standard_scaler.pkl") | |
| pt = joblib.load("models/boxcox_transformer.pkl") | |
| # Master feature definition: order matters! | |
| features = { | |
| "Age": { | |
| "type": "numeric", | |
| "default": 56.0, | |
| "explanation": "The age of the borrower in years.", | |
| }, | |
| "Income": { | |
| "type": "numeric", | |
| "default": 85994.0, | |
| "explanation": "The annual income of the borrower in USD.", | |
| }, | |
| "LoanAmount": { | |
| "type": "numeric", | |
| "default": 50587.0, | |
| "explanation": "The amount of money being borrowed in USD.", | |
| }, | |
| "CreditScore": { | |
| "type": "numeric", | |
| "default": 520.0, | |
| "explanation": "Credit score indicating borrower creditworthiness.", | |
| }, | |
| "MonthsEmployed": { | |
| "type": "numeric", | |
| "default": 80.0, | |
| "explanation": "Months the borrower has been employed at current job.", | |
| }, | |
| "NumCreditLines": { | |
| "type": "numeric", | |
| "default": 4.0, | |
| "explanation": "Number of active credit lines the borrower has.", | |
| }, | |
| "InterestRate": { | |
| "type": "numeric", | |
| "default": 15.23, | |
| "explanation": "Interest rate for the loan as a percentage.", | |
| }, | |
| "LoanTerm": { | |
| "type": "numeric", | |
| "default": 36.0, | |
| "explanation": "Duration of the loan in months.", | |
| }, | |
| "DTIRatio": { | |
| "type": "derived", | |
| "explanation": "Debt-to-Income ratio (total debt / annual income).", | |
| }, | |
| "Education": { | |
| "type": "categorical", | |
| "default": 0.0, | |
| "categories": ["Bachelor's", "High School", "Master's", "PhD"], | |
| "explanation": "Highest education level attained by the borrower.", | |
| }, | |
| "EmploymentType": { | |
| "type": "categorical", | |
| "default": 0.0, | |
| "categories": ["Full-time", "Part-time", "Self-employed", "Unemployed"], | |
| "explanation": "Borrower's employment status.", | |
| }, | |
| "MaritalStatus": { | |
| "type": "categorical", | |
| "default": 0.0, | |
| "categories": ["Divorced", "Married", "Single"], | |
| "explanation": "Borrower's marital status.", | |
| }, | |
| "HasMortgage": { | |
| "type": "categorical", | |
| "default": 1.0, | |
| "categories": ["No", "Yes"], | |
| "explanation": "Whether the borrower currently has a mortgage.", | |
| }, | |
| "HasDependents": { | |
| "type": "categorical", | |
| "default": 1.0, | |
| "categories": ["No", "Yes"], | |
| "explanation": "Whether the borrower has dependents.", | |
| }, | |
| "LoanPurpose": { | |
| "type": "categorical", | |
| "default": 4.0, | |
| "categories": ["Auto", "Business", "Education", "Home", "Other"], | |
| "explanation": "The reason for taking out the loan.", | |
| }, | |
| "HasCoSigner": { | |
| "type": "categorical", | |
| "default": 1.0, | |
| "categories": ["No", "Yes"], | |
| "explanation": "Whether there is a co-signer on the loan.", | |
| }, | |
| "AffRatio": { | |
| "type": "derived", | |
| "explanation": "LoanAmount divided by Income, a financial ratio.", | |
| }, | |
| "TotalInterest": { | |
| "type": "derived", | |
| "explanation": "Total interest paid: InterestRate * LoanTerm.", | |
| }, | |
| "Debt": {"type": "numeric", "default": 37837.36, "explanation": "Total debt."}, | |
| "AvgBorrowed": { | |
| "type": "derived", | |
| "explanation": "Average borrowed amount per credit line.", | |
| }, | |
| } | |
| # Gradio input components (with refs) | |
| input_components = [] | |
| component_refs = {} | |
| for name, meta in features.items(): | |
| if meta["type"] == "categorical": | |
| dropdown = gr.Dropdown( | |
| label=name, | |
| choices=meta["categories"], | |
| value=meta["categories"][int(meta["default"])], | |
| info=meta["explanation"], | |
| ) | |
| input_components.append(dropdown) | |
| component_refs[name] = dropdown | |
| elif meta["type"] == "numeric": | |
| number = gr.Number(label=name, value=meta["default"], info=meta["explanation"]) | |
| input_components.append(number) | |
| component_refs[name] = number | |
| # Derived components | |
| input_components += [ | |
| gr.Number( | |
| label="AffRatio", | |
| interactive=False, | |
| info=features["AffRatio"]["explanation"], | |
| value=lambda loan, income: round(loan / income, 5) if income else 0, | |
| inputs=[component_refs["LoanAmount"], component_refs["Income"]], | |
| ), | |
| gr.Number( | |
| label="TotalInterest", | |
| interactive=False, | |
| info=features["TotalInterest"]["explanation"], | |
| value=lambda rate, term: round(rate * term, 5), | |
| inputs=[component_refs["InterestRate"], component_refs["LoanTerm"]], | |
| ), | |
| gr.Number( | |
| label="DTIRatio", | |
| interactive=False, | |
| info=features["DTIRatio"]["explanation"], | |
| value=lambda debt, income: round(debt / income, 5) if income else 0, | |
| inputs=[component_refs["Debt"], component_refs["Income"]], | |
| ), | |
| gr.Number( | |
| label="AvgBorrowed", | |
| interactive=False, | |
| info=features["AvgBorrowed"]["explanation"], | |
| value=lambda loan, lines: round(loan / lines, 5) if lines else 0, | |
| inputs=[component_refs["LoanAmount"], component_refs["NumCreditLines"]], | |
| ), | |
| ] | |
| # Inference logic | |
| def predict_default(*inputs): | |
| input_map = {} | |
| input_index = 0 | |
| for name, meta in features.items(): | |
| if meta["type"] == "derived": | |
| continue | |
| val = inputs[input_index] | |
| if meta["type"] == "categorical": | |
| val = meta["categories"].index(val) | |
| input_map[name] = val | |
| input_index += 1 | |
| # Derived features and Box-Cox transform (same as before) | |
| input_map["AffRatio"] = ( | |
| round(input_map["LoanAmount"] / input_map["Income"], 5) | |
| if input_map["Income"] | |
| else 0 | |
| ) | |
| input_map["TotalInterest"] = round( | |
| input_map["InterestRate"] * input_map["LoanTerm"], 5 | |
| ) | |
| input_map["DTIRatio"] = ( | |
| round(input_map["Debt"] / input_map["Income"], 5) if input_map["Income"] else 0 | |
| ) | |
| input_map["AvgBorrowed"] = ( | |
| round(input_map["LoanAmount"] / input_map["NumCreditLines"], 5) | |
| if input_map["NumCreditLines"] | |
| else 0 | |
| ) | |
| derived_cols = ["AffRatio", "AvgBorrowed", "TotalInterest", "Debt"] | |
| derived_values_df = pd.DataFrame( | |
| [ | |
| [ | |
| input_map["AffRatio"], | |
| input_map["AvgBorrowed"], | |
| input_map["TotalInterest"], | |
| input_map["Debt"], | |
| ] | |
| ], | |
| columns=derived_cols, | |
| ) | |
| transformed_derived = pt.transform(derived_values_df).flatten() | |
| ( | |
| input_map["AffRatio"], | |
| input_map["AvgBorrowed"], | |
| input_map["TotalInterest"], | |
| input_map["Debt"], | |
| ) = transformed_derived | |
| input_row = [input_map[name] for name in features.keys()] | |
| input_df = pd.DataFrame([input_row], columns=list(features.keys())) | |
| scaled = scaler.transform(input_df) | |
| # Get probabilities | |
| probs = { | |
| "Logistic Regression": float(log_reg.predict_proba(scaled)[0][1]), | |
| "XGBoost": float(xgb.predict_proba(input_df.values)[0][1]), | |
| "FFNN": float(ffnn.predict(scaled, verbose=0)[0][0]), | |
| } | |
| # Binary labels using 0.5 threshold | |
| labels = { | |
| model: "Default" if p >= 0.5 else "No Default" for model, p in probs.items() | |
| } | |
| # Create markdown summary for labels | |
| label_md = "\n".join( | |
| [f"## {model}: *{label}*\n" for model, label in labels.items()] | |
| ) | |
| # Explanatory text for the user | |
| explanation_md = ( | |
| "### Prediction Explanation\n" | |
| "Each model predicts the probability that the borrower will default on their loan.\n" | |
| "- Probabilities closer to 1 indicate higher risk of default.\n" | |
| "- A threshold of 0.5 is used to classify 'Default' vs 'No Default'.\n" | |
| "- Consider the results from all models to get a comprehensive view.\n" | |
| "\n" | |
| "Please use this information as guidance and not a final decision." | |
| ) | |
| # For bar chart: format data as dict with labels and values | |
| bar_data = pd.DataFrame( | |
| { | |
| "Model": list(probs.keys()), | |
| "Default Probability": list(probs.values()), | |
| } | |
| ) | |
| return bar_data, label_md, explanation_md | |
| output_bar = gr.BarPlot( | |
| x="Model", y="Default Probability", label="Model Default Probabilities", height=250 | |
| ) | |
| output_labels = gr.Markdown() | |
| output_explanation = gr.Markdown() | |
| demo = gr.Interface( | |
| fn=predict_default, | |
| inputs=input_components, | |
| outputs=[output_bar, output_labels, output_explanation], | |
| title="Loan Default Risk Predictor", | |
| description="Enter borrower info and see the default risk prediction from 3 models.", | |
| flagging_mode="never", | |
| ) | |
| demo.launch() | |