| | import marimo |
| |
|
| | __generated_with = "0.14.16" |
| | app = marimo.App() |
| |
|
| |
|
| | @app.cell |
| | def _(): |
| | import joblib |
| | import warnings |
| |
|
| | import marimo as mo |
| | import pandas as pd |
| |
|
| | warnings.filterwarnings( |
| | "ignore", message="X does not have valid feature names" |
| | ) |
| | return joblib, mo, pd |
| |
|
| |
|
| | @app.cell |
| | def _(mo): |
| | mo.center(mo.md("# 🏦 Home Credit Default Risk Prediction")) |
| | return |
| |
|
| |
|
| | @app.cell |
| | def _(mo): |
| | mo.Html("<br>") |
| | return |
| |
|
| |
|
| | @app.cell |
| | def _(joblib, mo): |
| | |
| | with mo.redirect_stdout(): |
| | loaded_pipeline = joblib.load("./model/lgbm_model.joblib") |
| | return (loaded_pipeline,) |
| |
|
| |
|
| | @app.cell |
| | def _(): |
| | |
| | default_values = { |
| | "SK_ID_CURR": 277659.5, |
| | "CNT_CHILDREN": 0.0, |
| | "AMT_INCOME_TOTAL": 147150.0, |
| | "AMT_CREDIT": 512997.75, |
| | "AMT_ANNUITY": 24885.0, |
| | "AMT_GOODS_PRICE": 450000.0, |
| | "REGION_POPULATION_RELATIVE": 0.01885, |
| | "DAYS_BIRTH": -15743.5, |
| | "DAYS_EMPLOYED": -1219.0, |
| | "DAYS_REGISTRATION": -4492.0, |
| | "DAYS_ID_PUBLISH": -3254.0, |
| | "OWN_CAR_AGE": 9.0, |
| | "FLAG_MOBIL": 1.0, |
| | "FLAG_EMP_PHONE": 1.0, |
| | "FLAG_WORK_PHONE": 0.0, |
| | "FLAG_CONT_MOBILE": 1.0, |
| | "FLAG_PHONE": 0.0, |
| | "FLAG_EMAIL": 0.0, |
| | "CNT_FAM_MEMBERS": 2.0, |
| | "REGION_RATING_CLIENT": 2.0, |
| | "REGION_RATING_CLIENT_W_CITY": 2.0, |
| | "HOUR_APPR_PROCESS_START": 12.0, |
| | "REG_REGION_NOT_LIVE_REGION": 0.0, |
| | "REG_REGION_NOT_WORK_REGION": 0.0, |
| | "LIVE_REGION_NOT_WORK_REGION": 0.0, |
| | "REG_CITY_NOT_LIVE_CITY": 0.0, |
| | "REG_CITY_NOT_WORK_CITY": 0.0, |
| | "LIVE_CITY_NOT_WORK_CITY": 0.0, |
| | "EXT_SOURCE_1": 0.5068839442599388, |
| | "EXT_SOURCE_2": 0.5662837032261614, |
| | "EXT_SOURCE_3": 0.5370699579791587, |
| | "APARTMENTS_AVG": 0.0876, |
| | "BASEMENTAREA_AVG": 0.0764, |
| | "YEARS_BEGINEXPLUATATION_AVG": 0.9816, |
| | "YEARS_BUILD_AVG": 0.7552, |
| | "COMMONAREA_AVG": 0.0211, |
| | "ELEVATORS_AVG": 0.0, |
| | "ENTRANCES_AVG": 0.1379, |
| | "FLOORSMAX_AVG": 0.1667, |
| | "FLOORSMIN_AVG": 0.2083, |
| | "LANDAREA_AVG": 0.0483, |
| | "LIVINGAPARTMENTS_AVG": 0.0756, |
| | "LIVINGAREA_AVG": 0.0746, |
| | "NONLIVINGAPARTMENTS_AVG": 0.0, |
| | "NONLIVINGAREA_AVG": 0.0035, |
| | "APARTMENTS_MODE": 0.084, |
| | "BASEMENTAREA_MODE": 0.0748, |
| | "YEARS_BEGINEXPLUATATION_MODE": 0.9816, |
| | "YEARS_BUILD_MODE": 0.7648, |
| | "COMMONAREA_MODE": 0.0191, |
| | "ELEVATORS_MODE": 0.0, |
| | "ENTRANCES_MODE": 0.1379, |
| | "FLOORSMAX_MODE": 0.1667, |
| | "FLOORSMIN_MODE": 0.2083, |
| | "LANDAREA_MODE": 0.0459, |
| | "LIVINGAPARTMENTS_MODE": 0.0771, |
| | "LIVINGAREA_MODE": 0.0731, |
| | "NONLIVINGAPARTMENTS_MODE": 0.0, |
| | "NONLIVINGAREA_MODE": 0.0011, |
| | "APARTMENTS_MEDI": 0.0864, |
| | "BASEMENTAREA_MEDI": 0.0761, |
| | "YEARS_BEGINEXPLUATATION_MEDI": 0.9816, |
| | "YEARS_BUILD_MEDI": 0.7585, |
| | "COMMONAREA_MEDI": 0.0209, |
| | "ELEVATORS_MEDI": 0.0, |
| | "ENTRANCES_MEDI": 0.1379, |
| | "FLOORSMAX_MEDI": 0.1667, |
| | "FLOORSMIN_MEDI": 0.2083, |
| | "LANDAREA_MEDI": 0.0488, |
| | "LIVINGAPARTMENTS_MEDI": 0.0765, |
| | "LIVINGAREA_MEDI": 0.0749, |
| | "NONLIVINGAPARTMENTS_MEDI": 0.0, |
| | "NONLIVINGAREA_MEDI": 0.003, |
| | "TOTALAREA_MODE": 0.0687, |
| | "OBS_30_CNT_SOCIAL_CIRCLE": 0.0, |
| | "DEF_30_CNT_SOCIAL_CIRCLE": 0.0, |
| | "OBS_60_CNT_SOCIAL_CIRCLE": 0.0, |
| | "DEF_60_CNT_SOCIAL_CIRCLE": 0.0, |
| | "DAYS_LAST_PHONE_CHANGE": -755.0, |
| | "FLAG_DOCUMENT_2": 0.0, |
| | "FLAG_DOCUMENT_3": 1.0, |
| | "FLAG_DOCUMENT_4": 0.0, |
| | "FLAG_DOCUMENT_5": 0.0, |
| | "FLAG_DOCUMENT_6": 0.0, |
| | "FLAG_DOCUMENT_7": 0.0, |
| | "FLAG_DOCUMENT_8": 0.0, |
| | "FLAG_DOCUMENT_9": 0.0, |
| | "FLAG_DOCUMENT_10": 0.0, |
| | "FLAG_DOCUMENT_11": 0.0, |
| | "FLAG_DOCUMENT_12": 0.0, |
| | "FLAG_DOCUMENT_13": 0.0, |
| | "FLAG_DOCUMENT_14": 0.0, |
| | "FLAG_DOCUMENT_15": 0.0, |
| | "FLAG_DOCUMENT_16": 0.0, |
| | "FLAG_DOCUMENT_17": 0.0, |
| | "FLAG_DOCUMENT_18": 0.0, |
| | "FLAG_DOCUMENT_19": 0.0, |
| | "FLAG_DOCUMENT_20": 0.0, |
| | "FLAG_DOCUMENT_21": 0.0, |
| | "AMT_REQ_CREDIT_BUREAU_HOUR": 0.0, |
| | "AMT_REQ_CREDIT_BUREAU_DAY": 0.0, |
| | "AMT_REQ_CREDIT_BUREAU_WEEK": 0.0, |
| | "AMT_REQ_CREDIT_BUREAU_MON": 0.0, |
| | "AMT_REQ_CREDIT_BUREAU_QRT": 0.0, |
| | "AMT_REQ_CREDIT_BUREAU_YEAR": 1.0, |
| | "NAME_CONTRACT_TYPE": "Cash loans", |
| | "CODE_GENDER": "F", |
| | "FLAG_OWN_CAR": "N", |
| | "FLAG_OWN_REALTY": "Y", |
| | "NAME_TYPE_SUITE": "Unaccompanied", |
| | "NAME_INCOME_TYPE": "Working", |
| | "NAME_EDUCATION_TYPE": "Secondary / secondary special", |
| | "NAME_FAMILY_STATUS": "Married", |
| | "NAME_HOUSING_TYPE": "House / apartment", |
| | "OCCUPATION_TYPE": "Laborers", |
| | "WEEKDAY_APPR_PROCESS_START": "TUESDAY", |
| | "ORGANIZATION_TYPE": "Business Entity Type 3", |
| | "FONDKAPREMONT_MODE": "reg oper account", |
| | "HOUSETYPE_MODE": "block of flats", |
| | "WALLSMATERIAL_MODE": "Panel", |
| | "EMERGENCYSTATE_MODE": "No", |
| | } |
| | return (default_values,) |
| |
|
| |
|
| | @app.cell |
| | def _(mo): |
| | |
| | EXT_SOURCE_3 = mo.ui.slider( |
| | start=0.00, |
| | stop=0.90, |
| | step=0.01, |
| | value=0.5, |
| | label="EXT_SOURCE_3", |
| | ) |
| |
|
| | EXT_SOURCE_2 = mo.ui.slider( |
| | start=0.00, |
| | stop=0.86, |
| | step=0.01, |
| | value=0.5, |
| | label="EXT_SOURCE_2", |
| | ) |
| |
|
| | DAYS_BIRTH = mo.ui.slider( |
| | start=-25229, |
| | stop=-7673, |
| | value=-15743, |
| | label="DAYS_BIRTH", |
| | ) |
| |
|
| | EXT_SOURCE_1 = mo.ui.slider( |
| | start=0.01, |
| | stop=0.97, |
| | step=0.01, |
| | value=0.5, |
| | label="EXT_SOURCE_1", |
| | ) |
| |
|
| | AMT_ANNUITY = mo.ui.slider( |
| | start=1980, |
| | stop=258025, |
| | step=100, |
| | value=24885, |
| | label="AMT_ANNUITY", |
| | ) |
| |
|
| | AMT_CREDIT = mo.ui.slider( |
| | start=45000, |
| | stop=4050000, |
| | step=50000, |
| | value=512997, |
| | label="AMT_CREDIT", |
| | ) |
| |
|
| | DAYS_EMPLOYED = mo.ui.slider( |
| | start=-17583, |
| | stop=365243, |
| | value=-1219, |
| | label="DAYS_EMPLOYED", |
| | ) |
| |
|
| | DAYS_ID_PUBLISH = mo.ui.slider( |
| | start=-7197, |
| | stop=0, |
| | value=-3254, |
| | label="DAYS_ID_PUBLISH", |
| | ) |
| |
|
| | DAYS_REGISTRATION = mo.ui.slider( |
| | start=-24672, |
| | stop=0, |
| | value=-4492, |
| | label="DAYS_REGISTRATION", |
| | ) |
| |
|
| | SK_ID_CURR = mo.ui.slider( |
| | start=100003, |
| | stop=456253, |
| | step=100, |
| | value=277659, |
| | label="SK_ID_CURR", |
| | ) |
| |
|
| | features_widgets = { |
| | "EXT_SOURCE_3": EXT_SOURCE_3, |
| | "EXT_SOURCE_2": EXT_SOURCE_2, |
| | "DAYS_BIRTH": DAYS_BIRTH, |
| | "EXT_SOURCE_1": EXT_SOURCE_1, |
| | "AMT_ANNUITY": AMT_ANNUITY, |
| | "AMT_CREDIT": AMT_CREDIT, |
| | "DAYS_EMPLOYED": DAYS_EMPLOYED, |
| | "DAYS_ID_PUBLISH": DAYS_ID_PUBLISH, |
| | "DAYS_REGISTRATION": DAYS_REGISTRATION, |
| | "SK_ID_CURR": SK_ID_CURR, |
| | } |
| | return (features_widgets,) |
| |
|
| |
|
| | @app.cell |
| | def _(features_widgets, mo): |
| | |
| | sliders_form = ( |
| | mo.md(""" |
| | ###Fill in the Client Profile to see the prediction |
| | |
| | {EXT_SOURCE_3} {EXT_SOURCE_2} |
| | {DAYS_BIRTH} {EXT_SOURCE_1} |
| | {AMT_ANNUITY} {AMT_CREDIT} |
| | {DAYS_EMPLOYED} {DAYS_ID_PUBLISH} |
| | {DAYS_REGISTRATION} {SK_ID_CURR} |
| | """) |
| | .batch(**features_widgets) |
| | .form(show_clear_button=True, bordered=True) |
| | ) |
| | return (sliders_form,) |
| |
|
| |
|
| | @app.cell |
| | def _(default_values, loaded_pipeline, mo, pd, sliders_form): |
| | |
| | probability = None |
| |
|
| | |
| | if sliders_form.value is not None: |
| | |
| | prediction_data = default_values.copy() |
| |
|
| | |
| | prediction_data.update(sliders_form.value) |
| |
|
| | |
| | predict_df = pd.DataFrame([prediction_data]) |
| |
|
| | |
| | probability = loaded_pipeline.predict_proba(predict_df)[:, 1][0] |
| | else: |
| | mo.md("Fill in the form and click **Submit** to get a prediction.") |
| | return (probability,) |
| |
|
| |
|
| | @app.cell |
| | def _(probability): |
| | |
| | prob_percent = 70.12 |
| | risk = "High Risk" |
| | direction = "decrease" |
| |
|
| | if probability is not None: |
| | prob_percent = round(probability * 100, 2) |
| |
|
| | |
| | if probability < 0.34: |
| | risk = "Low Risk" |
| | direction = "increase" |
| | elif probability < 0.67: |
| | risk = "Medium Risk" |
| | direction = None |
| | else: |
| | risk = "High Risk" |
| | direction = "decrease" |
| | return direction, prob_percent, risk |
| |
|
| |
|
| | @app.cell |
| | def _(direction, mo, prob_percent, risk): |
| | interpretation_text = f"""This means there is a {prob_percent}% chance the client will default on their loan. |
| | Risk level is categorized as {risk}, which can help guide loan approval decisions. |
| | """ |
| |
|
| | result_stat = mo.stat( |
| | label="⚖️ Probability of Payment Difficulties", |
| | bordered=True, |
| | value=f"{prob_percent}%", |
| | caption=risk, |
| | direction=direction, |
| | ) |
| |
|
| | interpretation_stat = mo.stat( |
| | label="💡 Interpretation", |
| | bordered=True, |
| | value="", |
| | caption=interpretation_text, |
| | ) |
| | return interpretation_stat, result_stat |
| |
|
| |
|
| | @app.cell |
| | def _(mo): |
| | mo.md("""## 🔮 Credit Risk Predictor — Try It Yourself!""") |
| | return |
| |
|
| |
|
| | @app.cell |
| | def _(mo): |
| | mo.Html("<hr><br>") |
| | return |
| |
|
| |
|
| | @app.cell |
| | def _(interpretation_stat, mo, result_stat): |
| | mo.vstack( |
| | items=[ |
| | mo.hstack( |
| | items=[result_stat, interpretation_stat], widths="equal", gap=1 |
| | ), |
| | ], |
| | gap=1, |
| | heights="equal", |
| | ) |
| | return |
| |
|
| |
|
| | @app.cell |
| | def _(mo): |
| | mo.Html("<br>") |
| | return |
| |
|
| |
|
| | @app.cell |
| | def _(sliders_form): |
| | sliders_form |
| | return |
| |
|
| |
|
| | @app.cell |
| | def _(mo): |
| | mo.md( |
| | r""" |
| | <small>_(*) Predictions are based on the top 10 most important features. Remaining features are assigned default values (median for numeric, mode for categorical)._</small> |
| | |
| | """ |
| | ) |
| | return |
| |
|
| |
|
| | @app.cell |
| | def _(mo): |
| | mo.Html("<br>") |
| | return |
| |
|
| |
|
| | @app.cell |
| | def _(mo): |
| | mo.md(r"""## 🚀 Model Selection""") |
| | return |
| |
|
| |
|
| | @app.cell |
| | def _(mo): |
| | mo.Html("<hr><br>") |
| | return |
| |
|
| |
|
| | @app.cell |
| | def _(mo): |
| | lg_stat = mo.stat( |
| | label="Logistic Regression", |
| | bordered=True, |
| | value="💪🏻 68.7% 📝 68.5%", |
| | caption="Scores are consistent across train and test, indicating no overfitting. However, the overall AUC is low, suggesting underfitting — the model is too simple to capture complex patterns.", |
| | direction="decrease", |
| | ) |
| |
|
| | rfc_stat = mo.stat( |
| | label="Random Forest Classifier", |
| | bordered=True, |
| | value="💪🏻 100% 📝 70.7%", |
| | caption="Perfect training AUC indicates severe overfitting — the model memorized the training set. While the test score is better than Logistic Regression, the gap is too large for good generalization.", |
| | direction="decrease", |
| | ) |
| |
|
| | rfo_stat = mo.stat( |
| | label="Random Forest with Randomized Search", |
| | bordered=True, |
| | value="💪🏻 82% 📝 73.1%", |
| | caption="Hyperparameter tuning greatly reduced overfitting. The smaller train–test gap and improved test AUC show better generalization and a strong performance.", |
| | direction="increase", |
| | ) |
| |
|
| | lgbm_stat = mo.stat( |
| | label="LightGBM", |
| | bordered=True, |
| | value="💪🏻 85.2% 📝 75.1%", |
| | caption="Best overall performance. Small train–test gap and highest test AUC indicate a well-balanced model with strong generalization.", |
| | direction="increase", |
| | ) |
| |
|
| | mo.vstack( |
| | items=[ |
| | mo.hstack(items=[lg_stat, rfc_stat], widths="equal", gap=1), |
| | mo.hstack(items=[rfo_stat, lgbm_stat], widths="equal", gap=1), |
| | ], |
| | gap=1, |
| | heights="equal", |
| | align="center", |
| | justify="center", |
| | ) |
| | return |
| |
|
| |
|
| | @app.cell |
| | def _(mo): |
| | mo.Html("<br>") |
| | return |
| |
|
| |
|
| | @app.cell |
| | def _(mo): |
| | mo.md( |
| | r"""Based on a comparison of all the models _(using AUC ROC metric)_, the final model selection is clear:""" |
| | ) |
| | return |
| |
|
| |
|
| | @app.cell |
| | def _(mo): |
| | mo.Html("<br>") |
| | return |
| |
|
| |
|
| | @app.cell |
| | def _(mo): |
| | mo.center( |
| | mo.md(r""" |
| | | Model | 💪🏻 Train Score | 📝 Test Score | |
| | | :--- | :---: | :---: | |
| | | Logistic Regression | 0.687 | 0.685 | |
| | | Random Forest Classifier | 1.000 | 0.707 | |
| | | Randomized Search (Tuned RF) | 0.820 | 0.731 | |
| | | **LightGBM** | **0.852** | **0.751** | |
| | """) |
| | ) |
| | return |
| |
|
| |
|
| | @app.cell |
| | def _(mo): |
| | mo.Html("<br>") |
| | return |
| |
|
| |
|
| | @app.cell |
| | def _(mo): |
| | mo.md( |
| | r""" |
| | * The **Logistic Regression** model performed poorly due to underfitting. |
| | * The base **Random Forest** model, while better, suffered from severe overfitting. |
| | * The tuned **Random Forest** model was a significant improvement and a strong contender, achieving a solid `test_score`. |
| | * However, the **LightGBM** model ultimately demonstrated the best performance, achieving the highest **ROC AUC test score of 0.751**. This indicates that it is the most robust and accurate model for predicting loan repayment risk on unseen data. |
| | """ |
| | ) |
| | return |
| |
|
| |
|
| | @app.cell |
| | def _(mo): |
| | mo.callout( |
| | kind="info", |
| | value=mo.md( |
| | """💡 **Want to explore the process in detail?** |
| | |
| | See the full 👉 [Jupyter notebook](https://huggingface.co/spaces/iBrokeTheCode/Home_Credit_Default_Risk_Prediction/blob/main/tutorial_app.ipynb) 👈️ for an end-to-end walkthrough, including Exploratory Data Analysis, preprocessing, model training, evaluation, model selection, and saving the final model.""" |
| | ), |
| | ) |
| | return |
| |
|
| |
|
| | @app.cell |
| | def _(mo): |
| | mo.Html("<br><hr><br>") |
| | return |
| |
|
| |
|
| | @app.cell |
| | def _(mo): |
| | mo.center( |
| | mo.md( |
| | "**Connect with me:** 💼 [Linkedin](https://www.linkedin.com/in/alex-turpo/) • 🐱 [GitHub](https://github.com/iBrokeTheCode) • 🤗 [Hugging Face](https://huggingface.co/iBrokeTheCode)" |
| | ) |
| | ) |
| | return |
| |
|
| |
|
| | if __name__ == "__main__": |
| | app.run() |
| |
|