Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import pandas as pd | |
| import numpy as np | |
| import joblib | |
| from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier | |
| from sklearn.impute import SimpleImputer | |
| from sklearn.preprocessing import OneHotEncoder | |
| from sklearn.compose import ColumnTransformer | |
| from sklearn.pipeline import Pipeline | |
| from sklearn.base import BaseEstimator, TransformerMixin | |
| # ---------------------------- | |
| # Custom Numeric Imputer | |
| # ---------------------------- | |
| class CustomImputer(TransformerMixin): | |
| def fit(self, X, y=None): | |
| # Precompute group means for imputations | |
| self.group_means = { | |
| 'rate_of_interest': X.groupby(['loan_type', 'term'])['rate_of_interest'].mean(), | |
| 'Interest_rate_spread': X.groupby(['loan_type', 'term'])['Interest_rate_spread'].mean(), | |
| 'Upfront_charges': X.groupby(['loan_type', 'term'])['Upfront_charges'].mean(), | |
| 'rate_of_interest_loan': X.groupby(['loan_type'])['rate_of_interest'].mean(), | |
| 'Interest_rate_spread_loan': X.groupby(['loan_type'])['Interest_rate_spread'].mean(), | |
| 'Upfront_charges_loan': X.groupby(['loan_type'])['Upfront_charges'].mean(), | |
| 'income_by_age': X.groupby(['age'])['income'].mean(), | |
| 'property_value_mean': X['property_value'].mean(), | |
| 'dtir1_mean': X['dtir1'].mean(), | |
| 'income_mean': X['income'].mean(), | |
| } | |
| return self | |
| def transform(self, X): | |
| X = X.copy() | |
| # Impute numerical features using group-based means | |
| for col in ['rate_of_interest', 'Interest_rate_spread', 'Upfront_charges']: | |
| X[col] = X.groupby(['loan_type', 'term'])[col].transform(lambda x: x.fillna(x.mean())).round(3 if col == 'rate_of_interest' else 4) | |
| for col in ['rate_of_interest', 'Interest_rate_spread', 'Upfront_charges']: | |
| loan_mean = self.group_means[col + '_loan'] | |
| X[col] = X.apply( | |
| lambda row: row[col] if pd.notnull(row[col]) else loan_mean.get(row['loan_type'], np.nan), | |
| axis=1 | |
| ) | |
| X[col] = X[col].round(3 if col == 'rate_of_interest' else 4) | |
| # Impute property_value and dtir1 | |
| X['property_value'] = X['property_value'].fillna(self.group_means['property_value_mean']) | |
| X['property_value'] = np.round(X['property_value'], -3) | |
| X['dtir1'] = X['dtir1'].fillna(self.group_means['dtir1_mean']).round(0) | |
| # Income | |
| X['income'] = X.groupby(['age'])['income'].transform(lambda x: x.fillna(x.mean())) | |
| X['income'] = X['income'].fillna(self.group_means['income_mean']) | |
| X['income'] = np.round(X['income'], -2) | |
| # LTV | |
| X['LTV'] = X['LTV'].fillna(X['loan_amount'] / X['property_value'] * 100).round(8) | |
| return X | |
| # ---------------------------- | |
| # Custom Categorical Cleaner | |
| # ---------------------------- | |
| class CustomCleaner(BaseEstimator, TransformerMixin): | |
| def __init__(self, drop_cols=None, missing_placeholders=None, cat_cols=None): | |
| self.drop_cols = drop_cols | |
| self.missing_placeholders = missing_placeholders if missing_placeholders is not None else ['', 'NA', 'nan', 'NaN'] | |
| self.cat_cols = cat_cols | |
| def fit(self, X, y=None): | |
| return self | |
| def transform(self, X): | |
| X = X.copy() | |
| if self.drop_cols: | |
| X = X.drop(self.drop_cols, axis=1) | |
| if 'Security_Type' in X.columns: | |
| X['Security_Type'] = X['Security_Type'].replace({'Indriect': 'Indirect'}) | |
| if self.cat_cols: | |
| for col in self.cat_cols: | |
| if col in X.columns: | |
| X[col] = X[col].replace(self.missing_placeholders, np.nan) | |
| return X | |
| # ---------------------------- | |
| # Load models and preprocessing pipelines | |
| # ---------------------------- | |
| gb_loaded = joblib.load('gradient_boosting_model.pkl') | |
| rf_loaded = joblib.load("random_forest_model.pkl") | |
| num_pipeline = joblib.load('num_pipeline.pkl') # numeric imputer pipeline | |
| custom_cleaner = joblib.load('custom_cleaner.pkl') # custom cleaning transformer | |
| cat_preprocessing = joblib.load('cat_preprocessing.pkl') # categorical preprocessing | |
| # ---------------------------- | |
| # Predefined CSV file options | |
| # ---------------------------- | |
| csv_files = { | |
| "Default 1": "Default_1.csv", | |
| "Default 2": "Default_2.csv", | |
| "Non Default": "Non_default.csv" | |
| } | |
| # ---------------------------- | |
| # Prediction function | |
| # ---------------------------- | |
| def predict_csv_from_dropdown(file_choice, model_choice): | |
| # Read CSV based on dropdown choice | |
| file_path = csv_files[file_choice] | |
| df = pd.read_csv(file_path) | |
| # Filter rows with 'term' not null | |
| df_cleaned = df[df['term'].notnull()].copy() | |
| # Drop target if exists | |
| if 'target' in df_cleaned.columns: | |
| df_cleaned = df_cleaned.drop(columns=['target']) | |
| # Numeric preprocessing | |
| X_num = num_pipeline.transform(df_cleaned) | |
| # Custom cleaning | |
| X_cleaned = custom_cleaner.transform(X_num) | |
| # Categorical preprocessing | |
| X_processed = cat_preprocessing.transform(X_cleaned) | |
| # Select model | |
| model = rf_loaded if model_choice == "Random Forest" else gb_loaded | |
| # Predict | |
| preds = model.predict(X_processed) | |
| probs = model.predict_proba(X_processed).max(axis=1) | |
| # Convert to readable labels | |
| labels = ['Non-default' if c == 0 else 'Default' for c in preds] | |
| results = pd.DataFrame({ | |
| 'Prediction': labels, | |
| 'Confidence': probs | |
| }) | |
| return results | |
| # ---------------------------- | |
| # Gradio Interface | |
| # ---------------------------- | |
| iface = gr.Interface( | |
| fn=predict_csv_from_dropdown, | |
| inputs=[ | |
| gr.Dropdown(choices=list(csv_files.keys()), label="Select CSV File"), | |
| gr.Dropdown(choices=["Random Forest", "Gradient Boosting"], label="Select Model") | |
| ], | |
| outputs=gr.Dataframe(headers=["Prediction", "Confidence"]), | |
| title="Loan Default Prediction", | |
| description="Select a CSV file and model to predict whether the applicant will Default (1) or Non-default (0) the loan." | |
| ) | |
| if __name__ == "__main__": | |
| iface.launch() | |