Spaces:

lkchew
/

ITI105_Project

Sleeping

App Files Files Community

lkchew commited on Aug 23, 2025

Commit

73ec2e0

verified ·

1 Parent(s): 80f2d4a

Update app.py

Browse files

Files changed (1) hide show

app.py +108 -10

app.py CHANGED Viewed

@@ -2,30 +2,126 @@ import gradio as gr
 import pandas as pd
 import numpy as np
 import joblib
-# ... keep your existing import statements, CustomImputer, CustomCleaner, and model loading ...
-# Define a mapping of dropdown options to file paths
 csv_files = {
     "Default 1": "Default_1.csv",
     "Default 2": "Default_2.csv",
     "Non Default": "Non_default.csv"
 }
 def predict_csv_from_dropdown(file_choice, model_choice):
     # Read CSV based on dropdown choice
     file_path = csv_files[file_choice]
     df = pd.read_csv(file_path)
     # Filter rows with 'term' not null
-    mask = df['term'].notnull()
-    df_cleaned = df[mask].copy()
-    # If target column exists in CSV, drop it
     if 'target' in df_cleaned.columns:
         df_cleaned = df_cleaned.drop(columns=['target'])
-    # Apply numeric imputation
     X_num = num_pipeline.transform(df_cleaned)
     # Custom cleaning
@@ -34,17 +130,16 @@ def predict_csv_from_dropdown(file_choice, model_choice):
     # Categorical preprocessing
     X_processed = cat_preprocessing.transform(X_cleaned)
-    # Choose model based on dropdown
     model = rf_loaded if model_choice == "Random Forest" else gb_loaded
     # Predict
     preds = model.predict(X_processed)
     probs = model.predict_proba(X_processed).max(axis=1)
-    # Convert classes to readable labels
     labels = ['Non-default' if c == 0 else 'Default' for c in preds]
-    # Combine results
     results = pd.DataFrame({
         'Prediction': labels,
         'Confidence': probs
@@ -52,7 +147,9 @@ def predict_csv_from_dropdown(file_choice, model_choice):
     return results
-# Gradio interface
 iface = gr.Interface(
     fn=predict_csv_from_dropdown,
     inputs=[
@@ -67,3 +164,4 @@ iface = gr.Interface(
 if __name__ == "__main__":
     iface.launch()

 import pandas as pd
 import numpy as np
 import joblib
+from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
+from sklearn.impute import SimpleImputer
+from sklearn.preprocessing import OneHotEncoder
+from sklearn.compose import ColumnTransformer
+from sklearn.pipeline import Pipeline
+from sklearn.base import BaseEstimator, TransformerMixin
+# ----------------------------
+# Custom Numeric Imputer
+# ----------------------------
+class CustomImputer(TransformerMixin):
+    def fit(self, X, y=None):
+        # Precompute group means for imputations
+        self.group_means = {
+            'rate_of_interest': X.groupby(['loan_type', 'term'])['rate_of_interest'].mean(),
+            'Interest_rate_spread': X.groupby(['loan_type', 'term'])['Interest_rate_spread'].mean(),
+            'Upfront_charges': X.groupby(['loan_type', 'term'])['Upfront_charges'].mean(),
+            'rate_of_interest_loan': X.groupby(['loan_type'])['rate_of_interest'].mean(),
+            'Interest_rate_spread_loan': X.groupby(['loan_type'])['Interest_rate_spread'].mean(),
+            'Upfront_charges_loan': X.groupby(['loan_type'])['Upfront_charges'].mean(),
+            'income_by_age': X.groupby(['age'])['income'].mean(),
+            'property_value_mean': X['property_value'].mean(),
+            'dtir1_mean': X['dtir1'].mean(),
+            'income_mean': X['income'].mean(),
+        }
+        return self
+    def transform(self, X):
+        X = X.copy()
+        # Impute numerical features using group-based means
+        for col in ['rate_of_interest', 'Interest_rate_spread', 'Upfront_charges']:
+            X[col] = X.groupby(['loan_type', 'term'])[col].transform(lambda x: x.fillna(x.mean())).round(3 if col == 'rate_of_interest' else 4)
+        for col in ['rate_of_interest', 'Interest_rate_spread', 'Upfront_charges']:
+            loan_mean = self.group_means[col + '_loan']
+            X[col] = X.apply(
+                lambda row: row[col] if pd.notnull(row[col]) else loan_mean.get(row['loan_type'], np.nan),
+                axis=1
+            )
+            X[col] = X[col].round(3 if col == 'rate_of_interest' else 4)
+        # Impute property_value and dtir1
+        X['property_value'] = X['property_value'].fillna(self.group_means['property_value_mean'])
+        X['property_value'] = np.round(X['property_value'], -3)
+        X['dtir1'] = X['dtir1'].fillna(self.group_means['dtir1_mean']).round(0)
+        # Income
+        X['income'] = X.groupby(['age'])['income'].transform(lambda x: x.fillna(x.mean()))
+        X['income'] = X['income'].fillna(self.group_means['income_mean'])
+        X['income'] = np.round(X['income'], -2)
+        # LTV
+        X['LTV'] = X['LTV'].fillna(X['loan_amount'] / X['property_value'] * 100).round(8)
+        return X
+# ----------------------------
+# Custom Categorical Cleaner
+# ----------------------------
+class CustomCleaner(BaseEstimator, TransformerMixin):
+    def __init__(self, drop_cols=None, missing_placeholders=None, cat_cols=None):
+        self.drop_cols = drop_cols
+        self.missing_placeholders = missing_placeholders if missing_placeholders is not None else ['', 'NA', 'nan', 'NaN']
+        self.cat_cols = cat_cols
+    def fit(self, X, y=None):
+        return self
+    def transform(self, X):
+        X = X.copy()
+        if self.drop_cols:
+            X = X.drop(self.drop_cols, axis=1)
+        if 'Security_Type' in X.columns:
+            X['Security_Type'] = X['Security_Type'].replace({'Indriect': 'Indirect'})
+        if self.cat_cols:
+            for col in self.cat_cols:
+                if col in X.columns:
+                    X[col] = X[col].replace(self.missing_placeholders, np.nan)
+        return X
+# ----------------------------
+# Load models and preprocessing pipelines
+# ----------------------------
+gb_loaded = joblib.load('gradient_boosting_model.pkl')
+rf_loaded = joblib.load("random_forest_model.pkl")
+num_pipeline = joblib.load('num_pipeline.pkl')       # numeric imputer pipeline
+custom_cleaner = joblib.load('custom_cleaner.pkl')   # custom cleaning transformer
+cat_preprocessing = joblib.load('cat_preprocessing.pkl')  # categorical preprocessing
+# ----------------------------
+# Predefined CSV file options
+# ----------------------------
 csv_files = {
     "Default 1": "Default_1.csv",
     "Default 2": "Default_2.csv",
     "Non Default": "Non_default.csv"
 }
+# ----------------------------
+# Prediction function
+# ----------------------------
 def predict_csv_from_dropdown(file_choice, model_choice):
     # Read CSV based on dropdown choice
     file_path = csv_files[file_choice]
     df = pd.read_csv(file_path)
     # Filter rows with 'term' not null
+    df_cleaned = df[df['term'].notnull()].copy()
+    # Drop target if exists
     if 'target' in df_cleaned.columns:
         df_cleaned = df_cleaned.drop(columns=['target'])
+    # Numeric preprocessing
     X_num = num_pipeline.transform(df_cleaned)
     # Custom cleaning
     # Categorical preprocessing
     X_processed = cat_preprocessing.transform(X_cleaned)
+    # Select model
     model = rf_loaded if model_choice == "Random Forest" else gb_loaded
     # Predict
     preds = model.predict(X_processed)
     probs = model.predict_proba(X_processed).max(axis=1)
+    # Convert to readable labels
     labels = ['Non-default' if c == 0 else 'Default' for c in preds]
     results = pd.DataFrame({
         'Prediction': labels,
         'Confidence': probs
     return results
+# ----------------------------
+# Gradio Interface
+# ----------------------------
 iface = gr.Interface(
     fn=predict_csv_from_dropdown,
     inputs=[
 if __name__ == "__main__":
     iface.launch()