Spaces:

lkchew
/

ITI105_Project

Sleeping

App Files Files Community

ITI105_Project / app.py

lkchew

Update app.py

73ec2e0 verified 4 months ago

raw

history blame contribute delete

6.16 kB

	import gradio as gr
	import pandas as pd
	import numpy as np
	import joblib
	from sklearn.ensemble import GradientBoostingClassifier, RandomForestClassifier
	from sklearn.impute import SimpleImputer
	from sklearn.preprocessing import OneHotEncoder
	from sklearn.compose import ColumnTransformer
	from sklearn.pipeline import Pipeline
	from sklearn.base import BaseEstimator, TransformerMixin

	# ----------------------------
	# Custom Numeric Imputer
	# ----------------------------
	class CustomImputer(TransformerMixin):
	def fit(self, X, y=None):
	# Precompute group means for imputations
	self.group_means = {
	'rate_of_interest': X.groupby(['loan_type', 'term'])['rate_of_interest'].mean(),
	'Interest_rate_spread': X.groupby(['loan_type', 'term'])['Interest_rate_spread'].mean(),
	'Upfront_charges': X.groupby(['loan_type', 'term'])['Upfront_charges'].mean(),
	'rate_of_interest_loan': X.groupby(['loan_type'])['rate_of_interest'].mean(),
	'Interest_rate_spread_loan': X.groupby(['loan_type'])['Interest_rate_spread'].mean(),
	'Upfront_charges_loan': X.groupby(['loan_type'])['Upfront_charges'].mean(),
	'income_by_age': X.groupby(['age'])['income'].mean(),
	'property_value_mean': X['property_value'].mean(),
	'dtir1_mean': X['dtir1'].mean(),
	'income_mean': X['income'].mean(),
	}
	return self

	def transform(self, X):
	X = X.copy()

	# Impute numerical features using group-based means
	for col in ['rate_of_interest', 'Interest_rate_spread', 'Upfront_charges']:
	X[col] = X.groupby(['loan_type', 'term'])[col].transform(lambda x: x.fillna(x.mean())).round(3 if col == 'rate_of_interest' else 4)

	for col in ['rate_of_interest', 'Interest_rate_spread', 'Upfront_charges']:
	loan_mean = self.group_means[col + '_loan']
	X[col] = X.apply(
	lambda row: row[col] if pd.notnull(row[col]) else loan_mean.get(row['loan_type'], np.nan),
	axis=1
	)
	X[col] = X[col].round(3 if col == 'rate_of_interest' else 4)

	# Impute property_value and dtir1
	X['property_value'] = X['property_value'].fillna(self.group_means['property_value_mean'])
	X['property_value'] = np.round(X['property_value'], -3)

	X['dtir1'] = X['dtir1'].fillna(self.group_means['dtir1_mean']).round(0)

	# Income
	X['income'] = X.groupby(['age'])['income'].transform(lambda x: x.fillna(x.mean()))
	X['income'] = X['income'].fillna(self.group_means['income_mean'])
	X['income'] = np.round(X['income'], -2)

	# LTV
	X['LTV'] = X['LTV'].fillna(X['loan_amount'] / X['property_value'] * 100).round(8)

	return X

	# ----------------------------
	# Custom Categorical Cleaner
	# ----------------------------
	class CustomCleaner(BaseEstimator, TransformerMixin):
	def __init__(self, drop_cols=None, missing_placeholders=None, cat_cols=None):
	self.drop_cols = drop_cols
	self.missing_placeholders = missing_placeholders if missing_placeholders is not None else ['', 'NA', 'nan', 'NaN']
	self.cat_cols = cat_cols

	def fit(self, X, y=None):
	return self

	def transform(self, X):
	X = X.copy()

	if self.drop_cols:
	X = X.drop(self.drop_cols, axis=1)

	if 'Security_Type' in X.columns:
	X['Security_Type'] = X['Security_Type'].replace({'Indriect': 'Indirect'})

	if self.cat_cols:
	for col in self.cat_cols:
	if col in X.columns:
	X[col] = X[col].replace(self.missing_placeholders, np.nan)

	return X

	# ----------------------------
	# Load models and preprocessing pipelines
	# ----------------------------
	gb_loaded = joblib.load('gradient_boosting_model.pkl')
	rf_loaded = joblib.load("random_forest_model.pkl")
	num_pipeline = joblib.load('num_pipeline.pkl') # numeric imputer pipeline
	custom_cleaner = joblib.load('custom_cleaner.pkl') # custom cleaning transformer
	cat_preprocessing = joblib.load('cat_preprocessing.pkl') # categorical preprocessing

	# ----------------------------
	# Predefined CSV file options
	# ----------------------------
	csv_files = {
	"Default 1": "Default_1.csv",
	"Default 2": "Default_2.csv",
	"Non Default": "Non_default.csv"
	}

	# ----------------------------
	# Prediction function
	# ----------------------------
	def predict_csv_from_dropdown(file_choice, model_choice):
	# Read CSV based on dropdown choice
	file_path = csv_files[file_choice]
	df = pd.read_csv(file_path)

	# Filter rows with 'term' not null
	df_cleaned = df[df['term'].notnull()].copy()

	# Drop target if exists
	if 'target' in df_cleaned.columns:
	df_cleaned = df_cleaned.drop(columns=['target'])

	# Numeric preprocessing
	X_num = num_pipeline.transform(df_cleaned)

	# Custom cleaning
	X_cleaned = custom_cleaner.transform(X_num)

	# Categorical preprocessing
	X_processed = cat_preprocessing.transform(X_cleaned)

	# Select model
	model = rf_loaded if model_choice == "Random Forest" else gb_loaded

	# Predict
	preds = model.predict(X_processed)
	probs = model.predict_proba(X_processed).max(axis=1)

	# Convert to readable labels
	labels = ['Non-default' if c == 0 else 'Default' for c in preds]

	results = pd.DataFrame({
	'Prediction': labels,
	'Confidence': probs
	})

	return results

	# ----------------------------
	# Gradio Interface
	# ----------------------------
	iface = gr.Interface(
	fn=predict_csv_from_dropdown,
	inputs=[
	gr.Dropdown(choices=list(csv_files.keys()), label="Select CSV File"),
	gr.Dropdown(choices=["Random Forest", "Gradient Boosting"], label="Select Model")
	],
	outputs=gr.Dataframe(headers=["Prediction", "Confidence"]),
	title="Loan Default Prediction",
	description="Select a CSV file and model to predict whether the applicant will Default (1) or Non-default (0) the loan."
	)

	if __name__ == "__main__":
	iface.launch()