Loadapproval / app.py
eaglelandsonce's picture
Create app.py
c2f8de8 verified
import gradio as gr
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.metrics import (
classification_report,
accuracy_score,
precision_score,
recall_score,
f1_score,
)
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
DEFAULT_DATA_PATH = "/mnt/data/Loan_Delinquent_Analysis_Dataset.csv"
TARGET_COL_DEFAULT = "Delinquency_Status"
def _build_model(model_name: str):
model_name = (model_name or "").strip()
if model_name == "Logistic Regression":
return LogisticRegression(max_iter=2000)
if model_name == "Decision Tree":
return DecisionTreeClassifier(random_state=1)
if model_name == "Random Forest":
return RandomForestClassifier(random_state=1, n_estimators=200)
if model_name == "K-Nearest Neighbors (KNN)":
return KNeighborsClassifier()
if model_name == "Support Vector Machine (SVM)":
return SVC()
raise ValueError(f"Unknown model selection: {model_name}")
def train_from_csv(
file_obj,
model_name: str,
target_col: str,
test_size: float,
random_state: int,
):
# Load CSV
if file_obj is None:
df = pd.read_csv(DEFAULT_DATA_PATH)
source = f"Loaded default dataset from: {DEFAULT_DATA_PATH}"
else:
# gr.File returns an object with a .name path
df = pd.read_csv(file_obj.name)
source = f"Loaded uploaded dataset: {file_obj.name}"
if target_col not in df.columns:
raise gr.Error(
f"Target column '{target_col}' not found. Available columns: {list(df.columns)}"
)
# Basic cleanup: drop rows with missing target
df = df.dropna(subset=[target_col]).copy()
# Split features/target
X = df.drop(columns=[target_col])
y = df[target_col]
# Identify column types
cat_cols = X.select_dtypes(include=["object", "category", "bool"]).columns.tolist()
num_cols = [c for c in X.columns if c not in cat_cols]
# Preprocess: one-hot for categoricals, passthrough numeric
preprocess = ColumnTransformer(
transformers=[
("cat", OneHotEncoder(handle_unknown="ignore"), cat_cols),
("num", "passthrough", num_cols),
],
remainder="drop",
sparse_threshold=0.3,
)
model = _build_model(model_name)
# Scale for consistency across models (esp. LR/SVM/KNN). Use with_mean=False for sparse output.
pipe = Pipeline(
steps=[
("preprocess", preprocess),
("scaler", StandardScaler(with_mean=False)),
("model", model),
]
)
# Split
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=float(test_size), random_state=int(random_state)
)
# Train + predict
pipe.fit(X_train, y_train)
y_pred = pipe.predict(X_test)
# Metrics (weighted to match common lab pattern)
train_acc = pipe.score(X_train, y_train)
test_acc = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred, average="weighted", zero_division=0)
recall = recall_score(y_test, y_pred, average="weighted", zero_division=0)
f1 = f1_score(y_test, y_pred, average="weighted", zero_division=0)
report = classification_report(y_test, y_pred, digits=4, zero_division=0)
metrics_df = pd.DataFrame(
[{
"Algorithm": model_name,
"Training_Accuracy": train_acc,
"Testing_Accuracy": test_acc,
"Precision_weighted": precision,
"Recall_weighted": recall,
"F1_weighted": f1,
}]
)
details = (
f"{source}\n"
f"Rows: {len(df):,} | Features: {X.shape[1]:,} | Target: '{target_col}'\n"
f"Train size: {len(X_train):,} | Test size: {len(X_test):,}\n"
f"Categorical cols: {len(cat_cols)} | Numeric cols: {len(num_cols)}"
)
return metrics_df, report, details
def build_demo():
with gr.Blocks(title="Loan Delinquency Model Trainer") as demo:
gr.Markdown(
"## Loan Delinquency Model Trainer\n"
"Drag-and-drop a **CSV**, choose a **model**, train, and review **Precision/Recall/F1** and the **classification report**."
)
with gr.Row():
file_in = gr.File(
label="Upload CSV (drag & drop)",
file_types=[".csv"],
)
model_in = gr.Dropdown(
label="Select Model",
choices=[
"Logistic Regression",
"Decision Tree",
"Random Forest",
"K-Nearest Neighbors (KNN)",
"Support Vector Machine (SVM)",
],
value="Logistic Regression",
)
with gr.Row():
target_in = gr.Textbox(
label="Target Column",
value=TARGET_COL_DEFAULT,
)
test_size_in = gr.Slider(
label="Test Size",
minimum=0.1,
maximum=0.5,
value=0.3,
step=0.05,
)
rs_in = gr.Number(
label="Random State",
value=1,
precision=0,
)
train_btn = gr.Button("Train Model", variant="primary")
with gr.Row():
metrics_out = gr.Dataframe(
label="Model Performance (lab metrics)",
wrap=True,
)
with gr.Row():
report_out = gr.Textbox(
label="Classification Report",
lines=14,
)
with gr.Row():
details_out = gr.Textbox(
label="Run Details",
lines=5,
)
train_btn.click(
fn=train_from_csv,
inputs=[file_in, model_in, target_in, test_size_in, rs_in],
outputs=[metrics_out, report_out, details_out],
)
gr.Markdown(
"**Note:** If you do not upload a file, the app will attempt to load the default dataset path:\n"
f"`{DEFAULT_DATA_PATH}`"
)
return demo
if __name__ == "__main__":
demo = build_demo()
demo.launch()