anyonehomep1mane
Initial Changes
d7e53e8
import gradio as gr
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings(action="ignore")
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import (
StandardScaler,
OneHotEncoder,
LabelEncoder,
)
from sklearn.metrics import (
mean_absolute_error,
mean_squared_error,
r2_score,
accuracy_score,
precision_score,
recall_score,
f1_score,
roc_auc_score,
)
# ======================
# Models
# ======================
from sklearn.linear_model import (
LinearRegression,
LogisticRegression,
Perceptron,
)
from sklearn.neighbors import (
KNeighborsClassifier,
KNeighborsRegressor,
)
from sklearn.naive_bayes import GaussianNB
from sklearn.tree import (
DecisionTreeClassifier,
DecisionTreeRegressor,
)
from sklearn.svm import SVC, SVR
from sklearn.neural_network import (
MLPClassifier,
MLPRegressor,
)
from sklearn.utils.multiclass import type_of_target
# ======================
# Model Registry
# ======================
REGRESSION_MODELS = {
"Linear Regression": LinearRegression(),
"KNN Regressor": KNeighborsRegressor(),
"Decision Tree Regressor": DecisionTreeRegressor(),
"SVR": SVR(),
"MLP Regressor": MLPRegressor(max_iter=1000),
}
CLASSIFICATION_MODELS = {
"Logistic Regression": LogisticRegression(max_iter=500),
"KNN Classifier": KNeighborsClassifier(),
"Naive Bayes": GaussianNB(),
"Perceptron": Perceptron(),
"Decision Tree Classifier": DecisionTreeClassifier(),
"SVM Classifier": SVC(probability=True),
"MLP Classifier": MLPClassifier(max_iter=1000),
}
# ======================
# UI Helpers
# ======================
def update_models(task_type):
if task_type == "Regression":
return gr.update(choices=list(REGRESSION_MODELS.keys()), value=None)
else:
return gr.update(choices=list(CLASSIFICATION_MODELS.keys()), value=None)
def preview_csv(file):
if file is None:
return None
return pd.read_csv(file.name)
def detect_target_type(y):
# Categorical target
if y.dtype == "object" or y.dtype.name == "category":
return "Classification"
# Numeric but low cardinality → classification
if y.nunique() <= 20:
return "Classification"
return "Regression"
def auto_set_task(file):
if file is None:
return "Regression"
df = pd.read_csv(file.name)
y = df.iloc[:, -1]
return detect_target_type(y)
# ======================
# Core Training Logic
# ======================
def train_model(file, task_type, model_name):
df = pd.read_csv(file.name)
# Target = last column
X = df.iloc[:, :-1]
y = df.iloc[:, -1]
detected_task = detect_target_type(y)
# 🚫 Mismatch validation
if task_type != detected_task:
return pd.DataFrame(
{
"Error": [
f"Dataset target detected as {detected_task}, "
f"but {task_type} model selected."
]
}
)
# ---------- Automatic label encoding ----------
if task_type == "Classification" and y.dtype == "object":
y = LabelEncoder().fit_transform(y)
# ---------- Feature preprocessing ----------
num_cols = X.select_dtypes(include=["int64", "float64"]).columns
cat_cols = X.select_dtypes(include=["object", "category"]).columns
preprocessor = ColumnTransformer(
transformers=[
("num", StandardScaler(), num_cols),
("cat", OneHotEncoder(handle_unknown="ignore"), cat_cols),
]
)
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42
)
# ---------- Model selection ----------
model = (
REGRESSION_MODELS[model_name]
if task_type == "Regression"
else CLASSIFICATION_MODELS[model_name]
)
pipeline = Pipeline(
steps=[
("preprocessing", preprocessor),
("model", model),
]
)
pipeline.fit(X_train, y_train)
preds = pipeline.predict(X_test)
# ---------- Metrics ----------
if task_type == "Regression":
metrics = {
"MAE": mean_absolute_error(y_test, preds),
"MSE": mean_squared_error(y_test, preds),
"RMSE": np.sqrt(mean_squared_error(y_test, preds)),
"R²": r2_score(y_test, preds),
}
else:
metrics = {
"Accuracy": accuracy_score(y_test, preds),
"Precision": precision_score(y_test, preds, average="weighted"),
"Recall": recall_score(y_test, preds, average="weighted"),
"F1 Score": f1_score(y_test, preds, average="weighted"),
}
# ROC-AUC (safe handling)
if hasattr(pipeline.named_steps["model"], "predict_proba"):
probs = pipeline.predict_proba(X_test)
target_type = type_of_target(y_test)
# Binary classification
if target_type == "binary":
roc_auc = roc_auc_score(y_test, probs[:, 1])
metrics["ROC-AUC"] = roc_auc
# Multiclass classification
elif target_type == "multiclass":
roc_auc = roc_auc_score(
y_test,
probs,
multi_class="ovr",
average="weighted",
)
metrics["ROC-AUC"] = roc_auc
# ---------- Metric table ----------
result_df = pd.DataFrame(
metrics.items(), columns=["Metric", "Value"]
)
return result_df
# ======================
# Gradio UI
# ======================
with gr.Blocks() as app:
gr.Markdown("## Supervised Learning Model Trainer")
gr.Markdown(
"• Upload CSV\n"
"• Last column is target\n"
"• Automatic preprocessing & metrics"
)
file_input = gr.File(label="Upload CSV", file_types=[".csv"])
csv_preview = gr.Dataframe(
label="CSV Preview",
interactive=False,
)
task_type = gr.Dropdown(
["Regression", "Classification"], label="Task Type", value="Regression"
)
model_name = gr.Dropdown(label="Model")
output = gr.Dataframe(label="Evaluation Metrics")
run_btn = gr.Button("Train & Evaluate")
file_input.change(
preview_csv,
inputs=file_input,
outputs=csv_preview,
)
file_input.change(
auto_set_task,
inputs=file_input,
outputs=task_type,
)
task_type.change(
update_models, inputs=task_type, outputs=model_name
)
app.load(
update_models,
inputs=task_type,
outputs=model_name,
)
run_btn.click(
train_model,
inputs=[file_input, task_type, model_name],
outputs=output,
)
app.launch()