Spaces:

HF-Pawan
/

Supervised-Learning-Model-Trainer

Running

Supervised-Learning-Model-Trainer / version_2_app.py

anyonehomep1mane

Initial Changes

d7e53e8 19 days ago

6.93 kB

	import gradio as gr
	import pandas as pd
	import numpy as np
	import warnings
	warnings.filterwarnings(action="ignore")

	from sklearn.model_selection import train_test_split
	from sklearn.pipeline import Pipeline
	from sklearn.compose import ColumnTransformer
	from sklearn.preprocessing import (
	StandardScaler,
	OneHotEncoder,
	LabelEncoder,
	)
	from sklearn.metrics import (
	mean_absolute_error,
	mean_squared_error,
	r2_score,
	accuracy_score,
	precision_score,
	recall_score,
	f1_score,
	roc_auc_score,
	)

	# ======================
	# Models
	# ======================
	from sklearn.linear_model import (
	LinearRegression,
	LogisticRegression,
	Perceptron,
	)
	from sklearn.neighbors import (
	KNeighborsClassifier,
	KNeighborsRegressor,
	)
	from sklearn.naive_bayes import GaussianNB
	from sklearn.tree import (
	DecisionTreeClassifier,
	DecisionTreeRegressor,
	)
	from sklearn.svm import SVC, SVR
	from sklearn.neural_network import (
	MLPClassifier,
	MLPRegressor,
	)

	from sklearn.utils.multiclass import type_of_target

	# ======================
	# Model Registry
	# ======================
	REGRESSION_MODELS = {
	"Linear Regression": LinearRegression(),
	"KNN Regressor": KNeighborsRegressor(),
	"Decision Tree Regressor": DecisionTreeRegressor(),
	"SVR": SVR(),
	"MLP Regressor": MLPRegressor(max_iter=1000),
	}

	CLASSIFICATION_MODELS = {
	"Logistic Regression": LogisticRegression(max_iter=500),
	"KNN Classifier": KNeighborsClassifier(),
	"Naive Bayes": GaussianNB(),
	"Perceptron": Perceptron(),
	"Decision Tree Classifier": DecisionTreeClassifier(),
	"SVM Classifier": SVC(probability=True),
	"MLP Classifier": MLPClassifier(max_iter=1000),
	}

	# ======================
	# UI Helpers
	# ======================
	def update_models(task_type):
	if task_type == "Regression":
	return gr.update(choices=list(REGRESSION_MODELS.keys()), value=None)
	else:
	return gr.update(choices=list(CLASSIFICATION_MODELS.keys()), value=None)

	def preview_csv(file):
	if file is None:
	return None
	return pd.read_csv(file.name)


	def detect_target_type(y):
	# Categorical target
	if y.dtype == "object" or y.dtype.name == "category":
	return "Classification"

	# Numeric but low cardinality → classification
	if y.nunique() <= 20:
	return "Classification"

	return "Regression"


	def auto_set_task(file):
	if file is None:
	return "Regression"
	df = pd.read_csv(file.name)
	y = df.iloc[:, -1]
	return detect_target_type(y)


	# ======================
	# Core Training Logic
	# ======================
	def train_model(file, task_type, model_name):
	df = pd.read_csv(file.name)

	# Target = last column
	X = df.iloc[:, :-1]
	y = df.iloc[:, -1]

	detected_task = detect_target_type(y)

	# 🚫 Mismatch validation
	if task_type != detected_task:
	return pd.DataFrame(
	{
	"Error": [
	f"Dataset target detected as {detected_task}, "
	f"but {task_type} model selected."
	]
	}
	)

	# ---------- Automatic label encoding ----------
	if task_type == "Classification" and y.dtype == "object":
	y = LabelEncoder().fit_transform(y)

	# ---------- Feature preprocessing ----------
	num_cols = X.select_dtypes(include=["int64", "float64"]).columns
	cat_cols = X.select_dtypes(include=["object", "category"]).columns

	preprocessor = ColumnTransformer(
	transformers=[
	("num", StandardScaler(), num_cols),
	("cat", OneHotEncoder(handle_unknown="ignore"), cat_cols),
	]
	)

	X_train, X_test, y_train, y_test = train_test_split(
	X, y, test_size=0.2, random_state=42
	)

	# ---------- Model selection ----------
	model = (
	REGRESSION_MODELS[model_name]
	if task_type == "Regression"
	else CLASSIFICATION_MODELS[model_name]
	)

	pipeline = Pipeline(
	steps=[
	("preprocessing", preprocessor),
	("model", model),
	]
	)

	pipeline.fit(X_train, y_train)
	preds = pipeline.predict(X_test)

	# ---------- Metrics ----------
	if task_type == "Regression":
	metrics = {
	"MAE": mean_absolute_error(y_test, preds),
	"MSE": mean_squared_error(y_test, preds),
	"RMSE": np.sqrt(mean_squared_error(y_test, preds)),
	"R²": r2_score(y_test, preds),
	}

	else:
	metrics = {
	"Accuracy": accuracy_score(y_test, preds),
	"Precision": precision_score(y_test, preds, average="weighted"),
	"Recall": recall_score(y_test, preds, average="weighted"),
	"F1 Score": f1_score(y_test, preds, average="weighted"),
	}

	# ROC-AUC (safe handling)
	if hasattr(pipeline.named_steps["model"], "predict_proba"):
	probs = pipeline.predict_proba(X_test)
	target_type = type_of_target(y_test)

	# Binary classification
	if target_type == "binary":
	roc_auc = roc_auc_score(y_test, probs[:, 1])
	metrics["ROC-AUC"] = roc_auc

	# Multiclass classification
	elif target_type == "multiclass":
	roc_auc = roc_auc_score(
	y_test,
	probs,
	multi_class="ovr",
	average="weighted",
	)
	metrics["ROC-AUC"] = roc_auc


	# ---------- Metric table ----------
	result_df = pd.DataFrame(
	metrics.items(), columns=["Metric", "Value"]
	)

	return result_df

	# ======================
	# Gradio UI
	# ======================
	with gr.Blocks() as app:
	gr.Markdown("## Supervised Learning Model Trainer")
	gr.Markdown(
	"• Upload CSV\n"
	"• Last column is target\n"
	"• Automatic preprocessing & metrics"
	)

	file_input = gr.File(label="Upload CSV", file_types=[".csv"])

	csv_preview = gr.Dataframe(
	label="CSV Preview",
	interactive=False,
	)

	task_type = gr.Dropdown(
	["Regression", "Classification"], label="Task Type", value="Regression"
	)
	model_name = gr.Dropdown(label="Model")
	output = gr.Dataframe(label="Evaluation Metrics")

	run_btn = gr.Button("Train & Evaluate")


	file_input.change(
	preview_csv,
	inputs=file_input,
	outputs=csv_preview,
	)

	file_input.change(
	auto_set_task,
	inputs=file_input,
	outputs=task_type,
	)

	task_type.change(
	update_models, inputs=task_type, outputs=model_name
	)

	app.load(
	update_models,
	inputs=task_type,
	outputs=model_name,
	)

	run_btn.click(
	train_model,
	inputs=[file_input, task_type, model_name],
	outputs=output,
	)

	app.launch()