import gradio as gr import pandas as pd import numpy as np import warnings warnings.filterwarnings(action="ignore") from sklearn.model_selection import train_test_split from sklearn.pipeline import Pipeline from sklearn.compose import ColumnTransformer from sklearn.preprocessing import ( StandardScaler, OneHotEncoder, LabelEncoder, ) from sklearn.metrics import ( mean_absolute_error, mean_squared_error, r2_score, accuracy_score, precision_score, recall_score, f1_score, roc_auc_score, ) # ====================== # Models # ====================== from sklearn.linear_model import ( LinearRegression, LogisticRegression, Perceptron, ) from sklearn.neighbors import ( KNeighborsClassifier, KNeighborsRegressor, ) from sklearn.naive_bayes import GaussianNB from sklearn.tree import ( DecisionTreeClassifier, DecisionTreeRegressor, ) from sklearn.svm import SVC, SVR from sklearn.neural_network import ( MLPClassifier, MLPRegressor, ) from sklearn.utils.multiclass import type_of_target # ====================== # Model Registry # ====================== REGRESSION_MODELS = { "Linear Regression": LinearRegression(), "KNN Regressor": KNeighborsRegressor(), "Decision Tree Regressor": DecisionTreeRegressor(), "SVR": SVR(), "MLP Regressor": MLPRegressor(max_iter=1000), } CLASSIFICATION_MODELS = { "Logistic Regression": LogisticRegression(max_iter=500), "KNN Classifier": KNeighborsClassifier(), "Naive Bayes": GaussianNB(), "Perceptron": Perceptron(), "Decision Tree Classifier": DecisionTreeClassifier(), "SVM Classifier": SVC(probability=True), "MLP Classifier": MLPClassifier(max_iter=1000), } # ====================== # UI Helpers # ====================== def update_models(task_type): if task_type == "Regression": return gr.update(choices=list(REGRESSION_MODELS.keys()), value=None) else: return gr.update(choices=list(CLASSIFICATION_MODELS.keys()), value=None) def preview_csv(file): if file is None: return None return pd.read_csv(file.name) def detect_target_type(y): # Categorical target if y.dtype == "object" or y.dtype.name == "category": return "Classification" # Numeric but low cardinality → classification if y.nunique() <= 20: return "Classification" return "Regression" def auto_set_task(file): if file is None: return "Regression" df = pd.read_csv(file.name) y = df.iloc[:, -1] return detect_target_type(y) # ====================== # Core Training Logic # ====================== def train_model(file, task_type, model_name): df = pd.read_csv(file.name) # Target = last column X = df.iloc[:, :-1] y = df.iloc[:, -1] detected_task = detect_target_type(y) # 🚫 Mismatch validation if task_type != detected_task: return pd.DataFrame( { "Error": [ f"Dataset target detected as {detected_task}, " f"but {task_type} model selected." ] } ) # ---------- Automatic label encoding ---------- if task_type == "Classification" and y.dtype == "object": y = LabelEncoder().fit_transform(y) # ---------- Feature preprocessing ---------- num_cols = X.select_dtypes(include=["int64", "float64"]).columns cat_cols = X.select_dtypes(include=["object", "category"]).columns preprocessor = ColumnTransformer( transformers=[ ("num", StandardScaler(), num_cols), ("cat", OneHotEncoder(handle_unknown="ignore"), cat_cols), ] ) X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, random_state=42 ) # ---------- Model selection ---------- model = ( REGRESSION_MODELS[model_name] if task_type == "Regression" else CLASSIFICATION_MODELS[model_name] ) pipeline = Pipeline( steps=[ ("preprocessing", preprocessor), ("model", model), ] ) pipeline.fit(X_train, y_train) preds = pipeline.predict(X_test) # ---------- Metrics ---------- if task_type == "Regression": metrics = { "MAE": mean_absolute_error(y_test, preds), "MSE": mean_squared_error(y_test, preds), "RMSE": np.sqrt(mean_squared_error(y_test, preds)), "R²": r2_score(y_test, preds), } else: metrics = { "Accuracy": accuracy_score(y_test, preds), "Precision": precision_score(y_test, preds, average="weighted"), "Recall": recall_score(y_test, preds, average="weighted"), "F1 Score": f1_score(y_test, preds, average="weighted"), } # ROC-AUC (safe handling) if hasattr(pipeline.named_steps["model"], "predict_proba"): probs = pipeline.predict_proba(X_test) target_type = type_of_target(y_test) # Binary classification if target_type == "binary": roc_auc = roc_auc_score(y_test, probs[:, 1]) metrics["ROC-AUC"] = roc_auc # Multiclass classification elif target_type == "multiclass": roc_auc = roc_auc_score( y_test, probs, multi_class="ovr", average="weighted", ) metrics["ROC-AUC"] = roc_auc # ---------- Metric table ---------- result_df = pd.DataFrame( metrics.items(), columns=["Metric", "Value"] ) return result_df # ====================== # Gradio UI # ====================== with gr.Blocks() as app: gr.Markdown("## Supervised Learning Model Trainer") gr.Markdown( "• Upload CSV\n" "• Last column is target\n" "• Automatic preprocessing & metrics" ) file_input = gr.File(label="Upload CSV", file_types=[".csv"]) csv_preview = gr.Dataframe( label="CSV Preview", interactive=False, ) task_type = gr.Dropdown( ["Regression", "Classification"], label="Task Type", value="Regression" ) model_name = gr.Dropdown(label="Model") output = gr.Dataframe(label="Evaluation Metrics") run_btn = gr.Button("Train & Evaluate") file_input.change( preview_csv, inputs=file_input, outputs=csv_preview, ) file_input.change( auto_set_task, inputs=file_input, outputs=task_type, ) task_type.change( update_models, inputs=task_type, outputs=model_name ) app.load( update_models, inputs=task_type, outputs=model_name, ) run_btn.click( train_model, inputs=[file_input, task_type, model_name], outputs=output, ) app.launch()