| | import gradio as gr |
| | import pandas as pd |
| | import numpy as np |
| | import warnings |
| | warnings.filterwarnings(action="ignore") |
| |
|
| | from sklearn.model_selection import train_test_split |
| | from sklearn.pipeline import Pipeline |
| | from sklearn.compose import ColumnTransformer |
| | from sklearn.preprocessing import ( |
| | StandardScaler, |
| | OneHotEncoder, |
| | LabelEncoder, |
| | ) |
| | from sklearn.metrics import ( |
| | mean_absolute_error, |
| | mean_squared_error, |
| | r2_score, |
| | accuracy_score, |
| | precision_score, |
| | recall_score, |
| | f1_score, |
| | roc_auc_score, |
| | ) |
| |
|
| | |
| | |
| | |
| | from sklearn.linear_model import ( |
| | LinearRegression, |
| | LogisticRegression, |
| | Perceptron, |
| | ) |
| | from sklearn.neighbors import ( |
| | KNeighborsClassifier, |
| | KNeighborsRegressor, |
| | ) |
| | from sklearn.naive_bayes import GaussianNB |
| | from sklearn.tree import ( |
| | DecisionTreeClassifier, |
| | DecisionTreeRegressor, |
| | ) |
| | from sklearn.svm import SVC, SVR |
| | from sklearn.neural_network import ( |
| | MLPClassifier, |
| | MLPRegressor, |
| | ) |
| |
|
| | from sklearn.utils.multiclass import type_of_target |
| |
|
| | |
| | |
| | |
| | REGRESSION_MODELS = { |
| | "Linear Regression": LinearRegression(), |
| | "KNN Regressor": KNeighborsRegressor(), |
| | "Decision Tree Regressor": DecisionTreeRegressor(), |
| | "SVR": SVR(), |
| | "MLP Regressor": MLPRegressor(max_iter=1000), |
| | } |
| |
|
| | CLASSIFICATION_MODELS = { |
| | "Logistic Regression": LogisticRegression(max_iter=500), |
| | "KNN Classifier": KNeighborsClassifier(), |
| | "Naive Bayes": GaussianNB(), |
| | "Perceptron": Perceptron(), |
| | "Decision Tree Classifier": DecisionTreeClassifier(), |
| | "SVM Classifier": SVC(probability=True), |
| | "MLP Classifier": MLPClassifier(max_iter=1000), |
| | } |
| |
|
| | |
| | |
| | |
| | def update_models(task_type): |
| | if task_type == "Regression": |
| | return gr.update(choices=list(REGRESSION_MODELS.keys()), value=None) |
| | else: |
| | return gr.update(choices=list(CLASSIFICATION_MODELS.keys()), value=None) |
| |
|
| | def preview_csv(file): |
| | if file is None: |
| | return None |
| | return pd.read_csv(file.name) |
| |
|
| |
|
| | def detect_target_type(y): |
| | |
| | if y.dtype == "object" or y.dtype.name == "category": |
| | return "Classification" |
| |
|
| | |
| | if y.nunique() <= 20: |
| | return "Classification" |
| |
|
| | return "Regression" |
| |
|
| |
|
| | def auto_set_task(file): |
| | if file is None: |
| | return "Regression" |
| | df = pd.read_csv(file.name) |
| | y = df.iloc[:, -1] |
| | return detect_target_type(y) |
| |
|
| |
|
| | |
| | |
| | |
| | def train_model(file, task_type, model_name): |
| | df = pd.read_csv(file.name) |
| |
|
| | |
| | X = df.iloc[:, :-1] |
| | y = df.iloc[:, -1] |
| |
|
| | detected_task = detect_target_type(y) |
| |
|
| | |
| | if task_type != detected_task: |
| | return pd.DataFrame( |
| | { |
| | "Error": [ |
| | f"Dataset target detected as {detected_task}, " |
| | f"but {task_type} model selected." |
| | ] |
| | } |
| | ) |
| |
|
| | |
| | if task_type == "Classification" and y.dtype == "object": |
| | y = LabelEncoder().fit_transform(y) |
| |
|
| | |
| | num_cols = X.select_dtypes(include=["int64", "float64"]).columns |
| | cat_cols = X.select_dtypes(include=["object", "category"]).columns |
| |
|
| | preprocessor = ColumnTransformer( |
| | transformers=[ |
| | ("num", StandardScaler(), num_cols), |
| | ("cat", OneHotEncoder(handle_unknown="ignore"), cat_cols), |
| | ] |
| | ) |
| |
|
| | X_train, X_test, y_train, y_test = train_test_split( |
| | X, y, test_size=0.2, random_state=42 |
| | ) |
| |
|
| | |
| | model = ( |
| | REGRESSION_MODELS[model_name] |
| | if task_type == "Regression" |
| | else CLASSIFICATION_MODELS[model_name] |
| | ) |
| |
|
| | pipeline = Pipeline( |
| | steps=[ |
| | ("preprocessing", preprocessor), |
| | ("model", model), |
| | ] |
| | ) |
| |
|
| | pipeline.fit(X_train, y_train) |
| | preds = pipeline.predict(X_test) |
| |
|
| | |
| | if task_type == "Regression": |
| | metrics = { |
| | "MAE": mean_absolute_error(y_test, preds), |
| | "MSE": mean_squared_error(y_test, preds), |
| | "RMSE": np.sqrt(mean_squared_error(y_test, preds)), |
| | "R²": r2_score(y_test, preds), |
| | } |
| |
|
| | else: |
| | metrics = { |
| | "Accuracy": accuracy_score(y_test, preds), |
| | "Precision": precision_score(y_test, preds, average="weighted"), |
| | "Recall": recall_score(y_test, preds, average="weighted"), |
| | "F1 Score": f1_score(y_test, preds, average="weighted"), |
| | } |
| |
|
| | |
| | if hasattr(pipeline.named_steps["model"], "predict_proba"): |
| | probs = pipeline.predict_proba(X_test) |
| | target_type = type_of_target(y_test) |
| |
|
| | |
| | if target_type == "binary": |
| | roc_auc = roc_auc_score(y_test, probs[:, 1]) |
| | metrics["ROC-AUC"] = roc_auc |
| |
|
| | |
| | elif target_type == "multiclass": |
| | roc_auc = roc_auc_score( |
| | y_test, |
| | probs, |
| | multi_class="ovr", |
| | average="weighted", |
| | ) |
| | metrics["ROC-AUC"] = roc_auc |
| |
|
| |
|
| | |
| | result_df = pd.DataFrame( |
| | metrics.items(), columns=["Metric", "Value"] |
| | ) |
| |
|
| | return result_df |
| |
|
| | |
| | |
| | |
| | with gr.Blocks() as app: |
| | gr.Markdown("## Supervised Learning Model Trainer") |
| | gr.Markdown( |
| | "• Upload CSV\n" |
| | "• Last column is target\n" |
| | "• Automatic preprocessing & metrics" |
| | ) |
| |
|
| | file_input = gr.File(label="Upload CSV", file_types=[".csv"]) |
| |
|
| | csv_preview = gr.Dataframe( |
| | label="CSV Preview", |
| | interactive=False, |
| | ) |
| |
|
| | task_type = gr.Dropdown( |
| | ["Regression", "Classification"], label="Task Type", value="Regression" |
| | ) |
| | model_name = gr.Dropdown(label="Model") |
| | output = gr.Dataframe(label="Evaluation Metrics") |
| |
|
| | run_btn = gr.Button("Train & Evaluate") |
| |
|
| |
|
| | file_input.change( |
| | preview_csv, |
| | inputs=file_input, |
| | outputs=csv_preview, |
| | ) |
| |
|
| | file_input.change( |
| | auto_set_task, |
| | inputs=file_input, |
| | outputs=task_type, |
| | ) |
| |
|
| | task_type.change( |
| | update_models, inputs=task_type, outputs=model_name |
| | ) |
| |
|
| | app.load( |
| | update_models, |
| | inputs=task_type, |
| | outputs=model_name, |
| | ) |
| |
|
| | run_btn.click( |
| | train_model, |
| | inputs=[file_input, task_type, model_name], |
| | outputs=output, |
| | ) |
| |
|
| | app.launch() |