anyonehomep1mane commited on
Commit
d7e53e8
·
0 Parent(s):

Initial Changes

Browse files
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ .env
2
+ .vscode
3
+ venv
app.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import warnings
3
+ warnings.filterwarnings("ignore")
4
+
5
+ from ui.helpers import (
6
+ update_models,
7
+ update_graphs,
8
+ preview_csv,
9
+ reset_metrics_on_file_clear,
10
+ toggle_csv_preview
11
+ )
12
+
13
+ from ui.theme import OrangeRedTheme
14
+ from ui.styles import CSS_STYLE
15
+
16
+ from core.training import train_model
17
+ from core.detection import auto_set_task
18
+
19
+ with gr.Blocks() as app:
20
+ with gr.Column(elem_id="container"):
21
+ gr.Markdown("## Supervised Learning Model Trainer")
22
+
23
+ with gr.Row(equal_height=True):
24
+ with gr.Column():
25
+ file_input = gr.File(label="Upload CSV", file_types=[".csv"])
26
+ show_preview = gr.Checkbox(
27
+ label="Show CSV Preview",
28
+ value=False,
29
+ )
30
+ task_type = gr.Dropdown(
31
+ ["Regression", "Classification"],
32
+ label="Task Type",
33
+ value="Regression",
34
+ )
35
+ model_name = gr.Dropdown(label="Model")
36
+ graph_type = gr.Dropdown(label="Graph Type")
37
+
38
+ with gr.Row(equal_height=True):
39
+ run_btn = gr.Button("Train & Evaluate", variant="primary", size="lg")
40
+
41
+ with gr.Row(equal_height=True):
42
+ with gr.Column():
43
+ csv_preview = gr.Dataframe(label="CSV Preview", interactive=False, visible=False,)
44
+ output = gr.Dataframe(label="Evaluation Metrics", interactive=False)
45
+
46
+ with gr.Row(equal_height=True):
47
+ with gr.Column():
48
+ plot = gr.Plot(label="Selected Graph")
49
+
50
+ file_input.change(preview_csv, file_input, csv_preview)
51
+ file_input.change(auto_set_task, file_input, task_type)
52
+ file_input.change(reset_metrics_on_file_clear,inputs=file_input,outputs=[output, plot])
53
+
54
+ task_type.change(update_models, task_type, model_name)
55
+ task_type.change(update_graphs, task_type, graph_type)
56
+
57
+ show_preview.change(
58
+ toggle_csv_preview,
59
+ inputs=show_preview,
60
+ outputs=csv_preview,
61
+ )
62
+
63
+ app.load(update_models, task_type, model_name)
64
+ app.load(update_graphs, task_type, graph_type)
65
+
66
+ run_btn.click(
67
+ train_model,
68
+ inputs=[file_input, task_type, model_name, graph_type],
69
+ outputs=[output, plot]
70
+ )
71
+
72
+
73
+ if __name__ == "__main__":
74
+ orange_red_theme = OrangeRedTheme()
75
+ app.queue().launch(
76
+ theme=orange_red_theme,
77
+ css=CSS_STYLE,
78
+ show_error=True,
79
+ server_name="0.0.0.0",
80
+ server_port=7860,
81
+ debug=True
82
+ )
core/__init__.py ADDED
File without changes
core/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (165 Bytes). View file
 
core/__pycache__/detection.cpython-310.pyc ADDED
Binary file (633 Bytes). View file
 
core/__pycache__/training.cpython-310.pyc ADDED
Binary file (1.8 kB). View file
 
core/__pycache__/visuals.cpython-310.pyc ADDED
Binary file (2.13 kB). View file
 
core/detection.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+
3
+ def detect_target_type(y):
4
+ if y.dtype == "object" or y.dtype.name == "category":
5
+ return "Classification"
6
+
7
+ if y.nunique() <= 20:
8
+ return "Classification"
9
+
10
+ return "Regression"
11
+
12
+
13
+ def auto_set_task(file):
14
+ if file is None:
15
+ return "Regression"
16
+
17
+ df = pd.read_csv(file.name)
18
+ y = df.iloc[:, -1]
19
+ return detect_target_type(y)
core/training.py ADDED
@@ -0,0 +1,68 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from sklearn.pipeline import Pipeline
3
+ from sklearn.model_selection import train_test_split
4
+ from sklearn.preprocessing import LabelEncoder
5
+
6
+ from core.detection import detect_target_type
7
+ from models.registry import REGRESSION_MODELS, CLASSIFICATION_MODELS
8
+ from preprocessing.transformers import build_preprocessor
9
+ from utils.metrics import regression_metrics, classification_metrics
10
+ from core.visuals import regression_graphs, classification_graphs
11
+
12
+
13
+ def train_model(file, task_type, model_name, graph_type):
14
+ if file is None:
15
+ return pd.DataFrame({
16
+ "Error": [f"Please upload a csv file first."]
17
+
18
+ }), None
19
+
20
+ df = pd.read_csv(file.name)
21
+
22
+ X = df.iloc[:, :-1]
23
+ y = df.iloc[:, -1]
24
+
25
+ detected_task = detect_target_type(y)
26
+
27
+ if task_type != detected_task:
28
+ return pd.DataFrame({
29
+ "Error": [f"Detected {detected_task} target, but {task_type} selected."]
30
+ }), None
31
+
32
+ if task_type == "Classification" and y.dtype == "object":
33
+ y = LabelEncoder().fit_transform(y)
34
+
35
+ preprocessor = build_preprocessor(X)
36
+
37
+ X_train, X_test, y_train, y_test = train_test_split(
38
+ X, y, test_size=0.2, random_state=42
39
+ )
40
+
41
+ model = (
42
+ REGRESSION_MODELS[model_name]
43
+ if task_type == "Regression"
44
+ else CLASSIFICATION_MODELS[model_name]
45
+ )
46
+
47
+ pipeline = Pipeline([
48
+ ("preprocessing", preprocessor),
49
+ ("model", model),
50
+ ])
51
+
52
+ pipeline.fit(X_train, y_train)
53
+ preds = pipeline.predict(X_test)
54
+
55
+ if task_type == "Regression":
56
+ metrics = regression_metrics(y_test, preds)
57
+ else:
58
+ metrics = classification_metrics(pipeline, X_test, y_test, preds)
59
+
60
+ fig = None
61
+ if task_type == "Regression":
62
+ fig = regression_graphs(graph_type, X, y, model, pipeline, y_test, preds)
63
+ else:
64
+ fig = classification_graphs(graph_type, pipeline, X_test, y_test, preds)
65
+
66
+ metrics_df = pd.DataFrame(metrics.items(), columns=["Metric", "Value"])
67
+
68
+ return metrics_df, fig
core/visuals.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import matplotlib.pyplot as plt
2
+ import pandas as pd
3
+ from sklearn.model_selection import learning_curve
4
+ from sklearn.metrics import (
5
+ roc_curve, precision_recall_curve,
6
+ confusion_matrix, classification_report
7
+ )
8
+
9
+ def regression_graphs(graph_type, X, y, model, pipeline, y_test, preds):
10
+ if graph_type == "Actual vs Predicted":
11
+ fig, ax = plt.subplots()
12
+ ax.plot(y_test.values[:100])
13
+ ax.plot(preds[:100])
14
+ ax.legend(["Actual", "Predicted"])
15
+ elif graph_type == "Residual Plot":
16
+ fig, ax = plt.subplots()
17
+ ax.scatter(preds, y_test - preds)
18
+ ax.axhline(0)
19
+ elif graph_type == "Residual Histogram":
20
+ fig, ax = plt.subplots()
21
+ ax.hist(y_test - preds, bins=30)
22
+ elif graph_type == "Feature Importance":
23
+ fig = None
24
+ if hasattr(model, "feature_importances_"):
25
+ fig, ax = plt.subplots()
26
+ ax.bar(range(len(model.feature_importances_)), model.feature_importances_)
27
+ elif graph_type == "Learning Curve":
28
+ sizes, train_scores, test_scores = learning_curve(
29
+ pipeline, X, y
30
+ )
31
+ fig, ax = plt.subplots()
32
+ ax.plot(sizes, train_scores.mean(axis=1))
33
+ ax.plot(sizes, test_scores.mean(axis=1))
34
+ ax.legend(["Train", "Test"])
35
+
36
+ return fig
37
+
38
+ def classification_graphs(graph_type, pipeline, X_test, y_test, preds):
39
+ if graph_type == "Confusion Matrix":
40
+ cm = confusion_matrix(y_test, preds)
41
+ fig, ax = plt.subplots()
42
+ ax.imshow(cm)
43
+ ax.set_title("Confusion Matrix")
44
+ elif graph_type == "ROC Curve":
45
+ probs = pipeline.predict_proba(X_test)[:, 1]
46
+ fpr, tpr, _ = roc_curve(y_test, probs)
47
+ fig, ax = plt.subplots()
48
+ ax.plot(fpr, tpr)
49
+ ax.set_title("ROC Curve")
50
+ elif graph_type == "Per-Class Metrics Table":
51
+ fig = classification_report(y_test, preds, output_dict=True)
52
+ fig = pd.DataFrame(fig).transpose()
53
+ elif graph_type == "Precision-Recall Curve":
54
+ probs = pipeline.predict_proba(X_test)[:, 1]
55
+ p, r, _ = precision_recall_curve(y_test, probs)
56
+ fig, ax = plt.subplots()
57
+ ax.plot(r, p)
58
+ ax.set_title("Precision-Recall Curve")
59
+ elif graph_type == "Probability Histogram":
60
+ probs = pipeline.predict_proba(X_test)[:, 1]
61
+ fig, ax = plt.subplots()
62
+ ax.hist(probs, bins=20)
63
+ ax.set_title("Prediction Probability Histogram")
64
+
65
+ return fig
models/__init__.py ADDED
File without changes
models/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (167 Bytes). View file
 
models/__pycache__/registry.cpython-310.pyc ADDED
Binary file (1.29 kB). View file
 
models/registry.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sklearn.linear_model import LinearRegression, LogisticRegression, Perceptron
2
+ from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
3
+ from sklearn.naive_bayes import GaussianNB
4
+ from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
5
+ from sklearn.svm import SVC, SVR
6
+ from sklearn.neural_network import MLPClassifier, MLPRegressor
7
+
8
+ REGRESSION_MODELS = {
9
+ "Linear Regression": LinearRegression(),
10
+ "KNN Regressor": KNeighborsRegressor(),
11
+ "Decision Tree Regressor": DecisionTreeRegressor(),
12
+ "SVR": SVR(),
13
+ "MLP Regressor": MLPRegressor(max_iter=1000),
14
+ }
15
+
16
+ CLASSIFICATION_MODELS = {
17
+ "Logistic Regression": LogisticRegression(max_iter=500),
18
+ "KNN Classifier": KNeighborsClassifier(),
19
+ "Naive Bayes": GaussianNB(),
20
+ "Perceptron": Perceptron(),
21
+ "Decision Tree Classifier": DecisionTreeClassifier(),
22
+ "SVM Classifier": SVC(probability=True),
23
+ "MLP Classifier": MLPClassifier(max_iter=1000),
24
+ }
25
+
26
+ CLASSIFICATION_GRAPHS = [
27
+ "Confusion Matrix",
28
+ "ROC Curve",
29
+ "Per-Class Metrics Table",
30
+ "Precision-Recall Curve",
31
+ "Probability Histogram",
32
+ ]
33
+
34
+ REGRESSION_GRAPHS = [
35
+ "Actual vs Predicted",
36
+ "Residual Plot",
37
+ "Residual Histogram",
38
+ "Feature Importance",
39
+ "Learning Curve",
40
+ ]
preprocessing/__init__.py ADDED
File without changes
preprocessing/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (174 Bytes). View file
 
preprocessing/__pycache__/transformers.cpython-310.pyc ADDED
Binary file (663 Bytes). View file
 
preprocessing/transformers.py ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from sklearn.compose import ColumnTransformer
2
+ from sklearn.preprocessing import StandardScaler, OneHotEncoder
3
+
4
+ def build_preprocessor(X):
5
+ num_cols = X.select_dtypes(include=["int64", "float64"]).columns
6
+ cat_cols = X.select_dtypes(include=["object", "category"]).columns
7
+
8
+ return ColumnTransformer(
9
+ transformers=[
10
+ ("num", StandardScaler(), num_cols),
11
+ ("cat", OneHotEncoder(handle_unknown="ignore"), cat_cols),
12
+ ]
13
+ )
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ gradio>=4.0.0
2
+ pandas
3
+ scikit-learn
4
+ numpy
5
+ matplotlib
ui/__init__.py ADDED
File without changes
ui/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (163 Bytes). View file
 
ui/__pycache__/helpers.cpython-310.pyc ADDED
Binary file (1.4 kB). View file
 
ui/__pycache__/styles.cpython-310.pyc ADDED
Binary file (362 Bytes). View file
 
ui/__pycache__/theme.cpython-310.pyc ADDED
Binary file (1.49 kB). View file
 
ui/helpers.py ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ from models.registry import REGRESSION_MODELS, CLASSIFICATION_MODELS, REGRESSION_GRAPHS, CLASSIFICATION_GRAPHS
4
+
5
+
6
+ def update_models(task_type):
7
+ if task_type == "Regression":
8
+ models = list(REGRESSION_MODELS.keys())
9
+ else:
10
+ models = list(CLASSIFICATION_MODELS.keys())
11
+
12
+ return gr.update(
13
+ choices=models,
14
+ value=models[0] if models else None # ✅ auto-select first
15
+ )
16
+
17
+
18
+ def update_graphs(task_type):
19
+ graphs = (
20
+ REGRESSION_GRAPHS
21
+ if task_type == "Regression"
22
+ else CLASSIFICATION_GRAPHS
23
+ )
24
+
25
+ return gr.update(
26
+ choices=graphs,
27
+ value=graphs[0], # ✅ auto-select first option
28
+ )
29
+
30
+ import os
31
+
32
+ def preview_csv(file, max_rows=50):
33
+ if not file:
34
+ return None
35
+
36
+ size_mb = os.path.getsize(file.name) / (1024 * 1024)
37
+
38
+ if size_mb > 50:
39
+ return None # ❌ No preview
40
+
41
+ return pd.read_csv(file.name, nrows=max_rows)
42
+
43
+
44
+
45
+ def reset_metrics_on_file_clear(file):
46
+ if file is None:
47
+ return pd.DataFrame(), None
48
+ return gr.update(), gr.update()
49
+
50
+
51
+ def toggle_csv_preview(show):
52
+ return gr.update(visible=show)
ui/styles.py ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ CSS_STYLE = """
2
+ #container {
3
+ max-width: 1280px;
4
+ margin: auto;
5
+ }
6
+
7
+ @media (min-width: 1600px) {
8
+ #container {
9
+ max-width: 1440px;
10
+ }
11
+ }
12
+
13
+ #title h1 {
14
+ font-size: 2.4em !important;
15
+ }
16
+ """
ui/theme.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from gradio.themes import Soft
2
+ from gradio.themes.utils import colors, fonts, sizes
3
+
4
+ colors.orange_red = colors.Color(
5
+ name="orange_red",
6
+ c50="#FFF0E5", c100="#FFE0CC", c200="#FFC299", c300="#FFA366",
7
+ c400="#FF8533", c500="#FF4500", c600="#E63E00", c700="#CC3700",
8
+ c800="#B33000", c900="#992900", c950="#802200",
9
+ )
10
+
11
+ class OrangeRedTheme(Soft):
12
+ def __init__(self):
13
+ super().__init__(
14
+ primary_hue=colors.orange_red,
15
+ secondary_hue=colors.orange_red,
16
+ neutral_hue=colors.slate,
17
+ text_size=sizes.text_lg,
18
+ font=(fonts.GoogleFont("Outfit"), "Arial", "sans-serif"),
19
+ font_mono=(fonts.GoogleFont("IBM Plex Mono"), "monospace"),
20
+ )
21
+ super().set(
22
+ body_background_fill="linear-gradient(135deg, *primary_200, *primary_100)",
23
+ button_primary_background_fill="linear-gradient(90deg, *secondary_500, *secondary_600)",
24
+ button_primary_background_fill_hover="linear-gradient(90deg, *secondary_600, *secondary_700)",
25
+ button_primary_text_color="white",
26
+ block_border_width="3px",
27
+ block_shadow="*shadow_drop_lg",
28
+ )
utils/__init__.py ADDED
File without changes
utils/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (166 Bytes). View file
 
utils/__pycache__/metrics.cpython-310.pyc ADDED
Binary file (1.25 kB). View file
 
utils/metrics.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ from sklearn.metrics import (
3
+ mean_absolute_error,
4
+ mean_squared_error,
5
+ r2_score,
6
+ accuracy_score,
7
+ precision_score,
8
+ recall_score,
9
+ f1_score,
10
+ roc_auc_score,
11
+ )
12
+
13
+ from sklearn.utils.multiclass import type_of_target
14
+
15
+ def regression_metrics(y_true, preds):
16
+ return {
17
+ "MAE": mean_absolute_error(y_true, preds),
18
+ "MSE": mean_squared_error(y_true, preds),
19
+ "RMSE": np.sqrt(mean_squared_error(y_true, preds)),
20
+ "R²": r2_score(y_true, preds),
21
+ }
22
+
23
+
24
+ def classification_metrics(pipeline, X_test, y_test, preds):
25
+ metrics = {
26
+ "Accuracy": accuracy_score(y_test, preds),
27
+ "Precision": precision_score(y_test, preds, average="weighted"),
28
+ "Recall": recall_score(y_test, preds, average="weighted"),
29
+ "F1 Score": f1_score(y_test, preds, average="weighted"),
30
+ }
31
+
32
+ if hasattr(pipeline.named_steps["model"], "predict_proba"):
33
+ probs = pipeline.predict_proba(X_test)
34
+ target_type = type_of_target(y_test)
35
+
36
+ if target_type == "binary":
37
+ metrics["ROC-AUC"] = roc_auc_score(y_test, probs[:, 1])
38
+
39
+ elif target_type == "multiclass":
40
+ metrics["ROC-AUC"] = roc_auc_score(
41
+ y_test, probs, multi_class="ovr", average="weighted"
42
+ )
43
+
44
+ return metrics
version_1_app.py ADDED
@@ -0,0 +1,142 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+
4
+ from sklearn.model_selection import train_test_split
5
+ from sklearn.metrics import (
6
+ accuracy_score,
7
+ mean_absolute_error,
8
+ mean_squared_error,
9
+ r2_score,
10
+ )
11
+ import numpy as np
12
+
13
+ # ======================
14
+ # Model imports
15
+ # ======================
16
+ from sklearn.linear_model import (
17
+ LinearRegression,
18
+ LogisticRegression,
19
+ Perceptron,
20
+ )
21
+ from sklearn.neighbors import (
22
+ KNeighborsClassifier,
23
+ KNeighborsRegressor,
24
+ )
25
+ from sklearn.naive_bayes import GaussianNB
26
+ from sklearn.tree import (
27
+ DecisionTreeClassifier,
28
+ DecisionTreeRegressor,
29
+ )
30
+ from sklearn.svm import SVC, SVR
31
+ from sklearn.neural_network import (
32
+ MLPClassifier,
33
+ MLPRegressor,
34
+ )
35
+
36
+ # ======================
37
+ # Model registry
38
+ # ======================
39
+ REGRESSION_MODELS = {
40
+ "Linear Regression": LinearRegression,
41
+ "KNN Regressor": KNeighborsRegressor,
42
+ "Decision Tree Regressor": DecisionTreeRegressor,
43
+ "SVR": SVR,
44
+ "MLP Regressor": MLPRegressor,
45
+ }
46
+
47
+ CLASSIFICATION_MODELS = {
48
+ "Logistic Regression": LogisticRegression,
49
+ "KNN Classifier": KNeighborsClassifier,
50
+ "Naive Bayes": GaussianNB,
51
+ "Perceptron": Perceptron,
52
+ "Decision Tree Classifier": DecisionTreeClassifier,
53
+ "SVM Classifier": SVC,
54
+ "MLP Classifier": MLPClassifier,
55
+ }
56
+
57
+ # ======================
58
+ # UI Logic
59
+ # ======================
60
+ def update_models(task_type):
61
+ if task_type == "Regression":
62
+ return gr.update(choices=list(REGRESSION_MODELS.keys()), value=None)
63
+ return gr.update(choices=list(CLASSIFICATION_MODELS.keys()), value=None)
64
+
65
+
66
+ def train_model(file, task_type, model_name):
67
+ df = pd.read_csv(file.name)
68
+
69
+ # Assumption: last column is target
70
+ X = df.iloc[:, :-1]
71
+ y = df.iloc[:, -1]
72
+
73
+ X_train, X_test, y_train, y_test = train_test_split(
74
+ X, y, test_size=0.2, random_state=42
75
+ )
76
+
77
+ if task_type == "Regression":
78
+ model = REGRESSION_MODELS[model_name]()
79
+ model.fit(X_train, y_train)
80
+
81
+ preds = model.predict(X_test)
82
+
83
+ mae = mean_absolute_error(y_test, preds)
84
+ mse = mean_squared_error(y_test, preds)
85
+ rmse = np.sqrt(mse)
86
+ r2 = r2_score(y_test, preds)
87
+
88
+ return (
89
+ f"Model: {model_name}\n"
90
+ f"Task: Regression\n"
91
+ f"MAE: {mae:.4f}\n"
92
+ f"MSE: {mse:.4f}\n"
93
+ f"RMSE: {rmse:.4f}\n"
94
+ f"R² Score: {r2:.4f}"
95
+ )
96
+
97
+ else:
98
+ model = CLASSIFICATION_MODELS[model_name]()
99
+ model.fit(X_train, y_train)
100
+
101
+ preds = model.predict(X_test)
102
+ acc = accuracy_score(y_test, preds)
103
+
104
+ return (
105
+ f"Model: {model_name}\n"
106
+ f"Task: Classification\n"
107
+ f"Accuracy: {acc:.4f}"
108
+ )
109
+
110
+
111
+ # ======================
112
+ # Gradio App
113
+ # ======================
114
+ with gr.Blocks() as demo:
115
+ gr.Markdown("## Supervised Learning Model Trainer")
116
+ gr.Markdown(
117
+ "Upload a CSV file. The **last column is treated as target**."
118
+ )
119
+
120
+ file_input = gr.File(label="Upload CSV", file_types=[".csv"])
121
+ task_dropdown = gr.Dropdown(
122
+ ["Regression", "Classification"],
123
+ label="Task Type",
124
+ )
125
+ model_dropdown = gr.Dropdown(label="Model")
126
+ output = gr.Textbox(label="Result", lines=5)
127
+
128
+ train_btn = gr.Button("Generate")
129
+
130
+ task_dropdown.change(
131
+ update_models,
132
+ inputs=task_dropdown,
133
+ outputs=model_dropdown,
134
+ )
135
+
136
+ train_btn.click(
137
+ train_model,
138
+ inputs=[file_input, task_dropdown, model_dropdown],
139
+ outputs=output,
140
+ )
141
+
142
+ demo.launch()
version_2_app.py ADDED
@@ -0,0 +1,266 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ import numpy as np
4
+ import warnings
5
+ warnings.filterwarnings(action="ignore")
6
+
7
+ from sklearn.model_selection import train_test_split
8
+ from sklearn.pipeline import Pipeline
9
+ from sklearn.compose import ColumnTransformer
10
+ from sklearn.preprocessing import (
11
+ StandardScaler,
12
+ OneHotEncoder,
13
+ LabelEncoder,
14
+ )
15
+ from sklearn.metrics import (
16
+ mean_absolute_error,
17
+ mean_squared_error,
18
+ r2_score,
19
+ accuracy_score,
20
+ precision_score,
21
+ recall_score,
22
+ f1_score,
23
+ roc_auc_score,
24
+ )
25
+
26
+ # ======================
27
+ # Models
28
+ # ======================
29
+ from sklearn.linear_model import (
30
+ LinearRegression,
31
+ LogisticRegression,
32
+ Perceptron,
33
+ )
34
+ from sklearn.neighbors import (
35
+ KNeighborsClassifier,
36
+ KNeighborsRegressor,
37
+ )
38
+ from sklearn.naive_bayes import GaussianNB
39
+ from sklearn.tree import (
40
+ DecisionTreeClassifier,
41
+ DecisionTreeRegressor,
42
+ )
43
+ from sklearn.svm import SVC, SVR
44
+ from sklearn.neural_network import (
45
+ MLPClassifier,
46
+ MLPRegressor,
47
+ )
48
+
49
+ from sklearn.utils.multiclass import type_of_target
50
+
51
+ # ======================
52
+ # Model Registry
53
+ # ======================
54
+ REGRESSION_MODELS = {
55
+ "Linear Regression": LinearRegression(),
56
+ "KNN Regressor": KNeighborsRegressor(),
57
+ "Decision Tree Regressor": DecisionTreeRegressor(),
58
+ "SVR": SVR(),
59
+ "MLP Regressor": MLPRegressor(max_iter=1000),
60
+ }
61
+
62
+ CLASSIFICATION_MODELS = {
63
+ "Logistic Regression": LogisticRegression(max_iter=500),
64
+ "KNN Classifier": KNeighborsClassifier(),
65
+ "Naive Bayes": GaussianNB(),
66
+ "Perceptron": Perceptron(),
67
+ "Decision Tree Classifier": DecisionTreeClassifier(),
68
+ "SVM Classifier": SVC(probability=True),
69
+ "MLP Classifier": MLPClassifier(max_iter=1000),
70
+ }
71
+
72
+ # ======================
73
+ # UI Helpers
74
+ # ======================
75
+ def update_models(task_type):
76
+ if task_type == "Regression":
77
+ return gr.update(choices=list(REGRESSION_MODELS.keys()), value=None)
78
+ else:
79
+ return gr.update(choices=list(CLASSIFICATION_MODELS.keys()), value=None)
80
+
81
+ def preview_csv(file):
82
+ if file is None:
83
+ return None
84
+ return pd.read_csv(file.name)
85
+
86
+
87
+ def detect_target_type(y):
88
+ # Categorical target
89
+ if y.dtype == "object" or y.dtype.name == "category":
90
+ return "Classification"
91
+
92
+ # Numeric but low cardinality → classification
93
+ if y.nunique() <= 20:
94
+ return "Classification"
95
+
96
+ return "Regression"
97
+
98
+
99
+ def auto_set_task(file):
100
+ if file is None:
101
+ return "Regression"
102
+ df = pd.read_csv(file.name)
103
+ y = df.iloc[:, -1]
104
+ return detect_target_type(y)
105
+
106
+
107
+ # ======================
108
+ # Core Training Logic
109
+ # ======================
110
+ def train_model(file, task_type, model_name):
111
+ df = pd.read_csv(file.name)
112
+
113
+ # Target = last column
114
+ X = df.iloc[:, :-1]
115
+ y = df.iloc[:, -1]
116
+
117
+ detected_task = detect_target_type(y)
118
+
119
+ # 🚫 Mismatch validation
120
+ if task_type != detected_task:
121
+ return pd.DataFrame(
122
+ {
123
+ "Error": [
124
+ f"Dataset target detected as {detected_task}, "
125
+ f"but {task_type} model selected."
126
+ ]
127
+ }
128
+ )
129
+
130
+ # ---------- Automatic label encoding ----------
131
+ if task_type == "Classification" and y.dtype == "object":
132
+ y = LabelEncoder().fit_transform(y)
133
+
134
+ # ---------- Feature preprocessing ----------
135
+ num_cols = X.select_dtypes(include=["int64", "float64"]).columns
136
+ cat_cols = X.select_dtypes(include=["object", "category"]).columns
137
+
138
+ preprocessor = ColumnTransformer(
139
+ transformers=[
140
+ ("num", StandardScaler(), num_cols),
141
+ ("cat", OneHotEncoder(handle_unknown="ignore"), cat_cols),
142
+ ]
143
+ )
144
+
145
+ X_train, X_test, y_train, y_test = train_test_split(
146
+ X, y, test_size=0.2, random_state=42
147
+ )
148
+
149
+ # ---------- Model selection ----------
150
+ model = (
151
+ REGRESSION_MODELS[model_name]
152
+ if task_type == "Regression"
153
+ else CLASSIFICATION_MODELS[model_name]
154
+ )
155
+
156
+ pipeline = Pipeline(
157
+ steps=[
158
+ ("preprocessing", preprocessor),
159
+ ("model", model),
160
+ ]
161
+ )
162
+
163
+ pipeline.fit(X_train, y_train)
164
+ preds = pipeline.predict(X_test)
165
+
166
+ # ---------- Metrics ----------
167
+ if task_type == "Regression":
168
+ metrics = {
169
+ "MAE": mean_absolute_error(y_test, preds),
170
+ "MSE": mean_squared_error(y_test, preds),
171
+ "RMSE": np.sqrt(mean_squared_error(y_test, preds)),
172
+ "R²": r2_score(y_test, preds),
173
+ }
174
+
175
+ else:
176
+ metrics = {
177
+ "Accuracy": accuracy_score(y_test, preds),
178
+ "Precision": precision_score(y_test, preds, average="weighted"),
179
+ "Recall": recall_score(y_test, preds, average="weighted"),
180
+ "F1 Score": f1_score(y_test, preds, average="weighted"),
181
+ }
182
+
183
+ # ROC-AUC (safe handling)
184
+ if hasattr(pipeline.named_steps["model"], "predict_proba"):
185
+ probs = pipeline.predict_proba(X_test)
186
+ target_type = type_of_target(y_test)
187
+
188
+ # Binary classification
189
+ if target_type == "binary":
190
+ roc_auc = roc_auc_score(y_test, probs[:, 1])
191
+ metrics["ROC-AUC"] = roc_auc
192
+
193
+ # Multiclass classification
194
+ elif target_type == "multiclass":
195
+ roc_auc = roc_auc_score(
196
+ y_test,
197
+ probs,
198
+ multi_class="ovr",
199
+ average="weighted",
200
+ )
201
+ metrics["ROC-AUC"] = roc_auc
202
+
203
+
204
+ # ---------- Metric table ----------
205
+ result_df = pd.DataFrame(
206
+ metrics.items(), columns=["Metric", "Value"]
207
+ )
208
+
209
+ return result_df
210
+
211
+ # ======================
212
+ # Gradio UI
213
+ # ======================
214
+ with gr.Blocks() as app:
215
+ gr.Markdown("## Supervised Learning Model Trainer")
216
+ gr.Markdown(
217
+ "• Upload CSV\n"
218
+ "• Last column is target\n"
219
+ "• Automatic preprocessing & metrics"
220
+ )
221
+
222
+ file_input = gr.File(label="Upload CSV", file_types=[".csv"])
223
+
224
+ csv_preview = gr.Dataframe(
225
+ label="CSV Preview",
226
+ interactive=False,
227
+ )
228
+
229
+ task_type = gr.Dropdown(
230
+ ["Regression", "Classification"], label="Task Type", value="Regression"
231
+ )
232
+ model_name = gr.Dropdown(label="Model")
233
+ output = gr.Dataframe(label="Evaluation Metrics")
234
+
235
+ run_btn = gr.Button("Train & Evaluate")
236
+
237
+
238
+ file_input.change(
239
+ preview_csv,
240
+ inputs=file_input,
241
+ outputs=csv_preview,
242
+ )
243
+
244
+ file_input.change(
245
+ auto_set_task,
246
+ inputs=file_input,
247
+ outputs=task_type,
248
+ )
249
+
250
+ task_type.change(
251
+ update_models, inputs=task_type, outputs=model_name
252
+ )
253
+
254
+ app.load(
255
+ update_models,
256
+ inputs=task_type,
257
+ outputs=model_name,
258
+ )
259
+
260
+ run_btn.click(
261
+ train_model,
262
+ inputs=[file_input, task_type, model_name],
263
+ outputs=output,
264
+ )
265
+
266
+ app.launch()