anyonehomep1mane commited on
Commit ·
d7e53e8
0
Parent(s):
Initial Changes
Browse files- .gitignore +3 -0
- app.py +82 -0
- core/__init__.py +0 -0
- core/__pycache__/__init__.cpython-310.pyc +0 -0
- core/__pycache__/detection.cpython-310.pyc +0 -0
- core/__pycache__/training.cpython-310.pyc +0 -0
- core/__pycache__/visuals.cpython-310.pyc +0 -0
- core/detection.py +19 -0
- core/training.py +68 -0
- core/visuals.py +65 -0
- models/__init__.py +0 -0
- models/__pycache__/__init__.cpython-310.pyc +0 -0
- models/__pycache__/registry.cpython-310.pyc +0 -0
- models/registry.py +40 -0
- preprocessing/__init__.py +0 -0
- preprocessing/__pycache__/__init__.cpython-310.pyc +0 -0
- preprocessing/__pycache__/transformers.cpython-310.pyc +0 -0
- preprocessing/transformers.py +13 -0
- requirements.txt +5 -0
- ui/__init__.py +0 -0
- ui/__pycache__/__init__.cpython-310.pyc +0 -0
- ui/__pycache__/helpers.cpython-310.pyc +0 -0
- ui/__pycache__/styles.cpython-310.pyc +0 -0
- ui/__pycache__/theme.cpython-310.pyc +0 -0
- ui/helpers.py +52 -0
- ui/styles.py +16 -0
- ui/theme.py +28 -0
- utils/__init__.py +0 -0
- utils/__pycache__/__init__.cpython-310.pyc +0 -0
- utils/__pycache__/metrics.cpython-310.pyc +0 -0
- utils/metrics.py +44 -0
- version_1_app.py +142 -0
- version_2_app.py +266 -0
.gitignore
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
.env
|
| 2 |
+
.vscode
|
| 3 |
+
venv
|
app.py
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import warnings
|
| 3 |
+
warnings.filterwarnings("ignore")
|
| 4 |
+
|
| 5 |
+
from ui.helpers import (
|
| 6 |
+
update_models,
|
| 7 |
+
update_graphs,
|
| 8 |
+
preview_csv,
|
| 9 |
+
reset_metrics_on_file_clear,
|
| 10 |
+
toggle_csv_preview
|
| 11 |
+
)
|
| 12 |
+
|
| 13 |
+
from ui.theme import OrangeRedTheme
|
| 14 |
+
from ui.styles import CSS_STYLE
|
| 15 |
+
|
| 16 |
+
from core.training import train_model
|
| 17 |
+
from core.detection import auto_set_task
|
| 18 |
+
|
| 19 |
+
with gr.Blocks() as app:
|
| 20 |
+
with gr.Column(elem_id="container"):
|
| 21 |
+
gr.Markdown("## Supervised Learning Model Trainer")
|
| 22 |
+
|
| 23 |
+
with gr.Row(equal_height=True):
|
| 24 |
+
with gr.Column():
|
| 25 |
+
file_input = gr.File(label="Upload CSV", file_types=[".csv"])
|
| 26 |
+
show_preview = gr.Checkbox(
|
| 27 |
+
label="Show CSV Preview",
|
| 28 |
+
value=False,
|
| 29 |
+
)
|
| 30 |
+
task_type = gr.Dropdown(
|
| 31 |
+
["Regression", "Classification"],
|
| 32 |
+
label="Task Type",
|
| 33 |
+
value="Regression",
|
| 34 |
+
)
|
| 35 |
+
model_name = gr.Dropdown(label="Model")
|
| 36 |
+
graph_type = gr.Dropdown(label="Graph Type")
|
| 37 |
+
|
| 38 |
+
with gr.Row(equal_height=True):
|
| 39 |
+
run_btn = gr.Button("Train & Evaluate", variant="primary", size="lg")
|
| 40 |
+
|
| 41 |
+
with gr.Row(equal_height=True):
|
| 42 |
+
with gr.Column():
|
| 43 |
+
csv_preview = gr.Dataframe(label="CSV Preview", interactive=False, visible=False,)
|
| 44 |
+
output = gr.Dataframe(label="Evaluation Metrics", interactive=False)
|
| 45 |
+
|
| 46 |
+
with gr.Row(equal_height=True):
|
| 47 |
+
with gr.Column():
|
| 48 |
+
plot = gr.Plot(label="Selected Graph")
|
| 49 |
+
|
| 50 |
+
file_input.change(preview_csv, file_input, csv_preview)
|
| 51 |
+
file_input.change(auto_set_task, file_input, task_type)
|
| 52 |
+
file_input.change(reset_metrics_on_file_clear,inputs=file_input,outputs=[output, plot])
|
| 53 |
+
|
| 54 |
+
task_type.change(update_models, task_type, model_name)
|
| 55 |
+
task_type.change(update_graphs, task_type, graph_type)
|
| 56 |
+
|
| 57 |
+
show_preview.change(
|
| 58 |
+
toggle_csv_preview,
|
| 59 |
+
inputs=show_preview,
|
| 60 |
+
outputs=csv_preview,
|
| 61 |
+
)
|
| 62 |
+
|
| 63 |
+
app.load(update_models, task_type, model_name)
|
| 64 |
+
app.load(update_graphs, task_type, graph_type)
|
| 65 |
+
|
| 66 |
+
run_btn.click(
|
| 67 |
+
train_model,
|
| 68 |
+
inputs=[file_input, task_type, model_name, graph_type],
|
| 69 |
+
outputs=[output, plot]
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
+
|
| 73 |
+
if __name__ == "__main__":
|
| 74 |
+
orange_red_theme = OrangeRedTheme()
|
| 75 |
+
app.queue().launch(
|
| 76 |
+
theme=orange_red_theme,
|
| 77 |
+
css=CSS_STYLE,
|
| 78 |
+
show_error=True,
|
| 79 |
+
server_name="0.0.0.0",
|
| 80 |
+
server_port=7860,
|
| 81 |
+
debug=True
|
| 82 |
+
)
|
core/__init__.py
ADDED
|
File without changes
|
core/__pycache__/__init__.cpython-310.pyc
ADDED
|
Binary file (165 Bytes). View file
|
|
|
core/__pycache__/detection.cpython-310.pyc
ADDED
|
Binary file (633 Bytes). View file
|
|
|
core/__pycache__/training.cpython-310.pyc
ADDED
|
Binary file (1.8 kB). View file
|
|
|
core/__pycache__/visuals.cpython-310.pyc
ADDED
|
Binary file (2.13 kB). View file
|
|
|
core/detection.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
|
| 3 |
+
def detect_target_type(y):
|
| 4 |
+
if y.dtype == "object" or y.dtype.name == "category":
|
| 5 |
+
return "Classification"
|
| 6 |
+
|
| 7 |
+
if y.nunique() <= 20:
|
| 8 |
+
return "Classification"
|
| 9 |
+
|
| 10 |
+
return "Regression"
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def auto_set_task(file):
|
| 14 |
+
if file is None:
|
| 15 |
+
return "Regression"
|
| 16 |
+
|
| 17 |
+
df = pd.read_csv(file.name)
|
| 18 |
+
y = df.iloc[:, -1]
|
| 19 |
+
return detect_target_type(y)
|
core/training.py
ADDED
|
@@ -0,0 +1,68 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import pandas as pd
|
| 2 |
+
from sklearn.pipeline import Pipeline
|
| 3 |
+
from sklearn.model_selection import train_test_split
|
| 4 |
+
from sklearn.preprocessing import LabelEncoder
|
| 5 |
+
|
| 6 |
+
from core.detection import detect_target_type
|
| 7 |
+
from models.registry import REGRESSION_MODELS, CLASSIFICATION_MODELS
|
| 8 |
+
from preprocessing.transformers import build_preprocessor
|
| 9 |
+
from utils.metrics import regression_metrics, classification_metrics
|
| 10 |
+
from core.visuals import regression_graphs, classification_graphs
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
def train_model(file, task_type, model_name, graph_type):
|
| 14 |
+
if file is None:
|
| 15 |
+
return pd.DataFrame({
|
| 16 |
+
"Error": [f"Please upload a csv file first."]
|
| 17 |
+
|
| 18 |
+
}), None
|
| 19 |
+
|
| 20 |
+
df = pd.read_csv(file.name)
|
| 21 |
+
|
| 22 |
+
X = df.iloc[:, :-1]
|
| 23 |
+
y = df.iloc[:, -1]
|
| 24 |
+
|
| 25 |
+
detected_task = detect_target_type(y)
|
| 26 |
+
|
| 27 |
+
if task_type != detected_task:
|
| 28 |
+
return pd.DataFrame({
|
| 29 |
+
"Error": [f"Detected {detected_task} target, but {task_type} selected."]
|
| 30 |
+
}), None
|
| 31 |
+
|
| 32 |
+
if task_type == "Classification" and y.dtype == "object":
|
| 33 |
+
y = LabelEncoder().fit_transform(y)
|
| 34 |
+
|
| 35 |
+
preprocessor = build_preprocessor(X)
|
| 36 |
+
|
| 37 |
+
X_train, X_test, y_train, y_test = train_test_split(
|
| 38 |
+
X, y, test_size=0.2, random_state=42
|
| 39 |
+
)
|
| 40 |
+
|
| 41 |
+
model = (
|
| 42 |
+
REGRESSION_MODELS[model_name]
|
| 43 |
+
if task_type == "Regression"
|
| 44 |
+
else CLASSIFICATION_MODELS[model_name]
|
| 45 |
+
)
|
| 46 |
+
|
| 47 |
+
pipeline = Pipeline([
|
| 48 |
+
("preprocessing", preprocessor),
|
| 49 |
+
("model", model),
|
| 50 |
+
])
|
| 51 |
+
|
| 52 |
+
pipeline.fit(X_train, y_train)
|
| 53 |
+
preds = pipeline.predict(X_test)
|
| 54 |
+
|
| 55 |
+
if task_type == "Regression":
|
| 56 |
+
metrics = regression_metrics(y_test, preds)
|
| 57 |
+
else:
|
| 58 |
+
metrics = classification_metrics(pipeline, X_test, y_test, preds)
|
| 59 |
+
|
| 60 |
+
fig = None
|
| 61 |
+
if task_type == "Regression":
|
| 62 |
+
fig = regression_graphs(graph_type, X, y, model, pipeline, y_test, preds)
|
| 63 |
+
else:
|
| 64 |
+
fig = classification_graphs(graph_type, pipeline, X_test, y_test, preds)
|
| 65 |
+
|
| 66 |
+
metrics_df = pd.DataFrame(metrics.items(), columns=["Metric", "Value"])
|
| 67 |
+
|
| 68 |
+
return metrics_df, fig
|
core/visuals.py
ADDED
|
@@ -0,0 +1,65 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import matplotlib.pyplot as plt
|
| 2 |
+
import pandas as pd
|
| 3 |
+
from sklearn.model_selection import learning_curve
|
| 4 |
+
from sklearn.metrics import (
|
| 5 |
+
roc_curve, precision_recall_curve,
|
| 6 |
+
confusion_matrix, classification_report
|
| 7 |
+
)
|
| 8 |
+
|
| 9 |
+
def regression_graphs(graph_type, X, y, model, pipeline, y_test, preds):
|
| 10 |
+
if graph_type == "Actual vs Predicted":
|
| 11 |
+
fig, ax = plt.subplots()
|
| 12 |
+
ax.plot(y_test.values[:100])
|
| 13 |
+
ax.plot(preds[:100])
|
| 14 |
+
ax.legend(["Actual", "Predicted"])
|
| 15 |
+
elif graph_type == "Residual Plot":
|
| 16 |
+
fig, ax = plt.subplots()
|
| 17 |
+
ax.scatter(preds, y_test - preds)
|
| 18 |
+
ax.axhline(0)
|
| 19 |
+
elif graph_type == "Residual Histogram":
|
| 20 |
+
fig, ax = plt.subplots()
|
| 21 |
+
ax.hist(y_test - preds, bins=30)
|
| 22 |
+
elif graph_type == "Feature Importance":
|
| 23 |
+
fig = None
|
| 24 |
+
if hasattr(model, "feature_importances_"):
|
| 25 |
+
fig, ax = plt.subplots()
|
| 26 |
+
ax.bar(range(len(model.feature_importances_)), model.feature_importances_)
|
| 27 |
+
elif graph_type == "Learning Curve":
|
| 28 |
+
sizes, train_scores, test_scores = learning_curve(
|
| 29 |
+
pipeline, X, y
|
| 30 |
+
)
|
| 31 |
+
fig, ax = plt.subplots()
|
| 32 |
+
ax.plot(sizes, train_scores.mean(axis=1))
|
| 33 |
+
ax.plot(sizes, test_scores.mean(axis=1))
|
| 34 |
+
ax.legend(["Train", "Test"])
|
| 35 |
+
|
| 36 |
+
return fig
|
| 37 |
+
|
| 38 |
+
def classification_graphs(graph_type, pipeline, X_test, y_test, preds):
|
| 39 |
+
if graph_type == "Confusion Matrix":
|
| 40 |
+
cm = confusion_matrix(y_test, preds)
|
| 41 |
+
fig, ax = plt.subplots()
|
| 42 |
+
ax.imshow(cm)
|
| 43 |
+
ax.set_title("Confusion Matrix")
|
| 44 |
+
elif graph_type == "ROC Curve":
|
| 45 |
+
probs = pipeline.predict_proba(X_test)[:, 1]
|
| 46 |
+
fpr, tpr, _ = roc_curve(y_test, probs)
|
| 47 |
+
fig, ax = plt.subplots()
|
| 48 |
+
ax.plot(fpr, tpr)
|
| 49 |
+
ax.set_title("ROC Curve")
|
| 50 |
+
elif graph_type == "Per-Class Metrics Table":
|
| 51 |
+
fig = classification_report(y_test, preds, output_dict=True)
|
| 52 |
+
fig = pd.DataFrame(fig).transpose()
|
| 53 |
+
elif graph_type == "Precision-Recall Curve":
|
| 54 |
+
probs = pipeline.predict_proba(X_test)[:, 1]
|
| 55 |
+
p, r, _ = precision_recall_curve(y_test, probs)
|
| 56 |
+
fig, ax = plt.subplots()
|
| 57 |
+
ax.plot(r, p)
|
| 58 |
+
ax.set_title("Precision-Recall Curve")
|
| 59 |
+
elif graph_type == "Probability Histogram":
|
| 60 |
+
probs = pipeline.predict_proba(X_test)[:, 1]
|
| 61 |
+
fig, ax = plt.subplots()
|
| 62 |
+
ax.hist(probs, bins=20)
|
| 63 |
+
ax.set_title("Prediction Probability Histogram")
|
| 64 |
+
|
| 65 |
+
return fig
|
models/__init__.py
ADDED
|
File without changes
|
models/__pycache__/__init__.cpython-310.pyc
ADDED
|
Binary file (167 Bytes). View file
|
|
|
models/__pycache__/registry.cpython-310.pyc
ADDED
|
Binary file (1.29 kB). View file
|
|
|
models/registry.py
ADDED
|
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from sklearn.linear_model import LinearRegression, LogisticRegression, Perceptron
|
| 2 |
+
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor
|
| 3 |
+
from sklearn.naive_bayes import GaussianNB
|
| 4 |
+
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
|
| 5 |
+
from sklearn.svm import SVC, SVR
|
| 6 |
+
from sklearn.neural_network import MLPClassifier, MLPRegressor
|
| 7 |
+
|
| 8 |
+
REGRESSION_MODELS = {
|
| 9 |
+
"Linear Regression": LinearRegression(),
|
| 10 |
+
"KNN Regressor": KNeighborsRegressor(),
|
| 11 |
+
"Decision Tree Regressor": DecisionTreeRegressor(),
|
| 12 |
+
"SVR": SVR(),
|
| 13 |
+
"MLP Regressor": MLPRegressor(max_iter=1000),
|
| 14 |
+
}
|
| 15 |
+
|
| 16 |
+
CLASSIFICATION_MODELS = {
|
| 17 |
+
"Logistic Regression": LogisticRegression(max_iter=500),
|
| 18 |
+
"KNN Classifier": KNeighborsClassifier(),
|
| 19 |
+
"Naive Bayes": GaussianNB(),
|
| 20 |
+
"Perceptron": Perceptron(),
|
| 21 |
+
"Decision Tree Classifier": DecisionTreeClassifier(),
|
| 22 |
+
"SVM Classifier": SVC(probability=True),
|
| 23 |
+
"MLP Classifier": MLPClassifier(max_iter=1000),
|
| 24 |
+
}
|
| 25 |
+
|
| 26 |
+
CLASSIFICATION_GRAPHS = [
|
| 27 |
+
"Confusion Matrix",
|
| 28 |
+
"ROC Curve",
|
| 29 |
+
"Per-Class Metrics Table",
|
| 30 |
+
"Precision-Recall Curve",
|
| 31 |
+
"Probability Histogram",
|
| 32 |
+
]
|
| 33 |
+
|
| 34 |
+
REGRESSION_GRAPHS = [
|
| 35 |
+
"Actual vs Predicted",
|
| 36 |
+
"Residual Plot",
|
| 37 |
+
"Residual Histogram",
|
| 38 |
+
"Feature Importance",
|
| 39 |
+
"Learning Curve",
|
| 40 |
+
]
|
preprocessing/__init__.py
ADDED
|
File without changes
|
preprocessing/__pycache__/__init__.cpython-310.pyc
ADDED
|
Binary file (174 Bytes). View file
|
|
|
preprocessing/__pycache__/transformers.cpython-310.pyc
ADDED
|
Binary file (663 Bytes). View file
|
|
|
preprocessing/transformers.py
ADDED
|
@@ -0,0 +1,13 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from sklearn.compose import ColumnTransformer
|
| 2 |
+
from sklearn.preprocessing import StandardScaler, OneHotEncoder
|
| 3 |
+
|
| 4 |
+
def build_preprocessor(X):
|
| 5 |
+
num_cols = X.select_dtypes(include=["int64", "float64"]).columns
|
| 6 |
+
cat_cols = X.select_dtypes(include=["object", "category"]).columns
|
| 7 |
+
|
| 8 |
+
return ColumnTransformer(
|
| 9 |
+
transformers=[
|
| 10 |
+
("num", StandardScaler(), num_cols),
|
| 11 |
+
("cat", OneHotEncoder(handle_unknown="ignore"), cat_cols),
|
| 12 |
+
]
|
| 13 |
+
)
|
requirements.txt
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
gradio>=4.0.0
|
| 2 |
+
pandas
|
| 3 |
+
scikit-learn
|
| 4 |
+
numpy
|
| 5 |
+
matplotlib
|
ui/__init__.py
ADDED
|
File without changes
|
ui/__pycache__/__init__.cpython-310.pyc
ADDED
|
Binary file (163 Bytes). View file
|
|
|
ui/__pycache__/helpers.cpython-310.pyc
ADDED
|
Binary file (1.4 kB). View file
|
|
|
ui/__pycache__/styles.cpython-310.pyc
ADDED
|
Binary file (362 Bytes). View file
|
|
|
ui/__pycache__/theme.cpython-310.pyc
ADDED
|
Binary file (1.49 kB). View file
|
|
|
ui/helpers.py
ADDED
|
@@ -0,0 +1,52 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import pandas as pd
|
| 3 |
+
from models.registry import REGRESSION_MODELS, CLASSIFICATION_MODELS, REGRESSION_GRAPHS, CLASSIFICATION_GRAPHS
|
| 4 |
+
|
| 5 |
+
|
| 6 |
+
def update_models(task_type):
|
| 7 |
+
if task_type == "Regression":
|
| 8 |
+
models = list(REGRESSION_MODELS.keys())
|
| 9 |
+
else:
|
| 10 |
+
models = list(CLASSIFICATION_MODELS.keys())
|
| 11 |
+
|
| 12 |
+
return gr.update(
|
| 13 |
+
choices=models,
|
| 14 |
+
value=models[0] if models else None # ✅ auto-select first
|
| 15 |
+
)
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def update_graphs(task_type):
|
| 19 |
+
graphs = (
|
| 20 |
+
REGRESSION_GRAPHS
|
| 21 |
+
if task_type == "Regression"
|
| 22 |
+
else CLASSIFICATION_GRAPHS
|
| 23 |
+
)
|
| 24 |
+
|
| 25 |
+
return gr.update(
|
| 26 |
+
choices=graphs,
|
| 27 |
+
value=graphs[0], # ✅ auto-select first option
|
| 28 |
+
)
|
| 29 |
+
|
| 30 |
+
import os
|
| 31 |
+
|
| 32 |
+
def preview_csv(file, max_rows=50):
|
| 33 |
+
if not file:
|
| 34 |
+
return None
|
| 35 |
+
|
| 36 |
+
size_mb = os.path.getsize(file.name) / (1024 * 1024)
|
| 37 |
+
|
| 38 |
+
if size_mb > 50:
|
| 39 |
+
return None # ❌ No preview
|
| 40 |
+
|
| 41 |
+
return pd.read_csv(file.name, nrows=max_rows)
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def reset_metrics_on_file_clear(file):
|
| 46 |
+
if file is None:
|
| 47 |
+
return pd.DataFrame(), None
|
| 48 |
+
return gr.update(), gr.update()
|
| 49 |
+
|
| 50 |
+
|
| 51 |
+
def toggle_csv_preview(show):
|
| 52 |
+
return gr.update(visible=show)
|
ui/styles.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
CSS_STYLE = """
|
| 2 |
+
#container {
|
| 3 |
+
max-width: 1280px;
|
| 4 |
+
margin: auto;
|
| 5 |
+
}
|
| 6 |
+
|
| 7 |
+
@media (min-width: 1600px) {
|
| 8 |
+
#container {
|
| 9 |
+
max-width: 1440px;
|
| 10 |
+
}
|
| 11 |
+
}
|
| 12 |
+
|
| 13 |
+
#title h1 {
|
| 14 |
+
font-size: 2.4em !important;
|
| 15 |
+
}
|
| 16 |
+
"""
|
ui/theme.py
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from gradio.themes import Soft
|
| 2 |
+
from gradio.themes.utils import colors, fonts, sizes
|
| 3 |
+
|
| 4 |
+
colors.orange_red = colors.Color(
|
| 5 |
+
name="orange_red",
|
| 6 |
+
c50="#FFF0E5", c100="#FFE0CC", c200="#FFC299", c300="#FFA366",
|
| 7 |
+
c400="#FF8533", c500="#FF4500", c600="#E63E00", c700="#CC3700",
|
| 8 |
+
c800="#B33000", c900="#992900", c950="#802200",
|
| 9 |
+
)
|
| 10 |
+
|
| 11 |
+
class OrangeRedTheme(Soft):
|
| 12 |
+
def __init__(self):
|
| 13 |
+
super().__init__(
|
| 14 |
+
primary_hue=colors.orange_red,
|
| 15 |
+
secondary_hue=colors.orange_red,
|
| 16 |
+
neutral_hue=colors.slate,
|
| 17 |
+
text_size=sizes.text_lg,
|
| 18 |
+
font=(fonts.GoogleFont("Outfit"), "Arial", "sans-serif"),
|
| 19 |
+
font_mono=(fonts.GoogleFont("IBM Plex Mono"), "monospace"),
|
| 20 |
+
)
|
| 21 |
+
super().set(
|
| 22 |
+
body_background_fill="linear-gradient(135deg, *primary_200, *primary_100)",
|
| 23 |
+
button_primary_background_fill="linear-gradient(90deg, *secondary_500, *secondary_600)",
|
| 24 |
+
button_primary_background_fill_hover="linear-gradient(90deg, *secondary_600, *secondary_700)",
|
| 25 |
+
button_primary_text_color="white",
|
| 26 |
+
block_border_width="3px",
|
| 27 |
+
block_shadow="*shadow_drop_lg",
|
| 28 |
+
)
|
utils/__init__.py
ADDED
|
File without changes
|
utils/__pycache__/__init__.cpython-310.pyc
ADDED
|
Binary file (166 Bytes). View file
|
|
|
utils/__pycache__/metrics.cpython-310.pyc
ADDED
|
Binary file (1.25 kB). View file
|
|
|
utils/metrics.py
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import numpy as np
|
| 2 |
+
from sklearn.metrics import (
|
| 3 |
+
mean_absolute_error,
|
| 4 |
+
mean_squared_error,
|
| 5 |
+
r2_score,
|
| 6 |
+
accuracy_score,
|
| 7 |
+
precision_score,
|
| 8 |
+
recall_score,
|
| 9 |
+
f1_score,
|
| 10 |
+
roc_auc_score,
|
| 11 |
+
)
|
| 12 |
+
|
| 13 |
+
from sklearn.utils.multiclass import type_of_target
|
| 14 |
+
|
| 15 |
+
def regression_metrics(y_true, preds):
|
| 16 |
+
return {
|
| 17 |
+
"MAE": mean_absolute_error(y_true, preds),
|
| 18 |
+
"MSE": mean_squared_error(y_true, preds),
|
| 19 |
+
"RMSE": np.sqrt(mean_squared_error(y_true, preds)),
|
| 20 |
+
"R²": r2_score(y_true, preds),
|
| 21 |
+
}
|
| 22 |
+
|
| 23 |
+
|
| 24 |
+
def classification_metrics(pipeline, X_test, y_test, preds):
|
| 25 |
+
metrics = {
|
| 26 |
+
"Accuracy": accuracy_score(y_test, preds),
|
| 27 |
+
"Precision": precision_score(y_test, preds, average="weighted"),
|
| 28 |
+
"Recall": recall_score(y_test, preds, average="weighted"),
|
| 29 |
+
"F1 Score": f1_score(y_test, preds, average="weighted"),
|
| 30 |
+
}
|
| 31 |
+
|
| 32 |
+
if hasattr(pipeline.named_steps["model"], "predict_proba"):
|
| 33 |
+
probs = pipeline.predict_proba(X_test)
|
| 34 |
+
target_type = type_of_target(y_test)
|
| 35 |
+
|
| 36 |
+
if target_type == "binary":
|
| 37 |
+
metrics["ROC-AUC"] = roc_auc_score(y_test, probs[:, 1])
|
| 38 |
+
|
| 39 |
+
elif target_type == "multiclass":
|
| 40 |
+
metrics["ROC-AUC"] = roc_auc_score(
|
| 41 |
+
y_test, probs, multi_class="ovr", average="weighted"
|
| 42 |
+
)
|
| 43 |
+
|
| 44 |
+
return metrics
|
version_1_app.py
ADDED
|
@@ -0,0 +1,142 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import pandas as pd
|
| 3 |
+
|
| 4 |
+
from sklearn.model_selection import train_test_split
|
| 5 |
+
from sklearn.metrics import (
|
| 6 |
+
accuracy_score,
|
| 7 |
+
mean_absolute_error,
|
| 8 |
+
mean_squared_error,
|
| 9 |
+
r2_score,
|
| 10 |
+
)
|
| 11 |
+
import numpy as np
|
| 12 |
+
|
| 13 |
+
# ======================
|
| 14 |
+
# Model imports
|
| 15 |
+
# ======================
|
| 16 |
+
from sklearn.linear_model import (
|
| 17 |
+
LinearRegression,
|
| 18 |
+
LogisticRegression,
|
| 19 |
+
Perceptron,
|
| 20 |
+
)
|
| 21 |
+
from sklearn.neighbors import (
|
| 22 |
+
KNeighborsClassifier,
|
| 23 |
+
KNeighborsRegressor,
|
| 24 |
+
)
|
| 25 |
+
from sklearn.naive_bayes import GaussianNB
|
| 26 |
+
from sklearn.tree import (
|
| 27 |
+
DecisionTreeClassifier,
|
| 28 |
+
DecisionTreeRegressor,
|
| 29 |
+
)
|
| 30 |
+
from sklearn.svm import SVC, SVR
|
| 31 |
+
from sklearn.neural_network import (
|
| 32 |
+
MLPClassifier,
|
| 33 |
+
MLPRegressor,
|
| 34 |
+
)
|
| 35 |
+
|
| 36 |
+
# ======================
|
| 37 |
+
# Model registry
|
| 38 |
+
# ======================
|
| 39 |
+
REGRESSION_MODELS = {
|
| 40 |
+
"Linear Regression": LinearRegression,
|
| 41 |
+
"KNN Regressor": KNeighborsRegressor,
|
| 42 |
+
"Decision Tree Regressor": DecisionTreeRegressor,
|
| 43 |
+
"SVR": SVR,
|
| 44 |
+
"MLP Regressor": MLPRegressor,
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
CLASSIFICATION_MODELS = {
|
| 48 |
+
"Logistic Regression": LogisticRegression,
|
| 49 |
+
"KNN Classifier": KNeighborsClassifier,
|
| 50 |
+
"Naive Bayes": GaussianNB,
|
| 51 |
+
"Perceptron": Perceptron,
|
| 52 |
+
"Decision Tree Classifier": DecisionTreeClassifier,
|
| 53 |
+
"SVM Classifier": SVC,
|
| 54 |
+
"MLP Classifier": MLPClassifier,
|
| 55 |
+
}
|
| 56 |
+
|
| 57 |
+
# ======================
|
| 58 |
+
# UI Logic
|
| 59 |
+
# ======================
|
| 60 |
+
def update_models(task_type):
|
| 61 |
+
if task_type == "Regression":
|
| 62 |
+
return gr.update(choices=list(REGRESSION_MODELS.keys()), value=None)
|
| 63 |
+
return gr.update(choices=list(CLASSIFICATION_MODELS.keys()), value=None)
|
| 64 |
+
|
| 65 |
+
|
| 66 |
+
def train_model(file, task_type, model_name):
|
| 67 |
+
df = pd.read_csv(file.name)
|
| 68 |
+
|
| 69 |
+
# Assumption: last column is target
|
| 70 |
+
X = df.iloc[:, :-1]
|
| 71 |
+
y = df.iloc[:, -1]
|
| 72 |
+
|
| 73 |
+
X_train, X_test, y_train, y_test = train_test_split(
|
| 74 |
+
X, y, test_size=0.2, random_state=42
|
| 75 |
+
)
|
| 76 |
+
|
| 77 |
+
if task_type == "Regression":
|
| 78 |
+
model = REGRESSION_MODELS[model_name]()
|
| 79 |
+
model.fit(X_train, y_train)
|
| 80 |
+
|
| 81 |
+
preds = model.predict(X_test)
|
| 82 |
+
|
| 83 |
+
mae = mean_absolute_error(y_test, preds)
|
| 84 |
+
mse = mean_squared_error(y_test, preds)
|
| 85 |
+
rmse = np.sqrt(mse)
|
| 86 |
+
r2 = r2_score(y_test, preds)
|
| 87 |
+
|
| 88 |
+
return (
|
| 89 |
+
f"Model: {model_name}\n"
|
| 90 |
+
f"Task: Regression\n"
|
| 91 |
+
f"MAE: {mae:.4f}\n"
|
| 92 |
+
f"MSE: {mse:.4f}\n"
|
| 93 |
+
f"RMSE: {rmse:.4f}\n"
|
| 94 |
+
f"R² Score: {r2:.4f}"
|
| 95 |
+
)
|
| 96 |
+
|
| 97 |
+
else:
|
| 98 |
+
model = CLASSIFICATION_MODELS[model_name]()
|
| 99 |
+
model.fit(X_train, y_train)
|
| 100 |
+
|
| 101 |
+
preds = model.predict(X_test)
|
| 102 |
+
acc = accuracy_score(y_test, preds)
|
| 103 |
+
|
| 104 |
+
return (
|
| 105 |
+
f"Model: {model_name}\n"
|
| 106 |
+
f"Task: Classification\n"
|
| 107 |
+
f"Accuracy: {acc:.4f}"
|
| 108 |
+
)
|
| 109 |
+
|
| 110 |
+
|
| 111 |
+
# ======================
|
| 112 |
+
# Gradio App
|
| 113 |
+
# ======================
|
| 114 |
+
with gr.Blocks() as demo:
|
| 115 |
+
gr.Markdown("## Supervised Learning Model Trainer")
|
| 116 |
+
gr.Markdown(
|
| 117 |
+
"Upload a CSV file. The **last column is treated as target**."
|
| 118 |
+
)
|
| 119 |
+
|
| 120 |
+
file_input = gr.File(label="Upload CSV", file_types=[".csv"])
|
| 121 |
+
task_dropdown = gr.Dropdown(
|
| 122 |
+
["Regression", "Classification"],
|
| 123 |
+
label="Task Type",
|
| 124 |
+
)
|
| 125 |
+
model_dropdown = gr.Dropdown(label="Model")
|
| 126 |
+
output = gr.Textbox(label="Result", lines=5)
|
| 127 |
+
|
| 128 |
+
train_btn = gr.Button("Generate")
|
| 129 |
+
|
| 130 |
+
task_dropdown.change(
|
| 131 |
+
update_models,
|
| 132 |
+
inputs=task_dropdown,
|
| 133 |
+
outputs=model_dropdown,
|
| 134 |
+
)
|
| 135 |
+
|
| 136 |
+
train_btn.click(
|
| 137 |
+
train_model,
|
| 138 |
+
inputs=[file_input, task_dropdown, model_dropdown],
|
| 139 |
+
outputs=output,
|
| 140 |
+
)
|
| 141 |
+
|
| 142 |
+
demo.launch()
|
version_2_app.py
ADDED
|
@@ -0,0 +1,266 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
import gradio as gr
|
| 2 |
+
import pandas as pd
|
| 3 |
+
import numpy as np
|
| 4 |
+
import warnings
|
| 5 |
+
warnings.filterwarnings(action="ignore")
|
| 6 |
+
|
| 7 |
+
from sklearn.model_selection import train_test_split
|
| 8 |
+
from sklearn.pipeline import Pipeline
|
| 9 |
+
from sklearn.compose import ColumnTransformer
|
| 10 |
+
from sklearn.preprocessing import (
|
| 11 |
+
StandardScaler,
|
| 12 |
+
OneHotEncoder,
|
| 13 |
+
LabelEncoder,
|
| 14 |
+
)
|
| 15 |
+
from sklearn.metrics import (
|
| 16 |
+
mean_absolute_error,
|
| 17 |
+
mean_squared_error,
|
| 18 |
+
r2_score,
|
| 19 |
+
accuracy_score,
|
| 20 |
+
precision_score,
|
| 21 |
+
recall_score,
|
| 22 |
+
f1_score,
|
| 23 |
+
roc_auc_score,
|
| 24 |
+
)
|
| 25 |
+
|
| 26 |
+
# ======================
|
| 27 |
+
# Models
|
| 28 |
+
# ======================
|
| 29 |
+
from sklearn.linear_model import (
|
| 30 |
+
LinearRegression,
|
| 31 |
+
LogisticRegression,
|
| 32 |
+
Perceptron,
|
| 33 |
+
)
|
| 34 |
+
from sklearn.neighbors import (
|
| 35 |
+
KNeighborsClassifier,
|
| 36 |
+
KNeighborsRegressor,
|
| 37 |
+
)
|
| 38 |
+
from sklearn.naive_bayes import GaussianNB
|
| 39 |
+
from sklearn.tree import (
|
| 40 |
+
DecisionTreeClassifier,
|
| 41 |
+
DecisionTreeRegressor,
|
| 42 |
+
)
|
| 43 |
+
from sklearn.svm import SVC, SVR
|
| 44 |
+
from sklearn.neural_network import (
|
| 45 |
+
MLPClassifier,
|
| 46 |
+
MLPRegressor,
|
| 47 |
+
)
|
| 48 |
+
|
| 49 |
+
from sklearn.utils.multiclass import type_of_target
|
| 50 |
+
|
| 51 |
+
# ======================
|
| 52 |
+
# Model Registry
|
| 53 |
+
# ======================
|
| 54 |
+
REGRESSION_MODELS = {
|
| 55 |
+
"Linear Regression": LinearRegression(),
|
| 56 |
+
"KNN Regressor": KNeighborsRegressor(),
|
| 57 |
+
"Decision Tree Regressor": DecisionTreeRegressor(),
|
| 58 |
+
"SVR": SVR(),
|
| 59 |
+
"MLP Regressor": MLPRegressor(max_iter=1000),
|
| 60 |
+
}
|
| 61 |
+
|
| 62 |
+
CLASSIFICATION_MODELS = {
|
| 63 |
+
"Logistic Regression": LogisticRegression(max_iter=500),
|
| 64 |
+
"KNN Classifier": KNeighborsClassifier(),
|
| 65 |
+
"Naive Bayes": GaussianNB(),
|
| 66 |
+
"Perceptron": Perceptron(),
|
| 67 |
+
"Decision Tree Classifier": DecisionTreeClassifier(),
|
| 68 |
+
"SVM Classifier": SVC(probability=True),
|
| 69 |
+
"MLP Classifier": MLPClassifier(max_iter=1000),
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
+
# ======================
|
| 73 |
+
# UI Helpers
|
| 74 |
+
# ======================
|
| 75 |
+
def update_models(task_type):
|
| 76 |
+
if task_type == "Regression":
|
| 77 |
+
return gr.update(choices=list(REGRESSION_MODELS.keys()), value=None)
|
| 78 |
+
else:
|
| 79 |
+
return gr.update(choices=list(CLASSIFICATION_MODELS.keys()), value=None)
|
| 80 |
+
|
| 81 |
+
def preview_csv(file):
|
| 82 |
+
if file is None:
|
| 83 |
+
return None
|
| 84 |
+
return pd.read_csv(file.name)
|
| 85 |
+
|
| 86 |
+
|
| 87 |
+
def detect_target_type(y):
|
| 88 |
+
# Categorical target
|
| 89 |
+
if y.dtype == "object" or y.dtype.name == "category":
|
| 90 |
+
return "Classification"
|
| 91 |
+
|
| 92 |
+
# Numeric but low cardinality → classification
|
| 93 |
+
if y.nunique() <= 20:
|
| 94 |
+
return "Classification"
|
| 95 |
+
|
| 96 |
+
return "Regression"
|
| 97 |
+
|
| 98 |
+
|
| 99 |
+
def auto_set_task(file):
|
| 100 |
+
if file is None:
|
| 101 |
+
return "Regression"
|
| 102 |
+
df = pd.read_csv(file.name)
|
| 103 |
+
y = df.iloc[:, -1]
|
| 104 |
+
return detect_target_type(y)
|
| 105 |
+
|
| 106 |
+
|
| 107 |
+
# ======================
|
| 108 |
+
# Core Training Logic
|
| 109 |
+
# ======================
|
| 110 |
+
def train_model(file, task_type, model_name):
|
| 111 |
+
df = pd.read_csv(file.name)
|
| 112 |
+
|
| 113 |
+
# Target = last column
|
| 114 |
+
X = df.iloc[:, :-1]
|
| 115 |
+
y = df.iloc[:, -1]
|
| 116 |
+
|
| 117 |
+
detected_task = detect_target_type(y)
|
| 118 |
+
|
| 119 |
+
# 🚫 Mismatch validation
|
| 120 |
+
if task_type != detected_task:
|
| 121 |
+
return pd.DataFrame(
|
| 122 |
+
{
|
| 123 |
+
"Error": [
|
| 124 |
+
f"Dataset target detected as {detected_task}, "
|
| 125 |
+
f"but {task_type} model selected."
|
| 126 |
+
]
|
| 127 |
+
}
|
| 128 |
+
)
|
| 129 |
+
|
| 130 |
+
# ---------- Automatic label encoding ----------
|
| 131 |
+
if task_type == "Classification" and y.dtype == "object":
|
| 132 |
+
y = LabelEncoder().fit_transform(y)
|
| 133 |
+
|
| 134 |
+
# ---------- Feature preprocessing ----------
|
| 135 |
+
num_cols = X.select_dtypes(include=["int64", "float64"]).columns
|
| 136 |
+
cat_cols = X.select_dtypes(include=["object", "category"]).columns
|
| 137 |
+
|
| 138 |
+
preprocessor = ColumnTransformer(
|
| 139 |
+
transformers=[
|
| 140 |
+
("num", StandardScaler(), num_cols),
|
| 141 |
+
("cat", OneHotEncoder(handle_unknown="ignore"), cat_cols),
|
| 142 |
+
]
|
| 143 |
+
)
|
| 144 |
+
|
| 145 |
+
X_train, X_test, y_train, y_test = train_test_split(
|
| 146 |
+
X, y, test_size=0.2, random_state=42
|
| 147 |
+
)
|
| 148 |
+
|
| 149 |
+
# ---------- Model selection ----------
|
| 150 |
+
model = (
|
| 151 |
+
REGRESSION_MODELS[model_name]
|
| 152 |
+
if task_type == "Regression"
|
| 153 |
+
else CLASSIFICATION_MODELS[model_name]
|
| 154 |
+
)
|
| 155 |
+
|
| 156 |
+
pipeline = Pipeline(
|
| 157 |
+
steps=[
|
| 158 |
+
("preprocessing", preprocessor),
|
| 159 |
+
("model", model),
|
| 160 |
+
]
|
| 161 |
+
)
|
| 162 |
+
|
| 163 |
+
pipeline.fit(X_train, y_train)
|
| 164 |
+
preds = pipeline.predict(X_test)
|
| 165 |
+
|
| 166 |
+
# ---------- Metrics ----------
|
| 167 |
+
if task_type == "Regression":
|
| 168 |
+
metrics = {
|
| 169 |
+
"MAE": mean_absolute_error(y_test, preds),
|
| 170 |
+
"MSE": mean_squared_error(y_test, preds),
|
| 171 |
+
"RMSE": np.sqrt(mean_squared_error(y_test, preds)),
|
| 172 |
+
"R²": r2_score(y_test, preds),
|
| 173 |
+
}
|
| 174 |
+
|
| 175 |
+
else:
|
| 176 |
+
metrics = {
|
| 177 |
+
"Accuracy": accuracy_score(y_test, preds),
|
| 178 |
+
"Precision": precision_score(y_test, preds, average="weighted"),
|
| 179 |
+
"Recall": recall_score(y_test, preds, average="weighted"),
|
| 180 |
+
"F1 Score": f1_score(y_test, preds, average="weighted"),
|
| 181 |
+
}
|
| 182 |
+
|
| 183 |
+
# ROC-AUC (safe handling)
|
| 184 |
+
if hasattr(pipeline.named_steps["model"], "predict_proba"):
|
| 185 |
+
probs = pipeline.predict_proba(X_test)
|
| 186 |
+
target_type = type_of_target(y_test)
|
| 187 |
+
|
| 188 |
+
# Binary classification
|
| 189 |
+
if target_type == "binary":
|
| 190 |
+
roc_auc = roc_auc_score(y_test, probs[:, 1])
|
| 191 |
+
metrics["ROC-AUC"] = roc_auc
|
| 192 |
+
|
| 193 |
+
# Multiclass classification
|
| 194 |
+
elif target_type == "multiclass":
|
| 195 |
+
roc_auc = roc_auc_score(
|
| 196 |
+
y_test,
|
| 197 |
+
probs,
|
| 198 |
+
multi_class="ovr",
|
| 199 |
+
average="weighted",
|
| 200 |
+
)
|
| 201 |
+
metrics["ROC-AUC"] = roc_auc
|
| 202 |
+
|
| 203 |
+
|
| 204 |
+
# ---------- Metric table ----------
|
| 205 |
+
result_df = pd.DataFrame(
|
| 206 |
+
metrics.items(), columns=["Metric", "Value"]
|
| 207 |
+
)
|
| 208 |
+
|
| 209 |
+
return result_df
|
| 210 |
+
|
| 211 |
+
# ======================
|
| 212 |
+
# Gradio UI
|
| 213 |
+
# ======================
|
| 214 |
+
with gr.Blocks() as app:
|
| 215 |
+
gr.Markdown("## Supervised Learning Model Trainer")
|
| 216 |
+
gr.Markdown(
|
| 217 |
+
"• Upload CSV\n"
|
| 218 |
+
"• Last column is target\n"
|
| 219 |
+
"• Automatic preprocessing & metrics"
|
| 220 |
+
)
|
| 221 |
+
|
| 222 |
+
file_input = gr.File(label="Upload CSV", file_types=[".csv"])
|
| 223 |
+
|
| 224 |
+
csv_preview = gr.Dataframe(
|
| 225 |
+
label="CSV Preview",
|
| 226 |
+
interactive=False,
|
| 227 |
+
)
|
| 228 |
+
|
| 229 |
+
task_type = gr.Dropdown(
|
| 230 |
+
["Regression", "Classification"], label="Task Type", value="Regression"
|
| 231 |
+
)
|
| 232 |
+
model_name = gr.Dropdown(label="Model")
|
| 233 |
+
output = gr.Dataframe(label="Evaluation Metrics")
|
| 234 |
+
|
| 235 |
+
run_btn = gr.Button("Train & Evaluate")
|
| 236 |
+
|
| 237 |
+
|
| 238 |
+
file_input.change(
|
| 239 |
+
preview_csv,
|
| 240 |
+
inputs=file_input,
|
| 241 |
+
outputs=csv_preview,
|
| 242 |
+
)
|
| 243 |
+
|
| 244 |
+
file_input.change(
|
| 245 |
+
auto_set_task,
|
| 246 |
+
inputs=file_input,
|
| 247 |
+
outputs=task_type,
|
| 248 |
+
)
|
| 249 |
+
|
| 250 |
+
task_type.change(
|
| 251 |
+
update_models, inputs=task_type, outputs=model_name
|
| 252 |
+
)
|
| 253 |
+
|
| 254 |
+
app.load(
|
| 255 |
+
update_models,
|
| 256 |
+
inputs=task_type,
|
| 257 |
+
outputs=model_name,
|
| 258 |
+
)
|
| 259 |
+
|
| 260 |
+
run_btn.click(
|
| 261 |
+
train_model,
|
| 262 |
+
inputs=[file_input, task_type, model_name],
|
| 263 |
+
outputs=output,
|
| 264 |
+
)
|
| 265 |
+
|
| 266 |
+
app.launch()
|