anyonehomep1mane commited on
Commit ·
4928a1a
1
Parent(s): 6ea1869
Latest Code Changes and Bug Fixes
Browse files- app.py +25 -3
- core/__pycache__/training.cpython-310.pyc +0 -0
- core/training.py +87 -40
- models/__pycache__/registry.cpython-310.pyc +0 -0
- models/registry.py +46 -0
- requirements.txt +4 -1
- ui/__pycache__/helpers.cpython-310.pyc +0 -0
- ui/helpers.py +23 -9
app.py
CHANGED
|
@@ -3,6 +3,7 @@ import warnings
|
|
| 3 |
warnings.filterwarnings("ignore")
|
| 4 |
|
| 5 |
from ui.helpers import (
|
|
|
|
| 6 |
update_models,
|
| 7 |
update_graphs,
|
| 8 |
preview_csv,
|
|
@@ -32,6 +33,11 @@ with gr.Blocks() as app:
|
|
| 32 |
label="Task Type",
|
| 33 |
value="Regression",
|
| 34 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 35 |
model_name = gr.Dropdown(label="Model")
|
| 36 |
graph_type = gr.Dropdown(label="Graph Type")
|
| 37 |
|
|
@@ -51,7 +57,18 @@ with gr.Blocks() as app:
|
|
| 51 |
file_input.change(auto_set_task, file_input, task_type)
|
| 52 |
file_input.change(reset_metrics_on_file_clear,inputs=file_input,outputs=[output, plot])
|
| 53 |
|
| 54 |
-
task_type.change(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 55 |
task_type.change(update_graphs, task_type, graph_type)
|
| 56 |
|
| 57 |
show_preview.change(
|
|
@@ -60,12 +77,17 @@ with gr.Blocks() as app:
|
|
| 60 |
outputs=csv_preview,
|
| 61 |
)
|
| 62 |
|
| 63 |
-
app.load(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 64 |
app.load(update_graphs, task_type, graph_type)
|
| 65 |
|
| 66 |
run_btn.click(
|
| 67 |
train_model,
|
| 68 |
-
inputs=[file_input, task_type, model_name, graph_type],
|
| 69 |
outputs=[output, plot]
|
| 70 |
)
|
| 71 |
|
|
|
|
| 3 |
warnings.filterwarnings("ignore")
|
| 4 |
|
| 5 |
from ui.helpers import (
|
| 6 |
+
reset_on_task_change,
|
| 7 |
update_models,
|
| 8 |
update_graphs,
|
| 9 |
preview_csv,
|
|
|
|
| 33 |
label="Task Type",
|
| 34 |
value="Regression",
|
| 35 |
)
|
| 36 |
+
model_group = gr.Dropdown(
|
| 37 |
+
label="Model Group",
|
| 38 |
+
choices=["Basic", "Bagging", "Boosting", "Stacking"],
|
| 39 |
+
value="Basic",
|
| 40 |
+
)
|
| 41 |
model_name = gr.Dropdown(label="Model")
|
| 42 |
graph_type = gr.Dropdown(label="Graph Type")
|
| 43 |
|
|
|
|
| 57 |
file_input.change(auto_set_task, file_input, task_type)
|
| 58 |
file_input.change(reset_metrics_on_file_clear,inputs=file_input,outputs=[output, plot])
|
| 59 |
|
| 60 |
+
task_type.change(
|
| 61 |
+
reset_on_task_change,
|
| 62 |
+
inputs=task_type,
|
| 63 |
+
outputs=[model_group, model_name],
|
| 64 |
+
)
|
| 65 |
+
|
| 66 |
+
model_group.change(
|
| 67 |
+
update_models,
|
| 68 |
+
inputs=[task_type, model_group],
|
| 69 |
+
outputs=model_name,
|
| 70 |
+
)
|
| 71 |
+
|
| 72 |
task_type.change(update_graphs, task_type, graph_type)
|
| 73 |
|
| 74 |
show_preview.change(
|
|
|
|
| 77 |
outputs=csv_preview,
|
| 78 |
)
|
| 79 |
|
| 80 |
+
app.load(
|
| 81 |
+
reset_on_task_change,
|
| 82 |
+
inputs=task_type,
|
| 83 |
+
outputs=[model_group, model_name],
|
| 84 |
+
)
|
| 85 |
+
|
| 86 |
app.load(update_graphs, task_type, graph_type)
|
| 87 |
|
| 88 |
run_btn.click(
|
| 89 |
train_model,
|
| 90 |
+
inputs=[file_input, task_type, model_group, model_name, graph_type],
|
| 91 |
outputs=[output, plot]
|
| 92 |
)
|
| 93 |
|
core/__pycache__/training.cpython-310.pyc
CHANGED
|
Binary files a/core/__pycache__/training.cpython-310.pyc and b/core/__pycache__/training.cpython-310.pyc differ
|
|
|
core/training.py
CHANGED
|
@@ -8,61 +8,108 @@ from models.registry import REGRESSION_MODELS, CLASSIFICATION_MODELS
|
|
| 8 |
from preprocessing.transformers import build_preprocessor
|
| 9 |
from utils.metrics import regression_metrics, classification_metrics
|
| 10 |
from core.visuals import regression_graphs, classification_graphs
|
|
|
|
| 11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 12 |
|
| 13 |
-
def train_model(file, task_type, model_name, graph_type):
|
| 14 |
-
if file is None:
|
| 15 |
-
return pd.DataFrame({
|
| 16 |
-
"Error": [f"Please upload a csv file first."]
|
| 17 |
-
|
| 18 |
-
}), None
|
| 19 |
-
|
| 20 |
-
df = pd.read_csv(file.name)
|
| 21 |
|
|
|
|
| 22 |
X = df.iloc[:, :-1]
|
| 23 |
y = df.iloc[:, -1]
|
| 24 |
|
| 25 |
-
|
|
|
|
| 26 |
|
| 27 |
-
if
|
| 28 |
-
|
| 29 |
-
"Error": [f"Detected {detected_task} target, but {task_type} selected."]
|
| 30 |
-
}), None
|
| 31 |
|
| 32 |
-
|
| 33 |
-
|
|
|
|
|
|
|
| 34 |
|
| 35 |
-
|
|
|
|
|
|
|
|
|
|
| 36 |
|
| 37 |
-
|
| 38 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
)
|
| 40 |
|
| 41 |
-
|
| 42 |
-
REGRESSION_MODELS[model_name]
|
| 43 |
-
if task_type == "Regression"
|
| 44 |
-
else CLASSIFICATION_MODELS[model_name]
|
| 45 |
-
)
|
| 46 |
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
("
|
|
|
|
| 50 |
])
|
| 51 |
|
| 52 |
-
pipeline.fit(X_train, y_train)
|
| 53 |
-
preds = pipeline.predict(X_test)
|
| 54 |
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
|
| 66 |
-
|
| 67 |
|
| 68 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 8 |
from preprocessing.transformers import build_preprocessor
|
| 9 |
from utils.metrics import regression_metrics, classification_metrics
|
| 10 |
from core.visuals import regression_graphs, classification_graphs
|
| 11 |
+
from models.registry import MODEL_GROUPS
|
| 12 |
|
| 13 |
+
from sklearn.compose import ColumnTransformer
|
| 14 |
+
from sklearn.pipeline import Pipeline
|
| 15 |
+
from sklearn.preprocessing import OneHotEncoder, StandardScaler
|
| 16 |
+
from sklearn.impute import SimpleImputer
|
| 17 |
+
from sklearn.model_selection import train_test_split
|
| 18 |
+
import numpy as np
|
| 19 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
|
| 21 |
+
def build_preprocessor(df):
|
| 22 |
X = df.iloc[:, :-1]
|
| 23 |
y = df.iloc[:, -1]
|
| 24 |
|
| 25 |
+
num_cols = X.select_dtypes(include=["int64", "float64"]).columns.tolist()
|
| 26 |
+
cat_cols = X.select_dtypes(include=["object", "category", "bool"]).columns.tolist()
|
| 27 |
|
| 28 |
+
if len(num_cols) + len(cat_cols) == 0:
|
| 29 |
+
raise ValueError("No usable feature columns found")
|
|
|
|
|
|
|
| 30 |
|
| 31 |
+
numeric_pipeline = Pipeline([
|
| 32 |
+
("imputer", SimpleImputer(strategy="median")),
|
| 33 |
+
("scaler", StandardScaler())
|
| 34 |
+
])
|
| 35 |
|
| 36 |
+
categorical_pipeline = Pipeline([
|
| 37 |
+
("imputer", SimpleImputer(strategy="most_frequent")),
|
| 38 |
+
("encoder", OneHotEncoder(handle_unknown="ignore", sparse_output=False))
|
| 39 |
+
])
|
| 40 |
|
| 41 |
+
preprocessor = ColumnTransformer(
|
| 42 |
+
transformers=[
|
| 43 |
+
("num", numeric_pipeline, num_cols),
|
| 44 |
+
("cat", categorical_pipeline, cat_cols),
|
| 45 |
+
],
|
| 46 |
+
remainder="drop"
|
| 47 |
)
|
| 48 |
|
| 49 |
+
return X, y, preprocessor
|
|
|
|
|
|
|
|
|
|
|
|
|
| 50 |
|
| 51 |
+
def build_pipeline(model, preprocessor):
|
| 52 |
+
return Pipeline([
|
| 53 |
+
("preprocessor", preprocessor),
|
| 54 |
+
("model", model)
|
| 55 |
])
|
| 56 |
|
|
|
|
|
|
|
| 57 |
|
| 58 |
+
def train_model(file, task_type, model_group, model_name, graph_type):
|
| 59 |
+
try:
|
| 60 |
+
if file is None:
|
| 61 |
+
return pd.DataFrame({
|
| 62 |
+
"Error": [f"Please upload a csv file first."]
|
| 63 |
+
|
| 64 |
+
}), None
|
| 65 |
+
|
| 66 |
+
df = pd.read_csv(file.name)
|
| 67 |
+
|
| 68 |
+
X, y, preprocessor = build_preprocessor(df)
|
| 69 |
+
|
| 70 |
+
detected_task = detect_target_type(y)
|
| 71 |
+
|
| 72 |
+
if task_type != detected_task:
|
| 73 |
+
return pd.DataFrame({
|
| 74 |
+
"Error": [f"Detected {detected_task} target, but {task_type} selected."]
|
| 75 |
+
}), None
|
| 76 |
+
|
| 77 |
+
if task_type == "Classification" and y.dtype == "object":
|
| 78 |
+
y = LabelEncoder().fit_transform(y)
|
| 79 |
+
|
| 80 |
+
model = MODEL_GROUPS[model_group][task_type][model_name]
|
| 81 |
+
|
| 82 |
+
unique_count = len(np.unique(y))
|
| 83 |
+
|
| 84 |
+
X_train, X_test, y_train, y_test = train_test_split(
|
| 85 |
+
X, y,
|
| 86 |
+
test_size=0.2,
|
| 87 |
+
random_state=42,
|
| 88 |
+
stratify=y if unique_count < 20 else None
|
| 89 |
+
)
|
| 90 |
+
|
| 91 |
+
pipeline = build_pipeline(model, preprocessor)
|
| 92 |
+
|
| 93 |
+
pipeline.fit(X_train, y_train)
|
| 94 |
+
|
| 95 |
+
preds = pipeline.predict(X_test)
|
| 96 |
+
|
| 97 |
+
if task_type == "Regression":
|
| 98 |
+
metrics = regression_metrics(y_test, preds)
|
| 99 |
+
else:
|
| 100 |
+
metrics = classification_metrics(pipeline, X_test, y_test, preds)
|
| 101 |
+
|
| 102 |
+
fig = None
|
| 103 |
+
if task_type == "Regression":
|
| 104 |
+
fig = regression_graphs(graph_type, X, y, model, pipeline, y_test, preds)
|
| 105 |
+
else:
|
| 106 |
+
fig = classification_graphs(graph_type, pipeline, X_test, y_test, preds)
|
| 107 |
|
| 108 |
+
metrics_df = pd.DataFrame(metrics.items(), columns=["Metric", "Value"])
|
| 109 |
|
| 110 |
+
return metrics_df, fig
|
| 111 |
+
except ValueError as e:
|
| 112 |
+
return (
|
| 113 |
+
pd.DataFrame({"Error": [str(e)]}),
|
| 114 |
+
None,
|
| 115 |
+
)
|
models/__pycache__/registry.cpython-310.pyc
CHANGED
|
Binary files a/models/__pycache__/registry.cpython-310.pyc and b/models/__pycache__/registry.cpython-310.pyc differ
|
|
|
models/registry.py
CHANGED
|
@@ -4,6 +4,13 @@ from sklearn.naive_bayes import GaussianNB
|
|
| 4 |
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
|
| 5 |
from sklearn.svm import SVC, SVR
|
| 6 |
from sklearn.neural_network import MLPClassifier, MLPRegressor
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 7 |
|
| 8 |
REGRESSION_MODELS = {
|
| 9 |
"Linear Regression": LinearRegression(),
|
|
@@ -23,6 +30,45 @@ CLASSIFICATION_MODELS = {
|
|
| 23 |
"MLP Classifier": MLPClassifier(max_iter=1000),
|
| 24 |
}
|
| 25 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 26 |
CLASSIFICATION_GRAPHS = [
|
| 27 |
"Confusion Matrix",
|
| 28 |
"ROC Curve",
|
|
|
|
| 4 |
from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
|
| 5 |
from sklearn.svm import SVC, SVR
|
| 6 |
from sklearn.neural_network import MLPClassifier, MLPRegressor
|
| 7 |
+
from sklearn.ensemble import (
|
| 8 |
+
RandomForestClassifier, RandomForestRegressor,
|
| 9 |
+
ExtraTreesClassifier, ExtraTreesRegressor,
|
| 10 |
+
AdaBoostClassifier, AdaBoostRegressor,
|
| 11 |
+
GradientBoostingClassifier, GradientBoostingRegressor,
|
| 12 |
+
StackingClassifier, StackingRegressor
|
| 13 |
+
)
|
| 14 |
|
| 15 |
REGRESSION_MODELS = {
|
| 16 |
"Linear Regression": LinearRegression(),
|
|
|
|
| 30 |
"MLP Classifier": MLPClassifier(max_iter=1000),
|
| 31 |
}
|
| 32 |
|
| 33 |
+
MODEL_GROUPS = {
|
| 34 |
+
"Basic": {
|
| 35 |
+
"Regression": REGRESSION_MODELS,
|
| 36 |
+
"Classification": CLASSIFICATION_MODELS,
|
| 37 |
+
},
|
| 38 |
+
"Bagging": {
|
| 39 |
+
"Regression": {
|
| 40 |
+
"Random Forest Regressor": RandomForestRegressor(),
|
| 41 |
+
"Extra Trees Regressor": ExtraTreesRegressor(),
|
| 42 |
+
},
|
| 43 |
+
"Classification": {
|
| 44 |
+
"Random Forest Classifier": RandomForestClassifier(),
|
| 45 |
+
"Extra Trees Classifier": ExtraTreesClassifier(),
|
| 46 |
+
},
|
| 47 |
+
},
|
| 48 |
+
"Boosting": {
|
| 49 |
+
"Regression": {
|
| 50 |
+
"AdaBoost Regressor": AdaBoostRegressor(),
|
| 51 |
+
"Gradient Boosting Regressor": GradientBoostingRegressor(),
|
| 52 |
+
},
|
| 53 |
+
"Classification": {
|
| 54 |
+
"AdaBoost Classifier": AdaBoostClassifier(),
|
| 55 |
+
"Gradient Boosting Classifier": GradientBoostingClassifier(),
|
| 56 |
+
},
|
| 57 |
+
},
|
| 58 |
+
"Stacking": {
|
| 59 |
+
"Regression": {
|
| 60 |
+
"Stacking Regressor": StackingRegressor(
|
| 61 |
+
estimators=[("lr", LinearRegression())]
|
| 62 |
+
),
|
| 63 |
+
},
|
| 64 |
+
"Classification": {
|
| 65 |
+
"Stacking Classifier": StackingClassifier(
|
| 66 |
+
estimators=[("lr", LogisticRegression(max_iter=500))]
|
| 67 |
+
),
|
| 68 |
+
},
|
| 69 |
+
},
|
| 70 |
+
}
|
| 71 |
+
|
| 72 |
CLASSIFICATION_GRAPHS = [
|
| 73 |
"Confusion Matrix",
|
| 74 |
"ROC Curve",
|
requirements.txt
CHANGED
|
@@ -2,4 +2,7 @@ gradio>=4.0.0
|
|
| 2 |
pandas
|
| 3 |
scikit-learn
|
| 4 |
numpy
|
| 5 |
-
matplotlib
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
pandas
|
| 3 |
scikit-learn
|
| 4 |
numpy
|
| 5 |
+
matplotlib
|
| 6 |
+
xgboost
|
| 7 |
+
lightgbm
|
| 8 |
+
catboost
|
ui/__pycache__/helpers.cpython-310.pyc
CHANGED
|
Binary files a/ui/__pycache__/helpers.cpython-310.pyc and b/ui/__pycache__/helpers.cpython-310.pyc differ
|
|
|
ui/helpers.py
CHANGED
|
@@ -1,17 +1,31 @@
|
|
|
|
|
| 1 |
import gradio as gr
|
| 2 |
import pandas as pd
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 4 |
|
| 5 |
|
| 6 |
-
def update_models(task_type):
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
else:
|
| 10 |
-
models = list(CLASSIFICATION_MODELS.keys())
|
| 11 |
|
| 12 |
return gr.update(
|
| 13 |
-
choices=
|
| 14 |
-
value=
|
| 15 |
)
|
| 16 |
|
| 17 |
|
|
@@ -29,7 +43,7 @@ def update_graphs(task_type):
|
|
| 29 |
|
| 30 |
import os
|
| 31 |
|
| 32 |
-
def preview_csv(file, max_rows=
|
| 33 |
if not file:
|
| 34 |
return None
|
| 35 |
|
|
|
|
| 1 |
+
from models.registry import MODEL_GROUPS, REGRESSION_GRAPHS, CLASSIFICATION_GRAPHS
|
| 2 |
import gradio as gr
|
| 3 |
import pandas as pd
|
| 4 |
+
import os
|
| 5 |
+
|
| 6 |
+
def reset_on_task_change(task_type):
|
| 7 |
+
"""
|
| 8 |
+
When task changes:
|
| 9 |
+
- Model Group → Basic
|
| 10 |
+
- Model → first model of Basic group for selected task
|
| 11 |
+
"""
|
| 12 |
+
model_group = "Basic"
|
| 13 |
+
models = MODEL_GROUPS[model_group][task_type]
|
| 14 |
+
model_names = list(models.keys())
|
| 15 |
+
|
| 16 |
+
return (
|
| 17 |
+
gr.update(value=model_group),
|
| 18 |
+
gr.update(choices=model_names, value=model_names[0])
|
| 19 |
+
)
|
| 20 |
|
| 21 |
|
| 22 |
+
def update_models(task_type, model_group):
|
| 23 |
+
models = MODEL_GROUPS.get(model_group, {}).get(task_type, {})
|
| 24 |
+
model_names = list(models.keys())
|
|
|
|
|
|
|
| 25 |
|
| 26 |
return gr.update(
|
| 27 |
+
choices=model_names,
|
| 28 |
+
value=model_names[0] if model_names else None
|
| 29 |
)
|
| 30 |
|
| 31 |
|
|
|
|
| 43 |
|
| 44 |
import os
|
| 45 |
|
| 46 |
+
def preview_csv(file, max_rows=10):
|
| 47 |
if not file:
|
| 48 |
return None
|
| 49 |
|