Spaces:

HF-Pawan
/

Supervised-Learning-Model-Trainer

Running

App Files Files Community

anyonehomep1mane commited on 19 days ago

Commit

4928a1a

1 Parent(s): 6ea1869

Latest Code Changes and Bug Fixes

Browse files

Files changed (8) hide show

app.py +25 -3
core/__pycache__/training.cpython-310.pyc +0 -0
core/training.py +87 -40
models/__pycache__/registry.cpython-310.pyc +0 -0
models/registry.py +46 -0
requirements.txt +4 -1
ui/__pycache__/helpers.cpython-310.pyc +0 -0
ui/helpers.py +23 -9

app.py CHANGED Viewed

@@ -3,6 +3,7 @@ import warnings
 warnings.filterwarnings("ignore")
 from ui.helpers import (
     update_models,
     update_graphs,
     preview_csv,
@@ -32,6 +33,11 @@ with gr.Blocks() as app:
                     label="Task Type",
                     value="Regression",
                 )
                 model_name = gr.Dropdown(label="Model")
                 graph_type = gr.Dropdown(label="Graph Type")
@@ -51,7 +57,18 @@ with gr.Blocks() as app:
         file_input.change(auto_set_task, file_input, task_type)
         file_input.change(reset_metrics_on_file_clear,inputs=file_input,outputs=[output, plot])
-        task_type.change(update_models, task_type, model_name)
         task_type.change(update_graphs, task_type, graph_type)
         show_preview.change(
@@ -60,12 +77,17 @@ with gr.Blocks() as app:
             outputs=csv_preview,
         )
-        app.load(update_models, task_type, model_name)
         app.load(update_graphs, task_type, graph_type)
         run_btn.click(
             train_model,
-            inputs=[file_input, task_type, model_name, graph_type],
             outputs=[output, plot]
         )

 warnings.filterwarnings("ignore")
 from ui.helpers import (
+    reset_on_task_change,
     update_models,
     update_graphs,
     preview_csv,
                     label="Task Type",
                     value="Regression",
                 )
+                model_group = gr.Dropdown(
+                    label="Model Group",
+                    choices=["Basic", "Bagging", "Boosting", "Stacking"],
+                    value="Basic",
+                )
                 model_name = gr.Dropdown(label="Model")
                 graph_type = gr.Dropdown(label="Graph Type")
         file_input.change(auto_set_task, file_input, task_type)
         file_input.change(reset_metrics_on_file_clear,inputs=file_input,outputs=[output, plot])
+        task_type.change(
+            reset_on_task_change,
+            inputs=task_type,
+            outputs=[model_group, model_name],
+        )
+        model_group.change(
+            update_models,
+            inputs=[task_type, model_group],
+            outputs=model_name,
+        )
         task_type.change(update_graphs, task_type, graph_type)
         show_preview.change(
             outputs=csv_preview,
         )
+        app.load(
+            reset_on_task_change,
+            inputs=task_type,
+            outputs=[model_group, model_name],
+        )
         app.load(update_graphs, task_type, graph_type)
         run_btn.click(
             train_model,
+            inputs=[file_input, task_type, model_group, model_name, graph_type],
             outputs=[output, plot]
         )

core/__pycache__/training.cpython-310.pyc CHANGED Viewed

Binary files a/core/__pycache__/training.cpython-310.pyc and b/core/__pycache__/training.cpython-310.pyc differ

core/training.py CHANGED Viewed

@@ -8,61 +8,108 @@ from models.registry import REGRESSION_MODELS, CLASSIFICATION_MODELS
 from preprocessing.transformers import build_preprocessor
 from utils.metrics import regression_metrics, classification_metrics
 from core.visuals import regression_graphs, classification_graphs
-def train_model(file, task_type, model_name, graph_type):
-    if file is None:
-        return pd.DataFrame({
-            "Error": [f"Please upload a csv file first."]
-        }), None
-    df = pd.read_csv(file.name)
     X = df.iloc[:, :-1]
     y = df.iloc[:, -1]
-    detected_task = detect_target_type(y)
-    if task_type != detected_task:
-        return pd.DataFrame({
-            "Error": [f"Detected {detected_task} target, but {task_type} selected."]
-        }), None
-    if task_type == "Classification" and y.dtype == "object":
-        y = LabelEncoder().fit_transform(y)
-    preprocessor = build_preprocessor(X)
-    X_train, X_test, y_train, y_test = train_test_split(
-        X, y, test_size=0.2, random_state=42
     )
-    model = (
-        REGRESSION_MODELS[model_name]
-        if task_type == "Regression"
-        else CLASSIFICATION_MODELS[model_name]
-    )
-    pipeline = Pipeline([
-        ("preprocessing", preprocessor),
-        ("model", model),
     ])
-    pipeline.fit(X_train, y_train)
-    preds = pipeline.predict(X_test)
-    if task_type == "Regression":
-        metrics = regression_metrics(y_test, preds)
-    else:
-        metrics = classification_metrics(pipeline, X_test, y_test, preds)
-    fig = None
-    if task_type == "Regression":
-        fig = regression_graphs(graph_type, X, y, model, pipeline, y_test, preds)
-    else:
-        fig = classification_graphs(graph_type, pipeline, X_test, y_test, preds)
-    metrics_df = pd.DataFrame(metrics.items(), columns=["Metric", "Value"])
-    return metrics_df, fig

 from preprocessing.transformers import build_preprocessor
 from utils.metrics import regression_metrics, classification_metrics
 from core.visuals import regression_graphs, classification_graphs
+from models.registry import MODEL_GROUPS
+from sklearn.compose import ColumnTransformer
+from sklearn.pipeline import Pipeline
+from sklearn.preprocessing import OneHotEncoder, StandardScaler
+from sklearn.impute import SimpleImputer
+from sklearn.model_selection import train_test_split
+import numpy as np
+def build_preprocessor(df):
     X = df.iloc[:, :-1]
     y = df.iloc[:, -1]
+    num_cols = X.select_dtypes(include=["int64", "float64"]).columns.tolist()
+    cat_cols = X.select_dtypes(include=["object", "category", "bool"]).columns.tolist()
+    if len(num_cols) + len(cat_cols) == 0:
+        raise ValueError("No usable feature columns found")
+    numeric_pipeline = Pipeline([
+        ("imputer", SimpleImputer(strategy="median")),
+        ("scaler", StandardScaler())
+    ])
+    categorical_pipeline = Pipeline([
+        ("imputer", SimpleImputer(strategy="most_frequent")),
+        ("encoder", OneHotEncoder(handle_unknown="ignore", sparse_output=False))
+    ])
+    preprocessor = ColumnTransformer(
+        transformers=[
+            ("num", numeric_pipeline, num_cols),
+            ("cat", categorical_pipeline, cat_cols),
+        ],
+        remainder="drop"
     )
+    return X, y, preprocessor
+def build_pipeline(model, preprocessor):
+    return Pipeline([
+        ("preprocessor", preprocessor),
+        ("model", model)
     ])
+def train_model(file, task_type, model_group, model_name, graph_type):
+    try:
+        if file is None:
+            return pd.DataFrame({
+                "Error": [f"Please upload a csv file first."]
+            }), None
+        df = pd.read_csv(file.name)
+        X, y, preprocessor = build_preprocessor(df)
+        detected_task = detect_target_type(y)
+        if task_type != detected_task:
+            return pd.DataFrame({
+                "Error": [f"Detected {detected_task} target, but {task_type} selected."]
+            }), None
+        if task_type == "Classification" and y.dtype == "object":
+            y = LabelEncoder().fit_transform(y)
+        model = MODEL_GROUPS[model_group][task_type][model_name]
+        unique_count = len(np.unique(y))
+        X_train, X_test, y_train, y_test = train_test_split(
+            X, y,
+            test_size=0.2,
+            random_state=42,
+            stratify=y if unique_count < 20 else None
+        )
+        pipeline = build_pipeline(model, preprocessor)
+        pipeline.fit(X_train, y_train)
+        preds = pipeline.predict(X_test)
+        if task_type == "Regression":
+            metrics = regression_metrics(y_test, preds)
+        else:
+            metrics = classification_metrics(pipeline, X_test, y_test, preds)
+        fig = None
+        if task_type == "Regression":
+            fig = regression_graphs(graph_type, X, y, model, pipeline, y_test, preds)
+        else:
+            fig = classification_graphs(graph_type, pipeline, X_test, y_test, preds)
+        metrics_df = pd.DataFrame(metrics.items(), columns=["Metric", "Value"])
+        return metrics_df, fig
+    except ValueError as e:
+        return (
+            pd.DataFrame({"Error": [str(e)]}),
+            None,
+        )

models/__pycache__/registry.cpython-310.pyc CHANGED Viewed

Binary files a/models/__pycache__/registry.cpython-310.pyc and b/models/__pycache__/registry.cpython-310.pyc differ

models/registry.py CHANGED Viewed

@@ -4,6 +4,13 @@ from sklearn.naive_bayes import GaussianNB
 from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
 from sklearn.svm import SVC, SVR
 from sklearn.neural_network import MLPClassifier, MLPRegressor
 REGRESSION_MODELS = {
     "Linear Regression": LinearRegression(),
@@ -23,6 +30,45 @@ CLASSIFICATION_MODELS = {
     "MLP Classifier": MLPClassifier(max_iter=1000),
 }
 CLASSIFICATION_GRAPHS = [
     "Confusion Matrix",
     "ROC Curve",

 from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
 from sklearn.svm import SVC, SVR
 from sklearn.neural_network import MLPClassifier, MLPRegressor
+from sklearn.ensemble import (
+    RandomForestClassifier, RandomForestRegressor,
+    ExtraTreesClassifier, ExtraTreesRegressor,
+    AdaBoostClassifier, AdaBoostRegressor,
+    GradientBoostingClassifier, GradientBoostingRegressor,
+    StackingClassifier, StackingRegressor
+)
 REGRESSION_MODELS = {
     "Linear Regression": LinearRegression(),
     "MLP Classifier": MLPClassifier(max_iter=1000),
 }
+MODEL_GROUPS = {
+    "Basic": {
+        "Regression": REGRESSION_MODELS,
+        "Classification": CLASSIFICATION_MODELS,
+    },
+    "Bagging": {
+        "Regression": {
+            "Random Forest Regressor": RandomForestRegressor(),
+            "Extra Trees Regressor": ExtraTreesRegressor(),
+        },
+        "Classification": {
+            "Random Forest Classifier": RandomForestClassifier(),
+            "Extra Trees Classifier": ExtraTreesClassifier(),
+        },
+    },
+    "Boosting": {
+        "Regression": {
+            "AdaBoost Regressor": AdaBoostRegressor(),
+            "Gradient Boosting Regressor": GradientBoostingRegressor(),
+        },
+        "Classification": {
+            "AdaBoost Classifier": AdaBoostClassifier(),
+            "Gradient Boosting Classifier": GradientBoostingClassifier(),
+        },
+    },
+    "Stacking": {
+        "Regression": {
+            "Stacking Regressor": StackingRegressor(
+                estimators=[("lr", LinearRegression())]
+            ),
+        },
+        "Classification": {
+            "Stacking Classifier": StackingClassifier(
+                estimators=[("lr", LogisticRegression(max_iter=500))]
+            ),
+        },
+    },
+}
 CLASSIFICATION_GRAPHS = [
     "Confusion Matrix",
     "ROC Curve",

requirements.txt CHANGED Viewed

@@ -2,4 +2,7 @@ gradio>=4.0.0
 pandas
 scikit-learn
 numpy
-matplotlib

 pandas
 scikit-learn
 numpy
+matplotlib
+xgboost
+lightgbm
+catboost

ui/__pycache__/helpers.cpython-310.pyc CHANGED Viewed

Binary files a/ui/__pycache__/helpers.cpython-310.pyc and b/ui/__pycache__/helpers.cpython-310.pyc differ

ui/helpers.py CHANGED Viewed

@@ -1,17 +1,31 @@
 import gradio as gr
 import pandas as pd
-from models.registry import REGRESSION_MODELS, CLASSIFICATION_MODELS, REGRESSION_GRAPHS, CLASSIFICATION_GRAPHS
-def update_models(task_type):
-    if task_type == "Regression":
-        models = list(REGRESSION_MODELS.keys())
-    else:
-        models = list(CLASSIFICATION_MODELS.keys())
     return gr.update(
-        choices=models,
-        value=models[0] if models else None  # ✅ auto-select first
     )
@@ -29,7 +43,7 @@ def update_graphs(task_type):
 import os
-def preview_csv(file, max_rows=50):
     if not file:
         return None

+from models.registry import MODEL_GROUPS, REGRESSION_GRAPHS, CLASSIFICATION_GRAPHS
 import gradio as gr
 import pandas as pd
+import os
+def reset_on_task_change(task_type):
+    """
+    When task changes:
+    - Model Group → Basic
+    - Model → first model of Basic group for selected task
+    """
+    model_group = "Basic"
+    models = MODEL_GROUPS[model_group][task_type]
+    model_names = list(models.keys())
+    return (
+        gr.update(value=model_group),
+        gr.update(choices=model_names, value=model_names[0])
+    )
+def update_models(task_type, model_group):
+    models = MODEL_GROUPS.get(model_group, {}).get(task_type, {})
+    model_names = list(models.keys())
     return gr.update(
+        choices=model_names,
+        value=model_names[0] if model_names else None
     )
 import os
+def preview_csv(file, max_rows=10):
     if not file:
         return None