anyonehomep1mane commited on
Commit
4928a1a
·
1 Parent(s): 6ea1869

Latest Code Changes and Bug Fixes

Browse files
app.py CHANGED
@@ -3,6 +3,7 @@ import warnings
3
  warnings.filterwarnings("ignore")
4
 
5
  from ui.helpers import (
 
6
  update_models,
7
  update_graphs,
8
  preview_csv,
@@ -32,6 +33,11 @@ with gr.Blocks() as app:
32
  label="Task Type",
33
  value="Regression",
34
  )
 
 
 
 
 
35
  model_name = gr.Dropdown(label="Model")
36
  graph_type = gr.Dropdown(label="Graph Type")
37
 
@@ -51,7 +57,18 @@ with gr.Blocks() as app:
51
  file_input.change(auto_set_task, file_input, task_type)
52
  file_input.change(reset_metrics_on_file_clear,inputs=file_input,outputs=[output, plot])
53
 
54
- task_type.change(update_models, task_type, model_name)
 
 
 
 
 
 
 
 
 
 
 
55
  task_type.change(update_graphs, task_type, graph_type)
56
 
57
  show_preview.change(
@@ -60,12 +77,17 @@ with gr.Blocks() as app:
60
  outputs=csv_preview,
61
  )
62
 
63
- app.load(update_models, task_type, model_name)
 
 
 
 
 
64
  app.load(update_graphs, task_type, graph_type)
65
 
66
  run_btn.click(
67
  train_model,
68
- inputs=[file_input, task_type, model_name, graph_type],
69
  outputs=[output, plot]
70
  )
71
 
 
3
  warnings.filterwarnings("ignore")
4
 
5
  from ui.helpers import (
6
+ reset_on_task_change,
7
  update_models,
8
  update_graphs,
9
  preview_csv,
 
33
  label="Task Type",
34
  value="Regression",
35
  )
36
+ model_group = gr.Dropdown(
37
+ label="Model Group",
38
+ choices=["Basic", "Bagging", "Boosting", "Stacking"],
39
+ value="Basic",
40
+ )
41
  model_name = gr.Dropdown(label="Model")
42
  graph_type = gr.Dropdown(label="Graph Type")
43
 
 
57
  file_input.change(auto_set_task, file_input, task_type)
58
  file_input.change(reset_metrics_on_file_clear,inputs=file_input,outputs=[output, plot])
59
 
60
+ task_type.change(
61
+ reset_on_task_change,
62
+ inputs=task_type,
63
+ outputs=[model_group, model_name],
64
+ )
65
+
66
+ model_group.change(
67
+ update_models,
68
+ inputs=[task_type, model_group],
69
+ outputs=model_name,
70
+ )
71
+
72
  task_type.change(update_graphs, task_type, graph_type)
73
 
74
  show_preview.change(
 
77
  outputs=csv_preview,
78
  )
79
 
80
+ app.load(
81
+ reset_on_task_change,
82
+ inputs=task_type,
83
+ outputs=[model_group, model_name],
84
+ )
85
+
86
  app.load(update_graphs, task_type, graph_type)
87
 
88
  run_btn.click(
89
  train_model,
90
+ inputs=[file_input, task_type, model_group, model_name, graph_type],
91
  outputs=[output, plot]
92
  )
93
 
core/__pycache__/training.cpython-310.pyc CHANGED
Binary files a/core/__pycache__/training.cpython-310.pyc and b/core/__pycache__/training.cpython-310.pyc differ
 
core/training.py CHANGED
@@ -8,61 +8,108 @@ from models.registry import REGRESSION_MODELS, CLASSIFICATION_MODELS
8
  from preprocessing.transformers import build_preprocessor
9
  from utils.metrics import regression_metrics, classification_metrics
10
  from core.visuals import regression_graphs, classification_graphs
 
11
 
 
 
 
 
 
 
12
 
13
- def train_model(file, task_type, model_name, graph_type):
14
- if file is None:
15
- return pd.DataFrame({
16
- "Error": [f"Please upload a csv file first."]
17
-
18
- }), None
19
-
20
- df = pd.read_csv(file.name)
21
 
 
22
  X = df.iloc[:, :-1]
23
  y = df.iloc[:, -1]
24
 
25
- detected_task = detect_target_type(y)
 
26
 
27
- if task_type != detected_task:
28
- return pd.DataFrame({
29
- "Error": [f"Detected {detected_task} target, but {task_type} selected."]
30
- }), None
31
 
32
- if task_type == "Classification" and y.dtype == "object":
33
- y = LabelEncoder().fit_transform(y)
 
 
34
 
35
- preprocessor = build_preprocessor(X)
 
 
 
36
 
37
- X_train, X_test, y_train, y_test = train_test_split(
38
- X, y, test_size=0.2, random_state=42
 
 
 
 
39
  )
40
 
41
- model = (
42
- REGRESSION_MODELS[model_name]
43
- if task_type == "Regression"
44
- else CLASSIFICATION_MODELS[model_name]
45
- )
46
 
47
- pipeline = Pipeline([
48
- ("preprocessing", preprocessor),
49
- ("model", model),
 
50
  ])
51
 
52
- pipeline.fit(X_train, y_train)
53
- preds = pipeline.predict(X_test)
54
 
55
- if task_type == "Regression":
56
- metrics = regression_metrics(y_test, preds)
57
- else:
58
- metrics = classification_metrics(pipeline, X_test, y_test, preds)
59
-
60
- fig = None
61
- if task_type == "Regression":
62
- fig = regression_graphs(graph_type, X, y, model, pipeline, y_test, preds)
63
- else:
64
- fig = classification_graphs(graph_type, pipeline, X_test, y_test, preds)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
 
66
- metrics_df = pd.DataFrame(metrics.items(), columns=["Metric", "Value"])
67
 
68
- return metrics_df, fig
 
 
 
 
 
 
8
  from preprocessing.transformers import build_preprocessor
9
  from utils.metrics import regression_metrics, classification_metrics
10
  from core.visuals import regression_graphs, classification_graphs
11
+ from models.registry import MODEL_GROUPS
12
 
13
+ from sklearn.compose import ColumnTransformer
14
+ from sklearn.pipeline import Pipeline
15
+ from sklearn.preprocessing import OneHotEncoder, StandardScaler
16
+ from sklearn.impute import SimpleImputer
17
+ from sklearn.model_selection import train_test_split
18
+ import numpy as np
19
 
 
 
 
 
 
 
 
 
20
 
21
+ def build_preprocessor(df):
22
  X = df.iloc[:, :-1]
23
  y = df.iloc[:, -1]
24
 
25
+ num_cols = X.select_dtypes(include=["int64", "float64"]).columns.tolist()
26
+ cat_cols = X.select_dtypes(include=["object", "category", "bool"]).columns.tolist()
27
 
28
+ if len(num_cols) + len(cat_cols) == 0:
29
+ raise ValueError("No usable feature columns found")
 
 
30
 
31
+ numeric_pipeline = Pipeline([
32
+ ("imputer", SimpleImputer(strategy="median")),
33
+ ("scaler", StandardScaler())
34
+ ])
35
 
36
+ categorical_pipeline = Pipeline([
37
+ ("imputer", SimpleImputer(strategy="most_frequent")),
38
+ ("encoder", OneHotEncoder(handle_unknown="ignore", sparse_output=False))
39
+ ])
40
 
41
+ preprocessor = ColumnTransformer(
42
+ transformers=[
43
+ ("num", numeric_pipeline, num_cols),
44
+ ("cat", categorical_pipeline, cat_cols),
45
+ ],
46
+ remainder="drop"
47
  )
48
 
49
+ return X, y, preprocessor
 
 
 
 
50
 
51
+ def build_pipeline(model, preprocessor):
52
+ return Pipeline([
53
+ ("preprocessor", preprocessor),
54
+ ("model", model)
55
  ])
56
 
 
 
57
 
58
+ def train_model(file, task_type, model_group, model_name, graph_type):
59
+ try:
60
+ if file is None:
61
+ return pd.DataFrame({
62
+ "Error": [f"Please upload a csv file first."]
63
+
64
+ }), None
65
+
66
+ df = pd.read_csv(file.name)
67
+
68
+ X, y, preprocessor = build_preprocessor(df)
69
+
70
+ detected_task = detect_target_type(y)
71
+
72
+ if task_type != detected_task:
73
+ return pd.DataFrame({
74
+ "Error": [f"Detected {detected_task} target, but {task_type} selected."]
75
+ }), None
76
+
77
+ if task_type == "Classification" and y.dtype == "object":
78
+ y = LabelEncoder().fit_transform(y)
79
+
80
+ model = MODEL_GROUPS[model_group][task_type][model_name]
81
+
82
+ unique_count = len(np.unique(y))
83
+
84
+ X_train, X_test, y_train, y_test = train_test_split(
85
+ X, y,
86
+ test_size=0.2,
87
+ random_state=42,
88
+ stratify=y if unique_count < 20 else None
89
+ )
90
+
91
+ pipeline = build_pipeline(model, preprocessor)
92
+
93
+ pipeline.fit(X_train, y_train)
94
+
95
+ preds = pipeline.predict(X_test)
96
+
97
+ if task_type == "Regression":
98
+ metrics = regression_metrics(y_test, preds)
99
+ else:
100
+ metrics = classification_metrics(pipeline, X_test, y_test, preds)
101
+
102
+ fig = None
103
+ if task_type == "Regression":
104
+ fig = regression_graphs(graph_type, X, y, model, pipeline, y_test, preds)
105
+ else:
106
+ fig = classification_graphs(graph_type, pipeline, X_test, y_test, preds)
107
 
108
+ metrics_df = pd.DataFrame(metrics.items(), columns=["Metric", "Value"])
109
 
110
+ return metrics_df, fig
111
+ except ValueError as e:
112
+ return (
113
+ pd.DataFrame({"Error": [str(e)]}),
114
+ None,
115
+ )
models/__pycache__/registry.cpython-310.pyc CHANGED
Binary files a/models/__pycache__/registry.cpython-310.pyc and b/models/__pycache__/registry.cpython-310.pyc differ
 
models/registry.py CHANGED
@@ -4,6 +4,13 @@ from sklearn.naive_bayes import GaussianNB
4
  from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
5
  from sklearn.svm import SVC, SVR
6
  from sklearn.neural_network import MLPClassifier, MLPRegressor
 
 
 
 
 
 
 
7
 
8
  REGRESSION_MODELS = {
9
  "Linear Regression": LinearRegression(),
@@ -23,6 +30,45 @@ CLASSIFICATION_MODELS = {
23
  "MLP Classifier": MLPClassifier(max_iter=1000),
24
  }
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  CLASSIFICATION_GRAPHS = [
27
  "Confusion Matrix",
28
  "ROC Curve",
 
4
  from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
5
  from sklearn.svm import SVC, SVR
6
  from sklearn.neural_network import MLPClassifier, MLPRegressor
7
+ from sklearn.ensemble import (
8
+ RandomForestClassifier, RandomForestRegressor,
9
+ ExtraTreesClassifier, ExtraTreesRegressor,
10
+ AdaBoostClassifier, AdaBoostRegressor,
11
+ GradientBoostingClassifier, GradientBoostingRegressor,
12
+ StackingClassifier, StackingRegressor
13
+ )
14
 
15
  REGRESSION_MODELS = {
16
  "Linear Regression": LinearRegression(),
 
30
  "MLP Classifier": MLPClassifier(max_iter=1000),
31
  }
32
 
33
+ MODEL_GROUPS = {
34
+ "Basic": {
35
+ "Regression": REGRESSION_MODELS,
36
+ "Classification": CLASSIFICATION_MODELS,
37
+ },
38
+ "Bagging": {
39
+ "Regression": {
40
+ "Random Forest Regressor": RandomForestRegressor(),
41
+ "Extra Trees Regressor": ExtraTreesRegressor(),
42
+ },
43
+ "Classification": {
44
+ "Random Forest Classifier": RandomForestClassifier(),
45
+ "Extra Trees Classifier": ExtraTreesClassifier(),
46
+ },
47
+ },
48
+ "Boosting": {
49
+ "Regression": {
50
+ "AdaBoost Regressor": AdaBoostRegressor(),
51
+ "Gradient Boosting Regressor": GradientBoostingRegressor(),
52
+ },
53
+ "Classification": {
54
+ "AdaBoost Classifier": AdaBoostClassifier(),
55
+ "Gradient Boosting Classifier": GradientBoostingClassifier(),
56
+ },
57
+ },
58
+ "Stacking": {
59
+ "Regression": {
60
+ "Stacking Regressor": StackingRegressor(
61
+ estimators=[("lr", LinearRegression())]
62
+ ),
63
+ },
64
+ "Classification": {
65
+ "Stacking Classifier": StackingClassifier(
66
+ estimators=[("lr", LogisticRegression(max_iter=500))]
67
+ ),
68
+ },
69
+ },
70
+ }
71
+
72
  CLASSIFICATION_GRAPHS = [
73
  "Confusion Matrix",
74
  "ROC Curve",
requirements.txt CHANGED
@@ -2,4 +2,7 @@ gradio>=4.0.0
2
  pandas
3
  scikit-learn
4
  numpy
5
- matplotlib
 
 
 
 
2
  pandas
3
  scikit-learn
4
  numpy
5
+ matplotlib
6
+ xgboost
7
+ lightgbm
8
+ catboost
ui/__pycache__/helpers.cpython-310.pyc CHANGED
Binary files a/ui/__pycache__/helpers.cpython-310.pyc and b/ui/__pycache__/helpers.cpython-310.pyc differ
 
ui/helpers.py CHANGED
@@ -1,17 +1,31 @@
 
1
  import gradio as gr
2
  import pandas as pd
3
- from models.registry import REGRESSION_MODELS, CLASSIFICATION_MODELS, REGRESSION_GRAPHS, CLASSIFICATION_GRAPHS
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4
 
5
 
6
- def update_models(task_type):
7
- if task_type == "Regression":
8
- models = list(REGRESSION_MODELS.keys())
9
- else:
10
- models = list(CLASSIFICATION_MODELS.keys())
11
 
12
  return gr.update(
13
- choices=models,
14
- value=models[0] if models else None # ✅ auto-select first
15
  )
16
 
17
 
@@ -29,7 +43,7 @@ def update_graphs(task_type):
29
 
30
  import os
31
 
32
- def preview_csv(file, max_rows=50):
33
  if not file:
34
  return None
35
 
 
1
+ from models.registry import MODEL_GROUPS, REGRESSION_GRAPHS, CLASSIFICATION_GRAPHS
2
  import gradio as gr
3
  import pandas as pd
4
+ import os
5
+
6
+ def reset_on_task_change(task_type):
7
+ """
8
+ When task changes:
9
+ - Model Group → Basic
10
+ - Model → first model of Basic group for selected task
11
+ """
12
+ model_group = "Basic"
13
+ models = MODEL_GROUPS[model_group][task_type]
14
+ model_names = list(models.keys())
15
+
16
+ return (
17
+ gr.update(value=model_group),
18
+ gr.update(choices=model_names, value=model_names[0])
19
+ )
20
 
21
 
22
+ def update_models(task_type, model_group):
23
+ models = MODEL_GROUPS.get(model_group, {}).get(task_type, {})
24
+ model_names = list(models.keys())
 
 
25
 
26
  return gr.update(
27
+ choices=model_names,
28
+ value=model_names[0] if model_names else None
29
  )
30
 
31
 
 
43
 
44
  import os
45
 
46
+ def preview_csv(file, max_rows=10):
47
  if not file:
48
  return None
49