Spaces:

QSBench
/

Noise_Detection

Sleeping

App Files Files Community

QSBench commited on 13 days ago

Commit

94dd65f

verified ·

1 Parent(s): a4d74cf

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -13

app.py CHANGED Viewed

@@ -322,7 +322,7 @@ def train_classifier(
     max_depth: float,
     random_state: float,
 ) -> Tuple[Optional[plt.Figure], str]:
-    """Train a four-class classifier and return metrics plus a plot."""
     if not feature_columns:
         return None, "### ❌ Please select at least one feature."
@@ -332,31 +332,26 @@ def train_classifier(
     train_df = train_df[train_df["noise_label"].isin(CLASS_ORDER)]
     if len(train_df) < 20:
-        return None, "### ❌ Not enough clean rows after filtering missing values."
     X = train_df[feature_columns]
     y = train_df["noise_label"]
     seed = int(random_state)
     depth = int(max_depth) if max_depth and int(max_depth) > 0 else None
-    trees = int(n_estimators)
     try:
         X_train, X_test, y_train, y_test = train_test_split(
-            X,
-            y,
-            test_size=test_size,
-            random_state=seed,
-            stratify=y,
         )
     except ValueError:
         X_train, X_test, y_train, y_test = train_test_split(
-            X,
-            y,
-            test_size=test_size,
-            random_state=seed,
         )
     model = Pipeline(
         steps=[
             ("imputer", SimpleImputer(strategy="median")),
@@ -364,10 +359,13 @@ def train_classifier(
             (
                 "classifier",
                 HistGradientBoostingClassifier(
-                    max_iter=trees,
                     max_depth=depth,
                     random_state=seed,
                     min_samples_leaf=1,
                 ),
             ),
         ]

     max_depth: float,
     random_state: float,
 ) -> Tuple[Optional[plt.Figure], str]:
+    """Train a four-class classifier with better handling of class imbalance."""
     if not feature_columns:
         return None, "### ❌ Please select at least one feature."
     train_df = train_df[train_df["noise_label"].isin(CLASS_ORDER)]
     if len(train_df) < 20:
+        return None, "### ❌ Not enough rows after filtering missing values."
     X = train_df[feature_columns]
     y = train_df["noise_label"]
     seed = int(random_state)
     depth = int(max_depth) if max_depth and int(max_depth) > 0 else None
+    max_iter = int(n_estimators)
+    # --- Stratified split ---
     try:
         X_train, X_test, y_train, y_test = train_test_split(
+            X, y, test_size=test_size, random_state=seed, stratify=y
         )
     except ValueError:
         X_train, X_test, y_train, y_test = train_test_split(
+            X, y, test_size=test_size, random_state=seed
         )
+    # --- Pipeline with class_weight='balanced' ---
     model = Pipeline(
         steps=[
             ("imputer", SimpleImputer(strategy="median")),
             (
                 "classifier",
                 HistGradientBoostingClassifier(
+                    max_iter=max_iter,
                     max_depth=depth,
                     random_state=seed,
                     min_samples_leaf=1,
+                    class_weight="balanced",        # ← главное улучшение
+                    learning_rate=0.1,              # можно поиграть (0.05-0.2)
+                    max_bins=255,                   # стандартное хорошее значение
                 ),
             ),
         ]