Spaces:

QSBench
/

Noise_Detection

Sleeping

App Files Files Community

QSBench commited on 14 days ago

Commit

c752e48

verified ·

1 Parent(s): 0553f08

Update app.py

Browse files

Files changed (1) hide show

app.py +39 -26

app.py CHANGED Viewed

@@ -2,6 +2,7 @@ import ast
 import logging
 import re
 from typing import Dict, List, Optional, Tuple
 import gradio as gr
 import matplotlib.pyplot as plt
 import numpy as np
@@ -9,6 +10,7 @@ import pandas as pd
 from datasets import load_dataset
 from sklearn.ensemble import HistGradientBoostingClassifier
 from sklearn.impute import SimpleImputer
 from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, f1_score
 from sklearn.model_selection import train_test_split
 from sklearn.pipeline import Pipeline
@@ -271,8 +273,8 @@ def build_dataset_profile(df: pd.DataFrame) -> str:
     """Build a short dataset summary for the explorer tab."""
     return (
         f"### Dataset profile\n\n"
-        f"**Rows:** {len(df):,} \n"
-        f"**Columns:** {len(df.columns):,} \n"
         f"**Classes:** {', '.join(CLASS_ORDER)}"
     )
@@ -292,9 +294,9 @@ def refresh_explorer(dataset_key: str, split_name: str) -> Tuple[gr.update, pd.D
     profile_box = build_dataset_profile(df)
     summary_box = (
         f"### Split summary\n\n"
-        f"**Dataset:** `{dataset_key}` \n"
-        f"**Label:** `{REPO_CONFIG[dataset_key]['label']}` \n"
-        f"**Available splits:** {', '.join(splits)} \n"
         f"**Preview rows:** {len(display_df)}"
     )
     return (
@@ -307,15 +309,16 @@ def refresh_explorer(dataset_key: str, split_name: str) -> Tuple[gr.update, pd.D
     )
-def sync_feature_picker(_dataset_key: str) -> gr.update:
-    """Refresh the feature list from the combined dataset."""
-    df = load_combined_dataset()
     features = get_available_feature_columns(df)
     defaults = default_feature_selection(features)
     return gr.update(choices=features, value=defaults)
 def train_classifier(
     feature_columns: List[str],
     test_size: float,
     n_estimators: int,
@@ -326,7 +329,7 @@ def train_classifier(
     if not feature_columns:
         return None, "### ❌ Please select at least one feature."
-    df = load_combined_dataset()
     required_cols = feature_columns + ["noise_label"]
     train_df = df.dropna(subset=required_cols).copy()
     train_df = train_df[train_df["noise_label"].isin(CLASS_ORDER)]
@@ -341,7 +344,6 @@ def train_classifier(
     depth = int(max_depth) if max_depth and int(max_depth) > 0 else None
     max_iter = int(n_estimators)
-    # --- Stratified split ---
     try:
         X_train, X_test, y_train, y_test = train_test_split(
             X, y, test_size=test_size, random_state=seed, stratify=y
@@ -351,7 +353,6 @@ def train_classifier(
             X, y, test_size=test_size, random_state=seed
         )
-    # --- Pipeline with class_weight='balanced' ---
     model = Pipeline(
         steps=[
             ("imputer", SimpleImputer(strategy="median")),
@@ -363,9 +364,9 @@ def train_classifier(
                     max_depth=depth,
                     random_state=seed,
                     min_samples_leaf=1,
-                    class_weight="balanced",        # ← главное улучшение
-                    learning_rate=0.1,              # можно поиграть (0.05-0.2)
-                    max_bins=255,                   # стандартное хорошее значение
                 ),
             ),
         ]
@@ -378,8 +379,16 @@ def train_classifier(
     macro_f1 = float(f1_score(y_test, y_pred, average="macro", zero_division=0))
     weighted_f1 = float(f1_score(y_test, y_pred, average="weighted", zero_division=0))
-    classifier = model.named_steps["classifier"]
-    importances = getattr(classifier, "feature_importances_", None)
     fig = make_classification_figure(y_test.to_numpy(), y_pred, CLASS_ORDER, list(feature_columns), importances)
@@ -389,19 +398,18 @@ def train_classifier(
         labels=CLASS_ORDER,
         zero_division=0,
     )
     results = (
         "### Classification results\n\n"
-        f"**Rows used:** {len(train_df):,} \n"
-        f"**Test size:** {test_size:.0%} \n"
-        f"**Accuracy:** {accuracy:.4f} \n"
-        f"**Macro F1:** {macro_f1:.4f} \n"
         f"**Weighted F1:** {weighted_f1:.4f}\n\n"
         "```text\n"
         f"{report}"
         "```"
     )
     return fig, results
@@ -439,6 +447,11 @@ with gr.Blocks(title=APP_TITLE) as demo:
                 transpiled_qasm = gr.Code(label="Transpiled QASM", language=None)
         with gr.TabItem("🧠 Classification"):
             feature_picker = gr.CheckboxGroup(label="Input features", choices=[])
             test_size = gr.Slider(0.1, 0.4, value=0.2, step=0.05, label="Test split")
             n_estimators = gr.Slider(50, 400, value=200, step=10, label="Trees")
@@ -470,11 +483,11 @@ with gr.Blocks(title=APP_TITLE) as demo:
         [split_dropdown, explorer_df, raw_qasm, transpiled_qasm, profile_box, summary_box],
     )
-    dataset_dropdown.change(sync_feature_picker, [dataset_dropdown], [feature_picker])
     run_btn.click(
         train_classifier,
-        [feature_picker, test_size, n_estimators, max_depth, seed],
         [plot, metrics],
     )
@@ -483,8 +496,8 @@ with gr.Blocks(title=APP_TITLE) as demo:
         [dataset_dropdown, split_dropdown],
         [split_dropdown, explorer_df, raw_qasm, transpiled_qasm, profile_box, summary_box],
     )
-    demo.load(sync_feature_picker, [dataset_dropdown], [feature_picker])
 if __name__ == "__main__":
-    demo.launch(theme=gr.themes.Soft(), css=CUSTOM_CSS)

 import logging
 import re
 from typing import Dict, List, Optional, Tuple
 import gradio as gr
 import matplotlib.pyplot as plt
 import numpy as np
 from datasets import load_dataset
 from sklearn.ensemble import HistGradientBoostingClassifier
 from sklearn.impute import SimpleImputer
+from sklearn.inspection import permutation_importance
 from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, f1_score
 from sklearn.model_selection import train_test_split
 from sklearn.pipeline import Pipeline
     """Build a short dataset summary for the explorer tab."""
     return (
         f"### Dataset profile\n\n"
+        f"**Rows:** {len(df):,}  \n"
+        f"**Columns:** {len(df.columns):,}  \n"
         f"**Classes:** {', '.join(CLASS_ORDER)}"
     )
     profile_box = build_dataset_profile(df)
     summary_box = (
         f"### Split summary\n\n"
+        f"**Dataset:** `{dataset_key}`  \n"
+        f"**Label:** `{REPO_CONFIG[dataset_key]['label']}`  \n"
+        f"**Available splits:** {', '.join(splits)}  \n"
         f"**Preview rows:** {len(display_df)}"
     )
     return (
     )
+def sync_feature_picker(dataset_key: str) -> gr.update:
+    """Refresh the feature list from the selected dataset."""
+    df = load_single_dataset(dataset_key)
     features = get_available_feature_columns(df)
     defaults = default_feature_selection(features)
     return gr.update(choices=features, value=defaults)
 def train_classifier(
+    dataset_key: str,
     feature_columns: List[str],
     test_size: float,
     n_estimators: int,
     if not feature_columns:
         return None, "### ❌ Please select at least one feature."
+    df = load_single_dataset(dataset_key)
     required_cols = feature_columns + ["noise_label"]
     train_df = df.dropna(subset=required_cols).copy()
     train_df = train_df[train_df["noise_label"].isin(CLASS_ORDER)]
     depth = int(max_depth) if max_depth and int(max_depth) > 0 else None
     max_iter = int(n_estimators)
     try:
         X_train, X_test, y_train, y_test = train_test_split(
             X, y, test_size=test_size, random_state=seed, stratify=y
             X, y, test_size=test_size, random_state=seed
         )
     model = Pipeline(
         steps=[
             ("imputer", SimpleImputer(strategy="median")),
                     max_depth=depth,
                     random_state=seed,
                     min_samples_leaf=1,
+                    class_weight="balanced",
+                    learning_rate=0.1,
+                    max_bins=255,
                 ),
             ),
         ]
     macro_f1 = float(f1_score(y_test, y_pred, average="macro", zero_division=0))
     weighted_f1 = float(f1_score(y_test, y_pred, average="weighted", zero_division=0))
+    perm = permutation_importance(
+        model,
+        X_test,
+        y_test,
+        n_repeats=8,
+        random_state=seed,
+        scoring="f1_macro",
+        n_jobs=-1,
+    )
+    importances = perm.importances_mean
     fig = make_classification_figure(y_test.to_numpy(), y_pred, CLASS_ORDER, list(feature_columns), importances)
         labels=CLASS_ORDER,
         zero_division=0,
     )
     results = (
         "### Classification results\n\n"
+        f"**Rows used:** {len(train_df):,}  \n"
+        f"**Dataset:** `{dataset_key}`  \n"
+        f"**Test size:** {test_size:.0%}  \n"
+        f"**Accuracy:** {accuracy:.4f}  \n"
+        f"**Macro F1:** {macro_f1:.4f}  \n"
         f"**Weighted F1:** {weighted_f1:.4f}\n\n"
         "```text\n"
         f"{report}"
         "```"
     )
     return fig, results
                 transpiled_qasm = gr.Code(label="Transpiled QASM", language=None)
         with gr.TabItem("🧠 Classification"):
+            class_dataset_dropdown = gr.Dropdown(
+                list(REPO_CONFIG.keys()),
+                value="clean",
+                label="Dataset",
+            )
             feature_picker = gr.CheckboxGroup(label="Input features", choices=[])
             test_size = gr.Slider(0.1, 0.4, value=0.2, step=0.05, label="Test split")
             n_estimators = gr.Slider(50, 400, value=200, step=10, label="Trees")
         [split_dropdown, explorer_df, raw_qasm, transpiled_qasm, profile_box, summary_box],
     )
+    class_dataset_dropdown.change(sync_feature_picker, [class_dataset_dropdown], [feature_picker])
     run_btn.click(
         train_classifier,
+        [class_dataset_dropdown, feature_picker, test_size, n_estimators, max_depth, seed],
         [plot, metrics],
     )
         [dataset_dropdown, split_dropdown],
         [split_dropdown, explorer_df, raw_qasm, transpiled_qasm, profile_box, summary_box],
     )
+    demo.load(sync_feature_picker, [class_dataset_dropdown], [feature_picker])
 if __name__ == "__main__":
+    demo.launch(theme=gr.themes.Soft(), css=CUSTOM_CSS)