Spaces:

QSBench
/

Noise_Detection

Running

App Files Files Community

QSBench commited on 4 days ago

Commit

3c7d3fa

verified ·

1 Parent(s): 7bc7a81

Update app.py

Browse files

Files changed (1) hide show

app.py +26 -16

app.py CHANGED Viewed

@@ -8,7 +8,7 @@ import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
 from datasets import load_dataset
-from sklearn.ensemble import RandomForestClassifier
 from sklearn.impute import SimpleImputer
 from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, f1_score
 from sklearn.model_selection import train_test_split
@@ -19,7 +19,9 @@ logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 APP_TITLE = "Noise Detection"
-APP_SUBTITLE = "Classify quantum circuits into clean, depolarizing, amplitude_damping, or hardware-aware noise conditions."
 REPO_CONFIG = {
     "clean": {
@@ -66,6 +68,7 @@ NON_FEATURE_COLS = {
 }
 SOFT_EXCLUDE_PATTERNS = ["ideal_", "noisy_", "error_", "sign_ideal_", "sign_noisy_"]
 _ASSET_CACHE: Dict[str, pd.DataFrame] = {}
 _COMBINED_CACHE: Optional[pd.DataFrame] = None
@@ -81,7 +84,7 @@ def safe_parse(value):
 def adjacency_features(adj_value) -> Dict[str, float]:
-    """Derive compact graph features from an adjacency matrix."""
     parsed = safe_parse(adj_value)
     if not isinstance(parsed, list) or len(parsed) == 0:
         return {
@@ -160,7 +163,7 @@ def enrich_dataframe(df: pd.DataFrame) -> pd.DataFrame:
 def load_single_dataset(dataset_key: str) -> pd.DataFrame:
-    """Load a single dataset shard from Hugging Face and cache it."""
     if dataset_key not in _ASSET_CACHE:
         logger.info("Loading dataset: %s", dataset_key)
         ds = load_dataset(REPO_CONFIG[dataset_key]["repo"])
@@ -172,7 +175,7 @@ def load_single_dataset(dataset_key: str) -> pd.DataFrame:
 def load_combined_dataset() -> pd.DataFrame:
-    """Load and merge all noise-condition datasets."""
     global _COMBINED_CACHE
     if _COMBINED_CACHE is None:
         frames = [load_single_dataset(key) for key in REPO_CONFIG.keys()]
@@ -192,7 +195,7 @@ def load_guide_content() -> str:
 def get_available_feature_columns(df: pd.DataFrame) -> List[str]:
-    """Return numeric feature columns excluding metadata and the target."""
     numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
     features = []
     for col in numeric_cols:
@@ -205,7 +208,7 @@ def get_available_feature_columns(df: pd.DataFrame) -> List[str]:
 def default_feature_selection(features: List[str]) -> List[str]:
-    """Pick a stable set of default features."""
     preferred = [
         "gate_entropy",
         "adj_density",
@@ -240,7 +243,7 @@ def make_classification_figure(
     ax3 = fig.add_subplot(gs[0, 2])
     cm = confusion_matrix(y_true, y_pred, labels=class_names)
-    im = ax1.imshow(cm, interpolation="nearest")
     ax1.set_title("Confusion Matrix")
     ax1.set_xlabel("Predicted")
     ax1.set_ylabel("Actual")
@@ -251,10 +254,10 @@ def make_classification_figure(
     for i in range(cm.shape[0]):
         for j in range(cm.shape[1]):
             ax1.text(j, i, cm[i, j], ha="center", va="center")
-    fig.colorbar(im, ax=ax1, fraction=0.046, pad=0.04)
-    residual_like = (y_true != y_pred).astype(int)
-    ax2.hist(residual_like, bins=[-0.5, 0.5, 1.5])
     ax2.set_title("Correct vs Incorrect")
     ax2.set_xlabel("0 = Correct, 1 = Incorrect")
     ax2.set_ylabel("Count")
@@ -273,7 +276,7 @@ def make_classification_figure(
 def build_dataset_profile(df: pd.DataFrame) -> str:
-    """Build a dataset summary for the explorer tab."""
     return (
         f"### Dataset profile\n\n"
         f"**Rows:** {len(df):,}  \n"
@@ -373,11 +376,13 @@ def train_classifier(
             ("scaler", StandardScaler()),
             (
                 "classifier",
-                RandomForestClassifier(
                     n_estimators=trees,
                     max_depth=depth,
                     random_state=seed,
                     n_jobs=-1,
                 ),
             ),
         ]
@@ -387,14 +392,19 @@ def train_classifier(
     y_pred = model.predict(X_test)
     accuracy = float(accuracy_score(y_test, y_pred))
-    macro_f1 = float(f1_score(y_test, y_pred, average="macro"))
-    weighted_f1 = float(f1_score(y_test, y_pred, average="weighted"))
     classifier = model.named_steps["classifier"]
     importances = getattr(classifier, "feature_importances_", None)
     fig = make_classification_figure(y_test.to_numpy(), y_pred, CLASS_ORDER, list(feature_columns), importances)
-    report = classification_report(y_test, y_pred, labels=CLASS_ORDER, output_dict=False, zero_division=0)
     results = (
         "### Classification results\n\n"
         f"**Rows used:** {len(train_df):,}  \n"

 import numpy as np
 import pandas as pd
 from datasets import load_dataset
+from sklearn.ensemble import ExtraTreesClassifier
 from sklearn.impute import SimpleImputer
 from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, f1_score
 from sklearn.model_selection import train_test_split
 logger = logging.getLogger(__name__)
 APP_TITLE = "Noise Detection"
+APP_SUBTITLE = (
+    "Classify quantum circuits into clean, depolarizing, amplitude_damping, or hardware-aware noise conditions."
+)
 REPO_CONFIG = {
     "clean": {
 }
 SOFT_EXCLUDE_PATTERNS = ["ideal_", "noisy_", "error_", "sign_ideal_", "sign_noisy_"]
 _ASSET_CACHE: Dict[str, pd.DataFrame] = {}
 _COMBINED_CACHE: Optional[pd.DataFrame] = None
 def adjacency_features(adj_value) -> Dict[str, float]:
+    """Derive graph statistics from an adjacency matrix."""
     parsed = safe_parse(adj_value)
     if not isinstance(parsed, list) or len(parsed) == 0:
         return {
 def load_single_dataset(dataset_key: str) -> pd.DataFrame:
+    """Load a dataset shard from Hugging Face and cache it in memory."""
     if dataset_key not in _ASSET_CACHE:
         logger.info("Loading dataset: %s", dataset_key)
         ds = load_dataset(REPO_CONFIG[dataset_key]["repo"])
 def load_combined_dataset() -> pd.DataFrame:
+    """Load and merge all four noise-condition datasets."""
     global _COMBINED_CACHE
     if _COMBINED_CACHE is None:
         frames = [load_single_dataset(key) for key in REPO_CONFIG.keys()]
 def get_available_feature_columns(df: pd.DataFrame) -> List[str]:
+    """Return numeric feature columns excluding metadata and target columns."""
     numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
     features = []
     for col in numeric_cols:
 def default_feature_selection(features: List[str]) -> List[str]:
+    """Select a stable default feature subset."""
     preferred = [
         "gate_entropy",
         "adj_density",
     ax3 = fig.add_subplot(gs[0, 2])
     cm = confusion_matrix(y_true, y_pred, labels=class_names)
+    image = ax1.imshow(cm, interpolation="nearest")
     ax1.set_title("Confusion Matrix")
     ax1.set_xlabel("Predicted")
     ax1.set_ylabel("Actual")
     for i in range(cm.shape[0]):
         for j in range(cm.shape[1]):
             ax1.text(j, i, cm[i, j], ha="center", va="center")
+    fig.colorbar(image, ax=ax1, fraction=0.046, pad=0.04)
+    incorrect = (y_true != y_pred).astype(int)
+    ax2.hist(incorrect, bins=[-0.5, 0.5, 1.5])
     ax2.set_title("Correct vs Incorrect")
     ax2.set_xlabel("0 = Correct, 1 = Incorrect")
     ax2.set_ylabel("Count")
 def build_dataset_profile(df: pd.DataFrame) -> str:
+    """Build a short dataset summary for the explorer tab."""
     return (
         f"### Dataset profile\n\n"
         f"**Rows:** {len(df):,}  \n"
             ("scaler", StandardScaler()),
             (
                 "classifier",
+                ExtraTreesClassifier(
                     n_estimators=trees,
                     max_depth=depth,
                     random_state=seed,
                     n_jobs=-1,
+                    class_weight="balanced",
+                    min_samples_leaf=1,
                 ),
             ),
         ]
     y_pred = model.predict(X_test)
     accuracy = float(accuracy_score(y_test, y_pred))
+    macro_f1 = float(f1_score(y_test, y_pred, average="macro", zero_division=0))
+    weighted_f1 = float(f1_score(y_test, y_pred, average="weighted", zero_division=0))
     classifier = model.named_steps["classifier"]
     importances = getattr(classifier, "feature_importances_", None)
     fig = make_classification_figure(y_test.to_numpy(), y_pred, CLASS_ORDER, list(feature_columns), importances)
+    report = classification_report(
+        y_test,
+        y_pred,
+        labels=CLASS_ORDER,
+        zero_division=0,
+    )
     results = (
         "### Classification results\n\n"
         f"**Rows used:** {len(train_df):,}  \n"