Spaces:

QSBench
/

Noise_Detection

Running

App Files Files Community

QSBench commited on 5 days ago

Commit

2aae7a5

verified ·

1 Parent(s): 0ca7d39

Update app.py

Browse files

Files changed (1) hide show

app.py +20 -26

app.py CHANGED Viewed

@@ -1,7 +1,5 @@
 import ast
-import glob
 import logging
-import os
 import re
 from typing import Dict, List, Optional, Tuple
@@ -9,6 +7,7 @@ import gradio as gr
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
 from sklearn.ensemble import HistGradientBoostingClassifier
 from sklearn.impute import SimpleImputer
 from sklearn.inspection import permutation_importance
@@ -21,31 +20,29 @@ logger = logging.getLogger(__name__)
 APP_TITLE = "Noise Detection"
 APP_SUBTITLE = (
-    "Detect hardware-aware transpilation artifacts versus all other circuit conditions using structural circuit features."
 )
-DATA_DIR = os.getenv("QS_DATA_DIR", "data")
 REPO_CONFIG = {
     "clean": {
         "label": "clean",
-        "path": os.getenv("QS_CLEAN_PATH", os.path.join(DATA_DIR, "core")),
     },
     "depolarizing": {
         "label": "depolarizing",
-        "path": os.getenv("QS_DEPOLARIZING_PATH", os.path.join(DATA_DIR, "depolarizing")),
     },
     "amplitude_damping": {
         "label": "amplitude_damping",
-        "path": os.getenv("QS_AMPLITUDE_PATH", os.path.join(DATA_DIR, "amplitude")),
     },
     "hardware_aware": {
         "label": "hardware_aware",
-        "path": os.getenv("QS_HARDWARE_AWARE_PATH", os.path.join(DATA_DIR, "transpilation")),
     },
 }
-CLASS_ORDER = ["other", "hardware_aware"]
 NON_FEATURE_COLS = {
     "sample_id",
@@ -68,14 +65,12 @@ NON_FEATURE_COLS = {
     "meyer_wallach",
     "cx_count",
     "noise_label",
-    "source_dataset",
-    "target_label",
 }
 SOFT_EXCLUDE_PATTERNS = ["ideal_", "noisy_", "error_", "sign_ideal_", "sign_noisy_"]
 _ASSET_CACHE: Dict[str, pd.DataFrame] = {}
-_COMBINED_CACHE: Dict[Tuple[str, ...], pd.DataFrame] = {}
 def safe_parse(value):
@@ -190,27 +185,26 @@ def _read_parquet_source(path: str) -> pd.DataFrame:
 def load_single_dataset(dataset_key: str) -> pd.DataFrame:
-    """Load a local parquet dataset and cache it in memory."""
     if dataset_key not in _ASSET_CACHE:
-        path = _resolve_path(dataset_key)
-        logger.info("Loading local dataset: %s -> %s", dataset_key, path)
-        df = _read_parquet_source(path)
         df = enrich_dataframe(df)
         df["noise_label"] = REPO_CONFIG[dataset_key]["label"]
-        df["source_dataset"] = dataset_key
         _ASSET_CACHE[dataset_key] = df
     return _ASSET_CACHE[dataset_key]
-def load_combined_dataset(dataset_keys: List[str]) -> pd.DataFrame:
-    """Load and merge selected local datasets."""
-    cache_key = tuple(sorted(dataset_keys))
-    if cache_key not in _COMBINED_CACHE:
-        frames = [load_single_dataset(key) for key in dataset_keys]
         combined = pd.concat(frames, ignore_index=True)
-        combined = combined.copy()
-        _COMBINED_CACHE[cache_key] = combined
-    return _COMBINED_CACHE[cache_key]
 def load_guide_content() -> str:

 import ast
 import logging
 import re
 from typing import Dict, List, Optional, Tuple
 import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd
+from datasets import load_dataset
 from sklearn.ensemble import HistGradientBoostingClassifier
 from sklearn.impute import SimpleImputer
 from sklearn.inspection import permutation_importance
 APP_TITLE = "Noise Detection"
 APP_SUBTITLE = (
+    "Classify quantum circuits into clean, depolarizing, amplitude_damping, or hardware-aware noise conditions."
 )
 REPO_CONFIG = {
     "clean": {
         "label": "clean",
+        "repo": "QSBench/QSBench-Core-v1.0.0-demo",
     },
     "depolarizing": {
         "label": "depolarizing",
+        "repo": "QSBench/QSBench-Depolarizing-Demo-v1.0.0",
     },
     "amplitude_damping": {
         "label": "amplitude_damping",
+        "repo": "QSBench/QSBench-Amplitude-v1.0.0-demo",
     },
     "hardware_aware": {
         "label": "hardware_aware",
+        "repo": "QSBench/QSBench-Transpilation-v1.0.0-demo",
     },
 }
+CLASS_ORDER = ["clean", "depolarizing", "amplitude_damping", "hardware_aware"]
 NON_FEATURE_COLS = {
     "sample_id",
     "meyer_wallach",
     "cx_count",
     "noise_label",
 }
 SOFT_EXCLUDE_PATTERNS = ["ideal_", "noisy_", "error_", "sign_ideal_", "sign_noisy_"]
 _ASSET_CACHE: Dict[str, pd.DataFrame] = {}
+_COMBINED_CACHE: Optional[pd.DataFrame] = None
 def safe_parse(value):
 def load_single_dataset(dataset_key: str) -> pd.DataFrame:
+    """Load a dataset shard from Hugging Face and cache it in memory."""
     if dataset_key not in _ASSET_CACHE:
+        logger.info("Loading dataset: %s", dataset_key)
+        ds = load_dataset(REPO_CONFIG[dataset_key]["repo"])
+        df = pd.DataFrame(ds["train"])
         df = enrich_dataframe(df)
         df["noise_label"] = REPO_CONFIG[dataset_key]["label"]
         _ASSET_CACHE[dataset_key] = df
     return _ASSET_CACHE[dataset_key]
+def load_combined_dataset() -> pd.DataFrame:
+    """Load and merge all four noise-condition datasets."""
+    global _COMBINED_CACHE
+    if _COMBINED_CACHE is None:
+        frames = [load_single_dataset(key) for key in REPO_CONFIG.keys()]
         combined = pd.concat(frames, ignore_index=True)
+        combined = combined[combined["noise_label"].isin(CLASS_ORDER)].copy()
+        _COMBINED_CACHE = combined
+    return _COMBINED_CACHE
 def load_guide_content() -> str: