QSBench commited on
Commit
2aae7a5
·
verified ·
1 Parent(s): 0ca7d39

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -26
app.py CHANGED
@@ -1,7 +1,5 @@
1
  import ast
2
- import glob
3
  import logging
4
- import os
5
  import re
6
  from typing import Dict, List, Optional, Tuple
7
 
@@ -9,6 +7,7 @@ import gradio as gr
9
  import matplotlib.pyplot as plt
10
  import numpy as np
11
  import pandas as pd
 
12
  from sklearn.ensemble import HistGradientBoostingClassifier
13
  from sklearn.impute import SimpleImputer
14
  from sklearn.inspection import permutation_importance
@@ -21,31 +20,29 @@ logger = logging.getLogger(__name__)
21
 
22
  APP_TITLE = "Noise Detection"
23
  APP_SUBTITLE = (
24
- "Detect hardware-aware transpilation artifacts versus all other circuit conditions using structural circuit features."
25
  )
26
 
27
- DATA_DIR = os.getenv("QS_DATA_DIR", "data")
28
-
29
  REPO_CONFIG = {
30
  "clean": {
31
  "label": "clean",
32
- "path": os.getenv("QS_CLEAN_PATH", os.path.join(DATA_DIR, "core")),
33
  },
34
  "depolarizing": {
35
  "label": "depolarizing",
36
- "path": os.getenv("QS_DEPOLARIZING_PATH", os.path.join(DATA_DIR, "depolarizing")),
37
  },
38
  "amplitude_damping": {
39
  "label": "amplitude_damping",
40
- "path": os.getenv("QS_AMPLITUDE_PATH", os.path.join(DATA_DIR, "amplitude")),
41
  },
42
  "hardware_aware": {
43
  "label": "hardware_aware",
44
- "path": os.getenv("QS_HARDWARE_AWARE_PATH", os.path.join(DATA_DIR, "transpilation")),
45
  },
46
  }
47
 
48
- CLASS_ORDER = ["other", "hardware_aware"]
49
 
50
  NON_FEATURE_COLS = {
51
  "sample_id",
@@ -68,14 +65,12 @@ NON_FEATURE_COLS = {
68
  "meyer_wallach",
69
  "cx_count",
70
  "noise_label",
71
- "source_dataset",
72
- "target_label",
73
  }
74
 
75
  SOFT_EXCLUDE_PATTERNS = ["ideal_", "noisy_", "error_", "sign_ideal_", "sign_noisy_"]
76
 
77
  _ASSET_CACHE: Dict[str, pd.DataFrame] = {}
78
- _COMBINED_CACHE: Dict[Tuple[str, ...], pd.DataFrame] = {}
79
 
80
 
81
  def safe_parse(value):
@@ -190,27 +185,26 @@ def _read_parquet_source(path: str) -> pd.DataFrame:
190
 
191
 
192
  def load_single_dataset(dataset_key: str) -> pd.DataFrame:
193
- """Load a local parquet dataset and cache it in memory."""
194
  if dataset_key not in _ASSET_CACHE:
195
- path = _resolve_path(dataset_key)
196
- logger.info("Loading local dataset: %s -> %s", dataset_key, path)
197
- df = _read_parquet_source(path)
198
  df = enrich_dataframe(df)
199
  df["noise_label"] = REPO_CONFIG[dataset_key]["label"]
200
- df["source_dataset"] = dataset_key
201
  _ASSET_CACHE[dataset_key] = df
202
  return _ASSET_CACHE[dataset_key]
203
 
204
 
205
- def load_combined_dataset(dataset_keys: List[str]) -> pd.DataFrame:
206
- """Load and merge selected local datasets."""
207
- cache_key = tuple(sorted(dataset_keys))
208
- if cache_key not in _COMBINED_CACHE:
209
- frames = [load_single_dataset(key) for key in dataset_keys]
210
  combined = pd.concat(frames, ignore_index=True)
211
- combined = combined.copy()
212
- _COMBINED_CACHE[cache_key] = combined
213
- return _COMBINED_CACHE[cache_key]
214
 
215
 
216
  def load_guide_content() -> str:
 
1
  import ast
 
2
  import logging
 
3
  import re
4
  from typing import Dict, List, Optional, Tuple
5
 
 
7
  import matplotlib.pyplot as plt
8
  import numpy as np
9
  import pandas as pd
10
+ from datasets import load_dataset
11
  from sklearn.ensemble import HistGradientBoostingClassifier
12
  from sklearn.impute import SimpleImputer
13
  from sklearn.inspection import permutation_importance
 
20
 
21
  APP_TITLE = "Noise Detection"
22
  APP_SUBTITLE = (
23
+ "Classify quantum circuits into clean, depolarizing, amplitude_damping, or hardware-aware noise conditions."
24
  )
25
 
 
 
26
  REPO_CONFIG = {
27
  "clean": {
28
  "label": "clean",
29
+ "repo": "QSBench/QSBench-Core-v1.0.0-demo",
30
  },
31
  "depolarizing": {
32
  "label": "depolarizing",
33
+ "repo": "QSBench/QSBench-Depolarizing-Demo-v1.0.0",
34
  },
35
  "amplitude_damping": {
36
  "label": "amplitude_damping",
37
+ "repo": "QSBench/QSBench-Amplitude-v1.0.0-demo",
38
  },
39
  "hardware_aware": {
40
  "label": "hardware_aware",
41
+ "repo": "QSBench/QSBench-Transpilation-v1.0.0-demo",
42
  },
43
  }
44
 
45
+ CLASS_ORDER = ["clean", "depolarizing", "amplitude_damping", "hardware_aware"]
46
 
47
  NON_FEATURE_COLS = {
48
  "sample_id",
 
65
  "meyer_wallach",
66
  "cx_count",
67
  "noise_label",
 
 
68
  }
69
 
70
  SOFT_EXCLUDE_PATTERNS = ["ideal_", "noisy_", "error_", "sign_ideal_", "sign_noisy_"]
71
 
72
  _ASSET_CACHE: Dict[str, pd.DataFrame] = {}
73
+ _COMBINED_CACHE: Optional[pd.DataFrame] = None
74
 
75
 
76
  def safe_parse(value):
 
185
 
186
 
187
  def load_single_dataset(dataset_key: str) -> pd.DataFrame:
188
+ """Load a dataset shard from Hugging Face and cache it in memory."""
189
  if dataset_key not in _ASSET_CACHE:
190
+ logger.info("Loading dataset: %s", dataset_key)
191
+ ds = load_dataset(REPO_CONFIG[dataset_key]["repo"])
192
+ df = pd.DataFrame(ds["train"])
193
  df = enrich_dataframe(df)
194
  df["noise_label"] = REPO_CONFIG[dataset_key]["label"]
 
195
  _ASSET_CACHE[dataset_key] = df
196
  return _ASSET_CACHE[dataset_key]
197
 
198
 
199
+ def load_combined_dataset() -> pd.DataFrame:
200
+ """Load and merge all four noise-condition datasets."""
201
+ global _COMBINED_CACHE
202
+ if _COMBINED_CACHE is None:
203
+ frames = [load_single_dataset(key) for key in REPO_CONFIG.keys()]
204
  combined = pd.concat(frames, ignore_index=True)
205
+ combined = combined[combined["noise_label"].isin(CLASS_ORDER)].copy()
206
+ _COMBINED_CACHE = combined
207
+ return _COMBINED_CACHE
208
 
209
 
210
  def load_guide_content() -> str: