QSBench commited on
Commit
3c7d3fa
·
verified ·
1 Parent(s): 7bc7a81

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -16
app.py CHANGED
@@ -8,7 +8,7 @@ import matplotlib.pyplot as plt
8
  import numpy as np
9
  import pandas as pd
10
  from datasets import load_dataset
11
- from sklearn.ensemble import RandomForestClassifier
12
  from sklearn.impute import SimpleImputer
13
  from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, f1_score
14
  from sklearn.model_selection import train_test_split
@@ -19,7 +19,9 @@ logging.basicConfig(level=logging.INFO)
19
  logger = logging.getLogger(__name__)
20
 
21
  APP_TITLE = "Noise Detection"
22
- APP_SUBTITLE = "Classify quantum circuits into clean, depolarizing, amplitude_damping, or hardware-aware noise conditions."
 
 
23
 
24
  REPO_CONFIG = {
25
  "clean": {
@@ -66,6 +68,7 @@ NON_FEATURE_COLS = {
66
  }
67
 
68
  SOFT_EXCLUDE_PATTERNS = ["ideal_", "noisy_", "error_", "sign_ideal_", "sign_noisy_"]
 
69
  _ASSET_CACHE: Dict[str, pd.DataFrame] = {}
70
  _COMBINED_CACHE: Optional[pd.DataFrame] = None
71
 
@@ -81,7 +84,7 @@ def safe_parse(value):
81
 
82
 
83
  def adjacency_features(adj_value) -> Dict[str, float]:
84
- """Derive compact graph features from an adjacency matrix."""
85
  parsed = safe_parse(adj_value)
86
  if not isinstance(parsed, list) or len(parsed) == 0:
87
  return {
@@ -160,7 +163,7 @@ def enrich_dataframe(df: pd.DataFrame) -> pd.DataFrame:
160
 
161
 
162
  def load_single_dataset(dataset_key: str) -> pd.DataFrame:
163
- """Load a single dataset shard from Hugging Face and cache it."""
164
  if dataset_key not in _ASSET_CACHE:
165
  logger.info("Loading dataset: %s", dataset_key)
166
  ds = load_dataset(REPO_CONFIG[dataset_key]["repo"])
@@ -172,7 +175,7 @@ def load_single_dataset(dataset_key: str) -> pd.DataFrame:
172
 
173
 
174
  def load_combined_dataset() -> pd.DataFrame:
175
- """Load and merge all noise-condition datasets."""
176
  global _COMBINED_CACHE
177
  if _COMBINED_CACHE is None:
178
  frames = [load_single_dataset(key) for key in REPO_CONFIG.keys()]
@@ -192,7 +195,7 @@ def load_guide_content() -> str:
192
 
193
 
194
  def get_available_feature_columns(df: pd.DataFrame) -> List[str]:
195
- """Return numeric feature columns excluding metadata and the target."""
196
  numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
197
  features = []
198
  for col in numeric_cols:
@@ -205,7 +208,7 @@ def get_available_feature_columns(df: pd.DataFrame) -> List[str]:
205
 
206
 
207
  def default_feature_selection(features: List[str]) -> List[str]:
208
- """Pick a stable set of default features."""
209
  preferred = [
210
  "gate_entropy",
211
  "adj_density",
@@ -240,7 +243,7 @@ def make_classification_figure(
240
  ax3 = fig.add_subplot(gs[0, 2])
241
 
242
  cm = confusion_matrix(y_true, y_pred, labels=class_names)
243
- im = ax1.imshow(cm, interpolation="nearest")
244
  ax1.set_title("Confusion Matrix")
245
  ax1.set_xlabel("Predicted")
246
  ax1.set_ylabel("Actual")
@@ -251,10 +254,10 @@ def make_classification_figure(
251
  for i in range(cm.shape[0]):
252
  for j in range(cm.shape[1]):
253
  ax1.text(j, i, cm[i, j], ha="center", va="center")
254
- fig.colorbar(im, ax=ax1, fraction=0.046, pad=0.04)
255
 
256
- residual_like = (y_true != y_pred).astype(int)
257
- ax2.hist(residual_like, bins=[-0.5, 0.5, 1.5])
258
  ax2.set_title("Correct vs Incorrect")
259
  ax2.set_xlabel("0 = Correct, 1 = Incorrect")
260
  ax2.set_ylabel("Count")
@@ -273,7 +276,7 @@ def make_classification_figure(
273
 
274
 
275
  def build_dataset_profile(df: pd.DataFrame) -> str:
276
- """Build a dataset summary for the explorer tab."""
277
  return (
278
  f"### Dataset profile\n\n"
279
  f"**Rows:** {len(df):,} \n"
@@ -373,11 +376,13 @@ def train_classifier(
373
  ("scaler", StandardScaler()),
374
  (
375
  "classifier",
376
- RandomForestClassifier(
377
  n_estimators=trees,
378
  max_depth=depth,
379
  random_state=seed,
380
  n_jobs=-1,
 
 
381
  ),
382
  ),
383
  ]
@@ -387,14 +392,19 @@ def train_classifier(
387
  y_pred = model.predict(X_test)
388
 
389
  accuracy = float(accuracy_score(y_test, y_pred))
390
- macro_f1 = float(f1_score(y_test, y_pred, average="macro"))
391
- weighted_f1 = float(f1_score(y_test, y_pred, average="weighted"))
392
 
393
  classifier = model.named_steps["classifier"]
394
  importances = getattr(classifier, "feature_importances_", None)
395
  fig = make_classification_figure(y_test.to_numpy(), y_pred, CLASS_ORDER, list(feature_columns), importances)
396
 
397
- report = classification_report(y_test, y_pred, labels=CLASS_ORDER, output_dict=False, zero_division=0)
 
 
 
 
 
398
  results = (
399
  "### Classification results\n\n"
400
  f"**Rows used:** {len(train_df):,} \n"
 
8
  import numpy as np
9
  import pandas as pd
10
  from datasets import load_dataset
11
+ from sklearn.ensemble import ExtraTreesClassifier
12
  from sklearn.impute import SimpleImputer
13
  from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, f1_score
14
  from sklearn.model_selection import train_test_split
 
19
  logger = logging.getLogger(__name__)
20
 
21
  APP_TITLE = "Noise Detection"
22
+ APP_SUBTITLE = (
23
+ "Classify quantum circuits into clean, depolarizing, amplitude_damping, or hardware-aware noise conditions."
24
+ )
25
 
26
  REPO_CONFIG = {
27
  "clean": {
 
68
  }
69
 
70
  SOFT_EXCLUDE_PATTERNS = ["ideal_", "noisy_", "error_", "sign_ideal_", "sign_noisy_"]
71
+
72
  _ASSET_CACHE: Dict[str, pd.DataFrame] = {}
73
  _COMBINED_CACHE: Optional[pd.DataFrame] = None
74
 
 
84
 
85
 
86
  def adjacency_features(adj_value) -> Dict[str, float]:
87
+ """Derive graph statistics from an adjacency matrix."""
88
  parsed = safe_parse(adj_value)
89
  if not isinstance(parsed, list) or len(parsed) == 0:
90
  return {
 
163
 
164
 
165
  def load_single_dataset(dataset_key: str) -> pd.DataFrame:
166
+ """Load a dataset shard from Hugging Face and cache it in memory."""
167
  if dataset_key not in _ASSET_CACHE:
168
  logger.info("Loading dataset: %s", dataset_key)
169
  ds = load_dataset(REPO_CONFIG[dataset_key]["repo"])
 
175
 
176
 
177
  def load_combined_dataset() -> pd.DataFrame:
178
+ """Load and merge all four noise-condition datasets."""
179
  global _COMBINED_CACHE
180
  if _COMBINED_CACHE is None:
181
  frames = [load_single_dataset(key) for key in REPO_CONFIG.keys()]
 
195
 
196
 
197
  def get_available_feature_columns(df: pd.DataFrame) -> List[str]:
198
+ """Return numeric feature columns excluding metadata and target columns."""
199
  numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
200
  features = []
201
  for col in numeric_cols:
 
208
 
209
 
210
  def default_feature_selection(features: List[str]) -> List[str]:
211
+ """Select a stable default feature subset."""
212
  preferred = [
213
  "gate_entropy",
214
  "adj_density",
 
243
  ax3 = fig.add_subplot(gs[0, 2])
244
 
245
  cm = confusion_matrix(y_true, y_pred, labels=class_names)
246
+ image = ax1.imshow(cm, interpolation="nearest")
247
  ax1.set_title("Confusion Matrix")
248
  ax1.set_xlabel("Predicted")
249
  ax1.set_ylabel("Actual")
 
254
  for i in range(cm.shape[0]):
255
  for j in range(cm.shape[1]):
256
  ax1.text(j, i, cm[i, j], ha="center", va="center")
257
+ fig.colorbar(image, ax=ax1, fraction=0.046, pad=0.04)
258
 
259
+ incorrect = (y_true != y_pred).astype(int)
260
+ ax2.hist(incorrect, bins=[-0.5, 0.5, 1.5])
261
  ax2.set_title("Correct vs Incorrect")
262
  ax2.set_xlabel("0 = Correct, 1 = Incorrect")
263
  ax2.set_ylabel("Count")
 
276
 
277
 
278
  def build_dataset_profile(df: pd.DataFrame) -> str:
279
+ """Build a short dataset summary for the explorer tab."""
280
  return (
281
  f"### Dataset profile\n\n"
282
  f"**Rows:** {len(df):,} \n"
 
376
  ("scaler", StandardScaler()),
377
  (
378
  "classifier",
379
+ ExtraTreesClassifier(
380
  n_estimators=trees,
381
  max_depth=depth,
382
  random_state=seed,
383
  n_jobs=-1,
384
+ class_weight="balanced",
385
+ min_samples_leaf=1,
386
  ),
387
  ),
388
  ]
 
392
  y_pred = model.predict(X_test)
393
 
394
  accuracy = float(accuracy_score(y_test, y_pred))
395
+ macro_f1 = float(f1_score(y_test, y_pred, average="macro", zero_division=0))
396
+ weighted_f1 = float(f1_score(y_test, y_pred, average="weighted", zero_division=0))
397
 
398
  classifier = model.named_steps["classifier"]
399
  importances = getattr(classifier, "feature_importances_", None)
400
  fig = make_classification_figure(y_test.to_numpy(), y_pred, CLASS_ORDER, list(feature_columns), importances)
401
 
402
+ report = classification_report(
403
+ y_test,
404
+ y_pred,
405
+ labels=CLASS_ORDER,
406
+ zero_division=0,
407
+ )
408
  results = (
409
  "### Classification results\n\n"
410
  f"**Rows used:** {len(train_df):,} \n"