QSBench commited on
Commit
2635a44
·
verified ·
1 Parent(s): 3c7d3fa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +62 -352
app.py CHANGED
@@ -1,49 +1,30 @@
1
  import ast
2
  import logging
3
  import re
4
- from typing import Dict, List, Optional, Tuple
5
 
6
  import gradio as gr
7
  import matplotlib.pyplot as plt
8
  import numpy as np
9
  import pandas as pd
10
  from datasets import load_dataset
11
- from sklearn.ensemble import ExtraTreesClassifier
12
- from sklearn.impute import SimpleImputer
13
- from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, f1_score
14
  from sklearn.model_selection import train_test_split
15
- from sklearn.pipeline import Pipeline
16
- from sklearn.preprocessing import StandardScaler
17
 
18
  logging.basicConfig(level=logging.INFO)
19
  logger = logging.getLogger(__name__)
20
 
21
  APP_TITLE = "Noise Detection"
22
- APP_SUBTITLE = (
23
- "Classify quantum circuits into clean, depolarizing, amplitude_damping, or hardware-aware noise conditions."
24
- )
25
 
26
  REPO_CONFIG = {
27
- "clean": {
28
- "label": "clean",
29
- "repo": "QSBench/QSBench-Core-v1.0.0-demo",
30
- },
31
- "depolarizing": {
32
- "label": "depolarizing",
33
- "repo": "QSBench/QSBench-Depolarizing-Demo-v1.0.0",
34
- },
35
- "amplitude_damping": {
36
- "label": "amplitude_damping",
37
- "repo": "QSBench/QSBench-Amplitude-v1.0.0-demo",
38
- },
39
- "hardware_aware": {
40
- "label": "hardware_aware",
41
- "repo": "QSBench/QSBench-Transpilation-v1.0.0-demo",
42
- },
43
  }
44
 
45
- CLASS_ORDER = ["clean", "depolarizing", "amplitude_damping", "hardware_aware"]
46
-
47
  NON_FEATURE_COLS = {
48
  "sample_id",
49
  "sample_seed",
@@ -61,20 +42,24 @@ NON_FEATURE_COLS = {
61
  "backend_device",
62
  "precision_mode",
63
  "circuit_signature",
64
- "entanglement",
65
- "meyer_wallach",
66
- "cx_count",
67
- "noise_label",
68
  }
69
 
70
- SOFT_EXCLUDE_PATTERNS = ["ideal_", "noisy_", "error_", "sign_ideal_", "sign_noisy_"]
71
 
72
  _ASSET_CACHE: Dict[str, pd.DataFrame] = {}
73
- _COMBINED_CACHE: Optional[pd.DataFrame] = None
 
 
 
 
 
 
 
 
 
74
 
75
 
76
  def safe_parse(value):
77
- """Safely parse stringified Python literals."""
78
  if isinstance(value, str):
79
  try:
80
  return ast.literal_eval(value)
@@ -84,15 +69,9 @@ def safe_parse(value):
84
 
85
 
86
  def adjacency_features(adj_value) -> Dict[str, float]:
87
- """Derive graph statistics from an adjacency matrix."""
88
  parsed = safe_parse(adj_value)
89
  if not isinstance(parsed, list) or len(parsed) == 0:
90
- return {
91
- "adj_edge_count": np.nan,
92
- "adj_density": np.nan,
93
- "adj_degree_mean": np.nan,
94
- "adj_degree_std": np.nan,
95
- }
96
 
97
  try:
98
  arr = np.array(parsed, dtype=float)
@@ -108,32 +87,17 @@ def adjacency_features(adj_value) -> Dict[str, float]:
108
  "adj_degree_std": float(np.std(degrees)),
109
  }
110
  except Exception:
111
- return {
112
- "adj_edge_count": np.nan,
113
- "adj_density": np.nan,
114
- "adj_degree_mean": np.nan,
115
- "adj_degree_std": np.nan,
116
- }
117
 
118
 
119
  def qasm_features(qasm_value) -> Dict[str, float]:
120
- """Extract lightweight text statistics from QASM."""
121
  if not isinstance(qasm_value, str) or not qasm_value.strip():
122
- return {
123
- "qasm_length": np.nan,
124
- "qasm_line_count": np.nan,
125
- "qasm_gate_keyword_count": np.nan,
126
- "qasm_measure_count": np.nan,
127
- "qasm_comment_count": np.nan,
128
- }
129
 
130
  text = qasm_value
131
  lines = [line for line in text.splitlines() if line.strip()]
132
- gate_keywords = re.findall(
133
- r"\b(cx|h|x|y|z|rx|ry|rz|u1|u2|u3|u|swap|cz|ccx|rxx|ryy|rzz)\b",
134
- text,
135
- flags=re.IGNORECASE,
136
- )
137
  measure_count = len(re.findall(r"\bmeasure\b", text, flags=re.IGNORECASE))
138
  comment_count = sum(1 for line in lines if line.strip().startswith("//"))
139
 
@@ -147,9 +111,7 @@ def qasm_features(qasm_value) -> Dict[str, float]:
147
 
148
 
149
  def enrich_dataframe(df: pd.DataFrame) -> pd.DataFrame:
150
- """Add derived numeric features for classification."""
151
  df = df.copy()
152
-
153
  if "adjacency" in df.columns:
154
  adj_df = df["adjacency"].apply(adjacency_features).apply(pd.Series)
155
  df = pd.concat([df, adj_df], axis=1)
@@ -158,274 +120,60 @@ def enrich_dataframe(df: pd.DataFrame) -> pd.DataFrame:
158
  if qasm_source in df.columns:
159
  qasm_df = df[qasm_source].apply(qasm_features).apply(pd.Series)
160
  df = pd.concat([df, qasm_df], axis=1)
161
-
162
  return df
163
 
164
 
165
- def load_single_dataset(dataset_key: str) -> pd.DataFrame:
166
- """Load a dataset shard from Hugging Face and cache it in memory."""
167
- if dataset_key not in _ASSET_CACHE:
168
- logger.info("Loading dataset: %s", dataset_key)
169
- ds = load_dataset(REPO_CONFIG[dataset_key]["repo"])
170
- df = pd.DataFrame(ds["train"])
171
- df = enrich_dataframe(df)
172
- df["noise_label"] = REPO_CONFIG[dataset_key]["label"]
173
- _ASSET_CACHE[dataset_key] = df
174
- return _ASSET_CACHE[dataset_key]
175
-
176
-
177
- def load_combined_dataset() -> pd.DataFrame:
178
- """Load and merge all four noise-condition datasets."""
179
- global _COMBINED_CACHE
180
- if _COMBINED_CACHE is None:
181
- frames = [load_single_dataset(key) for key in REPO_CONFIG.keys()]
182
- combined = pd.concat(frames, ignore_index=True)
183
- combined = combined[combined["noise_label"].isin(CLASS_ORDER)].copy()
184
- _COMBINED_CACHE = combined
185
- return _COMBINED_CACHE
186
-
187
-
188
- def load_guide_content() -> str:
189
- """Load the markdown guide if it exists."""
190
- try:
191
- with open("GUIDE.md", "r", encoding="utf-8") as f:
192
- return f.read()
193
- except FileNotFoundError:
194
- return "# Guide\n\nGuide file not found."
195
-
196
-
197
  def get_available_feature_columns(df: pd.DataFrame) -> List[str]:
198
- """Return numeric feature columns excluding metadata and target columns."""
199
  numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
200
  features = []
201
  for col in numeric_cols:
202
  if col in NON_FEATURE_COLS:
203
  continue
204
- if any(pattern in col for pattern in SOFT_EXCLUDE_PATTERNS):
205
  continue
206
  features.append(col)
207
  return sorted(features)
208
 
209
 
210
  def default_feature_selection(features: List[str]) -> List[str]:
211
- """Select a stable default feature subset."""
212
- preferred = [
213
- "gate_entropy",
214
- "adj_density",
215
- "adj_degree_mean",
216
- "adj_degree_std",
217
- "depth",
218
- "total_gates",
219
- "single_qubit_gates",
220
- "two_qubit_gates",
221
- "cx_count",
222
- "qasm_length",
223
- "qasm_line_count",
224
- "qasm_gate_keyword_count",
225
- ]
226
- selected = [feature for feature in preferred if feature in features]
227
- return selected[:8] if selected else features[:8]
228
-
229
-
230
- def make_classification_figure(
231
- y_true: np.ndarray,
232
- y_pred: np.ndarray,
233
- class_names: List[str],
234
- feature_names: Optional[List[str]] = None,
235
- importances: Optional[np.ndarray] = None,
236
- ) -> plt.Figure:
237
- """Create a compact classification summary figure."""
238
- fig = plt.figure(figsize=(20, 6))
239
- gs = fig.add_gridspec(1, 3)
240
-
241
- ax1 = fig.add_subplot(gs[0, 0])
242
- ax2 = fig.add_subplot(gs[0, 1])
243
- ax3 = fig.add_subplot(gs[0, 2])
244
-
245
- cm = confusion_matrix(y_true, y_pred, labels=class_names)
246
- image = ax1.imshow(cm, interpolation="nearest")
247
- ax1.set_title("Confusion Matrix")
248
- ax1.set_xlabel("Predicted")
249
- ax1.set_ylabel("Actual")
250
- ax1.set_xticks(np.arange(len(class_names)))
251
- ax1.set_yticks(np.arange(len(class_names)))
252
- ax1.set_xticklabels(class_names, rotation=45, ha="right")
253
- ax1.set_yticklabels(class_names)
254
- for i in range(cm.shape[0]):
255
- for j in range(cm.shape[1]):
256
- ax1.text(j, i, cm[i, j], ha="center", va="center")
257
- fig.colorbar(image, ax=ax1, fraction=0.046, pad=0.04)
258
-
259
- incorrect = (y_true != y_pred).astype(int)
260
- ax2.hist(incorrect, bins=[-0.5, 0.5, 1.5])
261
- ax2.set_title("Correct vs Incorrect")
262
- ax2.set_xlabel("0 = Correct, 1 = Incorrect")
263
- ax2.set_ylabel("Count")
264
-
265
- if importances is not None and feature_names is not None and len(importances) == len(feature_names):
266
- idx = np.argsort(importances)[-10:]
267
- ax3.barh([feature_names[i] for i in idx], importances[idx])
268
- ax3.set_title("Top-10 Feature Importances")
269
- ax3.set_xlabel("Importance")
270
- else:
271
- ax3.text(0.5, 0.5, "Feature importances are unavailable.", ha="center", va="center")
272
- ax3.set_axis_off()
273
-
274
- fig.tight_layout()
275
- return fig
276
-
277
-
278
- def build_dataset_profile(df: pd.DataFrame) -> str:
279
- """Build a short dataset summary for the explorer tab."""
280
- return (
281
- f"### Dataset profile\n\n"
282
- f"**Rows:** {len(df):,} \n"
283
- f"**Columns:** {len(df.columns):,} \n"
284
- f"**Classes:** {', '.join(CLASS_ORDER)}"
285
- )
286
 
287
 
288
- def refresh_explorer(dataset_key: str, split_name: str) -> Tuple[gr.update, pd.DataFrame, str, str, str, str]:
289
- """Refresh the explorer view for the selected source dataset."""
290
- df = load_single_dataset(dataset_key)
291
- splits = df["split"].dropna().unique().tolist() if "split" in df.columns else ["train"]
292
- if not splits:
293
- splits = ["train"]
294
-
295
- if split_name not in splits:
296
- split_name = splits[0]
297
-
298
- filtered = df[df["split"] == split_name] if "split" in df.columns else df
299
- display_df = filtered.head(12).copy()
300
-
301
- raw_qasm = display_df["qasm_raw"].iloc[0] if "qasm_raw" in display_df.columns and not display_df.empty else "// N/A"
302
- transpiled_qasm = display_df["qasm_transpiled"].iloc[0] if "qasm_transpiled" in display_df.columns and not display_df.empty else "// N/A"
303
-
304
- profile_box = build_dataset_profile(df)
305
- summary_box = (
306
- f"### Split summary\n\n"
307
- f"**Dataset:** `{dataset_key}` \n"
308
- f"**Label:** `{REPO_CONFIG[dataset_key]['label']}` \n"
309
- f"**Available splits:** {', '.join(splits)} \n"
310
- f"**Preview rows:** {len(display_df)}"
311
- )
312
-
313
- return (
314
- gr.update(choices=splits, value=split_name),
315
- display_df,
316
- raw_qasm,
317
- transpiled_qasm,
318
- profile_box,
319
- summary_box,
320
- )
321
-
322
-
323
- def sync_feature_picker(_dataset_key: str) -> gr.update:
324
- """Refresh the feature list from the combined dataset."""
325
- df = load_combined_dataset()
326
- features = get_available_feature_columns(df)
327
- defaults = default_feature_selection(features)
328
- return gr.update(choices=features, value=defaults)
329
-
330
-
331
- def train_classifier(
332
- feature_columns: List[str],
333
- test_size: float,
334
- n_estimators: int,
335
- max_depth: float,
336
- random_state: float,
337
- ) -> Tuple[Optional[plt.Figure], str]:
338
- """Train a four-class classifier and return metrics plus a plot."""
339
  if not feature_columns:
340
- return None, "### Please select at least one feature."
341
-
342
- df = load_combined_dataset()
343
- required_cols = feature_columns + ["noise_label"]
344
- train_df = df.dropna(subset=required_cols).copy()
345
- train_df = train_df[train_df["noise_label"].isin(CLASS_ORDER)]
346
 
347
- if len(train_df) < 20:
348
- return None, "### ❌ Not enough clean rows after filtering missing values."
 
349
 
350
- X = train_df[feature_columns]
351
- y = train_df["noise_label"]
352
 
353
- seed = int(random_state)
354
- depth = int(max_depth) if max_depth and int(max_depth) > 0 else None
355
- trees = int(n_estimators)
356
 
357
- try:
358
- X_train, X_test, y_train, y_test = train_test_split(
359
- X,
360
- y,
361
- test_size=test_size,
362
- random_state=seed,
363
- stratify=y,
364
- )
365
- except ValueError:
366
- X_train, X_test, y_train, y_test = train_test_split(
367
- X,
368
- y,
369
- test_size=test_size,
370
- random_state=seed,
371
- )
372
-
373
- model = Pipeline(
374
- steps=[
375
- ("imputer", SimpleImputer(strategy="median")),
376
- ("scaler", StandardScaler()),
377
- (
378
- "classifier",
379
- ExtraTreesClassifier(
380
- n_estimators=trees,
381
- max_depth=depth,
382
- random_state=seed,
383
- n_jobs=-1,
384
- class_weight="balanced",
385
- min_samples_leaf=1,
386
- ),
387
- ),
388
- ]
389
  )
390
-
391
  model.fit(X_train, y_train)
392
- y_pred = model.predict(X_test)
393
 
394
- accuracy = float(accuracy_score(y_test, y_pred))
395
- macro_f1 = float(f1_score(y_test, y_pred, average="macro", zero_division=0))
396
- weighted_f1 = float(f1_score(y_test, y_pred, average="weighted", zero_division=0))
397
 
398
- classifier = model.named_steps["classifier"]
399
- importances = getattr(classifier, "feature_importances_", None)
400
- fig = make_classification_figure(y_test.to_numpy(), y_pred, CLASS_ORDER, list(feature_columns), importances)
401
-
402
- report = classification_report(
403
- y_test,
404
- y_pred,
405
- labels=CLASS_ORDER,
406
- zero_division=0,
407
- )
408
- results = (
409
- "### Classification results\n\n"
410
- f"**Rows used:** {len(train_df):,} \n"
411
- f"**Test size:** {test_size:.0%} \n"
412
- f"**Accuracy:** {accuracy:.4f} \n"
413
- f"**Macro F1:** {macro_f1:.4f} \n"
414
- f"**Weighted F1:** {weighted_f1:.4f}\n\n"
415
- "```text\n"
416
- f"{report}"
417
- "```"
418
- )
419
- return fig, results
420
 
421
 
422
  CUSTOM_CSS = """
423
- .gradio-container {
424
- max-width: 1400px !important;
425
- }
426
- footer {
427
- margin-top: 1rem;
428
- }
429
  """
430
 
431
  with gr.Blocks(title=APP_TITLE) as demo:
@@ -433,38 +181,16 @@ with gr.Blocks(title=APP_TITLE) as demo:
433
  gr.Markdown(APP_SUBTITLE)
434
 
435
  with gr.Tabs():
436
- with gr.TabItem("🔎 Explorer"):
437
- dataset_dropdown = gr.Dropdown(
438
- list(REPO_CONFIG.keys()),
439
- value="clean",
440
- label="Dataset",
441
- )
442
- split_dropdown = gr.Dropdown(
443
- ["train"],
444
- value="train",
445
- label="Split",
446
- )
447
-
448
- profile_box = gr.Markdown(value="### Loading dataset...")
449
- summary_box = gr.Markdown(value="### Loading split summary...")
450
- explorer_df = gr.Dataframe(label="Preview", interactive=False)
451
-
452
- with gr.Row():
453
- raw_qasm = gr.Code(label="Raw QASM", language=None)
454
- transpiled_qasm = gr.Code(label="Transpiled QASM", language=None)
455
-
456
  with gr.TabItem("🧠 Classification"):
457
- feature_picker = gr.CheckboxGroup(label="Input features", choices=[])
458
- test_size = gr.Slider(0.1, 0.4, value=0.2, step=0.05, label="Test split")
459
- n_estimators = gr.Slider(50, 400, value=200, step=10, label="Trees")
460
- max_depth = gr.Slider(1, 30, value=12, step=1, label="Max depth")
461
- seed = gr.Number(value=42, precision=0, label="Random seed")
462
  run_btn = gr.Button("Train & Evaluate", variant="primary")
463
- plot = gr.Plot()
464
- metrics = gr.Markdown()
465
 
466
- with gr.TabItem("📖 Guide"):
467
- gr.Markdown(load_guide_content())
468
 
469
  gr.Markdown("---")
470
  gr.Markdown(
@@ -475,32 +201,16 @@ with gr.Blocks(title=APP_TITLE) as demo:
475
  )
476
 
477
  dataset_dropdown.change(
478
- refresh_explorer,
479
- [dataset_dropdown, split_dropdown],
480
- [split_dropdown, explorer_df, raw_qasm, transpiled_qasm, profile_box, summary_box],
481
- )
482
-
483
- split_dropdown.change(
484
- refresh_explorer,
485
- [dataset_dropdown, split_dropdown],
486
- [split_dropdown, explorer_df, raw_qasm, transpiled_qasm, profile_box, summary_box],
487
  )
488
 
489
- dataset_dropdown.change(sync_feature_picker, [dataset_dropdown], [feature_picker])
490
-
491
  run_btn.click(
492
  train_classifier,
493
- [feature_picker, test_size, n_estimators, max_depth, seed],
494
- [plot, metrics],
495
  )
496
 
497
- demo.load(
498
- refresh_explorer,
499
- [dataset_dropdown, split_dropdown],
500
- [split_dropdown, explorer_df, raw_qasm, transpiled_qasm, profile_box, summary_box],
501
- )
502
- demo.load(sync_feature_picker, [dataset_dropdown], [feature_picker])
503
-
504
-
505
  if __name__ == "__main__":
506
- demo.launch(theme=gr.themes.Soft(), css=CUSTOM_CSS)
 
1
  import ast
2
  import logging
3
  import re
4
+ from typing import Dict, List
5
 
6
  import gradio as gr
7
  import matplotlib.pyplot as plt
8
  import numpy as np
9
  import pandas as pd
10
  from datasets import load_dataset
 
 
 
11
  from sklearn.model_selection import train_test_split
12
+ from sklearn.ensemble import HistGradientBoostingClassifier
13
+ from sklearn.metrics import classification_report, confusion_matrix
14
 
15
  logging.basicConfig(level=logging.INFO)
16
  logger = logging.getLogger(__name__)
17
 
18
  APP_TITLE = "Noise Detection"
19
+ APP_SUBTITLE = "Classify circuits by noise type: clean, depolarizing, amplitude_damping, hardware_aware."
 
 
20
 
21
  REPO_CONFIG = {
22
+ "Core (Clean)": "QSBench/QSBench-Core-v1.0.0-demo",
23
+ "Depolarizing Noise": "QSBench/QSBench-Depolarizing-Demo-v1.0.0",
24
+ "Amplitude Damping": "QSBench/QSBench-Amplitude-v1.0.0-demo",
25
+ "Hardware-Aware Noise": "QSBench/QSBench-Transpilation-v1.0.0-demo",
 
 
 
 
 
 
 
 
 
 
 
 
26
  }
27
 
 
 
28
  NON_FEATURE_COLS = {
29
  "sample_id",
30
  "sample_seed",
 
42
  "backend_device",
43
  "precision_mode",
44
  "circuit_signature",
 
 
 
 
45
  }
46
 
47
+ _SOFT_EXCLUDE_PATTERNS = ["ideal_", "noisy_", "error_", "sign_ideal_", "sign_noisy_"]
48
 
49
  _ASSET_CACHE: Dict[str, pd.DataFrame] = {}
50
+
51
+
52
+ def load_dataset_df(dataset_key: str) -> pd.DataFrame:
53
+ if dataset_key not in _ASSET_CACHE:
54
+ ds = load_dataset(REPO_CONFIG[dataset_key])
55
+ df = pd.DataFrame(ds["train"])
56
+ df = enrich_dataframe(df)
57
+ df["noise_label"] = dataset_key
58
+ _ASSET_CACHE[dataset_key] = df
59
+ return _ASSET_CACHE[dataset_key]
60
 
61
 
62
  def safe_parse(value):
 
63
  if isinstance(value, str):
64
  try:
65
  return ast.literal_eval(value)
 
69
 
70
 
71
  def adjacency_features(adj_value) -> Dict[str, float]:
 
72
  parsed = safe_parse(adj_value)
73
  if not isinstance(parsed, list) or len(parsed) == 0:
74
+ return {"adj_edge_count": np.nan, "adj_density": np.nan, "adj_degree_mean": np.nan, "adj_degree_std": np.nan}
 
 
 
 
 
75
 
76
  try:
77
  arr = np.array(parsed, dtype=float)
 
87
  "adj_degree_std": float(np.std(degrees)),
88
  }
89
  except Exception:
90
+ return {"adj_edge_count": np.nan, "adj_density": np.nan, "adj_degree_mean": np.nan, "adj_degree_std": np.nan}
 
 
 
 
 
91
 
92
 
93
  def qasm_features(qasm_value) -> Dict[str, float]:
 
94
  if not isinstance(qasm_value, str) or not qasm_value.strip():
95
+ return {"qasm_length": np.nan, "qasm_line_count": np.nan, "qasm_gate_keyword_count": np.nan,
96
+ "qasm_measure_count": np.nan, "qasm_comment_count": np.nan}
 
 
 
 
 
97
 
98
  text = qasm_value
99
  lines = [line for line in text.splitlines() if line.strip()]
100
+ gate_keywords = re.findall(r"\b(cx|h|x|y|z|rx|ry|rz|u1|u2|u3|u|swap|cz|ccx|rxx|ryy|rzz)\b", text, flags=re.IGNORECASE)
 
 
 
 
101
  measure_count = len(re.findall(r"\bmeasure\b", text, flags=re.IGNORECASE))
102
  comment_count = sum(1 for line in lines if line.strip().startswith("//"))
103
 
 
111
 
112
 
113
  def enrich_dataframe(df: pd.DataFrame) -> pd.DataFrame:
 
114
  df = df.copy()
 
115
  if "adjacency" in df.columns:
116
  adj_df = df["adjacency"].apply(adjacency_features).apply(pd.Series)
117
  df = pd.concat([df, adj_df], axis=1)
 
120
  if qasm_source in df.columns:
121
  qasm_df = df[qasm_source].apply(qasm_features).apply(pd.Series)
122
  df = pd.concat([df, qasm_df], axis=1)
 
123
  return df
124
 
125
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
126
  def get_available_feature_columns(df: pd.DataFrame) -> List[str]:
 
127
  numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
128
  features = []
129
  for col in numeric_cols:
130
  if col in NON_FEATURE_COLS:
131
  continue
132
+ if any(pattern in col for pattern in _SOFT_EXCLUDE_PATTERNS):
133
  continue
134
  features.append(col)
135
  return sorted(features)
136
 
137
 
138
  def default_feature_selection(features: List[str]) -> List[str]:
139
+ preferred = ["gate_entropy", "adj_density", "adj_degree_mean", "adj_degree_std",
140
+ "depth", "total_gates", "cx_count", "qasm_length"]
141
+ return [f for f in preferred if f in features]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
142
 
143
 
144
+ def train_classifier(dataset_keys, feature_columns, test_size, seed):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
145
  if not feature_columns:
146
+ return None, "No features selected"
 
 
 
 
 
147
 
148
+ dfs = [load_dataset_df(k) for k in dataset_keys]
149
+ df = pd.concat(dfs, axis=0, ignore_index=True)
150
+ df = df.dropna(subset=feature_columns + ["noise_label"])
151
 
152
+ X = df[feature_columns]
153
+ y = df["noise_label"]
154
 
155
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=test_size, random_state=int(seed), stratify=y)
 
 
156
 
157
+ model = HistGradientBoostingClassifier(
158
+ learning_rate=0.05,
159
+ max_iter=200,
160
+ max_depth=5,
161
+ min_samples_leaf=10,
162
+ l2_regularization=0.1,
163
+ class_weight="balanced",
164
+ random_state=int(seed),
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
  )
 
166
  model.fit(X_train, y_train)
167
+ preds = model.predict(X_test)
168
 
169
+ report = classification_report(y_test, preds, output_dict=False)
170
+ cm = confusion_matrix(y_test, preds)
 
171
 
172
+ return report, cm.tolist()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
173
 
174
 
175
  CUSTOM_CSS = """
176
+ .gradio-container {max-width: 1400px !important;}
 
 
 
 
 
177
  """
178
 
179
  with gr.Blocks(title=APP_TITLE) as demo:
 
181
  gr.Markdown(APP_SUBTITLE)
182
 
183
  with gr.Tabs():
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
184
  with gr.TabItem("🧠 Classification"):
185
+ dataset_dropdown = gr.CheckboxGroup(list(REPO_CONFIG.keys()), value=list(REPO_CONFIG.keys()), label="Datasets")
186
+ feature_picker = gr.CheckboxGroup(label="Input features")
187
+
188
+ test_size = gr.Slider(0.1, 0.5, value=0.2, step=0.05, label="Test split")
189
+ seed = gr.Number(value=42, label="Random seed")
190
  run_btn = gr.Button("Train & Evaluate", variant="primary")
 
 
191
 
192
+ metrics = gr.Markdown()
193
+ cm_plot = gr.Plot()
194
 
195
  gr.Markdown("---")
196
  gr.Markdown(
 
201
  )
202
 
203
  dataset_dropdown.change(
204
+ lambda datasets: gr.update(choices=get_available_feature_columns(pd.concat([load_dataset_df(k) for k in datasets]))),
205
+ [dataset_dropdown],
206
+ [feature_picker]
 
 
 
 
 
 
207
  )
208
 
 
 
209
  run_btn.click(
210
  train_classifier,
211
+ [dataset_dropdown, feature_picker, test_size, seed],
212
+ [metrics, cm_plot]
213
  )
214
 
 
 
 
 
 
 
 
 
215
  if __name__ == "__main__":
216
+ demo.launch(theme=gr.themes.Soft(), css=CUSTOM_CSS)