QSBench commited on
Commit
c110b44
·
verified ·
1 Parent(s): 4d71862

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +80 -226
app.py CHANGED
@@ -1,14 +1,13 @@
1
  import ast
2
  import logging
3
- import os
4
  import re
5
- from dataclasses import dataclass
6
  from typing import Dict, List, Optional, Tuple
7
 
8
  import gradio as gr
9
  import matplotlib.pyplot as plt
10
  import numpy as np
11
  import pandas as pd
 
12
  from sklearn.ensemble import RandomForestRegressor
13
  from sklearn.impute import SimpleImputer
14
  from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
@@ -16,23 +15,19 @@ from sklearn.model_selection import train_test_split
16
  from sklearn.pipeline import Pipeline
17
  from sklearn.preprocessing import StandardScaler
18
 
19
- # -----------------------------------------------------------------------------
20
- # Logging
21
- # -----------------------------------------------------------------------------
22
  logging.basicConfig(level=logging.INFO)
23
  logger = logging.getLogger(__name__)
24
 
25
- # -----------------------------------------------------------------------------
26
- # Configuration
27
- # -----------------------------------------------------------------------------
28
  APP_TITLE = "Entanglement Score Regression"
29
  APP_SUBTITLE = "Predict the continuous Meyer-Wallach entanglement score from circuit topology and gate structure."
30
 
31
- # Set this to the CSV file you place in the Space repository.
32
- # You can also override it with an environment variable in Spaces.
33
- DATA_PATH = os.getenv("QS_DATA_PATH", "QSBench-Amplitude-v1.0.0-demo_shard_00000.csv")
 
 
 
34
 
35
- # Columns that should never be used as direct features.
36
  NON_FEATURE_COLS = {
37
  "sample_id",
38
  "sample_seed",
@@ -51,38 +46,24 @@ NON_FEATURE_COLS = {
51
  "precision_mode",
52
  "circuit_signature",
53
  "entanglement",
54
- "meyer_wallach", # target column
55
  }
56
 
57
- # Optional columns to visually hide from the feature picker because they are usually constant
58
- # or less informative in small demo shards.
59
  SOFT_EXCLUDE_PATTERNS = ["ideal_", "noisy_", "error_", "sign_ideal_", "sign_noisy_"]
60
 
61
  _ASSET_CACHE: Dict[str, pd.DataFrame] = {}
62
 
63
 
64
- # -----------------------------------------------------------------------------
65
- # Data loading and feature engineering
66
- # -----------------------------------------------------------------------------
67
-
68
- def load_dataset_df() -> pd.DataFrame:
69
- """Load the demo shard from disk and cache it in memory."""
70
- if "df" not in _ASSET_CACHE:
71
- if not os.path.exists(DATA_PATH):
72
- raise FileNotFoundError(
73
- f"Dataset file not found: {DATA_PATH}. "
74
- "Place the CSV in the Space repository or set QS_DATA_PATH."
75
- )
76
-
77
- logger.info("Loading dataset from %s", DATA_PATH)
78
- df = pd.read_csv(DATA_PATH)
79
  df = enrich_dataframe(df)
80
- _ASSET_CACHE["df"] = df
81
- return _ASSET_CACHE["df"]
82
 
83
 
84
  def safe_parse(value):
85
- """Safely parse a string representation of a Python literal."""
86
  if isinstance(value, str):
87
  try:
88
  return ast.literal_eval(value)
@@ -92,7 +73,6 @@ def safe_parse(value):
92
 
93
 
94
  def adjacency_features(adj_value) -> Dict[str, float]:
95
- """Derive compact topology features from the adjacency matrix."""
96
  parsed = safe_parse(adj_value)
97
  if not isinstance(parsed, list) or len(parsed) == 0:
98
  return {
@@ -105,7 +85,6 @@ def adjacency_features(adj_value) -> Dict[str, float]:
105
  try:
106
  arr = np.array(parsed, dtype=float)
107
  n = arr.shape[0]
108
- # For an undirected adjacency matrix, sum counts both directions.
109
  edge_count = float(np.triu(arr, k=1).sum())
110
  possible_edges = float(n * (n - 1) / 2)
111
  density = edge_count / possible_edges if possible_edges > 0 else np.nan
@@ -126,7 +105,6 @@ def adjacency_features(adj_value) -> Dict[str, float]:
126
 
127
 
128
  def qasm_features(qasm_value) -> Dict[str, float]:
129
- """Extract simple string-based statistics from QASM text."""
130
  if not isinstance(qasm_value, str) or not qasm_value.strip():
131
  return {
132
  "qasm_length": np.nan,
@@ -152,7 +130,6 @@ def qasm_features(qasm_value) -> Dict[str, float]:
152
 
153
 
154
  def enrich_dataframe(df: pd.DataFrame) -> pd.DataFrame:
155
- """Create extra features that are useful for regression."""
156
  df = df.copy()
157
 
158
  if "adjacency" in df.columns:
@@ -164,33 +141,18 @@ def enrich_dataframe(df: pd.DataFrame) -> pd.DataFrame:
164
  qasm_df = df[qasm_source].apply(qasm_features).apply(pd.Series)
165
  df = pd.concat([df, qasm_df], axis=1)
166
 
167
- # Normalize obvious object columns that can be safely treated as strings.
168
- for col in ["noise_type", "backend_device", "precision_mode", "observable_mode"]:
169
- if col in df.columns:
170
- df[col] = df[col].astype("string")
171
-
172
  return df
173
 
174
 
175
  def load_guide_content() -> str:
176
- """Load the user guide if it exists in the repository."""
177
- guide_path = "GUIDE.md"
178
- if os.path.exists(guide_path):
179
- with open(guide_path, "r", encoding="utf-8") as f:
180
  return f.read()
181
- return (
182
- "# Guide\n\n"
183
- "The guide file is not added yet. In the next step, we can build a full user manual "
184
- "with dataset description, model interpretation, and example workflows."
185
- )
186
-
187
 
188
- # -----------------------------------------------------------------------------
189
- # Feature selection helpers
190
- # -----------------------------------------------------------------------------
191
 
192
  def get_available_feature_columns(df: pd.DataFrame) -> List[str]:
193
- """Return all numeric feature columns after excluding target and metadata."""
194
  numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
195
  features = []
196
  for col in numeric_cols:
@@ -203,31 +165,20 @@ def get_available_feature_columns(df: pd.DataFrame) -> List[str]:
203
 
204
 
205
  def default_feature_selection(features: List[str]) -> List[str]:
206
- """Pick a stable set of high-value defaults."""
207
  preferred = [
208
  "gate_entropy",
209
- "adjacency",
210
  "adj_density",
211
  "adj_degree_mean",
212
  "adj_degree_std",
213
  "depth",
214
  "total_gates",
215
- "single_qubit_gates",
216
- "two_qubit_gates",
217
  "cx_count",
218
  "qasm_length",
219
- "qasm_line_count",
220
- "qasm_gate_keyword_count",
221
  ]
222
- return [f for f in preferred if f in features][:8]
223
 
224
 
225
- # -----------------------------------------------------------------------------
226
- # Visualization helpers
227
- # -----------------------------------------------------------------------------
228
-
229
- def make_regression_figure(y_true: np.ndarray, y_pred: np.ndarray, feature_names: Optional[List[str]] = None, importances: Optional[np.ndarray] = None) -> plt.Figure:
230
- """Create a compact three-panel regression summary figure."""
231
  fig = plt.figure(figsize=(20, 6))
232
  gs = fig.add_gridspec(1, 3)
233
 
@@ -235,174 +186,94 @@ def make_regression_figure(y_true: np.ndarray, y_pred: np.ndarray, feature_names
235
  ax2 = fig.add_subplot(gs[0, 1])
236
  ax3 = fig.add_subplot(gs[0, 2])
237
 
238
- # Actual vs predicted.
239
  ax1.scatter(y_true, y_pred, alpha=0.75)
240
  min_v = min(float(np.min(y_true)), float(np.min(y_pred)))
241
  max_v = max(float(np.max(y_true)), float(np.max(y_pred)))
242
  ax1.plot([min_v, max_v], [min_v, max_v], linestyle="--")
243
- ax1.set_title("Actual vs Predicted")
244
- ax1.set_xlabel("Actual Meyer-Wallach")
245
- ax1.set_ylabel("Predicted Meyer-Wallach")
246
 
247
- # Residual histogram.
248
  residuals = y_true - y_pred
249
  ax2.hist(residuals, bins=20)
250
- ax2.set_title("Residual Distribution")
251
- ax2.set_xlabel("Residual")
252
- ax2.set_ylabel("Count")
253
 
254
- # Feature importance chart.
255
- if importances is not None and feature_names is not None and len(importances) == len(feature_names):
256
  idx = np.argsort(importances)[-10:]
257
  ax3.barh([feature_names[i] for i in idx], importances[idx])
258
- ax3.set_title("Top-10 Feature Importances")
259
- ax3.set_xlabel("Importance")
260
- else:
261
- ax3.text(0.5, 0.5, "Feature importances are unavailable.", ha="center", va="center")
262
- ax3.set_axis_off()
263
 
264
  fig.tight_layout()
265
  return fig
266
 
267
 
268
- # -----------------------------------------------------------------------------
269
- # UI callbacks
270
- # -----------------------------------------------------------------------------
271
-
272
- def refresh_explorer(split_name: str) -> Tuple[gr.update, pd.DataFrame, str, str, str, str]:
273
- """Refresh explorer output based on the selected split."""
274
- df = load_dataset_df()
275
  splits = df["split"].dropna().unique().tolist() if "split" in df.columns else ["train"]
276
- if not splits:
277
- splits = ["train"]
278
 
279
  if split_name not in splits:
280
  split_name = splits[0]
281
 
282
  filtered = df[df["split"] == split_name] if "split" in df.columns else df
283
- display_df = filtered.head(12).copy()
284
-
285
- raw_qasm = display_df["qasm_raw"].iloc[0] if "qasm_raw" in display_df.columns and not display_df.empty else "// N/A"
286
- transpiled_qasm = display_df["qasm_transpiled"].iloc[0] if "qasm_transpiled" in display_df.columns and not display_df.empty else "// N/A"
287
-
288
- target_info = (
289
- f"### Dataset overview\n\n"
290
- f"**Rows:** {len(df):,} \n"
291
- f"**Visible split:** `{split_name}` \n"
292
- f"**Target:** `meyer_wallach` \n"
293
- f"**Target range:** {df['meyer_wallach'].min():.4f} → {df['meyer_wallach'].max():.4f}"
294
- )
295
 
296
- summary = (
297
- f"### Split summary\n\n"
298
- f"**Available splits:** {', '.join(splits)} \n"
299
- f"**Preview rows:** {len(display_df)}"
300
- )
301
 
302
  return (
303
  gr.update(choices=splits, value=split_name),
304
  display_df,
305
  raw_qasm,
306
  transpiled_qasm,
307
- target_info,
308
- summary,
309
  )
310
 
311
 
312
- def sync_feature_picker() -> gr.update:
313
- """Refresh the feature list from the loaded dataset."""
314
- df = load_dataset_df()
315
  features = get_available_feature_columns(df)
316
  defaults = default_feature_selection(features)
317
  return gr.update(choices=features, value=defaults)
318
 
319
 
320
- def train_regressor(feature_columns: List[str], test_size: float, n_estimators: int, max_depth: int, random_state: int) -> Tuple[Optional[plt.Figure], str]:
321
- """Train a regression model and return metrics plus a plot."""
322
  if not feature_columns:
323
- return None, "### Please select at least one feature."
324
 
325
- df = load_dataset_df()
326
- required_cols = feature_columns + ["meyer_wallach"]
327
- train_df = df.dropna(subset=required_cols).copy()
328
-
329
- if len(train_df) < 10:
330
- return None, "### ❌ Not enough clean rows after filtering missing values."
331
 
332
  X = train_df[feature_columns]
333
  y = train_df["meyer_wallach"]
334
 
335
  X_train, X_test, y_train, y_test = train_test_split(
336
- X,
337
- y,
338
- test_size=test_size,
339
- random_state=random_state,
340
  )
341
 
342
- # Random forest works well for small, tabular demo data and gives feature importances.
343
- model = Pipeline(
344
- steps=[
345
- ("imputer", SimpleImputer(strategy="median")),
346
- ("scaler", StandardScaler()),
347
- (
348
- "regressor",
349
- RandomForestRegressor(
350
- n_estimators=n_estimators,
351
- max_depth=max_depth if max_depth > 0 else None,
352
- random_state=random_state,
353
- n_jobs=-1,
354
- ),
355
- ),
356
- ]
357
- )
358
 
359
  model.fit(X_train, y_train)
360
- y_pred = model.predict(X_test)
361
-
362
- rmse = float(np.sqrt(mean_squared_error(y_test, y_pred)))
363
- mae = float(mean_absolute_error(y_test, y_pred))
364
- r2 = float(r2_score(y_test, y_pred))
365
-
366
- regressor = model.named_steps["regressor"]
367
- importances = getattr(regressor, "feature_importances_", None)
368
- fig = make_regression_figure(y_test.to_numpy(), y_pred, list(feature_columns), importances)
369
-
370
- results = (
371
- "### Regression results\n\n"
372
- f"**Rows used:** {len(train_df):,} \n"
373
- f"**Test size:** {test_size:.0%} \n"
374
- f"**RMSE:** {rmse:.4f} \n"
375
- f"**MAE:** {mae:.4f} \n"
376
- f"**R²:** {r2:.4f}\n\n"
377
- "The closer the scatter points are to the diagonal line, the better the model."
378
- )
379
- return fig, results
380
 
 
 
 
381
 
382
- def build_dataset_profile() -> str:
383
- """Generate a compact dataset summary for the explorer tab."""
384
- df = load_dataset_df()
385
- target = df["meyer_wallach"]
386
- return (
387
- f"### Dataset profile\n\n"
388
- f"**Rows:** {len(df):,} \n"
389
- f"**Columns:** {len(df.columns):,} \n"
390
- f"**Meyer-Wallach mean:** {target.mean():.4f} \n"
391
- f"**Meyer-Wallach std:** {target.std():.4f} \n"
392
- f"**Meyer-Wallach min/max:** {target.min():.4f} / {target.max():.4f}"
393
- )
394
 
395
 
396
- # -----------------------------------------------------------------------------
397
- # UI
398
- # -----------------------------------------------------------------------------
399
  CUSTOM_CSS = """
400
- footer {
401
- margin-top: 1rem;
402
- }
403
- .gradio-container {
404
- max-width: 1400px !important;
405
- }
406
  """
407
 
408
  with gr.Blocks(title=APP_TITLE) as demo:
@@ -411,61 +282,44 @@ with gr.Blocks(title=APP_TITLE) as demo:
411
 
412
  with gr.Tabs():
413
  with gr.TabItem("🔎 Explorer"):
414
- with gr.Row():
415
- split_dropdown = gr.Dropdown(label="Split", choices=["train"], value="train")
416
- profile_box = gr.Markdown(value="### Loading dataset...")
417
-
418
- with gr.Row():
419
- explorer_summary = gr.Markdown(value="### Loading split summary...")
420
-
421
- explorer_df = gr.Dataframe(interactive=False, label="Preview rows")
422
-
423
- with gr.Row():
424
- raw_qasm_code = gr.Code(label="Raw QASM", language=None)
425
- transpiled_qasm_code = gr.Code(label="Transpiled QASM", language=None)
426
 
427
  with gr.TabItem("🧠 Regression"):
428
- with gr.Row():
429
- with gr.Column(scale=1):
430
- feature_picker = gr.CheckboxGroup(label="Input features", choices=[])
431
- test_size_slider = gr.Slider(0.1, 0.4, value=0.2, step=0.05, label="Test split")
432
- n_estimators_slider = gr.Slider(50, 400, value=200, step=10, label="Number of trees")
433
- max_depth_slider = gr.Slider(2, 30, value=12, step=1, label="Max tree depth")
434
- random_state_number = gr.Number(value=42, precision=0, label="Random seed")
435
- train_btn = gr.Button("Train & Evaluate", variant="primary")
436
- with gr.Column(scale=2):
437
- plot_output = gr.Plot()
438
- metrics_output = gr.Markdown()
439
 
440
  with gr.TabItem("📖 Guide"):
441
  gr.Markdown(load_guide_content())
442
 
443
  gr.Markdown("---")
444
  gr.Markdown(
445
- "### 🔗 Links \n"
 
446
  "[Website](https://qsbench.github.io) | [Hugging Face](https://huggingface.co/QSBench) | [GitHub](https://github.com/QSBench)"
447
  )
448
 
449
- # Bind events.
450
- split_dropdown.change(
451
- refresh_explorer,
452
- inputs=[split_dropdown],
453
- outputs=[split_dropdown, explorer_df, raw_qasm_code, transpiled_qasm_code, profile_box, explorer_summary],
454
- )
455
 
456
- train_btn.click(
457
- train_regressor,
458
- inputs=[feature_picker, test_size_slider, n_estimators_slider, max_depth_slider, random_state_number],
459
- outputs=[plot_output, metrics_output],
460
- )
461
 
462
- demo.load(
463
- refresh_explorer,
464
- inputs=[split_dropdown],
465
- outputs=[split_dropdown, explorer_df, raw_qasm_code, transpiled_qasm_code, profile_box, explorer_summary],
466
- )
467
- demo.load(sync_feature_picker, outputs=[feature_picker])
468
 
469
 
470
  if __name__ == "__main__":
471
  demo.launch(theme=gr.themes.Soft(), css=CUSTOM_CSS)
 
 
1
  import ast
2
  import logging
 
3
  import re
 
4
  from typing import Dict, List, Optional, Tuple
5
 
6
  import gradio as gr
7
  import matplotlib.pyplot as plt
8
  import numpy as np
9
  import pandas as pd
10
+ from datasets import load_dataset
11
  from sklearn.ensemble import RandomForestRegressor
12
  from sklearn.impute import SimpleImputer
13
  from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
 
15
  from sklearn.pipeline import Pipeline
16
  from sklearn.preprocessing import StandardScaler
17
 
 
 
 
18
  logging.basicConfig(level=logging.INFO)
19
  logger = logging.getLogger(__name__)
20
 
 
 
 
21
  APP_TITLE = "Entanglement Score Regression"
22
  APP_SUBTITLE = "Predict the continuous Meyer-Wallach entanglement score from circuit topology and gate structure."
23
 
24
+ REPO_CONFIG = {
25
+ "Core (Clean)": "QSBench/QSBench-Core-v1.0.0-demo",
26
+ "Depolarizing Noise": "QSBench/QSBench-Depolarizing-Demo-v1.0.0",
27
+ "Amplitude Damping": "QSBench/QSBench-Amplitude-v1.0.0-demo",
28
+ "Transpilation (10q)": "QSBench/QSBench-Transpilation-v1.0.0-demo",
29
+ }
30
 
 
31
  NON_FEATURE_COLS = {
32
  "sample_id",
33
  "sample_seed",
 
46
  "precision_mode",
47
  "circuit_signature",
48
  "entanglement",
49
+ "meyer_wallach",
50
  }
51
 
 
 
52
  SOFT_EXCLUDE_PATTERNS = ["ideal_", "noisy_", "error_", "sign_ideal_", "sign_noisy_"]
53
 
54
  _ASSET_CACHE: Dict[str, pd.DataFrame] = {}
55
 
56
 
57
+ def load_dataset_df(dataset_key: str) -> pd.DataFrame:
58
+ if dataset_key not in _ASSET_CACHE:
59
+ ds = load_dataset(REPO_CONFIG[dataset_key])
60
+ df = pd.DataFrame(ds["train"])
 
 
 
 
 
 
 
 
 
 
 
61
  df = enrich_dataframe(df)
62
+ _ASSET_CACHE[dataset_key] = df
63
+ return _ASSET_CACHE[dataset_key]
64
 
65
 
66
  def safe_parse(value):
 
67
  if isinstance(value, str):
68
  try:
69
  return ast.literal_eval(value)
 
73
 
74
 
75
  def adjacency_features(adj_value) -> Dict[str, float]:
 
76
  parsed = safe_parse(adj_value)
77
  if not isinstance(parsed, list) or len(parsed) == 0:
78
  return {
 
85
  try:
86
  arr = np.array(parsed, dtype=float)
87
  n = arr.shape[0]
 
88
  edge_count = float(np.triu(arr, k=1).sum())
89
  possible_edges = float(n * (n - 1) / 2)
90
  density = edge_count / possible_edges if possible_edges > 0 else np.nan
 
105
 
106
 
107
  def qasm_features(qasm_value) -> Dict[str, float]:
 
108
  if not isinstance(qasm_value, str) or not qasm_value.strip():
109
  return {
110
  "qasm_length": np.nan,
 
130
 
131
 
132
  def enrich_dataframe(df: pd.DataFrame) -> pd.DataFrame:
 
133
  df = df.copy()
134
 
135
  if "adjacency" in df.columns:
 
141
  qasm_df = df[qasm_source].apply(qasm_features).apply(pd.Series)
142
  df = pd.concat([df, qasm_df], axis=1)
143
 
 
 
 
 
 
144
  return df
145
 
146
 
147
  def load_guide_content() -> str:
148
+ try:
149
+ with open("GUIDE.md", "r", encoding="utf-8") as f:
 
 
150
  return f.read()
151
+ except FileNotFoundError:
152
+ return "# Guide\n\nGuide file not found."
 
 
 
 
153
 
 
 
 
154
 
155
  def get_available_feature_columns(df: pd.DataFrame) -> List[str]:
 
156
  numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
157
  features = []
158
  for col in numeric_cols:
 
165
 
166
 
167
  def default_feature_selection(features: List[str]) -> List[str]:
 
168
  preferred = [
169
  "gate_entropy",
 
170
  "adj_density",
171
  "adj_degree_mean",
172
  "adj_degree_std",
173
  "depth",
174
  "total_gates",
 
 
175
  "cx_count",
176
  "qasm_length",
 
 
177
  ]
178
+ return [f for f in preferred if f in features]
179
 
180
 
181
+ def make_regression_figure(y_true, y_pred, feature_names=None, importances=None):
 
 
 
 
 
182
  fig = plt.figure(figsize=(20, 6))
183
  gs = fig.add_gridspec(1, 3)
184
 
 
186
  ax2 = fig.add_subplot(gs[0, 1])
187
  ax3 = fig.add_subplot(gs[0, 2])
188
 
 
189
  ax1.scatter(y_true, y_pred, alpha=0.75)
190
  min_v = min(float(np.min(y_true)), float(np.min(y_pred)))
191
  max_v = max(float(np.max(y_true)), float(np.max(y_pred)))
192
  ax1.plot([min_v, max_v], [min_v, max_v], linestyle="--")
 
 
 
193
 
 
194
  residuals = y_true - y_pred
195
  ax2.hist(residuals, bins=20)
 
 
 
196
 
197
+ if importances is not None:
 
198
  idx = np.argsort(importances)[-10:]
199
  ax3.barh([feature_names[i] for i in idx], importances[idx])
 
 
 
 
 
200
 
201
  fig.tight_layout()
202
  return fig
203
 
204
 
205
+ def refresh_explorer(dataset_key, split_name):
206
+ df = load_dataset_df(dataset_key)
 
 
 
 
 
207
  splits = df["split"].dropna().unique().tolist() if "split" in df.columns else ["train"]
 
 
208
 
209
  if split_name not in splits:
210
  split_name = splits[0]
211
 
212
  filtered = df[df["split"] == split_name] if "split" in df.columns else df
213
+ display_df = filtered.head(10)
 
 
 
 
 
 
 
 
 
 
 
214
 
215
+ raw_qasm = display_df["qasm_raw"].iloc[0] if "qasm_raw" in display_df.columns else "// N/A"
216
+ transpiled_qasm = display_df["qasm_transpiled"].iloc[0] if "qasm_transpiled" in display_df.columns else "// N/A"
 
 
 
217
 
218
  return (
219
  gr.update(choices=splits, value=split_name),
220
  display_df,
221
  raw_qasm,
222
  transpiled_qasm,
223
+ f"### {dataset_key} Explorer",
224
+ f"Rows: {len(df)}",
225
  )
226
 
227
 
228
+ def sync_feature_picker(dataset_key):
229
+ df = load_dataset_df(dataset_key)
 
230
  features = get_available_feature_columns(df)
231
  defaults = default_feature_selection(features)
232
  return gr.update(choices=features, value=defaults)
233
 
234
 
235
+ def train_regressor(dataset_key, feature_columns, test_size, n_estimators, max_depth, random_state):
 
236
  if not feature_columns:
237
+ return None, "No features selected"
238
 
239
+ df = load_dataset_df(dataset_key)
240
+ train_df = df.dropna(subset=feature_columns + ["meyer_wallach"])
 
 
 
 
241
 
242
  X = train_df[feature_columns]
243
  y = train_df["meyer_wallach"]
244
 
245
  X_train, X_test, y_train, y_test = train_test_split(
246
+ X, y, test_size=test_size, random_state=random_state
 
 
 
247
  )
248
 
249
+ model = Pipeline([
250
+ ("imputer", SimpleImputer()),
251
+ ("scaler", StandardScaler()),
252
+ ("regressor", RandomForestRegressor(
253
+ n_estimators=n_estimators,
254
+ max_depth=max_depth,
255
+ random_state=random_state,
256
+ n_jobs=-1
257
+ ))
258
+ ])
 
 
 
 
 
 
259
 
260
  model.fit(X_train, y_train)
261
+ preds = model.predict(X_test)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
262
 
263
+ rmse = np.sqrt(mean_squared_error(y_test, preds))
264
+ mae = mean_absolute_error(y_test, preds)
265
+ r2 = r2_score(y_test, preds)
266
 
267
+ importances = model.named_steps["regressor"].feature_importances_
268
+ fig = make_regression_figure(y_test.to_numpy(), preds, feature_columns, importances)
269
+
270
+ results = f"RMSE: {rmse:.4f}\nMAE: {mae:.4f}\nR2: {r2:.4f}"
271
+
272
+ return fig, results
 
 
 
 
 
 
273
 
274
 
 
 
 
275
  CUSTOM_CSS = """
276
+ .gradio-container {max-width: 1400px !important;}
 
 
 
 
 
277
  """
278
 
279
  with gr.Blocks(title=APP_TITLE) as demo:
 
282
 
283
  with gr.Tabs():
284
  with gr.TabItem("🔎 Explorer"):
285
+ dataset_dropdown = gr.Dropdown(list(REPO_CONFIG.keys()), value="Amplitude Damping", label="Dataset")
286
+ split_dropdown = gr.Dropdown(["train"], value="train", label="Split")
287
+ explorer_df = gr.Dataframe(label="Preview")
288
+ raw_qasm = gr.Code(label="Raw QASM", language=None)
289
+ transpiled_qasm = gr.Code(label="Transpiled QASM", language=None)
290
+ info_box = gr.Markdown()
291
+ summary_box = gr.Markdown()
 
 
 
 
 
292
 
293
  with gr.TabItem("🧠 Regression"):
294
+ feature_picker = gr.CheckboxGroup(label="Input features")
295
+ test_size = gr.Slider(0.1, 0.4, value=0.2, label="Test split")
296
+ n_estimators = gr.Slider(50, 300, value=150, label="Trees")
297
+ max_depth = gr.Slider(2, 20, value=10, label="Max depth")
298
+ seed = gr.Number(value=42, label="Random seed")
299
+ run_btn = gr.Button("Train & Evaluate", variant="primary")
300
+ plot = gr.Plot()
301
+ metrics = gr.Markdown()
 
 
 
302
 
303
  with gr.TabItem("📖 Guide"):
304
  gr.Markdown(load_guide_content())
305
 
306
  gr.Markdown("---")
307
  gr.Markdown(
308
+ "### 🔗 Links
309
+ "
310
  "[Website](https://qsbench.github.io) | [Hugging Face](https://huggingface.co/QSBench) | [GitHub](https://github.com/QSBench)"
311
  )
312
 
313
+ dataset_dropdown.change(refresh_explorer, [dataset_dropdown, split_dropdown], [split_dropdown, explorer_df, raw_qasm, transpiled_qasm, info_box, summary_box])
314
+ split_dropdown.change(refresh_explorer, [dataset_dropdown, split_dropdown], [split_dropdown, explorer_df, raw_qasm, transpiled_qasm, info_box, summary_box])
 
 
 
 
315
 
316
+ dataset_dropdown.change(sync_feature_picker, [dataset_dropdown], [feature_picker])
317
+ run_btn.click(train_regressor, [dataset_dropdown, feature_picker, test_size, n_estimators, max_depth, seed], [plot, metrics])
 
 
 
318
 
319
+ demo.load(refresh_explorer, [dataset_dropdown, split_dropdown], [split_dropdown, explorer_df, raw_qasm, transpiled_qasm, info_box, summary_box])
320
+ demo.load(sync_feature_picker, [dataset_dropdown], [feature_picker])
 
 
 
 
321
 
322
 
323
  if __name__ == "__main__":
324
  demo.launch(theme=gr.themes.Soft(), css=CUSTOM_CSS)
325
+ demo.launch(theme=gr.themes.Soft(), css=CUSTOM_CSS)