QSBench commited on
Commit
9c8a1ad
Β·
verified Β·
1 Parent(s): 30d5809

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +64 -109
app.py CHANGED
@@ -11,11 +11,10 @@ from sklearn.ensemble import RandomForestRegressor
11
  from sklearn.metrics import mean_absolute_error, r2_score
12
  from sklearn.model_selection import train_test_split
13
 
14
- # Setup production-style logging
15
  logging.basicConfig(level=logging.INFO)
16
  logger = logging.getLogger(__name__)
17
 
18
- # Configuration for datasets and their specific metadata branches
19
  REPO_CONFIG = {
20
  "Core (Clean)": {
21
  "repo": "QSBench/QSBench-Core-v1.0.0-demo",
@@ -39,7 +38,6 @@ REPO_CONFIG = {
39
  }
40
  }
41
 
42
- # Features that should never be used as training inputs
43
  NON_FEATURE_COLS = {
44
  "sample_id", "sample_seed", "circuit_hash", "split", "circuit_qasm",
45
  "qasm_raw", "qasm_transpiled", "circuit_type_resolved", "circuit_type_requested",
@@ -50,175 +48,132 @@ NON_FEATURE_COLS = {
50
  _ASSET_CACHE = {}
51
 
52
  def fetch_remote_json(url: str) -> Optional[dict]:
53
- """Helper to fetch JSON files from raw Hugging Face branches."""
54
  try:
55
  response = requests.get(url, timeout=5)
56
  return response.json() if response.status_code == 200 else None
57
  except Exception as e:
58
- logger.error(f"Error fetching metadata from {url}: {e}")
59
  return None
60
 
61
  def load_all_assets(key: str) -> Dict:
62
- """Fetch dataframe and metadata with memory caching."""
63
  if key not in _ASSET_CACHE:
64
- logger.info(f"Loading assets for dataset: {key}")
65
- # Load main parquet/csv data
66
  ds = load_dataset(REPO_CONFIG[key]["repo"])
67
- # Fetch metadata from dedicated branches
68
- meta = fetch_remote_json(REPO_CONFIG[key]["meta_url"])
69
- report = fetch_remote_json(REPO_CONFIG[key]["report_url"])
70
-
71
  _ASSET_CACHE[key] = {
72
  "df": pd.DataFrame(ds["train"]),
73
- "meta": meta,
74
- "report": report
75
  }
76
  return _ASSET_CACHE[key]
77
 
78
- def generate_meta_markdown(assets: Dict) -> str:
79
- """Parse JSON metadata into a human-readable research summary."""
80
  meta = assets.get("meta", {})
81
  params = meta.get("parameters", {})
82
  report = assets.get("report", {})
83
-
84
- if not meta:
85
- return "⚠️ *Metadata currently unavailable for this dataset branch.*"
86
-
87
- # Format family distribution from report.json
88
  families = report.get("families", {})
89
- fam_info = ", ".join([f"{k.upper()}: {v}" for k, v in families.items()])
 
 
 
 
 
90
 
91
- md = (
92
- f"### πŸ“‹ Dataset Release: {meta.get('dataset_version', '1.0.0')}\n"
93
- f"**Hardware Config:** {params.get('n_qubits')} Qubits | Depth: {params.get('depth')} | "
94
- f"Shots: {params.get('shots')} | Device: {meta.get('backend_device', 'GPU')}\n\n"
95
- f"**Noise Model:** `{params.get('noise', 'Clean')}` (p={params.get('noise_prob', 0.0)}) | "
96
- f"**Circuit Coverage:** {fam_info}"
97
- )
98
- return md
 
 
 
 
 
99
 
100
  def update_explorer_view(ds_name: str, split_name: str):
101
- """Main callback for the Explorer tab."""
102
  assets = load_all_assets(ds_name)
103
  df = assets["df"]
104
-
105
  splits = df["split"].unique().tolist() if "split" in df.columns else ["train"]
106
  display_df = df[df["split"] == split_name].head(10) if "split" in df.columns else df.head(10)
107
-
108
- # QASM Sample Extraction
109
  raw_qasm = display_df["qasm_raw"].iloc[0] if "qasm_raw" in display_df.columns else "// No data"
110
  tr_qasm = display_df["qasm_transpiled"].iloc[0] if "qasm_transpiled" in display_df.columns else "// No data"
111
 
112
- return gr.update(choices=splits), display_df, raw_qasm, tr_qasm, generate_meta_markdown(assets)
 
113
 
114
  def sync_ml_inputs(ds_name: str):
115
- """Callback to update feature checkboxes when dataset changes."""
116
  assets = load_all_assets(ds_name)
117
  df = assets["df"]
118
-
119
- numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
120
- valid_features = [c for c in numeric_cols if c not in NON_FEATURE_COLS and not c.startswith(("error_", "sign_", "ideal_", "noisy_"))]
121
-
122
- # Default selection of core structural metrics
123
- top_picks = [f for f in ["gate_entropy", "meyer_wallach", "n_qubits", "depth", "total_gates"] if f in valid_features]
124
-
125
- return gr.update(choices=valid_features, value=top_picks or valid_features[:5])
126
 
127
  def train_baseline_model(ds_name: str, selected_features: List[str]):
128
- """Train a Random Forest regressor and generate analytics plots."""
129
- if not selected_features:
130
- return None, "### ❌ Error: Please select at least one feature."
131
-
132
  assets = load_all_assets(ds_name)
133
  df = assets["df"]
134
  target = "ideal_expval_Z_global" if "ideal_expval_Z_global" in df.columns else df.filter(like="expval").columns[0]
135
-
136
- # Data cleaning
137
  train_df = df.dropna(subset=selected_features + [target])
138
- if len(train_df) < 50:
139
- return None, "### ⚠️ Warning: Dataset too small for reliable training."
140
-
141
  X, y = train_df[selected_features], train_df[target]
142
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
143
-
144
- # Model Pipeline
145
  model = RandomForestRegressor(n_estimators=100, max_depth=12, n_jobs=-1, random_state=42)
146
  model.fit(X_train, y_train)
147
  preds = model.predict(X_test)
148
-
149
- # Plotting
150
- sns.set_theme(style="whitegrid", context="notebook")
151
  fig, axes = plt.subplots(1, 3, figsize=(20, 6))
152
-
153
- # 1. Parity Plot
154
- axes[0].scatter(y_test, preds, alpha=0.4, color='#34495e')
155
- axes[0].plot([y.min(), y.max()], [y.min(), y.max()], 'r--', lw=2)
156
- axes[0].set_title(f"Regression Accuracy (RΒ²: {r2_score(y_test, preds):.3f})")
157
- axes[0].set_xlabel("Actual")
158
- axes[0].set_ylabel("Predicted")
159
-
160
- # 2. Importance
161
- importances = model.feature_importances_
162
- indices = np.argsort(importances)[-12:]
163
- axes[1].barh([selected_features[i] for i in indices], importances[indices], color='#1abc9c')
164
- axes[1].set_title("Top Structural Predictors")
165
-
166
- # 3. Error Analysis
167
- sns.histplot(y_test - preds, kde=True, ax=axes[2], color='#e67e22')
168
- axes[2].set_title("Residuals Distribution")
169
-
170
  plt.tight_layout()
171
- result_text = f"**Model Performance on {ds_name}**\n**MAE:** {mean_absolute_error(y_test, preds):.4f}"
172
-
173
- return fig, result_text
174
-
175
- # --- GRADIO INTERFACE ---
176
 
177
- with gr.Blocks(theme=gr.themes.Soft(), title="QSBench Analytics") as demo:
 
178
  gr.Markdown("# 🌌 QSBench: Quantum Synthetic Benchmark Suite")
179
 
180
  with gr.Tabs():
181
- with gr.TabItem("πŸ”Ž Dataset Explorer"):
182
- # Header with parsed metadata from JSON
183
- metadata_box = gr.Markdown("### Synchronizing with Hugging Face...")
184
-
185
  with gr.Row():
186
  ds_select = gr.Dropdown(choices=list(REPO_CONFIG.keys()), value="Core (Clean)", label="Dataset Pack")
187
  split_select = gr.Dropdown(choices=["train"], value="train", label="Subset")
188
-
189
  data_table = gr.Dataframe(interactive=False)
190
-
191
  with gr.Row():
192
- code_raw = gr.Code(label="Source Circuit (QASM)", language="python")
193
- code_tr = gr.Code(label="Transpiled (Hardware-Ready)", language="python")
194
 
195
- with gr.TabItem("πŸ€– ML Training Baseline"):
196
  with gr.Row():
197
  with gr.Column(scale=1):
198
- gr.Markdown("### Training Configuration")
199
- ml_ds_select = gr.Dropdown(choices=list(REPO_CONFIG.keys()), value="Core (Clean)", label="Source Dataset")
200
- ml_features = gr.CheckboxGroup(label="Structural Metrics", choices=[])
201
- btn_train = gr.Button("Execute Baseline Training", variant="primary")
202
  with gr.Column(scale=2):
203
- plot_output = gr.Plot()
204
- txt_output = gr.Markdown()
205
 
206
- # Footer/Resources
207
- gr.Markdown("""
 
 
 
208
  ---
209
- ### πŸ”¬ Research Credits
210
- **QSBench** is an open-source framework for noise-aware Quantum Machine Learning benchmarking.
211
- - [GitHub Repository](https://github.com/QSBench/QSBench-Demo) | [Official Website](https://qsbench.github.io)
 
 
 
212
  """)
213
 
214
- # Event Handlers
215
- ds_select.change(update_explorer_view, [ds_select, split_select], [split_select, data_table, code_raw, code_tr, metadata_box])
216
- ml_ds_select.change(sync_ml_inputs, [ml_ds_select], [ml_features])
217
- btn_train.click(train_baseline_model, [ml_ds_select, ml_features], [plot_output, txt_output])
218
 
219
- # Initial Load
220
- demo.load(update_explorer_view, [ds_select, split_select], [split_select, data_table, code_raw, code_tr, metadata_box])
221
- demo.load(sync_ml_inputs, [ml_ds_select], [ml_features])
222
 
223
  if __name__ == "__main__":
224
  demo.launch()
 
11
  from sklearn.metrics import mean_absolute_error, r2_score
12
  from sklearn.model_selection import train_test_split
13
 
14
+ # Setup logging
15
  logging.basicConfig(level=logging.INFO)
16
  logger = logging.getLogger(__name__)
17
 
 
18
  REPO_CONFIG = {
19
  "Core (Clean)": {
20
  "repo": "QSBench/QSBench-Core-v1.0.0-demo",
 
38
  }
39
  }
40
 
 
41
  NON_FEATURE_COLS = {
42
  "sample_id", "sample_seed", "circuit_hash", "split", "circuit_qasm",
43
  "qasm_raw", "qasm_transpiled", "circuit_type_resolved", "circuit_type_requested",
 
48
  _ASSET_CACHE = {}
49
 
50
  def fetch_remote_json(url: str) -> Optional[dict]:
 
51
  try:
52
  response = requests.get(url, timeout=5)
53
  return response.json() if response.status_code == 200 else None
54
  except Exception as e:
55
+ logger.error(f"Error fetching metadata: {e}")
56
  return None
57
 
58
  def load_all_assets(key: str) -> Dict:
 
59
  if key not in _ASSET_CACHE:
 
 
60
  ds = load_dataset(REPO_CONFIG[key]["repo"])
 
 
 
 
61
  _ASSET_CACHE[key] = {
62
  "df": pd.DataFrame(ds["train"]),
63
+ "meta": fetch_remote_json(REPO_CONFIG[key]["meta_url"]),
64
+ "report": fetch_remote_json(REPO_CONFIG[key]["report_url"])
65
  }
66
  return _ASSET_CACHE[key]
67
 
68
+ def generate_guide_markdown(assets: Dict) -> str:
 
69
  meta = assets.get("meta", {})
70
  params = meta.get("parameters", {})
71
  report = assets.get("report", {})
72
+ if not meta: return "⚠️ *Metadata unavailable.*"
73
+
 
 
 
74
  families = report.get("families", {})
75
+ fam_table = "| Family | Samples | Description |\n| :--- | :--- | :--- |\n"
76
+ for f, count in families.items():
77
+ fam_table += f"| **{f.upper()}** | {count} | Synthetic {f} circuits |\n"
78
+
79
+ return f"""
80
+ ## πŸ“– Methodology & Release Notes: {meta.get('dataset_version', '1.0.0-demo')}
81
 
82
+ ### 1. Generation Engine
83
+ Generated using **QSBench v{meta.get('generator_version', '5.0')}**.
84
+ - **Qubits:** {params.get('n_qubits')} | **Depth:** {params.get('depth')}
85
+ - **Noise:** `{params.get('noise', 'None')}` (p={params.get('noise_prob', 0)})
86
+ - **Backend:** {meta.get('backend_device', 'GPU')}
87
+
88
+ ### 2. Structural Metrics
89
+ * **Gate Entropy:** Distribution of gates.
90
+ * **Meyer-Wallach:** Global entanglement.
91
+
92
+ ### 3. Circuit Family Coverage
93
+ {fam_table}
94
+ """
95
 
96
  def update_explorer_view(ds_name: str, split_name: str):
 
97
  assets = load_all_assets(ds_name)
98
  df = assets["df"]
 
99
  splits = df["split"].unique().tolist() if "split" in df.columns else ["train"]
100
  display_df = df[df["split"] == split_name].head(10) if "split" in df.columns else df.head(10)
 
 
101
  raw_qasm = display_df["qasm_raw"].iloc[0] if "qasm_raw" in display_df.columns else "// No data"
102
  tr_qasm = display_df["qasm_transpiled"].iloc[0] if "qasm_transpiled" in display_df.columns else "// No data"
103
 
104
+ meta_summary = f"### πŸ“‹ Pack: {ds_name} | Release: {assets.get('meta', {}).get('dataset_version', 'N/A')}"
105
+ return gr.update(choices=splits), display_df, raw_qasm, tr_qasm, meta_summary, generate_guide_markdown(assets)
106
 
107
  def sync_ml_inputs(ds_name: str):
 
108
  assets = load_all_assets(ds_name)
109
  df = assets["df"]
110
+ numeric = df.select_dtypes(include=[np.number]).columns.tolist()
111
+ valid = [c for c in numeric if c not in NON_FEATURE_COLS and not c.startswith(("error_", "sign_", "ideal_", "noisy_"))]
112
+ top_picks = [f for f in ["gate_entropy", "meyer_wallach", "n_qubits", "depth"] if f in valid]
113
+ return gr.update(choices=valid, value=top_picks)
 
 
 
 
114
 
115
  def train_baseline_model(ds_name: str, selected_features: List[str]):
116
+ if not selected_features: return None, "### ❌ Error: Select features."
 
 
 
117
  assets = load_all_assets(ds_name)
118
  df = assets["df"]
119
  target = "ideal_expval_Z_global" if "ideal_expval_Z_global" in df.columns else df.filter(like="expval").columns[0]
 
 
120
  train_df = df.dropna(subset=selected_features + [target])
 
 
 
121
  X, y = train_df[selected_features], train_df[target]
122
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
 
 
123
  model = RandomForestRegressor(n_estimators=100, max_depth=12, n_jobs=-1, random_state=42)
124
  model.fit(X_train, y_train)
125
  preds = model.predict(X_test)
 
 
 
126
  fig, axes = plt.subplots(1, 3, figsize=(20, 6))
127
+ axes[0].scatter(y_test, preds, alpha=0.4); axes[0].plot([y.min(), y.max()], [y.min(), y.max()], 'r--')
128
+ axes[1].barh(selected_features[:10], model.feature_importances_[:10])
129
+ sns.histplot(y_test - preds, kde=True, ax=axes[2])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
130
  plt.tight_layout()
131
+ return fig, f"**MAE:** {mean_absolute_error(y_test, preds):.4f}"
 
 
 
 
132
 
133
+ # --- UI ---
134
+ with gr.Blocks(theme=gr.themes.Soft()) as demo:
135
  gr.Markdown("# 🌌 QSBench: Quantum Synthetic Benchmark Suite")
136
 
137
  with gr.Tabs():
138
+ with gr.TabItem("πŸ”Ž Explorer"):
139
+ metadata_box = gr.Markdown("### Syncing...")
 
 
140
  with gr.Row():
141
  ds_select = gr.Dropdown(choices=list(REPO_CONFIG.keys()), value="Core (Clean)", label="Dataset Pack")
142
  split_select = gr.Dropdown(choices=["train"], value="train", label="Subset")
 
143
  data_table = gr.Dataframe(interactive=False)
 
144
  with gr.Row():
145
+ code_raw = gr.Code(label="Source QASM", language="python")
146
+ code_tr = gr.Code(label="Transpiled QASM", language="python")
147
 
148
+ with gr.TabItem("πŸ€– ML Training"):
149
  with gr.Row():
150
  with gr.Column(scale=1):
151
+ ml_ds = gr.Dropdown(choices=list(REPO_CONFIG.keys()), value="Core (Clean)", label="Dataset")
152
+ ml_feat = gr.CheckboxGroup(label="Features", choices=[])
153
+ btn = gr.Button("Train Baseline", variant="primary")
 
154
  with gr.Column(scale=2):
155
+ plot_out = gr.Plot(); txt_out = gr.Markdown()
 
156
 
157
+ with gr.TabItem("πŸ“– Methodology & Guide"):
158
+ guide_md = gr.Markdown("Loading guide...")
159
+
160
+ # FOOTER WITH YOUR LINKS
161
+ gr.Markdown(f"""
162
  ---
163
+ ### πŸ”— Project Resources & Store
164
+ * **πŸ€— Hugging Face:** [QSBench Organization](https://huggingface.co/QSBench)
165
+ * **πŸ’» GitHub:** [QSBench Source Code](https://github.com/QSBench)
166
+ * **🌐 Official Site:** [qsbench.github.io](https://qsbench.github.io)
167
+
168
+ *QSBench is an open-source framework for noise-aware Quantum Machine Learning benchmarking.*
169
  """)
170
 
171
+ ds_select.change(update_explorer_view, [ds_select, split_select], [split_select, data_table, code_raw, code_tr, metadata_box, guide_md])
172
+ ml_ds.change(sync_ml_inputs, [ml_ds], [ml_feat])
173
+ btn.click(train_baseline_model, [ml_ds, ml_feat], [plot_out, txt_out])
 
174
 
175
+ demo.load(update_explorer_view, [ds_select, split_select], [split_select, data_table, code_raw, code_tr, metadata_box, guide_md])
176
+ demo.load(sync_ml_inputs, [ml_ds], [ml_feat])
 
177
 
178
  if __name__ == "__main__":
179
  demo.launch()