QSBench commited on
Commit
581d034
·
verified ·
1 Parent(s): da72050

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +67 -89
app.py CHANGED
@@ -11,7 +11,7 @@ from sklearn.ensemble import RandomForestRegressor
11
  from sklearn.metrics import mean_absolute_error, r2_score
12
  from sklearn.model_selection import train_test_split
13
 
14
- # --- CONFIG & LOGGING ---
15
  logging.basicConfig(level=logging.INFO)
16
  logger = logging.getLogger(__name__)
17
 
@@ -38,162 +38,140 @@ REPO_CONFIG = {
38
  }
39
  }
40
 
41
- NON_FEATURE_COLS = {
42
- "sample_id", "sample_seed", "circuit_hash", "split", "circuit_qasm",
43
- "qasm_raw", "qasm_transpiled", "circuit_type_resolved", "circuit_type_requested",
44
- "noise_type", "noise_prob", "observable_bases", "observable_mode", "backend_device",
45
- "precision_mode", "circuit_signature", "ideal_expval_Z_global", "noisy_expval_Z_global"
46
- }
47
 
48
  _ASSET_CACHE = {}
49
 
50
- def fetch_remote_json(url: str) -> Optional[dict]:
51
- try:
52
- response = requests.get(url, timeout=5)
53
- return response.json() if response.status_code == 200 else None
54
- except:
55
- return None
56
-
57
  def load_all_assets(key: str) -> Dict:
58
  if key not in _ASSET_CACHE:
59
  ds = load_dataset(REPO_CONFIG[key]["repo"])
60
- _ASSET_CACHE[key] = {
61
- "df": pd.DataFrame(ds["train"]),
62
- "meta": fetch_remote_json(REPO_CONFIG[key]["meta_url"]),
63
- "report": fetch_remote_json(REPO_CONFIG[key]["report_url"])
64
- }
65
  return _ASSET_CACHE[key]
66
 
67
- def generate_guide_markdown(assets: Dict) -> str:
68
- meta = assets.get("meta", {})
 
 
69
  params = meta.get("parameters", {})
70
- report = assets.get("report", {})
71
 
72
- if not meta:
73
- return "### ⚠️ Metadata Unreachable"
74
-
75
- # Формируем таблицу БЕЗ лишних отступов слева (это ломает Markdown)
76
  families = report.get("families", {})
77
- fam_table = "| Family | Samples | Description |\n| :--- | :--- | :--- |\n"
 
78
  for f, count in families.items():
79
  fam_table += f"| {f.upper()} | {count} | Synthetic {f} circuits |\n"
80
 
81
- guide = f"""
82
- ## 📖 Methodology & Release Notes: {meta.get('dataset_version', '1.0.0-demo')}
83
 
84
- ### 1. Generation Engine
85
- Dataset produced by **QSBench v{meta.get('generator_version', '5.0')}**.
86
  - **Hardware Profile:** {params.get('n_qubits')} Qubits | Depth: {params.get('depth')}
87
- - **Noise Model:** `{params.get('noise', 'None')}` (p={params.get('noise_prob', 0)})
88
- - **Backend:** {meta.get('backend_device', 'GPU')} | {meta.get('precision_mode', 'double')}
89
-
90
- ### 2. Structural Metrics
91
- - **Gate Entropy:** Measures circuit complexity and gate distribution diversity.
92
- - **Meyer-Wallach:** Global entanglement scalar.
93
- - **Adjacency:** Topological density of the interaction graph.
94
 
95
- ### 3. Circuit Family Coverage
96
  {fam_table}
97
 
98
- ### 4. Split distribution
99
- - **Train:** {report.get('splits', {}).get('train')}
100
- - **Validation:** {report.get('splits', {}).get('val')}
101
- - **Test:** {report.get('splits', {}).get('test')}
102
  """
103
- return guide
104
 
105
- def update_explorer_view(ds_name: str, split_name: str):
106
  assets = load_all_assets(ds_name)
107
  df = assets["df"]
108
  splits = df["split"].unique().tolist() if "split" in df.columns else ["train"]
109
  display_df = df[df["split"] == split_name].head(10) if "split" in df.columns else df.head(10)
110
 
111
- raw_qasm = display_df["qasm_raw"].iloc[0] if "qasm_raw" in display_df.columns else "// N/A"
112
- tr_qasm = display_df["qasm_transpiled"].iloc[0] if "qasm_transpiled" in display_df.columns else "// N/A"
 
113
 
114
- meta_summary = f"### 📋 Pack: {ds_name} | Version: {assets.get('meta', {}).get('dataset_version', 'N/A')}"
115
- return gr.update(choices=splits), display_df, raw_qasm, tr_qasm, meta_summary, generate_guide_markdown(assets)
116
-
117
- def sync_ml_inputs(ds_name: str):
118
- assets = load_all_assets(ds_name)
119
- df = assets["df"]
120
- numeric = df.select_dtypes(include=[np.number]).columns.tolist()
121
- valid = [c for c in numeric if c not in NON_FEATURE_COLS and not c.startswith(("error_", "sign_", "ideal_", "noisy_"))]
122
- top_picks = [f for f in ["gate_entropy", "meyer_wallach", "n_qubits", "depth", "total_gates"] if f in valid]
123
- return gr.update(choices=valid, value=top_picks)
124
 
125
- def train_baseline_model(ds_name: str, selected_features: List[str]):
126
- if not selected_features: return None, "### ❌ Error: Select features."
127
  assets = load_all_assets(ds_name)
128
  df = assets["df"]
129
- target = "ideal_expval_Z_global" if "ideal_expval_Z_global" in df.columns else df.filter(like="expval").columns[0]
130
 
131
- train_df = df.dropna(subset=selected_features + [target])
132
- X, y = train_df[selected_features], train_df[target]
133
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
134
 
135
- model = RandomForestRegressor(n_estimators=100, max_depth=12, n_jobs=-1, random_state=42)
136
- model.fit(X_train, y_train)
137
  preds = model.predict(X_test)
138
 
139
  sns.set_theme(style="whitegrid", context="talk")
140
  fig, axes = plt.subplots(1, 3, figsize=(24, 8))
141
 
142
- axes[0].scatter(y_test, preds, alpha=0.4, color='#2c3e50')
143
  axes[0].plot([y.min(), y.max()], [y.min(), y.max()], 'r--', lw=2)
144
- axes[0].set_title(f"Accuracy (R²: {r2_score(y_test, preds):.3f})")
145
 
146
  imp = model.feature_importances_
147
  idx = np.argsort(imp)[-10:]
148
- axes[1].barh([selected_features[i] for i in idx], imp[idx], color='#27ae60')
149
- axes[1].set_title("Feature Importance")
150
 
151
  sns.histplot(y_test - preds, kde=True, ax=axes[2], color='#d35400')
152
- axes[2].set_title("Residuals")
153
 
154
- plt.tight_layout(pad=4.0)
155
- return fig, f"**Model Performance** | MAE: {mean_absolute_error(y_test, preds):.4f}"
156
 
 
157
  with gr.Blocks(theme=gr.themes.Soft(), title="QSBench Hub") as demo:
158
  gr.Markdown("# 🌌 QSBench: Quantum Analytics Hub")
159
 
160
  with gr.Tabs():
 
161
  with gr.TabItem("🔎 Explorer"):
162
- metadata_box = gr.Markdown("### Synchronizing data...")
163
  with gr.Row():
164
- ds_select = gr.Dropdown(choices=list(REPO_CONFIG.keys()), value="Core (Clean)", label="Dataset Pack")
165
- split_select = gr.Dropdown(choices=["train"], value="train", label="Subset")
166
  data_table = gr.Dataframe(interactive=False)
167
  with gr.Row():
168
  code_raw = gr.Code(label="Source QASM", language="python")
169
  code_tr = gr.Code(label="Transpiled QASM", language="python")
170
 
 
171
  with gr.TabItem("🤖 ML Training"):
172
  with gr.Row():
173
  with gr.Column(scale=1):
174
- ml_ds = gr.Dropdown(choices=list(REPO_CONFIG.keys()), value="Core (Clean)", label="Dataset")
175
- ml_feat = gr.CheckboxGroup(label="Structural Metrics", choices=[])
176
- btn = gr.Button("Train Baseline", variant="primary")
177
  with gr.Column(scale=2):
178
- plot_out = gr.Plot(); txt_out = gr.Markdown()
 
179
 
180
- with gr.TabItem("📖 Methodology & Guide"):
181
- guide_md = gr.Markdown("Loading research guide...")
 
 
182
 
 
183
  gr.Markdown(f"""
184
  ---
185
- ### 🔗 Official Links & Resources
186
- * **🤗 Hugging Face:** [**QSBench Organization**](https://huggingface.co/QSBench)
187
- * **💻 GitHub:** [**QSBench Repository**](https://github.com/QSBench)
188
- * **🌐 Website:** [**qsbench.github.io**](https://qsbench.github.io)
189
  """)
190
 
191
- ds_select.change(update_explorer_view, [ds_select, split_select], [split_select, data_table, code_raw, code_tr, metadata_box, guide_md])
192
- ml_ds.change(sync_ml_inputs, [ml_ds], [ml_feat])
193
- btn.click(train_baseline_model, [ml_ds, ml_feat], [plot_out, txt_out])
 
 
194
 
195
- demo.load(update_explorer_view, [ds_select, split_select], [split_select, data_table, code_raw, code_tr, metadata_box, guide_md])
196
- demo.load(sync_ml_inputs, [ml_ds], [ml_feat])
 
197
 
198
  if __name__ == "__main__":
199
  demo.launch()
 
11
  from sklearn.metrics import mean_absolute_error, r2_score
12
  from sklearn.model_selection import train_test_split
13
 
14
+ # --- CONFIG ---
15
  logging.basicConfig(level=logging.INFO)
16
  logger = logging.getLogger(__name__)
17
 
 
38
  }
39
  }
40
 
41
+ NON_FEATURE_COLS = {"sample_id", "sample_seed", "circuit_hash", "split", "circuit_qasm", "qasm_raw", "qasm_transpiled"}
 
 
 
 
 
42
 
43
  _ASSET_CACHE = {}
44
 
 
 
 
 
 
 
 
45
  def load_all_assets(key: str) -> Dict:
46
  if key not in _ASSET_CACHE:
47
  ds = load_dataset(REPO_CONFIG[key]["repo"])
48
+ meta = requests.get(REPO_CONFIG[key]["meta_url"]).json()
49
+ report = requests.get(REPO_CONFIG[key]["report_url"]).json()
50
+ _ASSET_CACHE[key] = {"df": pd.DataFrame(ds["train"]), "meta": meta, "report": report}
 
 
51
  return _ASSET_CACHE[key]
52
 
53
+ # --- RENDER FUNCTIONS ---
54
+ def get_methodology_content(ds_name: str):
55
+ assets = load_all_assets(ds_name)
56
+ meta = assets["meta"]
57
  params = meta.get("parameters", {})
58
+ report = assets["report"]
59
 
 
 
 
 
60
  families = report.get("families", {})
61
+ # Важно: Markdown таблицы требуют отсутствия пробелов в начале строки
62
+ fam_table = "| Family | Samples | Description |\n|:---|:---|:---|\n"
63
  for f, count in families.items():
64
  fam_table += f"| {f.upper()} | {count} | Synthetic {f} circuits |\n"
65
 
66
+ return f"""
67
+ ## 📖 Methodology & Release Notes: {meta.get('dataset_version')}
68
 
69
+ ### 1. Generation Profile
70
+ Dataset produced via **QSBench v{meta.get('generator_version')}**.
71
  - **Hardware Profile:** {params.get('n_qubits')} Qubits | Depth: {params.get('depth')}
72
+ - **Noise Configuration:** `{params.get('noise')}` (p={params.get('noise_prob')})
73
+ - **Backend:** {meta.get('backend_device')} | {meta.get('precision_mode')} precision
 
 
 
 
 
74
 
75
+ ### 2. Circuit Family Coverage
76
  {fam_table}
77
 
78
+ ### 3. Structural Metric Definitions
79
+ - **Gate Entropy:** Measures circuit complexity and gate distribution diversity.
80
+ - **Meyer-Wallach:** Scalar measure of global entanglement.
81
+ - **Adjacency:** Graph density of the qubit interaction map.
82
  """
 
83
 
84
+ def update_explorer(ds_name: str, split_name: str):
85
  assets = load_all_assets(ds_name)
86
  df = assets["df"]
87
  splits = df["split"].unique().tolist() if "split" in df.columns else ["train"]
88
  display_df = df[df["split"] == split_name].head(10) if "split" in df.columns else df.head(10)
89
 
90
+ raw = display_df["qasm_raw"].iloc[0] if "qasm_raw" in display_df.columns else "// N/A"
91
+ tr = display_df["qasm_transpiled"].iloc[0] if "qasm_transpiled" in display_df.columns else "// N/A"
92
+ meta_text = f"### 📋 {ds_name} | Version: {assets['meta'].get('dataset_version')}"
93
 
94
+ return gr.update(choices=splits), display_df, raw, tr, meta_text
 
 
 
 
 
 
 
 
 
95
 
96
+ def train_model(ds_name: str, features: List[str]):
97
+ if not features: return None, "### ❌ Select features"
98
  assets = load_all_assets(ds_name)
99
  df = assets["df"]
100
+ target = "ideal_expval_Z_global"
101
 
102
+ train_df = df.dropna(subset=features + [target])
103
+ X, y = train_df[features], train_df[target]
104
  X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
105
 
106
+ model = RandomForestRegressor(n_estimators=100, max_depth=10, n_jobs=-1).fit(X_train, y_train)
 
107
  preds = model.predict(X_test)
108
 
109
  sns.set_theme(style="whitegrid", context="talk")
110
  fig, axes = plt.subplots(1, 3, figsize=(24, 8))
111
 
112
+ axes[0].scatter(y_test, preds, alpha=0.3, color='#2c3e50')
113
  axes[0].plot([y.min(), y.max()], [y.min(), y.max()], 'r--', lw=2)
114
+ axes[0].set_title(f"R² Score: {r2_score(y_test, preds):.3f}")
115
 
116
  imp = model.feature_importances_
117
  idx = np.argsort(imp)[-10:]
118
+ axes[1].barh([features[i] for i in idx], imp[idx], color='#27ae60')
119
+ axes[1].set_title("Top Metrics Importance")
120
 
121
  sns.histplot(y_test - preds, kde=True, ax=axes[2], color='#d35400')
122
+ axes[2].set_title("Residuals Distribution")
123
 
124
+ plt.tight_layout(pad=3.0)
125
+ return fig, f"**MAE:** {mean_absolute_error(y_test, preds):.4f}"
126
 
127
+ # --- UI INTERFACE ---
128
  with gr.Blocks(theme=gr.themes.Soft(), title="QSBench Hub") as demo:
129
  gr.Markdown("# 🌌 QSBench: Quantum Analytics Hub")
130
 
131
  with gr.Tabs():
132
+ # TAB 1: EXPLORER
133
  with gr.TabItem("🔎 Explorer"):
134
+ metadata_info = gr.Markdown("### Loading...")
135
  with gr.Row():
136
+ ds_select = gr.Dropdown(list(REPO_CONFIG.keys()), value="Core (Clean)", label="Dataset Pack")
137
+ split_select = gr.Dropdown(["train"], value="train", label="Subset")
138
  data_table = gr.Dataframe(interactive=False)
139
  with gr.Row():
140
  code_raw = gr.Code(label="Source QASM", language="python")
141
  code_tr = gr.Code(label="Transpiled QASM", language="python")
142
 
143
+ # TAB 2: ML
144
  with gr.TabItem("🤖 ML Training"):
145
  with gr.Row():
146
  with gr.Column(scale=1):
147
+ ml_ds = gr.Dropdown(list(REPO_CONFIG.keys()), value="Core (Clean)", label="Select Dataset")
148
+ ml_feat = gr.CheckboxGroup(label="Metrics", choices=["gate_entropy", "meyer_wallach", "n_qubits", "depth", "total_gates"], value=["gate_entropy", "meyer_wallach"])
149
+ btn = gr.Button("Run Training", variant="primary")
150
  with gr.Column(scale=2):
151
+ plot_out = gr.Plot()
152
+ txt_out = gr.Markdown()
153
 
154
+ # TAB 3: METHODOLOGY (С ВЫБОРОМ)
155
+ with gr.TabItem("📖 Methodology"):
156
+ method_ds_select = gr.Dropdown(list(REPO_CONFIG.keys()), value="Core (Clean)", label="View methodology for:")
157
+ guide_md = gr.Markdown()
158
 
159
+ # LINKS FOOTER
160
  gr.Markdown(f"""
161
  ---
162
+ ### 🔗 Project Links
163
+ [**🤗 Hugging Face**](https://huggingface.co/QSBench) | [**💻 GitHub**](https://github.com/QSBench) | [**🌐 Website**](https://qsbench.github.io)
 
 
164
  """)
165
 
166
+ # EVENT HANDLERS
167
+ ds_select.change(update_explorer, [ds_select, split_select], [split_select, data_table, code_raw, code_tr, metadata_info])
168
+ ml_ds.change(lambda x: gr.update(), [ml_ds], []) # Simple sync
169
+ method_ds_select.change(get_methodology_content, [method_ds_select], [guide_md])
170
+ btn.click(train_model, [ml_ds, ml_feat], [plot_out, txt_out])
171
 
172
+ # INITIAL LOAD
173
+ demo.load(update_explorer, [ds_select, split_select], [split_select, data_table, code_raw, code_tr, metadata_info])
174
+ demo.load(get_methodology_content, [method_ds_select], [guide_md])
175
 
176
  if __name__ == "__main__":
177
  demo.launch()