Spaces:

QSBench
/

Multi-Target_Regression

Running

App Files Files Community

QSBench commited on 1 day ago

Commit

3ba9c8c

verified ·

1 Parent(s): 2cc591e

Update app.py

Browse files

Files changed (1) hide show

app.py +54 -59

app.py CHANGED Viewed

@@ -22,14 +22,12 @@ DATASET_MAP = {
 LOCAL_BENCHMARK_CSV = "noise_benchmark_results.csv"
 TARGET_COL = "ideal_expval_Z_global"
 EXCLUDE_COLS = {
-    "sample_id", "sample_seed", "split", "circuit_qasm", "circuit_qasm_transpiled",
-    "ideal_expval_Z_global", "ideal_expval_X_global", "ideal_expval_Y_global",
-    "noisy_expval_Z_global", "noisy_expval_X_global", "noisy_expval_Y_global",
-    "error_Z_global", "error_X_global", "error_Y_global",
-    "sign_ideal_Z_global", "sign_noisy_Z_global",
-    "sign_ideal_X_global", "sign_noisy_X_global",
-    "sign_ideal_Y_global", "sign_noisy_Y_global",
 }
 dataset_cache = {}
@@ -46,7 +44,7 @@ def get_df(dataset_key):
 def get_numeric_feature_cols(df: pd.DataFrame) -> list[str]:
     numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
-    return [c for c in numeric_cols if c not in EXCLUDE_COLS and not c.startswith("error_")]
 # =========================================================
 # TAB FUNCTIONS
@@ -54,103 +52,100 @@ def get_numeric_feature_cols(df: pd.DataFrame) -> list[str]:
 def update_explorer(dataset_name, split_name):
     df = get_df(dataset_name)
     splits = df["split"].unique().tolist() if "split" in df.columns else ["all"]
     filtered = df[df["split"] == split_name].head(10) if "split" in df.columns else df.head(10)
-    qasm_col = "circuit_qasm" if "circuit_qasm" in df.columns else None
-    qasm_sample = filtered[qasm_col].iloc[0] if qasm_col and not filtered.empty else "// QASM not available in this pack"
-    return gr.update(choices=splits), filtered, qasm_sample
-def run_model_demo(dataset_name):
     df = get_df(dataset_name)
-    feature_cols = get_numeric_feature_cols(df)
     target = TARGET_COL if TARGET_COL in df.columns else df.filter(like="expval").columns[0]
-    work_df = df.dropna(subset=feature_cols + [target]).reset_index(drop=True)
-    X, y = work_df[feature_cols], work_df[target]
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
     model = RandomForestRegressor(n_estimators=50, max_depth=10, n_jobs=-1, random_state=42)
     model.fit(X_train, y_train)
     preds = model.predict(X_test)
     fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(18, 5))
     # 1. Parity Plot
     ax1.scatter(y_test, preds, alpha=0.4, color='#636EFA')
     ax1.plot([y.min(), y.max()], [y.min(), y.max()], 'r--', lw=2)
     ax1.set_title(f"Parity Plot (R²={r2_score(y_test, preds):.3f})")
     # 2. Feature Importance
     importances = model.feature_importances_
-    indices = np.argsort(importances)[-10:]
-    ax2.barh(range(10), importances[indices], color='#EF553B')
-    ax2.set_yticks(range(10))
-    ax2.set_yticklabels([feature_cols[i] for i in indices])
-    ax2.set_title("Top 10 Features")
-    # 3. Residuals (Error Distribution)
     sns.histplot(y_test - preds, kde=True, ax=ax3, color='#00CC96')
-    ax3.set_title("Residuals Distribution")
     plt.tight_layout()
-    return fig, f"### Baseline Analysis for {dataset_name}\nMAE: {mean_absolute_error(y_test, preds):.4f}"
-def load_benchmark():
-    path = Path(LOCAL_BENCHMARK_CSV)
-    if not path.exists(): return None, None, "File noise_benchmark_results.csv not found."
-    df = pd.read_csv(path)
-    fig_r2, ax = plt.subplots(figsize=(8, 4))
-    ax.bar(df["dataset"], df["r2"], color=['#636EFA', '#EF553B', '#00CC96', '#AB63FA'])
-    plt.xticks(rotation=15)
-    plt.tight_layout()
-    return df, fig_r2, "Benchmark comparison completed."
 # =========================================================
 # INTERFACE
 # =========================================================
 with gr.Blocks(title="QSBench Unified Explorer") as demo:
-    gr.Markdown("# 🌌 QSBench: Quantum Synthetic Benchmark Explorer\n**Professional-grade datasets for Noise-Aware QML and Hardware Optimization.**")
     with gr.Tabs():
         with gr.TabItem("🔎 Dataset Explorer"):
             with gr.Row():
                 ds_selector = gr.Dropdown(choices=list(DATASET_MAP.keys()), value="Core (Clean)", label="Dataset Pack")
-                split_selector = gr.Dropdown(choices=["train", "test", "validation"], value="train", label="Split")
-            data_table = gr.Dataframe(interactive=False)
-            qasm_view = gr.Code(label="Circuit QASM Preview (First row of selection)", language="python")
-            ds_selector.change(update_explorer, [ds_selector, split_selector], [split_selector, data_table, qasm_view])
-            split_selector.change(update_explorer, [ds_selector, split_selector], [split_selector, data_table, qasm_view])
         with gr.TabItem("🤖 ML Baseline Demo"):
-            gr.Markdown("Train a Random Forest regressor to evaluate how well structural circuit features predict expectation values.")
-            model_ds_selector = gr.Dropdown(choices=list(DATASET_MAP.keys()), value="Core (Clean)", label="Select Target Pack")
-            train_btn = gr.Button("Train Baseline Model", variant="primary")
-            plot_output = gr.Plot()
-            text_output = gr.Markdown()
-            train_btn.click(run_model_demo, [model_ds_selector], [plot_output, text_output])
-        with gr.TabItem("📊 Cross-Dataset Benchmark"):
-            gr.Markdown("Comparison of model performance across different noise environments and hardware transpilation stages.")
-            bench_btn = gr.Button("Analyze Robustness Across All Packs")
-            bench_table = gr.Dataframe()
-            bench_plot = gr.Plot()
-            bench_btn.click(load_benchmark, outputs=[bench_table, bench_plot, text_output])
     gr.Markdown("""
     ---
-    ### 🔬 Research Resources
-    This interface provides a structural overview of the QSBench dataset family. These datasets are designed to support reproducible research in quantum error mitigation and machine learning.
     - **GitHub**: [QSBench/QSBench-Demo](https://github.com/QSBench/QSBench-Demo)
     - **Website**: [qsbench.github.io](https://qsbench.github.io)
-    - **Hugging Face**: [Explore all datasets](https://huggingface.co/QSBench)
     """)
-    demo.load(update_explorer, [ds_selector, split_selector], [split_selector, data_table, qasm_view])
 if __name__ == "__main__":
     demo.launch(theme=gr.themes.Soft())

 LOCAL_BENCHMARK_CSV = "noise_benchmark_results.csv"
 TARGET_COL = "ideal_expval_Z_global"
+# Исключаем нечисловые данные и целевые переменные из признаков
 EXCLUDE_COLS = {
+    "sample_id", "sample_seed", "circuit_hash", "split", "circuit_qasm",
+    "qasm_raw", "qasm_transpiled", "circuit_type_resolved", "circuit_type_requested",
+    "noise_type", "observable_bases", "observable_mode", "backend_device",
+    "precision_mode", "circuit_signature", "ideal_expval_Z_global", "noisy_expval_Z_global"
 }
 dataset_cache = {}
 def get_numeric_feature_cols(df: pd.DataFrame) -> list[str]:
     numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
+    return [c for c in numeric_cols if c not in EXCLUDE_COLS and not c.startswith("error_") and not c.startswith("sign_")]
 # =========================================================
 # TAB FUNCTIONS
 def update_explorer(dataset_name, split_name):
     df = get_df(dataset_name)
     splits = df["split"].unique().tolist() if "split" in df.columns else ["all"]
     filtered = df[df["split"] == split_name].head(10) if "split" in df.columns else df.head(10)
+    # Берем QASM из ваших реальных колонок
+    qasm_raw = filtered["qasm_raw"].iloc[0] if "qasm_raw" in filtered.columns else "// No raw QASM"
+    qasm_tr = filtered["qasm_transpiled"].iloc[0] if "qasm_transpiled" in filtered.columns else "// No transpiled QASM"
+    # Обновляем список фичей для вкладки ML
+    features = get_numeric_feature_cols(df)
+    return gr.update(choices=splits), filtered, qasm_raw, qasm_tr, gr.update(choices=features, value=features[:5])
+def run_model_demo(dataset_name, selected_features):
+    if not selected_features:
+        return None, "### ⚠️ Please select at least one feature."
     df = get_df(dataset_name)
     target = TARGET_COL if TARGET_COL in df.columns else df.filter(like="expval").columns[0]
+    work_df = df.dropna(subset=selected_features + [target]).reset_index(drop=True)
+    X, y = work_df[selected_features], work_df[target]
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
     model = RandomForestRegressor(n_estimators=50, max_depth=10, n_jobs=-1, random_state=42)
     model.fit(X_train, y_train)
     preds = model.predict(X_test)
+    sns.set_theme(style="whitegrid")
     fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(18, 5))
     # 1. Parity Plot
     ax1.scatter(y_test, preds, alpha=0.4, color='#636EFA')
     ax1.plot([y.min(), y.max()], [y.min(), y.max()], 'r--', lw=2)
+    ax1.set_xlabel("Actual")
+    ax1.set_ylabel("Predicted")
     ax1.set_title(f"Parity Plot (R²={r2_score(y_test, preds):.3f})")
     # 2. Feature Importance
     importances = model.feature_importances_
+    indices = np.argsort(importances)
+    ax2.barh(range(len(indices)), importances[indices], color='#EF553B')
+    ax2.set_yticks(range(len(indices)))
+    ax2.set_yticklabels([selected_features[i] for i in indices])
+    ax2.set_title("Feature Importance")
+    # 3. Residuals
     sns.histplot(y_test - preds, kde=True, ax=ax3, color='#00CC96')
+    ax3.set_title("Residuals (Error Distribution)")
     plt.tight_layout()
+    return fig, f"### Results for {dataset_name}\n**MAE:** {mean_absolute_error(y_test, preds):.4f} | **Features used:** {len(selected_features)}"
 # =========================================================
 # INTERFACE
 # =========================================================
 with gr.Blocks(title="QSBench Unified Explorer") as demo:
+    gr.Markdown("# 🌌 QSBench: Quantum Synthetic Benchmark Explorer")
     with gr.Tabs():
         with gr.TabItem("🔎 Dataset Explorer"):
             with gr.Row():
                 ds_selector = gr.Dropdown(choices=list(DATASET_MAP.keys()), value="Core (Clean)", label="Dataset Pack")
+                split_selector = gr.Dropdown(choices=["train"], value="train", label="Split")
+            data_table = gr.Dataframe(interactive=False, overflow_row_behaviour="paginate")
+            with gr.Row():
+                qasm_raw_view = gr.Code(label="Raw QASM (Source)", language="python", lines=10)
+                qasm_tr_view = gr.Code(label="Transpiled QASM (Hardware-ready)", language="python", lines=10)
         with gr.TabItem("🤖 ML Baseline Demo"):
+            with gr.Row():
+                with gr.Column(scale=1):
+                    model_ds_selector = gr.Dropdown(choices=list(DATASET_MAP.keys()), value="Core (Clean)", label="Dataset")
+                    feature_selector = gr.Checkboxgroup(label="Select Features for Training", choices=[])
+                    train_btn = gr.Button("Train Model", variant="primary")
+                with gr.Column(scale=2):
+                    plot_output = gr.Plot()
+                    text_output = gr.Markdown()
     gr.Markdown("""
     ---
+    ### 🔬 Research & Data
+    This Space provides structural validation of the **QSBench** dataset family.
     - **GitHub**: [QSBench/QSBench-Demo](https://github.com/QSBench/QSBench-Demo)
     - **Website**: [qsbench.github.io](https://qsbench.github.io)
     """)
+    # Event Linking
+    ds_selector.change(update_explorer, [ds_selector, split_selector], [split_selector, data_table, qasm_raw_view, qasm_tr_view, feature_selector])
+    split_selector.change(update_explorer, [ds_selector, split_selector], [split_selector, data_table, qasm_raw_view, qasm_tr_view, feature_selector])
+    train_btn.click(run_model_demo, [model_ds_selector, feature_selector], [plot_output, text_output])
+    # Initial load
+    demo.load(update_explorer, [ds_selector, split_selector], [split_selector, data_table, qasm_raw_view, qasm_tr_view, feature_selector])
 if __name__ == "__main__":
     demo.launch(theme=gr.themes.Soft())