Spaces:

QSBench
/

Multi-Target_Regression

Running

App Files Files Community

QSBench commited on 1 day ago

Commit

581d034

verified ·

1 Parent(s): da72050

Update app.py

Browse files

Files changed (1) hide show

app.py +67 -89

app.py CHANGED Viewed

@@ -11,7 +11,7 @@ from sklearn.ensemble import RandomForestRegressor
 from sklearn.metrics import mean_absolute_error, r2_score
 from sklearn.model_selection import train_test_split
-# --- CONFIG & LOGGING ---
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
@@ -38,162 +38,140 @@ REPO_CONFIG = {
     }
 }
-NON_FEATURE_COLS = {
-    "sample_id", "sample_seed", "circuit_hash", "split", "circuit_qasm",
-    "qasm_raw", "qasm_transpiled", "circuit_type_resolved", "circuit_type_requested",
-    "noise_type", "noise_prob", "observable_bases", "observable_mode", "backend_device",
-    "precision_mode", "circuit_signature", "ideal_expval_Z_global", "noisy_expval_Z_global"
-}
 _ASSET_CACHE = {}
-def fetch_remote_json(url: str) -> Optional[dict]:
-    try:
-        response = requests.get(url, timeout=5)
-        return response.json() if response.status_code == 200 else None
-    except:
-        return None
 def load_all_assets(key: str) -> Dict:
     if key not in _ASSET_CACHE:
         ds = load_dataset(REPO_CONFIG[key]["repo"])
-        _ASSET_CACHE[key] = {
-            "df": pd.DataFrame(ds["train"]),
-            "meta": fetch_remote_json(REPO_CONFIG[key]["meta_url"]),
-            "report": fetch_remote_json(REPO_CONFIG[key]["report_url"])
-        }
     return _ASSET_CACHE[key]
-def generate_guide_markdown(assets: Dict) -> str:
-    meta = assets.get("meta", {})
     params = meta.get("parameters", {})
-    report = assets.get("report", {})
-    if not meta:
-        return "### ⚠️ Metadata Unreachable"
-    # Формируем таблицу БЕЗ лишних отступов слева (это ломает Markdown)
     families = report.get("families", {})
-    fam_table = "| Family | Samples | Description |\n| :--- | :--- | :--- |\n"
     for f, count in families.items():
         fam_table += f"| {f.upper()} | {count} | Synthetic {f} circuits |\n"
-    guide = f"""
-## 📖 Methodology & Release Notes: {meta.get('dataset_version', '1.0.0-demo')}
-### 1. Generation Engine
-Dataset produced by **QSBench v{meta.get('generator_version', '5.0')}**.
 - **Hardware Profile:** {params.get('n_qubits')} Qubits | Depth: {params.get('depth')}
-- **Noise Model:** `{params.get('noise', 'None')}` (p={params.get('noise_prob', 0)})
-- **Backend:** {meta.get('backend_device', 'GPU')} | {meta.get('precision_mode', 'double')}
-### 2. Structural Metrics
-- **Gate Entropy:** Measures circuit complexity and gate distribution diversity.
-- **Meyer-Wallach:** Global entanglement scalar.
-- **Adjacency:** Topological density of the interaction graph.
-### 3. Circuit Family Coverage
 {fam_table}
-### 4. Split distribution
-- **Train:** {report.get('splits', {}).get('train')}
-- **Validation:** {report.get('splits', {}).get('val')}
-- **Test:** {report.get('splits', {}).get('test')}
     """
-    return guide
-def update_explorer_view(ds_name: str, split_name: str):
     assets = load_all_assets(ds_name)
     df = assets["df"]
     splits = df["split"].unique().tolist() if "split" in df.columns else ["train"]
     display_df = df[df["split"] == split_name].head(10) if "split" in df.columns else df.head(10)
-    raw_qasm = display_df["qasm_raw"].iloc[0] if "qasm_raw" in display_df.columns else "// N/A"
-    tr_qasm = display_df["qasm_transpiled"].iloc[0] if "qasm_transpiled" in display_df.columns else "// N/A"
-    meta_summary = f"### 📋 Pack: {ds_name} | Version: {assets.get('meta', {}).get('dataset_version', 'N/A')}"
-    return gr.update(choices=splits), display_df, raw_qasm, tr_qasm, meta_summary, generate_guide_markdown(assets)
-def sync_ml_inputs(ds_name: str):
-    assets = load_all_assets(ds_name)
-    df = assets["df"]
-    numeric = df.select_dtypes(include=[np.number]).columns.tolist()
-    valid = [c for c in numeric if c not in NON_FEATURE_COLS and not c.startswith(("error_", "sign_", "ideal_", "noisy_"))]
-    top_picks = [f for f in ["gate_entropy", "meyer_wallach", "n_qubits", "depth", "total_gates"] if f in valid]
-    return gr.update(choices=valid, value=top_picks)
-def train_baseline_model(ds_name: str, selected_features: List[str]):
-    if not selected_features: return None, "### ❌ Error: Select features."
     assets = load_all_assets(ds_name)
     df = assets["df"]
-    target = "ideal_expval_Z_global" if "ideal_expval_Z_global" in df.columns else df.filter(like="expval").columns[0]
-    train_df = df.dropna(subset=selected_features + [target])
-    X, y = train_df[selected_features], train_df[target]
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
-    model = RandomForestRegressor(n_estimators=100, max_depth=12, n_jobs=-1, random_state=42)
-    model.fit(X_train, y_train)
     preds = model.predict(X_test)
     sns.set_theme(style="whitegrid", context="talk")
     fig, axes = plt.subplots(1, 3, figsize=(24, 8))
-    axes[0].scatter(y_test, preds, alpha=0.4, color='#2c3e50')
     axes[0].plot([y.min(), y.max()], [y.min(), y.max()], 'r--', lw=2)
-    axes[0].set_title(f"Accuracy (R²: {r2_score(y_test, preds):.3f})")
     imp = model.feature_importances_
     idx = np.argsort(imp)[-10:]
-    axes[1].barh([selected_features[i] for i in idx], imp[idx], color='#27ae60')
-    axes[1].set_title("Feature Importance")
     sns.histplot(y_test - preds, kde=True, ax=axes[2], color='#d35400')
-    axes[2].set_title("Residuals")
-    plt.tight_layout(pad=4.0)
-    return fig, f"**Model Performance** | MAE: {mean_absolute_error(y_test, preds):.4f}"
 with gr.Blocks(theme=gr.themes.Soft(), title="QSBench Hub") as demo:
     gr.Markdown("# 🌌 QSBench: Quantum Analytics Hub")
     with gr.Tabs():
         with gr.TabItem("🔎 Explorer"):
-            metadata_box = gr.Markdown("### Synchronizing data...")
             with gr.Row():
-                ds_select = gr.Dropdown(choices=list(REPO_CONFIG.keys()), value="Core (Clean)", label="Dataset Pack")
-                split_select = gr.Dropdown(choices=["train"], value="train", label="Subset")
             data_table = gr.Dataframe(interactive=False)
             with gr.Row():
                 code_raw = gr.Code(label="Source QASM", language="python")
                 code_tr = gr.Code(label="Transpiled QASM", language="python")
         with gr.TabItem("🤖 ML Training"):
             with gr.Row():
                 with gr.Column(scale=1):
-                    ml_ds = gr.Dropdown(choices=list(REPO_CONFIG.keys()), value="Core (Clean)", label="Dataset")
-                    ml_feat = gr.CheckboxGroup(label="Structural Metrics", choices=[])
-                    btn = gr.Button("Train Baseline", variant="primary")
                 with gr.Column(scale=2):
-                    plot_out = gr.Plot(); txt_out = gr.Markdown()
-        with gr.TabItem("📖 Methodology & Guide"):
-            guide_md = gr.Markdown("Loading research guide...")
     gr.Markdown(f"""
 ---
-### 🔗 Official Links & Resources
-* **🤗 Hugging Face:** [**QSBench Organization**](https://huggingface.co/QSBench)
-* **💻 GitHub:** [**QSBench Repository**](https://github.com/QSBench)
-* **🌐 Website:** [**qsbench.github.io**](https://qsbench.github.io)
     """)
-    ds_select.change(update_explorer_view, [ds_select, split_select], [split_select, data_table, code_raw, code_tr, metadata_box, guide_md])
-    ml_ds.change(sync_ml_inputs, [ml_ds], [ml_feat])
-    btn.click(train_baseline_model, [ml_ds, ml_feat], [plot_out, txt_out])
-    demo.load(update_explorer_view, [ds_select, split_select], [split_select, data_table, code_raw, code_tr, metadata_box, guide_md])
-    demo.load(sync_ml_inputs, [ml_ds], [ml_feat])
 if __name__ == "__main__":
     demo.launch()

 from sklearn.metrics import mean_absolute_error, r2_score
 from sklearn.model_selection import train_test_split
+# --- CONFIG ---
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
     }
 }
+NON_FEATURE_COLS = {"sample_id", "sample_seed", "circuit_hash", "split", "circuit_qasm", "qasm_raw", "qasm_transpiled"}
 _ASSET_CACHE = {}
 def load_all_assets(key: str) -> Dict:
     if key not in _ASSET_CACHE:
         ds = load_dataset(REPO_CONFIG[key]["repo"])
+        meta = requests.get(REPO_CONFIG[key]["meta_url"]).json()
+        report = requests.get(REPO_CONFIG[key]["report_url"]).json()
+        _ASSET_CACHE[key] = {"df": pd.DataFrame(ds["train"]), "meta": meta, "report": report}
     return _ASSET_CACHE[key]
+# --- RENDER FUNCTIONS ---
+def get_methodology_content(ds_name: str):
+    assets = load_all_assets(ds_name)
+    meta = assets["meta"]
     params = meta.get("parameters", {})
+    report = assets["report"]
     families = report.get("families", {})
+    # Важно: Markdown таблицы требуют отсутствия пробелов в начале строки
+    fam_table = "| Family | Samples | Description |\n|:---|:---|:---|\n"
     for f, count in families.items():
         fam_table += f"| {f.upper()} | {count} | Synthetic {f} circuits |\n"
+    return f"""
+## 📖 Methodology & Release Notes: {meta.get('dataset_version')}
+### 1. Generation Profile
+Dataset produced via **QSBench v{meta.get('generator_version')}**.
 - **Hardware Profile:** {params.get('n_qubits')} Qubits | Depth: {params.get('depth')}
+- **Noise Configuration:** `{params.get('noise')}` (p={params.get('noise_prob')})
+- **Backend:** {meta.get('backend_device')} | {meta.get('precision_mode')} precision
+### 2. Circuit Family Coverage
 {fam_table}
+### 3. Structural Metric Definitions
+- **Gate Entropy:** Measures circuit complexity and gate distribution diversity.
+- **Meyer-Wallach:** Scalar measure of global entanglement.
+- **Adjacency:** Graph density of the qubit interaction map.
     """
+def update_explorer(ds_name: str, split_name: str):
     assets = load_all_assets(ds_name)
     df = assets["df"]
     splits = df["split"].unique().tolist() if "split" in df.columns else ["train"]
     display_df = df[df["split"] == split_name].head(10) if "split" in df.columns else df.head(10)
+    raw = display_df["qasm_raw"].iloc[0] if "qasm_raw" in display_df.columns else "// N/A"
+    tr = display_df["qasm_transpiled"].iloc[0] if "qasm_transpiled" in display_df.columns else "// N/A"
+    meta_text = f"### 📋 {ds_name} | Version: {assets['meta'].get('dataset_version')}"
+    return gr.update(choices=splits), display_df, raw, tr, meta_text
+def train_model(ds_name: str, features: List[str]):
+    if not features: return None, "### ❌ Select features"
     assets = load_all_assets(ds_name)
     df = assets["df"]
+    target = "ideal_expval_Z_global"
+    train_df = df.dropna(subset=features + [target])
+    X, y = train_df[features], train_df[target]
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+    model = RandomForestRegressor(n_estimators=100, max_depth=10, n_jobs=-1).fit(X_train, y_train)
     preds = model.predict(X_test)
     sns.set_theme(style="whitegrid", context="talk")
     fig, axes = plt.subplots(1, 3, figsize=(24, 8))
+    axes[0].scatter(y_test, preds, alpha=0.3, color='#2c3e50')
     axes[0].plot([y.min(), y.max()], [y.min(), y.max()], 'r--', lw=2)
+    axes[0].set_title(f"R² Score: {r2_score(y_test, preds):.3f}")
     imp = model.feature_importances_
     idx = np.argsort(imp)[-10:]
+    axes[1].barh([features[i] for i in idx], imp[idx], color='#27ae60')
+    axes[1].set_title("Top Metrics Importance")
     sns.histplot(y_test - preds, kde=True, ax=axes[2], color='#d35400')
+    axes[2].set_title("Residuals Distribution")
+    plt.tight_layout(pad=3.0)
+    return fig, f"**MAE:** {mean_absolute_error(y_test, preds):.4f}"
+# --- UI INTERFACE ---
 with gr.Blocks(theme=gr.themes.Soft(), title="QSBench Hub") as demo:
     gr.Markdown("# 🌌 QSBench: Quantum Analytics Hub")
     with gr.Tabs():
+        # TAB 1: EXPLORER
         with gr.TabItem("🔎 Explorer"):
+            metadata_info = gr.Markdown("### Loading...")
             with gr.Row():
+                ds_select = gr.Dropdown(list(REPO_CONFIG.keys()), value="Core (Clean)", label="Dataset Pack")
+                split_select = gr.Dropdown(["train"], value="train", label="Subset")
             data_table = gr.Dataframe(interactive=False)
             with gr.Row():
                 code_raw = gr.Code(label="Source QASM", language="python")
                 code_tr = gr.Code(label="Transpiled QASM", language="python")
+        # TAB 2: ML
         with gr.TabItem("🤖 ML Training"):
             with gr.Row():
                 with gr.Column(scale=1):
+                    ml_ds = gr.Dropdown(list(REPO_CONFIG.keys()), value="Core (Clean)", label="Select Dataset")
+                    ml_feat = gr.CheckboxGroup(label="Metrics", choices=["gate_entropy", "meyer_wallach", "n_qubits", "depth", "total_gates"], value=["gate_entropy", "meyer_wallach"])
+                    btn = gr.Button("Run Training", variant="primary")
                 with gr.Column(scale=2):
+                    plot_out = gr.Plot()
+                    txt_out = gr.Markdown()
+        # TAB 3: METHODOLOGY (С ВЫБОРОМ)
+        with gr.TabItem("📖 Methodology"):
+            method_ds_select = gr.Dropdown(list(REPO_CONFIG.keys()), value="Core (Clean)", label="View methodology for:")
+            guide_md = gr.Markdown()
+    # LINKS FOOTER
     gr.Markdown(f"""
 ---
+### 🔗 Project Links
+[**🤗 Hugging Face**](https://huggingface.co/QSBench) | [**💻 GitHub**](https://github.com/QSBench) | [**🌐 Website**](https://qsbench.github.io)
     """)
+    # EVENT HANDLERS
+    ds_select.change(update_explorer, [ds_select, split_select], [split_select, data_table, code_raw, code_tr, metadata_info])
+    ml_ds.change(lambda x: gr.update(), [ml_ds], []) # Simple sync
+    method_ds_select.change(get_methodology_content, [method_ds_select], [guide_md])
+    btn.click(train_model, [ml_ds, ml_feat], [plot_out, txt_out])
+    # INITIAL LOAD
+    demo.load(update_explorer, [ds_select, split_select], [split_select, data_table, code_raw, code_tr, metadata_info])
+    demo.load(get_methodology_content, [method_ds_select], [guide_md])
 if __name__ == "__main__":
     demo.launch()