Spaces:

QSBench
/

Multi-Target_Regression

Running

App Files Files Community

QSBench commited on 1 day ago

Commit

c8aba73

verified ·

1 Parent(s): 382832c

Update app.py

Browse files

Files changed (1) hide show

app.py +41 -32

app.py CHANGED Viewed

@@ -75,7 +75,7 @@ def sync_ml_metrics(ds_name: str):
     # Extract all numeric columns
     numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
-    # Filter: remove system IDs and targets (anything starting with ideal/noisy/error/sign)
     valid_features = [
         c for c in numeric_cols
         if c not in NON_FEATURE_COLS
@@ -89,43 +89,56 @@ def sync_ml_metrics(ds_name: str):
     return gr.update(choices=valid_features, value=defaults or valid_features[:5])
 def train_model(ds_name: str, features: List[str]):
     if not features: return None, "### ❌ Error: No metrics selected."
     assets = load_all_assets(ds_name)
     df = assets["df"]
-    # Use global Z value as target
-    target = "ideal_expval_Z_global"
-    train_df = df.dropna(subset=features + [target])
-    X, y = train_df[features], train_df[target]
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
     model = RandomForestRegressor(n_estimators=100, max_depth=10, n_jobs=-1).fit(X_train, y_train)
     preds = model.predict(X_test)
     sns.set_theme(style="whitegrid", context="talk")
-    fig, axes = plt.subplots(1, 3, figsize=(24, 8))
-    # 1. Prediction vs Reality
-    axes[0].scatter(y_test, preds, alpha=0.3, color='#2c3e50')
-    axes[0].plot([y.min(), y.max()], [y.min(), y.max()], 'r--', lw=2)
-    axes[0].set_title(f"Accuracy (R²: {r2_score(y_test, preds):.3f})")
-    axes[0].set_xlabel("Ideal ExpVal"); axes[0].set_ylabel("Predicted")
-    # 2. Feature Importance
-    imp = model.feature_importances_
-    # Take top 10 if there are many, or all if few
-    top_n = min(len(features), 10)
-    idx = np.argsort(imp)[-top_n:]
-    axes[1].barh([features[i] for i in idx], imp[idx], color='#27ae60')
-    axes[1].set_title(f"Top {top_n} Metrics Importance")
-    # 3. Residuals
-    sns.histplot(y_test - preds, kde=True, ax=axes[2], color='#d35400')
-    axes[2].set_title("Residuals (Error Distribution)")
     plt.tight_layout(pad=3.0)
-    return fig, f"**Mean Absolute Error (MAE):** {mean_absolute_error(y_test, preds):.4f}"
 def update_explorer(ds_name: str, split_name: str):
     assets = load_all_assets(ds_name)
@@ -153,19 +166,18 @@ with gr.Blocks(theme=gr.themes.Soft(), title="QSBench Hub") as demo:
                 c_raw = gr.Code(label="Source QASM", language="python")
                 c_tr = gr.Code(label="Transpiled QASM", language="python")
-        with gr.TabItem("🤖 ML Training"):
             with gr.Row():
                 with gr.Column(scale=1):
                     ml_ds_sel = gr.Dropdown(list(REPO_CONFIG.keys()), value="Core (Clean)", label="Select Dataset")
-                    # Dynamic metrics list extracted from CSV
-                    ml_feat_sel = gr.CheckboxGroup(label="Available Metrics (extracted from CSV)", choices=[])
-                    train_btn = gr.Button("Execute Baseline", variant="primary")
                 with gr.Column(scale=2):
                     p_out = gr.Plot()
                     t_out = gr.Markdown()
         with gr.TabItem("📖 Methodology"):
-            # Automatically loads content from GUIDE.md
             meth_md = gr.Markdown(value=load_guide_content())
     gr.Markdown(f"""
@@ -175,10 +187,7 @@ with gr.Blocks(theme=gr.themes.Soft(), title="QSBench Hub") as demo:
     """)
     # --- EVENTS ---
-    # Explorer
     ds_sel.change(update_explorer, [ds_sel, sp_sel], [sp_sel, data_view, c_raw, c_tr, meta_txt])
-    # ML Tab: Dynamic metrics update
     ml_ds_sel.change(sync_ml_metrics, [ml_ds_sel], [ml_feat_sel])
     train_btn.click(train_model, [ml_ds_sel, ml_feat_sel], [p_out, t_out])

     # Extract all numeric columns
     numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
+    # Filter: remove system IDs and ALL target components (X, Y, Z, global, local, error)
     valid_features = [
         c for c in numeric_cols
         if c not in NON_FEATURE_COLS
     return gr.update(choices=valid_features, value=defaults or valid_features[:5])
 def train_model(ds_name: str, features: List[str]):
+    """Trains a Multi-Target Regressor to predict X, Y, and Z expectation values."""
     if not features: return None, "### ❌ Error: No metrics selected."
     assets = load_all_assets(ds_name)
     df = assets["df"]
+    # Multi-Target: Prediction of all global expectation values
+    targets = ["ideal_expval_X_global", "ideal_expval_Y_global", "ideal_expval_Z_global"]
+    # Filter targets that actually exist in the dataframe (handle cases where some might be missing)
+    available_targets = [t for t in targets if t in df.columns]
+    if not available_targets:
+        return None, "### ❌ Error: Target columns not found in dataset."
+    train_df = df.dropna(subset=features + available_targets)
+    X, y = train_df[features], train_df[available_targets]
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+    # RandomForestRegressor supports multi-output regression out of the box
     model = RandomForestRegressor(n_estimators=100, max_depth=10, n_jobs=-1).fit(X_train, y_train)
     preds = model.predict(X_test)
     sns.set_theme(style="whitegrid", context="talk")
+    fig, axes = plt.subplots(1, len(available_targets), figsize=(8 * len(available_targets), 7))
+    # If only one target available, axes is not an array
+    if len(available_targets) == 1: axes = [axes]
+    summary_text = "### 📊 Multi-Target Performance Summary\n"
+    colors = ['#2980b9', '#8e44ad', '#2c3e50'] # Blue for X, Purple for Y, Dark for Z
+    for i, target_col in enumerate(available_targets):
+        y_true_axis = y_test.iloc[:, i]
+        y_pred_axis = preds[:, i]
+        r2 = r2_score(y_true_axis, y_pred_axis)
+        mae = mean_absolute_error(y_true_axis, y_pred_axis)
+        # Parity Plot for each basis
+        axes[i].scatter(y_true_axis, y_pred_axis, alpha=0.3, color=colors[i % len(colors)])
+        axes[i].plot([-1, 1], [-1, 1], 'r--', lw=2) # Theoretical range of expectation values is [-1, 1]
+        axes[i].set_title(f"Target: {target_col}\n(R²: {r2:.3f})")
+        axes[i].set_xlabel("Ground Truth (Ideal)"); axes[i].set_ylabel("Model Prediction")
+        axes[i].set_xlim([-1.1, 1.1]); axes[i].set_ylim([-1.1, 1.1])
+        axis_name = target_col.split('_')[2] # Extracts X, Y, or Z
+        summary_text += f"- **{axis_name}-Axis:** MAE = {mae:.4f} | R² = {r2:.3f}\n"
     plt.tight_layout(pad=3.0)
+    return fig, summary_text
 def update_explorer(ds_name: str, split_name: str):
     assets = load_all_assets(ds_name)
                 c_raw = gr.Code(label="Source QASM", language="python")
                 c_tr = gr.Code(label="Transpiled QASM", language="python")
+        with gr.TabItem("🤖 ML Training (Multi-Target)"):
+            gr.Markdown("Training models to predict the full Bloch vector expectation values (X, Y, Z) simultaneously.")
             with gr.Row():
                 with gr.Column(scale=1):
                     ml_ds_sel = gr.Dropdown(list(REPO_CONFIG.keys()), value="Core (Clean)", label="Select Dataset")
+                    ml_feat_sel = gr.CheckboxGroup(label="Structural Metrics (Features)", choices=[])
+                    train_btn = gr.Button("Train Multi-Output Model", variant="primary")
                 with gr.Column(scale=2):
                     p_out = gr.Plot()
                     t_out = gr.Markdown()
         with gr.TabItem("📖 Methodology"):
             meth_md = gr.Markdown(value=load_guide_content())
     gr.Markdown(f"""
     """)
     # --- EVENTS ---
     ds_sel.change(update_explorer, [ds_sel, sp_sel], [sp_sel, data_view, c_raw, c_tr, meta_txt])
     ml_ds_sel.change(sync_ml_metrics, [ml_ds_sel], [ml_feat_sel])
     train_btn.click(train_model, [ml_ds_sel, ml_feat_sel], [p_out, t_out])