Spaces:

QSBench
/

Multi-Target_Regression

Running

App Files Files Community

QSBench commited on 1 day ago

Commit

1971b4a

verified ·

1 Parent(s): 82c3c62

Update app.py

Browse files

Files changed (1) hide show

app.py +38 -25

app.py CHANGED Viewed

@@ -71,18 +71,14 @@ def sync_ml_metrics(ds_name: str):
     """Dynamically finds all available numerical metrics (features) from CSV/Dataset"""
     assets = load_all_assets(ds_name)
     df = assets["df"]
-    # Extract all numeric columns
     numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
-    # Filter: remove system IDs and ALL target components (X, Y, Z, global, local, error)
     valid_features = [
         c for c in numeric_cols
         if c not in NON_FEATURE_COLS
         and not any(prefix in c for prefix in ["ideal_", "noisy_", "error_", "sign_"])
     ]
-    # Priority metrics for "default" selection
     top_tier = ["gate_entropy", "meyer_wallach", "adjacency", "depth", "total_gates", "cx_count"]
     defaults = [f for f in top_tier if f in valid_features]
@@ -94,11 +90,9 @@ def train_model(ds_name: str, features: List[str]):
     assets = load_all_assets(ds_name)
     df = assets["df"]
-    # Multi-Target: Prediction of all global expectation values
     targets = ["ideal_expval_X_global", "ideal_expval_Y_global", "ideal_expval_Z_global"]
-    # Filter targets that actually exist in the dataframe (handle cases where some might be missing)
     available_targets = [t for t in targets if t in df.columns]
     if not available_targets:
         return None, "### ❌ Error: Target columns not found in dataset."
@@ -106,50 +100,65 @@ def train_model(ds_name: str, features: List[str]):
     X, y = train_df[features], train_df[available_targets]
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
-    # RandomForestRegressor supports multi-output regression out of the box
     model = RandomForestRegressor(n_estimators=100, max_depth=10, n_jobs=-1).fit(X_train, y_train)
     preds = model.predict(X_test)
     sns.set_theme(style="whitegrid", context="talk")
     fig, axes = plt.subplots(1, len(available_targets), figsize=(8 * len(available_targets), 7))
-    # If only one target available, axes is not an array
     if len(available_targets) == 1: axes = [axes]
     summary_text = "### 📊 Multi-Target Performance Summary\n"
-    colors = ['#2980b9', '#8e44ad', '#2c3e50'] # Blue for X, Purple for Y, Dark for Z
     for i, target_col in enumerate(available_targets):
         y_true_axis = y_test.iloc[:, i]
         y_pred_axis = preds[:, i]
         r2 = r2_score(y_true_axis, y_pred_axis)
         mae = mean_absolute_error(y_true_axis, y_pred_axis)
-        # Parity Plot for each basis
         axes[i].scatter(y_true_axis, y_pred_axis, alpha=0.3, color=colors[i % len(colors)])
-        axes[i].plot([-1, 1], [-1, 1], 'r--', lw=2) # Theoretical range of expectation values is [-1, 1]
         axes[i].set_title(f"Target: {target_col}\n(R²: {r2:.3f})")
-        axes[i].set_xlabel("Ground Truth (Ideal)"); axes[i].set_ylabel("Model Prediction")
         axes[i].set_xlim([-1.1, 1.1]); axes[i].set_ylim([-1.1, 1.1])
-        axis_name = target_col.split('_')[2] # Extracts X, Y, or Z
         summary_text += f"- **{axis_name}-Axis:** MAE = {mae:.4f} | R² = {r2:.3f}\n"
     plt.tight_layout(pad=3.0)
     return fig, summary_text
 def update_explorer(ds_name: str, split_name: str):
     assets = load_all_assets(ds_name)
     df = assets["df"]
-    splits = df["split"].unique().tolist() if "split" in df.columns else ["train"]
-    display_df = df[df["split"] == split_name].head(10) if "split" in df.columns else df.head(10)
-    raw = display_df["qasm_raw"].iloc[0] if "qasm_raw" in display_df.columns else "// N/A"
-    tr = display_df["qasm_transpiled"].iloc[0] if "qasm_transpiled" in display_df.columns else "// N/A"
-    return gr.update(choices=splits), display_df, raw, tr, f"### 📋 {ds_name} Explorer"
 # --- INTERFACE ---
 with gr.Blocks(theme=gr.themes.Soft(), title="QSBench Hub") as demo:
@@ -166,12 +175,12 @@ with gr.Blocks(theme=gr.themes.Soft(), title="QSBench Hub") as demo:
                 c_raw = gr.Code(label="Source QASM", language="python")
                 c_tr = gr.Code(label="Transpiled QASM", language="python")
-        with gr.TabItem("🤖 ML Training (Multi-Target)"):
-            gr.Markdown("Training models to predict the full Bloch vector expectation values (X, Y, Z) simultaneously.")
             with gr.Row():
                 with gr.Column(scale=1):
                     ml_ds_sel = gr.Dropdown(list(REPO_CONFIG.keys()), value="Core (Clean)", label="Select Dataset")
-                    ml_feat_sel = gr.CheckboxGroup(label="Structural Metrics (Features)", choices=[])
                     train_btn = gr.Button("Train Multi-Output Model", variant="primary")
                 with gr.Column(scale=2):
                     p_out = gr.Plot()
@@ -187,7 +196,11 @@ with gr.Blocks(theme=gr.themes.Soft(), title="QSBench Hub") as demo:
     """)
     # --- EVENTS ---
     ds_sel.change(update_explorer, [ds_sel, sp_sel], [sp_sel, data_view, c_raw, c_tr, meta_txt])
     ml_ds_sel.change(sync_ml_metrics, [ml_ds_sel], [ml_feat_sel])
     train_btn.click(train_model, [ml_ds_sel, ml_feat_sel], [p_out, t_out])

     """Dynamically finds all available numerical metrics (features) from CSV/Dataset"""
     assets = load_all_assets(ds_name)
     df = assets["df"]
     numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
     valid_features = [
         c for c in numeric_cols
         if c not in NON_FEATURE_COLS
         and not any(prefix in c for prefix in ["ideal_", "noisy_", "error_", "sign_"])
     ]
     top_tier = ["gate_entropy", "meyer_wallach", "adjacency", "depth", "total_gates", "cx_count"]
     defaults = [f for f in top_tier if f in valid_features]
     assets = load_all_assets(ds_name)
     df = assets["df"]
     targets = ["ideal_expval_X_global", "ideal_expval_Y_global", "ideal_expval_Z_global"]
     available_targets = [t for t in targets if t in df.columns]
     if not available_targets:
         return None, "### ❌ Error: Target columns not found in dataset."
     X, y = train_df[features], train_df[available_targets]
     X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
     model = RandomForestRegressor(n_estimators=100, max_depth=10, n_jobs=-1).fit(X_train, y_train)
     preds = model.predict(X_test)
     sns.set_theme(style="whitegrid", context="talk")
     fig, axes = plt.subplots(1, len(available_targets), figsize=(8 * len(available_targets), 7))
     if len(available_targets) == 1: axes = [axes]
     summary_text = "### 📊 Multi-Target Performance Summary\n"
+    colors = ['#2980b9', '#8e44ad', '#2c3e50']
     for i, target_col in enumerate(available_targets):
         y_true_axis = y_test.iloc[:, i]
         y_pred_axis = preds[:, i]
         r2 = r2_score(y_true_axis, y_pred_axis)
         mae = mean_absolute_error(y_true_axis, y_pred_axis)
         axes[i].scatter(y_true_axis, y_pred_axis, alpha=0.3, color=colors[i % len(colors)])
+        axes[i].plot([-1, 1], [-1, 1], 'r--', lw=2)
         axes[i].set_title(f"Target: {target_col}\n(R²: {r2:.3f})")
+        axes[i].set_xlabel("Ground Truth"); axes[i].set_ylabel("Prediction")
         axes[i].set_xlim([-1.1, 1.1]); axes[i].set_ylim([-1.1, 1.1])
+        axis_name = target_col.split('_')[2]
         summary_text += f"- **{axis_name}-Axis:** MAE = {mae:.4f} | R² = {r2:.3f}\n"
     plt.tight_layout(pad=3.0)
     return fig, summary_text
 def update_explorer(ds_name: str, split_name: str):
+    """Updates the data view based on dataset and split selection."""
     assets = load_all_assets(ds_name)
     df = assets["df"]
+    # Get unique splits for the dropdown update
+    unique_splits = df["split"].unique().tolist() if "split" in df.columns else ["train"]
+    # Filter dataframe by selected split
+    if "split" in df.columns:
+        filtered_df = df[df["split"] == split_name]
+        # If the split_name is not found in the new dataset, fallback to first available
+        if filtered_df.empty:
+            split_name = unique_splits[0]
+            filtered_df = df[df["split"] == split_name]
+    else:
+        filtered_df = df
+    display_df = filtered_df.head(10)
+    # Extract QASM samples
+    raw = display_df["qasm_raw"].iloc[0] if "qasm_raw" in display_df.columns and not display_df.empty else "// N/A"
+    tr = display_df["qasm_transpiled"].iloc[0] if "qasm_transpiled" in display_df.columns and not display_df.empty else "// N/A"
+    return (
+        gr.update(choices=unique_splits, value=split_name),
+        display_df,
+        raw,
+        tr,
+        f"### 📋 {ds_name} Explorer"
+    )
 # --- INTERFACE ---
 with gr.Blocks(theme=gr.themes.Soft(), title="QSBench Hub") as demo:
                 c_raw = gr.Code(label="Source QASM", language="python")
                 c_tr = gr.Code(label="Transpiled QASM", language="python")
+        with gr.TabItem("🤖 ML Training"):
+            gr.Markdown("Multi-target regression: predicting X, Y, and Z components simultaneously.")
             with gr.Row():
                 with gr.Column(scale=1):
                     ml_ds_sel = gr.Dropdown(list(REPO_CONFIG.keys()), value="Core (Clean)", label="Select Dataset")
+                    ml_feat_sel = gr.CheckboxGroup(label="Structural Metrics", choices=[])
                     train_btn = gr.Button("Train Multi-Output Model", variant="primary")
                 with gr.Column(scale=2):
                     p_out = gr.Plot()
     """)
     # --- EVENTS ---
+    # Explorer: Fixed by adding sp_sel.change
     ds_sel.change(update_explorer, [ds_sel, sp_sel], [sp_sel, data_view, c_raw, c_tr, meta_txt])
+    sp_sel.change(update_explorer, [ds_sel, sp_sel], [sp_sel, data_view, c_raw, c_tr, meta_txt])
+    # ML Tab
     ml_ds_sel.change(sync_ml_metrics, [ml_ds_sel], [ml_feat_sel])
     train_btn.click(train_model, [ml_ds_sel, ml_feat_sel], [p_out, t_out])