Spaces:

QSBench
/

Circuit_Family_Classifier

Running

App Files Files Community

QSBench commited on 3 days ago

Commit

30add96

verified ·

1 Parent(s): a0ad65f

Update app.py

Browse files

Files changed (1) hide show

app.py +48 -83

app.py CHANGED Viewed

@@ -39,7 +39,7 @@ REPO_CONFIG = {
     }
 }
-# Columns that are NOT features
 NON_FEATURE_COLS = {
     "sample_id", "sample_seed", "circuit_hash", "split", "circuit_qasm",
     "qasm_raw", "qasm_transpiled", "circuit_type_resolved", "circuit_type_requested",
@@ -61,150 +61,115 @@ def load_all_assets(key: str) -> Dict:
 # --- UI LOGIC ---
 def load_guide_content():
-    """Reads the content of GUIDE.md from the local directory."""
     try:
         with open("GUIDE.md", "r", encoding="utf-8") as f:
             return f.read()
-    except FileNotFoundError:
-        return "### ⚠️ Error: GUIDE.md not found. Please ensure it is in the root directory."
 def sync_ml_metrics(ds_name: str):
-    """Extracts numerical features available for classification."""
     assets = load_all_assets(ds_name)
     df = assets["df"]
     numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
-    valid_features = [
-        c for c in numeric_cols
-        if c not in NON_FEATURE_COLS
-        and not any(prefix in c for prefix in ["ideal_", "noisy_", "error_", "sign_"])
-    ]
-    # Pre-select logical structural indicators
     defaults = [f for f in ["gate_entropy", "meyer_wallach", "adjacency", "depth", "cx_count"] if f in valid_features]
-    return gr.update(choices=valid_features, value=defaults or valid_features[:5])
 def train_classifier(ds_name: str, features: List[str]):
-    """Trains a Classifier to identify the Circuit Family based on topology."""
-    if not features: return None, "### ❌ Error: No features selected."
     assets = load_all_assets(ds_name)
     df = assets["df"]
-    target_col = "circuit_type_requested"
-    if target_col not in df.columns:
-        return None, f"### ❌ Error: Target column '{target_col}' not found."
-    # Data Cleaning
-    train_df = df.dropna(subset=features + [target_col])
-    X = train_df[features]
-    y = train_df[target_col]
-    # Encoding targets
     le = LabelEncoder()
     y_encoded = le.fit_transform(y)
-    class_names = le.classes_
     X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)
-    # Classification Model
-    clf = RandomForestClassifier(n_estimators=100, max_depth=12, n_jobs=-1, random_state=42)
-    clf.fit(X_train, y_train)
     preds = clf.predict(X_test)
-    # Metrics
-    acc = accuracy_score(y_test, preds)
-    # Visualization
-    sns.set_theme(style="whitegrid", context="talk")
     fig, axes = plt.subplots(1, 2, figsize=(20, 8))
-    # 1. Confusion Matrix
     cm = confusion_matrix(y_test, preds)
-    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
-                xticklabels=class_names, yticklabels=class_names, ax=axes[0], cbar=False)
-    axes[0].set_title(f"Confusion Matrix (Accuracy: {acc:.2%})")
-    axes[0].set_xlabel("Predicted Family")
-    axes[0].set_ylabel("Actual Family")
-    # 2. Feature Importance
     importances = clf.feature_importances_
-    indices = np.argsort(importances)[-10:] # Top 10
-    axes[1].barh([features[i] for i in indices], importances[indices], color='#16a085')
-    axes[1].set_title("Top Structural Discriminators")
     plt.tight_layout()
-    report_dict = classification_report(y_test, preds, target_names=class_names)
-    summary = f"### 🏆 Classification Results\n**Overall Accuracy:** {acc:.2%}\n\n**Detailed Report:**\n```\n{report_dict}\n```"
-    return fig, summary
 def update_explorer(ds_name: str, split_name: str):
-    """Updates the data view for the Explorer tab."""
     assets = load_all_assets(ds_name)
     df = assets["df"]
-    unique_splits = df["split"].unique().tolist() if "split" in df.columns else ["train"]
-    if "split" in df.columns:
-        filtered_df = df[df["split"] == split_name]
-        if filtered_df.empty:
-            split_name = unique_splits[0]
-            filtered_df = df[df["split"] == split_name]
-    else:
-        filtered_df = df
-    display_df = filtered_df.head(10)
     raw = display_df["qasm_raw"].iloc[0] if "qasm_raw" in display_df.columns and not display_df.empty else "// N/A"
     tr = display_df["qasm_transpiled"].iloc[0] if "qasm_transpiled" in display_df.columns and not display_df.empty else "// N/A"
-    return gr.update(choices=unique_splits, value=split_name), display_df, raw, tr, f"### 📋 {ds_name} Explorer"
 # --- INTERFACE ---
 with gr.Blocks(theme=gr.themes.Soft(), title="QSBench Classifier") as demo:
     gr.Markdown("# 🌌 QSBench: Circuit Family Classifier")
-    gr.Markdown("Identify circuit types (QFT, HEA, RANDOM, etc.) using high-level structural complexity metrics.")
     with gr.Tabs():
-        with gr.TabItem("🔎 Dataset Explorer"):
             meta_txt = gr.Markdown("### Loading...")
             with gr.Row():
-                ds_sel = gr.Dropdown(list(REPO_CONFIG.keys()), value="Core (Clean)", label="Dataset Type")
-                sp_sel = gr.Dropdown(["train"], value="train", label="Subset (Split)")
             data_view = gr.Dataframe(interactive=False)
             with gr.Row():
-                c_raw = gr.Code(label="Original QASM (Logic)", language="python")
-                c_tr = gr.Code(label="Transpiled QASM (Hardware-ready)", language="python")
-        with gr.TabItem("🧠 Classification Model"):
-            gr.Markdown("Predict the **Circuit Family** by analyzing topology signatures.")
             with gr.Row():
                 with gr.Column(scale=1):
                     ml_ds_sel = gr.Dropdown(list(REPO_CONFIG.keys()), value="Core (Clean)", label="Environment")
-                    ml_feat_sel = gr.CheckboxGroup(label="Structural Features", choices=[])
-                    train_btn = gr.Button("Run Classification", variant="primary")
                 with gr.Column(scale=2):
                     p_out = gr.Plot()
                     t_out = gr.Markdown()
-        with gr.TabItem("📖 User Guide"):
-            meth_md = gr.Markdown(value=load_guide_content())
-    gr.Markdown(f"""
----
-### 🔗 Project Resources
-[**🌐 Website**](https://qsbench.github.io) | [**🤗 Hugging Face**](https://huggingface.co/QSBench) | [**💻 GitHub**](https://github.com/QSBench)
-    """)
-    # --- EVENTS ---
-    # Explorer events
     ds_sel.change(update_explorer, [ds_sel, sp_sel], [sp_sel, data_view, c_raw, c_tr, meta_txt])
     sp_sel.change(update_explorer, [ds_sel, sp_sel], [sp_sel, data_view, c_raw, c_tr, meta_txt])
-    # ML events
     ml_ds_sel.change(sync_ml_metrics, [ml_ds_sel], [ml_feat_sel])
     train_btn.click(train_classifier, [ml_ds_sel, ml_feat_sel], [p_out, t_out])
-    # Initial Load
     demo.load(update_explorer, [ds_sel, sp_sel], [sp_sel, data_view, c_raw, c_tr, meta_txt])
     demo.load(sync_ml_metrics, [ml_ds_sel], [ml_feat_sel])

     }
 }
+TARGET_FAMILIES = ['QFT', 'HEA', 'RANDOM', 'EFFICIENT', 'REAL_AMPLITUDES']
 NON_FEATURE_COLS = {
     "sample_id", "sample_seed", "circuit_hash", "split", "circuit_qasm",
     "qasm_raw", "qasm_transpiled", "circuit_type_resolved", "circuit_type_requested",
 # --- UI LOGIC ---
 def load_guide_content():
     try:
         with open("GUIDE.md", "r", encoding="utf-8") as f:
             return f.read()
+    except:
+        return "### ⚠️ GUIDE.md not found."
 def sync_ml_metrics(ds_name: str):
     assets = load_all_assets(ds_name)
     df = assets["df"]
     numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
+    valid_features = [c for c in numeric_cols if c not in NON_FEATURE_COLS and not any(p in c for p in ["ideal_", "noisy_", "error_"])]
     defaults = [f for f in ["gate_entropy", "meyer_wallach", "adjacency", "depth", "cx_count"] if f in valid_features]
+    return gr.update(choices=valid_features, value=defaults)
 def train_classifier(ds_name: str, features: List[str]):
+    if not features: return None, "### ❌ Select features first."
     assets = load_all_assets(ds_name)
     df = assets["df"]
+    # Filter for the 5 target families only
+    train_df = df[df['circuit_type_requested'].isin(TARGET_FAMILIES)].dropna(subset=features)
+    X, y = train_df[features], train_df['circuit_type_requested']
     le = LabelEncoder()
     y_encoded = le.fit_transform(y)
     X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)
+    clf = RandomForestClassifier(n_estimators=100, max_depth=12, n_jobs=-1).fit(X_train, y_train)
     preds = clf.predict(X_test)
+    sns.set_theme(style="whitegrid")
     fig, axes = plt.subplots(1, 2, figsize=(20, 8))
     cm = confusion_matrix(y_test, preds)
+    sns.heatmap(cm, annot=True, fmt='d', cmap='magma', xticklabels=le.classes_, yticklabels=le.classes_, ax=axes[0], cbar=False)
+    axes[0].set_title(f"Confusion Matrix (Acc: {accuracy_score(y_test, preds):.2%})")
     importances = clf.feature_importances_
+    idx = np.argsort(importances)[-10:]
+    axes[1].barh([features[i] for i in idx], importances[idx], color='#3498db')
+    axes[1].set_title("Feature Importance")
     plt.tight_layout()
+    report = classification_report(y_test, preds, target_names=le.classes_)
+    return fig, f"### 🏆 Results\n```\n{report}\n```"
 def update_explorer(ds_name: str, split_name: str):
     assets = load_all_assets(ds_name)
     df = assets["df"]
+    # Identify splits
+    splits = df["split"].unique().tolist() if "split" in df.columns else ["train"]
+    # Ensure current split_name exists in this dataset
+    if split_name not in splits:
+        split_name = splits[0]
+    filtered = df[df["split"] == split_name] if "split" in df.columns else df
+    display_df = filtered.head(10)
     raw = display_df["qasm_raw"].iloc[0] if "qasm_raw" in display_df.columns and not display_df.empty else "// N/A"
     tr = display_df["qasm_transpiled"].iloc[0] if "qasm_transpiled" in display_df.columns and not display_df.empty else "// N/A"
+    return (
+        gr.update(choices=splits, value=split_name),
+        display_df,
+        raw,
+        tr,
+        f"### 📋 {ds_name} Explorer"
+    )
 # --- INTERFACE ---
 with gr.Blocks(theme=gr.themes.Soft(), title="QSBench Classifier") as demo:
     gr.Markdown("# 🌌 QSBench: Circuit Family Classifier")
     with gr.Tabs():
+        with gr.TabItem("🔎 Explorer"):
             meta_txt = gr.Markdown("### Loading...")
             with gr.Row():
+                ds_sel = gr.Dropdown(list(REPO_CONFIG.keys()), value="Core (Clean)", label="Dataset")
+                sp_sel = gr.Dropdown(["train"], value="train", label="Split")
             data_view = gr.Dataframe(interactive=False)
             with gr.Row():
+                c_raw = gr.Code(label="Logic QASM", language="python")
+                c_tr = gr.Code(label="Transpiled QASM", language="python")
+        with gr.TabItem("🧠 Classification"):
             with gr.Row():
                 with gr.Column(scale=1):
                     ml_ds_sel = gr.Dropdown(list(REPO_CONFIG.keys()), value="Core (Clean)", label="Environment")
+                    ml_feat_sel = gr.CheckboxGroup(label="Features", choices=[])
+                    train_btn = gr.Button("Run Analysis", variant="primary")
                 with gr.Column(scale=2):
                     p_out = gr.Plot()
                     t_out = gr.Markdown()
+        with gr.TabItem("📖 Guide"):
+            gr.Markdown(load_guide_content())
+    gr.Markdown("--- \n ### 🔗 [Website](https://qsbench.github.io) | [Hugging Face](https://huggingface.co/QSBench) | [GitHub](https://github.com/QSBench)")
+    # --- UPDATED EVENT LOGIC ---
+    # Triggering the same function for both dropdowns
     ds_sel.change(update_explorer, [ds_sel, sp_sel], [sp_sel, data_view, c_raw, c_tr, meta_txt])
     sp_sel.change(update_explorer, [ds_sel, sp_sel], [sp_sel, data_view, c_raw, c_tr, meta_txt])
     ml_ds_sel.change(sync_ml_metrics, [ml_ds_sel], [ml_feat_sel])
     train_btn.click(train_classifier, [ml_ds_sel, ml_feat_sel], [p_out, t_out])
     demo.load(update_explorer, [ds_sel, sp_sel], [sp_sel, data_view, c_raw, c_tr, meta_txt])
     demo.load(sync_ml_metrics, [ml_ds_sel], [ml_feat_sel])