Spaces:

QSBench
/

Circuit_Family_Classifier

Running

App Files Files Community

QSBench commited on 3 days ago

Commit

980efa8

verified ·

1 Parent(s): 9c99c87

Update app.py

Browse files

Files changed (1) hide show

app.py +69 -48

app.py CHANGED Viewed

@@ -12,10 +12,11 @@ from sklearn.metrics import accuracy_score, confusion_matrix, classification_rep
 from sklearn.model_selection import train_test_split
 from sklearn.preprocessing import LabelEncoder
-# --- CONFIG & LOGGING ---
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
 REPO_CONFIG = {
     "Core (Clean)": {
         "repo": "QSBench/QSBench-Core-v1.0.0-demo",
@@ -39,6 +40,7 @@ REPO_CONFIG = {
     }
 }
 NON_FEATURE_COLS = {
     "sample_id", "sample_seed", "circuit_hash", "split", "circuit_qasm",
     "qasm_raw", "qasm_transpiled", "circuit_type_resolved", "circuit_type_requested",
@@ -49,25 +51,31 @@ NON_FEATURE_COLS = {
 _ASSET_CACHE = {}
 def load_all_assets(key: str) -> Dict:
-    """Fetch dataset and metadata from Hugging Face."""
     if key not in _ASSET_CACHE:
-        logger.info(f"Fetching {key}...")
         ds = load_dataset(REPO_CONFIG[key]["repo"])
         meta = requests.get(REPO_CONFIG[key]["meta_url"]).json()
         report = requests.get(REPO_CONFIG[key]["report_url"]).json()
         _ASSET_CACHE[key] = {"df": pd.DataFrame(ds["train"]), "meta": meta, "report": report}
     return _ASSET_CACHE[key]
-def load_guide_content():
-    """Load content for the methodology tab."""
     try:
         with open("GUIDE.md", "r", encoding="utf-8") as f:
             return f.read()
-    except:
-        return "### ⚠️ GUIDE.md not found. Please upload it to the root directory."
-def sync_ml_metrics(ds_name: str):
-    """Identify numerical features for classification."""
     assets = load_all_assets(ds_name)
     df = assets["df"]
     numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
@@ -75,18 +83,20 @@ def sync_ml_metrics(ds_name: str):
     defaults = [f for f in ["gate_entropy", "meyer_wallach", "adjacency", "depth", "cx_count"] if f in valid_features]
     return gr.update(choices=valid_features, value=defaults)
-def train_classifier(ds_name: str, features: List[str]):
-    """Train a classifier to detect circuit families."""
     if not features:
-        return None, "### ❌ Error: Select features first."
     assets = load_all_assets(ds_name)
     df = assets["df"]
-    # Logic: use 'resolved' if 'requested' contains 'mixed' tags
     target_col = 'circuit_type_resolved' if 'circuit_type_resolved' in df.columns else 'circuit_type_requested'
-    # Filter 'mixed' out if other classes exist
     train_df = df.dropna(subset=features + [target_col])
     if 'mixed' in train_df[target_col].unique() and len(train_df[target_col].unique()) > 1:
         train_df = train_df[train_df[target_col] != 'mixed']
@@ -95,38 +105,49 @@ def train_classifier(ds_name: str, features: List[str]):
     y = train_df[target_col]
     if len(y.unique()) < 2:
-        return None, f"### ❌ Error: At least 2 classes needed. Found only: {y.unique()}"
     le = LabelEncoder()
     y_encoded = le.fit_transform(y)
     try:
         X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded)
-    except:
         X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)
-    clf = RandomForestClassifier(n_estimators=100, max_depth=12, n_jobs=-1).fit(X_train, y_train)
     preds = clf.predict(X_test)
-    # Plotting
     sns.set_theme(style="whitegrid")
     fig, axes = plt.subplots(1, 2, figsize=(20, 8))
     cm = confusion_matrix(y_test, preds)
     sns.heatmap(cm, annot=True, fmt='d', cmap='viridis', xticklabels=le.classes_, yticklabels=le.classes_, ax=axes[0], cbar=False)
-    axes[0].set_title(f"Confusion Matrix (Acc: {accuracy_score(y_test, preds):.2%})")
     importances = clf.feature_importances_
     idx = np.argsort(importances)[-10:]
     axes[1].barh([features[i] for i in idx], importances[idx], color='#2ecc71')
-    axes[1].set_title("Top-10 Discriminative Features")
     plt.tight_layout()
-    report = classification_report(y_test, preds, target_names=le.classes_)
-    return fig, f"### 🏆 Results\n**Target Column:** `{target_col}`\n```\n{report}\n```"
-def update_explorer(ds_name: str, split_name: str):
-    """Manage the Explorer tab data view."""
     assets = load_all_assets(ds_name)
     df = assets["df"]
     splits = df["split"].unique().tolist() if "split" in df.columns else ["train"]
@@ -137,56 +158,56 @@ def update_explorer(ds_name: str, split_name: str):
     filtered = df[df["split"] == split_name] if "split" in df.columns else df
     display_df = filtered.head(10)
-    raw = display_df["qasm_raw"].iloc[0] if "qasm_raw" in display_df.columns and not display_df.empty else "// N/A"
-    tr = display_df["qasm_transpiled"].iloc[0] if "qasm_transpiled" in display_df.columns and not display_df.empty else "// N/A"
     return (
         gr.update(choices=splits, value=split_name),
         display_df,
-        raw,
-        tr,
         f"### 📋 {ds_name} Explorer"
     )
-# --- GRADIO INTERFACE ---
 with gr.Blocks(theme=gr.themes.Soft(), title="QSBench Classifier") as demo:
     gr.Markdown("# 🌌 QSBench: Circuit Family Classifier")
     with gr.Tabs():
         with gr.TabItem("🔎 Explorer"):
-            meta_txt = gr.Markdown("### Initializing...")
             with gr.Row():
-                ds_sel = gr.Dropdown(list(REPO_CONFIG.keys()), value="Core (Clean)", label="Dataset")
-                sp_sel = gr.Dropdown(["train"], value="train", label="Split")
-            data_view = gr.Dataframe(interactive=False)
             with gr.Row():
-                c_raw = gr.Code(label="Source QASM", language="python")
-                c_tr = gr.Code(label="Transpiled QASM", language="python")
         with gr.TabItem("🧠 Classification"):
             with gr.Row():
                 with gr.Column(scale=1):
-                    ml_ds_sel = gr.Dropdown(list(REPO_CONFIG.keys()), value="Core (Clean)", label="Environment")
-                    ml_feat_sel = gr.CheckboxGroup(label="Structural Metrics", choices=[])
-                    train_btn = gr.Button("Train Classifier", variant="primary")
                 with gr.Column(scale=2):
-                    p_out = gr.Plot()
-                    t_out = gr.Markdown()
         with gr.TabItem("📖 Guide"):
             gr.Markdown(load_guide_content())
     gr.Markdown("--- \n ### 🔗 [Website](https://qsbench.github.io) | [Hugging Face](https://huggingface.co/QSBench) | [GitHub](https://github.com/QSBench)")
-    # Event Mapping
-    ds_sel.change(update_explorer, [ds_sel, sp_sel], [sp_sel, data_view, c_raw, c_tr, meta_txt])
-    sp_sel.change(update_explorer, [ds_sel, sp_sel], [sp_sel, data_view, c_raw, c_tr, meta_txt])
-    ml_ds_sel.change(sync_ml_metrics, [ml_ds_sel], [ml_feat_sel])
-    train_btn.click(train_classifier, [ml_ds_sel, ml_feat_sel], [p_out, t_out])
-    # Startup Load
-    demo.load(update_explorer, [ds_sel, sp_sel], [sp_sel, data_view, c_raw, c_tr, meta_txt])
-    demo.load(sync_ml_metrics, [ml_ds_sel], [ml_feat_sel])
 if __name__ == "__main__":
     demo.launch()

 from sklearn.model_selection import train_test_split
 from sklearn.preprocessing import LabelEncoder
+# Logging configuration
 logging.basicConfig(level=logging.INFO)
 logger = logging.getLogger(__name__)
+# Dataset repository configuration
 REPO_CONFIG = {
     "Core (Clean)": {
         "repo": "QSBench/QSBench-Core-v1.0.0-demo",
     }
 }
+# Define non-feature columns to exclude from training
 NON_FEATURE_COLS = {
     "sample_id", "sample_seed", "circuit_hash", "split", "circuit_qasm",
     "qasm_raw", "qasm_transpiled", "circuit_type_resolved", "circuit_type_requested",
 _ASSET_CACHE = {}
 def load_all_assets(key: str) -> Dict:
+    """
+    Fetch and cache dataset and metadata from Hugging Face.
+    """
     if key not in _ASSET_CACHE:
+        logger.info(f"Fetching {key} assets...")
         ds = load_dataset(REPO_CONFIG[key]["repo"])
         meta = requests.get(REPO_CONFIG[key]["meta_url"]).json()
         report = requests.get(REPO_CONFIG[key]["report_url"]).json()
         _ASSET_CACHE[key] = {"df": pd.DataFrame(ds["train"]), "meta": meta, "report": report}
     return _ASSET_CACHE[key]
+def load_guide_content() -> str:
+    """
+    Load Markdown content for the Methodology/Guide tab.
+    """
     try:
         with open("GUIDE.md", "r", encoding="utf-8") as f:
             return f.read()
+    except FileNotFoundError:
+        return "### ⚠️ GUIDE.md not found."
+def sync_ml_metrics(ds_name: str) -> gr.update:
+    """
+    Filter and return available numerical features for the selected dataset.
+    """
     assets = load_all_assets(ds_name)
     df = assets["df"]
     numeric_cols = df.select_dtypes(include=[np.number]).columns.tolist()
     defaults = [f for f in ["gate_entropy", "meyer_wallach", "adjacency", "depth", "cx_count"] if f in valid_features]
     return gr.update(choices=valid_features, value=defaults)
+def train_classifier(ds_name: str, features: List[str]) -> Tuple[Optional[plt.Figure], str]:
+    """
+    Perform multi-class classification on circuit families and return metrics/plots.
+    """
     if not features:
+        return None, "### ❌ Error: No features selected."
     assets = load_all_assets(ds_name)
     df = assets["df"]
+    # Target column selection fallback logic
     target_col = 'circuit_type_resolved' if 'circuit_type_resolved' in df.columns else 'circuit_type_requested'
+    # Data preprocessing and cleaning
     train_df = df.dropna(subset=features + [target_col])
     if 'mixed' in train_df[target_col].unique() and len(train_df[target_col].unique()) > 1:
         train_df = train_df[train_df[target_col] != 'mixed']
     y = train_df[target_col]
     if len(y.unique()) < 2:
+        return None, f"### ❌ Error: Dataset contains insufficient classes for training ({y.unique()})."
+    # Label encoding and dataset splitting
     le = LabelEncoder()
     y_encoded = le.fit_transform(y)
     try:
         X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42, stratify=y_encoded)
+    except (ValueError, TypeError):
         X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)
+    # Model initialization and training
+    clf = RandomForestClassifier(n_estimators=100, max_depth=12, n_jobs=-1, random_state=42)
+    clf.fit(X_train, y_train)
     preds = clf.predict(X_test)
+    # Visualization generation
     sns.set_theme(style="whitegrid")
     fig, axes = plt.subplots(1, 2, figsize=(20, 8))
+    # Confusion Matrix Plot
     cm = confusion_matrix(y_test, preds)
     sns.heatmap(cm, annot=True, fmt='d', cmap='viridis', xticklabels=le.classes_, yticklabels=le.classes_, ax=axes[0], cbar=False)
+    axes[0].set_title(f"Confusion Matrix (Accuracy: {accuracy_score(y_test, preds):.2%})")
+    # Feature Importance Plot
     importances = clf.feature_importances_
     idx = np.argsort(importances)[-10:]
     axes[1].barh([features[i] for i in idx], importances[idx], color='#2ecc71')
+    axes[1].set_title("Top-10 Predictive Features")
     plt.tight_layout()
+    # Performance metrics string generation
+    cls_report = classification_report(y_test, preds, target_names=le.classes_, output_dict=False)
+    results_md = f"### 🏆 Classification Results\n**Target:** `{target_col}`\n**Accuracy:** {accuracy_score(y_test, preds):.2%}\n\n**Metrics:**\n```text\n{cls_report}\n```"
+    return fig, results_md
+def update_explorer(ds_name: str, split_name: str) -> Tuple[gr.update, pd.DataFrame, str, str, str]:
+    """
+    Refresh the Explorer view based on dataset and split selection.
+    """
     assets = load_all_assets(ds_name)
     df = assets["df"]
     splits = df["split"].unique().tolist() if "split" in df.columns else ["train"]
     filtered = df[df["split"] == split_name] if "split" in df.columns else df
     display_df = filtered.head(10)
+    raw_qasm = display_df["qasm_raw"].iloc[0] if "qasm_raw" in display_df.columns and not display_df.empty else "// N/A"
+    transpiled_qasm = display_df["qasm_transpiled"].iloc[0] if "qasm_transpiled" in display_df.columns and not display_df.empty else "// N/A"
     return (
         gr.update(choices=splits, value=split_name),
         display_df,
+        raw_qasm,
+        transpiled_qasm,
         f"### 📋 {ds_name} Explorer"
     )
+# Gradio interface definition
 with gr.Blocks(theme=gr.themes.Soft(), title="QSBench Classifier") as demo:
     gr.Markdown("# 🌌 QSBench: Circuit Family Classifier")
     with gr.Tabs():
         with gr.TabItem("🔎 Explorer"):
+            meta_label = gr.Markdown("### Initializing...")
             with gr.Row():
+                ds_dropdown = gr.Dropdown(list(REPO_CONFIG.keys()), value="Core (Clean)", label="Dataset Type")
+                split_dropdown = gr.Dropdown(["train"], value="train", label="Split")
+            explorer_df = gr.Dataframe(interactive=False)
             with gr.Row():
+                raw_qasm_code = gr.Code(label="Logical QASM", language="python")
+                tr_qasm_code = gr.Code(label="Transpiled QASM", language="python")
         with gr.TabItem("🧠 Classification"):
             with gr.Row():
                 with gr.Column(scale=1):
+                    ml_ds_dropdown = gr.Dropdown(list(REPO_CONFIG.keys()), value="Core (Clean)", label="Noise Environment")
+                    ml_feature_checks = gr.CheckboxGroup(label="Input Metrics", choices=[])
+                    run_btn = gr.Button("Train & Evaluate", variant="primary")
                 with gr.Column(scale=2):
+                    plot_output = gr.Plot()
+                    results_output = gr.Markdown()
         with gr.TabItem("📖 Guide"):
             gr.Markdown(load_guide_content())
     gr.Markdown("--- \n ### 🔗 [Website](https://qsbench.github.io) | [Hugging Face](https://huggingface.co/QSBench) | [GitHub](https://github.com/QSBench)")
+    # UI Event bindings
+    ds_dropdown.change(update_explorer, [ds_dropdown, split_dropdown], [split_dropdown, explorer_df, raw_qasm_code, tr_qasm_code, meta_label])
+    split_dropdown.change(update_explorer, [ds_dropdown, split_dropdown], [split_dropdown, explorer_df, raw_qasm_code, tr_qasm_code, meta_label])
+    ml_ds_dropdown.change(sync_ml_metrics, [ml_ds_dropdown], [ml_feature_checks])
+    run_btn.click(train_classifier, [ml_ds_dropdown, ml_feature_checks], [plot_output, results_output])
+    # Application startup triggers
+    demo.load(update_explorer, [ds_dropdown, split_dropdown], [split_dropdown, explorer_df, raw_qasm_code, tr_qasm_code, meta_label])
+    demo.load(sync_ml_metrics, [ml_ds_dropdown], [ml_feature_checks])
 if __name__ == "__main__":
     demo.launch()