Spaces:

QSBench
/

Circuit_Family_Classifier

Running

App Files Files Community

QSBench commited on 3 days ago

Commit

170aab6

verified ·

1 Parent(s): 30add96

Update app.py

Browse files

Files changed (1) hide show

app.py +23 -5

app.py CHANGED Viewed

@@ -80,13 +80,29 @@ def train_classifier(ds_name: str, features: List[str]):
     assets = load_all_assets(ds_name)
     df = assets["df"]
-    # Filter for the 5 target families only
-    train_df = df[df['circuit_type_requested'].isin(TARGET_FAMILIES)].dropna(subset=features)
     X, y = train_df[features], train_df['circuit_type_requested']
     le = LabelEncoder()
     y_encoded = le.fit_transform(y)
-    X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)
     clf = RandomForestClassifier(n_estimators=100, max_depth=12, n_jobs=-1).fit(X_train, y_train)
     preds = clf.predict(X_test)
@@ -95,7 +111,9 @@ def train_classifier(ds_name: str, features: List[str]):
     fig, axes = plt.subplots(1, 2, figsize=(20, 8))
     cm = confusion_matrix(y_test, preds)
-    sns.heatmap(cm, annot=True, fmt='d', cmap='magma', xticklabels=le.classes_, yticklabels=le.classes_, ax=axes[0], cbar=False)
     axes[0].set_title(f"Confusion Matrix (Acc: {accuracy_score(y_test, preds):.2%})")
     importances = clf.feature_importances_
@@ -105,7 +123,7 @@ def train_classifier(ds_name: str, features: List[str]):
     plt.tight_layout()
     report = classification_report(y_test, preds, target_names=le.classes_)
-    return fig, f"### 🏆 Results\n```\n{report}\n```"
 def update_explorer(ds_name: str, split_name: str):
     assets = load_all_assets(ds_name)

     assets = load_all_assets(ds_name)
     df = assets["df"]
+    # Automatically determine available classes in the dataset, excluding empty values
+    available_in_df = df['circuit_type_requested'].dropna().unique()
+    # Filter: keep only those that are in our list of interests (case-insensitive)
+    # Or simply take all available types if we want universality
+    train_df = df[df['circuit_type_requested'].isin(available_in_df)].dropna(subset=features)
+    if train_df.empty:
+        return None, f"### ❌ Error: No data found for features {features}. Check if these columns are empty in the dataset."
     X, y = train_df[features], train_df['circuit_type_requested']
+    # Check number of classes
+    if len(y.unique()) < 2:
+        return None, f"### ❌ Error: Need at least 2 classes to train. Found only: {y.unique()}"
     le = LabelEncoder()
     y_encoded = le.fit_transform(y)
+    try:
+        X_train, X_test, y_train, y_test = train_test_split(X, y_encoded, test_size=0.2, random_state=42)
+    except ValueError as e:
+        return None, f"### ❌ Split Error: {str(e)}"
     clf = RandomForestClassifier(n_estimators=100, max_depth=12, n_jobs=-1).fit(X_train, y_train)
     preds = clf.predict(X_test)
     fig, axes = plt.subplots(1, 2, figsize=(20, 8))
     cm = confusion_matrix(y_test, preds)
+    sns.heatmap(cm, annot=True, fmt='d', cmap='magma',
+                xticklabels=le.classes_, yticklabels=le.classes_,
+                ax=axes[0], cbar=False)
     axes[0].set_title(f"Confusion Matrix (Acc: {accuracy_score(y_test, preds):.2%})")
     importances = clf.feature_importances_
     plt.tight_layout()
     report = classification_report(y_test, preds, target_names=le.classes_)
+    return fig, f"### 🏆 Results for {ds_name}\n```\n{report}\n```"
 def update_explorer(ds_name: str, split_name: str):
     assets = load_all_assets(ds_name)