Spaces:

VLAI-AIVN
/

AIO2025M03_Demo_AdaBoost

Sleeping

App Files Files Community

wjnwjn59 commited on Sep 17, 2025

Commit

863c992

1 Parent(s): da0fb40

fix depth error

Browse files

Files changed (3) hide show

README.md +2 -2
app.py +3 -3
src/adaboost_core.py +87 -33

README.md CHANGED Viewed

@@ -26,7 +26,7 @@ This interactive demo showcases AdaBoost (Adaptive Boosting) algorithms for both
 ### AdaBoost Parameters
 - **Number of Estimators**: Control sequential learning steps (limited to 1000 for performance)
 - **Learning Rate**: Step size shrinkage for adaptive learning (0.0001-2.0)
-- **Max Depth**: Individual weak learner depth (default: 0, decision stumps work best)
 - **Base Estimator**: Decision tree with limited depth (weak learner)
 ### Visualizations
@@ -100,7 +100,7 @@ This interactive demo showcases AdaBoost (Adaptive Boosting) algorithms for both
 - **Number of Estimators**: Limited to 1000 for optimal performance in this demo
 - **Learning Rate**: Default 1.0 works well; lower rates (0.0001-0.1) create more conservative models, higher rates (1.0-2.0) for faster learning
-- **Max Depth**: Decision stumps (depth 0) typically optimal for AdaBoost
 - **Weak Learners**: Simple estimators work best to avoid overfitting
 ## 🎯 Use Cases

 ### AdaBoost Parameters
 - **Number of Estimators**: Control sequential learning steps (limited to 1000 for performance)
 - **Learning Rate**: Step size shrinkage for adaptive learning (0.0001-2.0)
+- **Max Depth**: Individual weak learner depth (default: 1, decision stumps work best)
 - **Base Estimator**: Decision tree with limited depth (weak learner)
 ### Visualizations
 - **Number of Estimators**: Limited to 1000 for optimal performance in this demo
 - **Learning Rate**: Default 1.0 works well; lower rates (0.0001-0.1) create more conservative models, higher rates (1.0-2.0) for faster learning
+- **Max Depth**: Decision stumps (depth 1) typically optimal for AdaBoost
 - **Weak Learners**: Simple estimators work best to avoid overfitting
 ## 🎯 Use Cases

app.py CHANGED Viewed

@@ -426,8 +426,8 @@ with gr.Blocks(theme="gstaff/sketch", css=vlai_template.custom_css, fill_width=T
                 with gr.Row():
                     max_depth = gr.Number(
                         label="Max Depth (Base Estimator)",
-                        value=0, minimum=0, maximum=10, precision=0,
-                        info="Maximum depth of individual decision trees (0 = decision stumps with 1 split, ideal for AdaBoost)"
                     )
                 gr.Markdown("**📊 Data Split Configuration**")
@@ -482,7 +482,7 @@ with gr.Blocks(theme="gstaff/sketch", css=vlai_template.custom_css, fill_width=T
 - **📊 Feature Importance**: Displays which features are most influential across all estimators.
 - **🎯 Parameter Tuning**: Try different **number of estimators** (up to 1000) and **learning rate** (0.0001-2.0).
 - **⚡ Learning Rate**: Default 1.0 works well; lower values create more conservative models with better generalization.
-- **🌲 Decision Stumps**: Max depth 0 creates decision stumps (one split), which are ideal weak learners for AdaBoost.
 - **🎯 Adaptive Reweighting**: AdaBoost focuses on misclassified examples by increasing their weights.
 - **🔍 Estimator Analysis**: Use the estimator selector to understand how each decision stump contributes to predictions.
 """)

                 with gr.Row():
                     max_depth = gr.Number(
                         label="Max Depth (Base Estimator)",
+                        value=1, minimum=1, maximum=10, precision=0,
+                        info="Maximum depth of individual decision trees (1 = decision stumps, 2+ = deeper trees)"
                     )
                 gr.Markdown("**📊 Data Split Configuration**")
 - **📊 Feature Importance**: Displays which features are most influential across all estimators.
 - **🎯 Parameter Tuning**: Try different **number of estimators** (up to 1000) and **learning rate** (0.0001-2.0).
 - **⚡ Learning Rate**: Default 1.0 works well; lower values create more conservative models with better generalization.
+- **🌲 Decision Stumps**: Max depth 1 creates decision stumps (one split), which are ideal weak learners for AdaBoost.
 - **🎯 Adaptive Reweighting**: AdaBoost focuses on misclassified examples by increasing their weights.
 - **🔍 Estimator Analysis**: Use the estimator selector to understand how each decision stump contributes to predictions.
 """)

src/adaboost_core.py CHANGED Viewed

@@ -160,8 +160,8 @@ def run_adaboost_and_visualize(df, target_col, new_point_dict,
     if n_estimators < 1:
         return None, None, None, None, "Number of estimators must be ≥ 1.", None
-    if max_depth is not None and max_depth < 0:
-        return None, None, None, None, "Max depth must be ≥ 0.", None
     if learning_rate <= 0 or learning_rate > 2:
         return None, None, None, None, "Learning rate must be between 0 and 2.", None
@@ -173,7 +173,9 @@ def run_adaboost_and_visualize(df, target_col, new_point_dict,
     if problem_type == "classification":
         # For binary/multiclass classification
-        base_estimator = DecisionTreeClassifier(max_depth=1 if max_depth == 0 else int(max_depth))
         try:
             # Try the new parameter name first (scikit-learn >= 1.2)
             model = AdaBoostClassifier(
@@ -193,7 +195,9 @@ def run_adaboost_and_visualize(df, target_col, new_point_dict,
                 random_state=42
             )
     else:
-        base_estimator = DecisionTreeRegressor(max_depth=1 if max_depth == 0 else int(max_depth))
         try:
             # Try the new parameter name first (scikit-learn >= 1.2)
             model = AdaBoostRegressor(
@@ -389,34 +393,78 @@ def create_manual_tree_plot(tree_index, feature_cols, problem_type, model_type,
     import random
     random.seed(tree_index)  # Consistent trees for same index
-    # Root node (decision stump - only one split)
     root_feature = random.choice(feature_cols) if feature_cols else "feature_0"
     root_threshold = round(random.uniform(0.1, 5.0), 2)
-    # Positions for a decision stump (depth 0 - only root and two leaves)
-    positions = {
-        'root': (0, 1),
-        'left': (-1, 0),
-        'right': (1, 0)
-    }
-    # Labels and colors for decision stump
-    labels = {
-        'root': f"{root_feature}<br>≤ {root_threshold}<br>Weight: {weight:.3f}<br>Decision Stump",
-        'left': f"Leaf (≤)<br>Value: {round(random.uniform(-1, 1), 3)}<br>Samples: {random.randint(20, 80)}",
-        'right': f"Leaf (>)<br>Value: {round(random.uniform(-1, 1), 3)}<br>Samples: {random.randint(20, 80)}"
-    }
-    colors = {
-        'root': '#81C784',  # Green for split node
-        'left': '#FFB74D',   # Orange for left leaf
-        'right': '#FFB74D'   # Orange for right leaf
-    }
-    # Draw edges for decision stump
-    edges = [
-        ('root', 'left'), ('root', 'right')
-    ]
     edge_x, edge_y = [], []
     for parent, child in edges:
@@ -452,12 +500,18 @@ def create_manual_tree_plot(tree_index, feature_cols, problem_type, model_type,
             hovertext=labels[node_id]
         ))
     fig.update_layout(
-        title=f"{model_type} Estimator {tree_index + 1} Structure - Decision Stump ({problem_type.title()})",
-        xaxis=dict(showgrid=False, zeroline=False, showticklabels=False, range=[-1.5, 1.5]),
-        yaxis=dict(showgrid=False, zeroline=False, showticklabels=False, range=[-0.5, 1.5]),
         plot_bgcolor="white",
-        height=400,
         margin=dict(l=40, r=40, t=60, b=40),
         showlegend=False
     )

     if n_estimators < 1:
         return None, None, None, None, "Number of estimators must be ≥ 1.", None
+    if max_depth is not None and max_depth < 1:
+        return None, None, None, None, "Max depth must be ≥ 1.", None
     if learning_rate <= 0 or learning_rate > 2:
         return None, None, None, None, "Learning rate must be between 0 and 2.", None
     if problem_type == "classification":
         # For binary/multiclass classification
+        # Direct mapping: UI depth = actual depth, with minimum depth of 1 for AdaBoost
+        actual_depth = max(1, int(max_depth)) if max_depth >= 1 else 1
+        base_estimator = DecisionTreeClassifier(max_depth=actual_depth)
         try:
             # Try the new parameter name first (scikit-learn >= 1.2)
             model = AdaBoostClassifier(
                 random_state=42
             )
     else:
+        # Direct mapping: UI depth = actual depth, with minimum depth of 1 for AdaBoost
+        actual_depth = max(1, int(max_depth)) if max_depth >= 1 else 1
+        base_estimator = DecisionTreeRegressor(max_depth=actual_depth)
         try:
             # Try the new parameter name first (scikit-learn >= 1.2)
             model = AdaBoostRegressor(
     import random
     random.seed(tree_index)  # Consistent trees for same index
+    # Get the current model to determine actual depth
+    current_model = _get_current_model()
+    if current_model and hasattr(current_model, 'estimators_') and len(current_model.estimators_) > tree_index:
+        try:
+            actual_estimator = current_model.estimators_[tree_index]
+            actual_depth = actual_estimator.max_depth
+        except:
+            actual_depth = 1  # fallback to stump
+    else:
+        actual_depth = 1  # fallback to stump
+    # Root node
     root_feature = random.choice(feature_cols) if feature_cols else "feature_0"
     root_threshold = round(random.uniform(0.1, 5.0), 2)
+    # Create tree structure based on actual depth
+    if actual_depth == 1:
+        # Decision stump (depth 1 - only root and two leaves)
+        positions = {
+            'root': (0, 1),
+            'left': (-1, 0),
+            'right': (1, 0)
+        }
+        labels = {
+            'root': f"{root_feature}<br>≤ {root_threshold}<br>Weight: {weight:.3f}<br>Decision Stump",
+            'left': f"Leaf (≤)<br>Value: {round(random.uniform(-1, 1), 3)}<br>Samples: {random.randint(20, 80)}",
+            'right': f"Leaf (>)<br>Value: {round(random.uniform(-1, 1), 3)}<br>Samples: {random.randint(20, 80)}"
+        }
+        colors = {
+            'root': '#81C784',  # Green for split node
+            'left': '#FFB74D',   # Orange for left leaf
+            'right': '#FFB74D'   # Orange for right leaf
+        }
+        edges = [('root', 'left'), ('root', 'right')]
+        title_suffix = "Decision Stump"
+    else:
+        # Deeper tree (depth 2+)
+        positions = {
+            'root': (0, 2),
+            'left': (-1.5, 1),
+            'right': (1.5, 1),
+            'left_left': (-2.5, 0),
+            'left_right': (-0.5, 0),
+            'right_left': (0.5, 0),
+            'right_right': (2.5, 0)
+        }
+        labels = {
+            'root': f"{root_feature}<br>≤ {root_threshold}<br>Weight: {weight:.3f}<br>Depth: {actual_depth}",
+            'left': f"{random.choice(feature_cols) if feature_cols else 'feature_1'}<br>≤ {round(random.uniform(0.1, 3.0), 2)}<br>Samples: 75",
+            'right': f"{random.choice(feature_cols) if feature_cols else 'feature_2'}<br>≤ {round(random.uniform(0.1, 3.0), 2)}<br>Samples: 75",
+            'left_left': f"Leaf<br>Value: {round(random.uniform(-1, 1), 3)}<br>Samples: 25",
+            'left_right': f"Leaf<br>Value: {round(random.uniform(-1, 1), 3)}<br>Samples: 50",
+            'right_left': f"Leaf<br>Value: {round(random.uniform(-1, 1), 3)}<br>Samples: 30",
+            'right_right': f"Leaf<br>Value: {round(random.uniform(-1, 1), 3)}<br>Samples: 45"
+        }
+        colors = {
+            'root': '#81C784', 'left': '#81C784', 'right': '#81C784',  # Green for split nodes
+            'left_left': '#FFB74D', 'left_right': '#FFB74D', 'right_left': '#FFB74D', 'right_right': '#FFB74D'  # Orange for leaves
+        }
+        edges = [
+            ('root', 'left'), ('root', 'right'),
+            ('left', 'left_left'), ('left', 'left_right'),
+            ('right', 'right_left'), ('right', 'right_right')
+        ]
+        title_suffix = f"Depth {actual_depth} Tree"
     edge_x, edge_y = [], []
     for parent, child in edges:
             hovertext=labels[node_id]
         ))
+    # Adjust layout based on tree depth
+    if actual_depth == 1:
+        x_range, y_range, height = [-1.5, 1.5], [-0.5, 1.5], 400
+    else:
+        x_range, y_range, height = [-3, 3], [-0.5, 2.5], 600
     fig.update_layout(
+        title=f"{model_type} Estimator {tree_index + 1} Structure - {title_suffix} ({problem_type.title()})",
+        xaxis=dict(showgrid=False, zeroline=False, showticklabels=False, range=x_range),
+        yaxis=dict(showgrid=False, zeroline=False, showticklabels=False, range=y_range),
         plot_bgcolor="white",
+        height=height,
         margin=dict(l=40, r=40, t=60, b=40),
         showlegend=False
     )