Spaces:

VLAI-AIVN
/

AIO2025M03_Demo_AdaBoost

Sleeping

App Files Files Community

wjnwjn59 commited on Sep 17, 2025

Commit

677dc84

1 Parent(s): 863c992

remove redundant

Browse files

Files changed (1) hide show

src/xgboost_core.py +0 -938

src/xgboost_core.py DELETED Viewed

@@ -1,938 +0,0 @@
-import pandas as pd
-import numpy as np
-# XGBoost is required for this demo
-try:
-    import xgboost as xgb
-    XGBOOST_AVAILABLE = True
-    print("✅ XGBoost loaded successfully!")
-except ImportError:
-    print("❌ XGBoost is required for this demo!")
-    print("Please install XGBoost using: pip install xgboost>=2.0.0")
-    raise ImportError("XGBoost is required for this XGBoost demo. Please install it using: pip install xgboost>=2.0.0")
-from sklearn.preprocessing import LabelEncoder
-from sklearn.datasets import (
-    load_iris, load_wine, load_diabetes, load_breast_cancer
-)
-from sklearn.model_selection import train_test_split
-import plotly.graph_objects as go
-import plotly.express as px
-_current_model = None
-def _get_current_model():
-    return _current_model
-def _set_current_model(model):
-    global _current_model
-    _current_model = model
-def load_data(file_obj=None, dataset_choice="Iris"):
-    if file_obj is not None:
-        if file_obj.name.endswith(".csv"):
-            encodings = ["utf-8", "latin-1", "iso-8859-1", "cp1252"]
-            for encoding in encodings:
-                try:
-                    return pd.read_csv(file_obj.name, encoding=encoding)
-                except UnicodeDecodeError:
-                    continue
-            return pd.read_csv(file_obj.name, encoding="utf-8", errors="replace")
-        elif file_obj.name.endswith((".xlsx", ".xls")):
-            return pd.read_excel(file_obj.name)
-        else:
-            raise ValueError("Unsupported format. Upload CSV or Excel files.")
-    datasets = {
-        "Iris": lambda: _sklearn_to_df(load_iris()),
-        "Wine": lambda: _sklearn_to_df(load_wine()),
-        "Breast Cancer": lambda: _sklearn_to_df(load_breast_cancer()),
-        "Diabetes": lambda: _sklearn_to_df(load_diabetes()),
-        "Titanic": lambda: _load_titanic_data(),
-    }
-    if dataset_choice not in datasets:
-        raise ValueError(f"Unknown dataset: {dataset_choice}")
-    return datasets[dataset_choice]()
-def _sklearn_to_df(data):
-    df = pd.DataFrame(data.data, columns=getattr(data, "feature_names", None))
-    if df.columns.isnull().any():
-        df.columns = [f"f{i}" for i in range(df.shape[1])]
-    df["target"] = data.target
-    return df
-def _load_titanic_data():
-    try:
-        df = pd.read_csv("data/titanic_dataset.csv")
-        df = df.dropna()
-        df['sex'] = df['sex'].map({'male': 0, 'female': 1})
-        df['embarked'] = df['embarked'].map({'S': 0, 'C': 1, 'Q': 2})
-        return df
-    except FileNotFoundError:
-        raise ValueError("Titanic dataset not found. Please ensure 'data/titanic_dataset.csv' exists.")
-def determine_problem_type(df, target_col):
-    if target_col not in df.columns:
-        return "classification"
-    target = df[target_col]
-    unique_vals = target.nunique()
-    if target.dtype == "object" or unique_vals <= min(20, len(target) * 0.1):
-        return "classification"
-    return "regression"
-def create_input_components(df, target_col):
-    feature_cols = [c for c in df.columns if c != target_col]
-    components = []
-    for col in feature_cols:
-        data = df[col]
-        if data.dtype == "object":
-            uniq = sorted(map(str, data.dropna().unique()))
-            if not uniq:
-                uniq = ["N/A"]
-            components.append(
-                {"name": col, "type": "dropdown", "choices": uniq, "value": uniq[0]}
-            )
-        else:
-            val = pd.to_numeric(data, errors="coerce").dropna().mean()
-            val = 0.0 if pd.isna(val) else float(val)
-            components.append(
-                {
-                    "name": col,
-                    "type": "number",
-                    "value": round(val, 3),
-                    "minimum": None,
-                    "maximum": None,
-                }
-            )
-    return components
-def preprocess_data(df, target_col, new_point_dict):
-    feature_cols = [c for c in df.columns if c != target_col]
-    X = df[feature_cols].copy()
-    y = df[target_col].copy()
-    encoders = {}
-    for col in feature_cols:
-        if X[col].dtype == "object":
-            le = LabelEncoder()
-            X[col] = le.fit_transform(X[col].astype(str))
-            encoders[col] = le
-        elif X[col].dtype == "bool":
-            X[col] = X[col].astype(int)
-        else:
-            X[col] = pd.to_numeric(X[col], errors="coerce").fillna(0.0)
-    if y.dtype == "object":
-        y = pd.Categorical(y).codes
-    elif y.dtype == "bool":
-        y = y.astype(int)
-    new_point = []
-    for col in feature_cols:
-        if col in new_point_dict:
-            if col in encoders:
-                val = str(new_point_dict[col])
-                try:
-                    enc_val = encoders[col].transform([val])[0]
-                except ValueError:
-                    enc_val = 0
-                new_point.append(enc_val)
-            else:
-                v = new_point_dict[col]
-                try:
-                    new_point.append(float(v))
-                except Exception:
-                    new_point.append(0.0)
-        else:
-            if col in encoders:
-                new_point.append(0)
-            else:
-                new_point.append(0.0)
-    new_point = np.array(new_point, dtype=float).reshape(1, -1)
-    return X, np.array(y), new_point, feature_cols, encoders
-def run_xgboost_and_visualize(df, target_col, new_point_dict,
-                             n_estimators, max_depth, min_child_weight,
-                             subsample, colsample_bytree, learning_rate, train_test_split_ratio=0.8, problem_type=None):
-    X, y, new_point, feature_cols, _ = preprocess_data(df, target_col, new_point_dict)
-    if problem_type is None:
-        problem_type = determine_problem_type(df, target_col)
-    if n_estimators < 1:
-        return None, None, None, None, "Number of estimators must be ≥ 1.", None
-    if max_depth is not None and max_depth < 0:
-        return None, None, None, None, "Max depth must be ≥ 0.", None
-    if min_child_weight < 1:
-        return None, None, None, None, "Min child weight must be ≥ 1.", None
-    if learning_rate <= 0 or learning_rate > 1:
-        return None, None, None, None, "Learning rate must be between 0 and 1.", None
-    n_estimators = min(int(n_estimators), 100)  # Limit to 100 trees
-    # Split data for loss tracking with user-defined ratio
-    test_size = 1.0 - train_test_split_ratio
-    X_train, X_val, y_train, y_val = train_test_split(X.values, y, test_size=test_size, random_state=42)
-    if problem_type == "classification":
-        # For binary/multiclass classification
-        model = xgb.XGBClassifier(
-            n_estimators=n_estimators,
-            max_depth=int(max_depth) if max_depth > 0 else 3,
-            min_child_weight=int(min_child_weight),
-            subsample=float(subsample),
-            colsample_bytree=float(colsample_bytree),
-            learning_rate=float(learning_rate),
-            random_state=42,
-            verbosity=0
-        )
-    else:
-        model = xgb.XGBRegressor(
-            n_estimators=n_estimators,
-            max_depth=int(max_depth) if max_depth > 0 else 3,
-            min_child_weight=int(min_child_weight),
-            subsample=float(subsample),
-            colsample_bytree=float(colsample_bytree),
-            learning_rate=float(learning_rate),
-            random_state=42,
-            verbosity=0
-        )
-    # Fit with early stopping to capture loss evolution
-    eval_set = [(X_train, y_train), (X_val, y_val)]
-    model.fit(X_train, y_train, eval_set=eval_set, verbose=False)
-    prediction = model.predict(new_point)[0]
-    _set_current_model(model)
-    # Calculate performance metrics
-    train_pred = model.predict(X_train)
-    val_pred = model.predict(X_val)
-    if problem_type == "classification":
-        from sklearn.metrics import accuracy_score
-        train_performance = accuracy_score(y_train, train_pred)
-        val_performance = accuracy_score(y_val, val_pred)
-        performance_metric = "Accuracy"
-    else:
-        from sklearn.metrics import mean_squared_error
-        train_performance = mean_squared_error(y_train, train_pred)
-        val_performance = mean_squared_error(y_val, val_pred)
-        performance_metric = "MSE"
-    # Store split info for aggregation display
-    split_info = {
-        "train_size": len(X_train),
-        "val_size": len(X_val),
-        "train_ratio": train_test_split_ratio,
-        "val_ratio": 1.0 - train_test_split_ratio,
-        "train_performance": train_performance,
-        "val_performance": val_performance,
-        "performance_metric": performance_metric
-    }
-    boosting_progress_fig = create_xgboost_progress_chart(model, new_point[0], problem_type, target_col, df)
-    loss_chart_fig = create_loss_chart(model)
-    importance_fig = create_feature_importance_plot(model, feature_cols)
-    prediction_details = create_prediction_details(model, new_point[0], feature_cols, target_col, prediction, problem_type)
-    summary = create_algorithm_summary(model, problem_type, n_estimators, max_depth, min_child_weight, subsample, colsample_bytree, learning_rate, feature_cols)
-    aggregation_display = create_xgboost_aggregation_display(model, new_point[0], problem_type, target_col, df, split_info)
-    return boosting_progress_fig, loss_chart_fig, importance_fig, prediction, prediction_details, summary, aggregation_display
-def create_loss_chart(model):
-    """Create a loss chart showing training and validation loss evolution"""
-    try:
-        # Get the evaluation results for XGBoost
-        evals_result = model.evals_result()
-        fig = go.Figure()
-        # Plot training loss
-        if 'validation_0' in evals_result:
-            train_metric = list(evals_result['validation_0'].keys())[0]
-            train_loss = evals_result['validation_0'][train_metric]
-            epochs = list(range(1, len(train_loss) + 1))
-            fig.add_trace(go.Scatter(
-                x=epochs,
-                y=train_loss,
-                mode='lines+markers',
-                name='Training Loss',
-                line=dict(color='#FF6B6B', width=2),
-                marker=dict(size=6)
-            ))
-        # Plot validation loss
-        if 'validation_1' in evals_result:
-            val_metric = list(evals_result['validation_1'].keys())[0]
-            val_loss = evals_result['validation_1'][val_metric]
-            epochs = list(range(1, len(val_loss) + 1))
-            fig.add_trace(go.Scatter(
-                x=epochs,
-                y=val_loss,
-                mode='lines+markers',
-                name='Validation Loss',
-                line=dict(color='#4ECDC4', width=2),
-                marker=dict(size=6)
-            ))
-        fig.update_layout(
-            title="XGBoost Training Progress - Loss Evolution",
-            xaxis_title="Boosting Round (Tree)",
-            yaxis_title="Loss",
-            plot_bgcolor="white",
-            height=400,
-            legend=dict(
-                yanchor="top",
-                y=0.99,
-                xanchor="right",
-                x=0.99
-            ),
-            margin=dict(l=40, r=40, t=60, b=40)
-        )
-        fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='lightgray')
-        fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='lightgray')
-        return fig
-    except Exception as e:
-        # Fallback if no loss data is available
-        fig = go.Figure()
-        fig.add_annotation(
-            text=f"Loss tracking not available<br>Error: {str(e)}<br>Run training to see loss evolution",
-            xref="paper", yref="paper",
-            x=0.5, y=0.5, xanchor='center', yanchor='middle',
-            showarrow=False,
-            font=dict(size=14)
-        )
-        fig.update_layout(
-            title="XGBoost Training Progress - Loss Evolution",
-            height=400,
-            plot_bgcolor="white"
-        )
-        return fig
-def create_xgboost_progress_chart(model, new_point, problem_type, target_col=None, df=None):
-    """Create a chart showing how XGBoost prediction evolves with each tree"""
-    if problem_type == "classification":
-        # For classification, show probability evolution
-        try:
-            # Get number of trees
-            n_trees = model.n_estimators
-            # Create a temporary model with varying n_estimators to see progression
-            iteration_data = []
-            # We'll use the model's predict_proba method with ntree_limit
-            # Sample every few trees for visualization if more than 50 trees
-            if n_trees <= 50:
-                tree_indices = list(range(1, n_trees + 1))
-            else:
-                # Sample 50 evenly spaced trees for visualization
-                tree_indices = [int(i) for i in np.linspace(1, n_trees, min(50, n_trees))]
-            for i in tree_indices:
-                try:
-                    # For XGBoost, we can't easily get staged predictions like sklearn
-                    # So we'll create new models with fewer estimators
-                    temp_model = type(model)(
-                        **{k: v for k, v in model.get_params().items() if k != 'n_estimators'},
-                        n_estimators=i,
-                        random_state=42
-                    )
-                    # We need the original training data for this approach
-                    # For simplicity, we'll approximate using the full model
-                    if hasattr(model, 'predict_proba'):
-                        proba = model.predict_proba(new_point.reshape(1, -1), ntree_limit=i)[0]
-                        pred = model.predict(new_point.reshape(1, -1), ntree_limit=i)[0]
-                        max_prob = np.max(proba)
-                        predicted_class = int(pred)
-                    else:
-                        # Fallback
-                        proba = model.predict_proba(new_point.reshape(1, -1))[0]
-                        pred = model.predict(new_point.reshape(1, -1))[0]
-                        max_prob = np.max(proba)
-                        predicted_class = int(pred)
-                    iteration_data.append({
-                        'iteration': i,
-                        'prediction_class': predicted_class,
-                        'confidence': max_prob
-                    })
-                except:
-                    # If ntree_limit doesn't work, use full prediction
-                    proba = model.predict_proba(new_point.reshape(1, -1))[0]
-                    pred = model.predict(new_point.reshape(1, -1))[0]
-                    max_prob = np.max(proba)
-                    predicted_class = int(pred)
-                    iteration_data.append({
-                        'iteration': i,
-                        'prediction_class': predicted_class,
-                        'confidence': max_prob
-                    })
-            # Create line chart
-            fig = go.Figure()
-            iterations = [data['iteration'] for data in iteration_data]
-            confidences = [data['confidence'] for data in iteration_data]
-            predictions = [data['prediction_class'] for data in iteration_data]
-            # Color mapping for different classes
-            colors = ['#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4', '#FECA57', '#FF9FF3', '#54A0FF', '#5F27CD', '#00D2D3', '#FF9F43']
-            # Group points by class for better visualization
-            class_data = {}
-            for iter_num, conf, pred_class in zip(iterations, confidences, predictions):
-                if pred_class not in class_data:
-                    class_data[pred_class] = {'iterations': [], 'confidences': []}
-                class_data[pred_class]['iterations'].append(iter_num)
-                class_data[pred_class]['confidences'].append(conf)
-            # Plot lines for each class
-            for class_idx, data in class_data.items():
-                color = colors[class_idx % len(colors)]
-                fig.add_trace(go.Scatter(
-                    x=data['iterations'],
-                    y=data['confidences'],
-                    mode='lines+markers',
-                    name=f'Class {class_idx}',
-                    line=dict(color=color, width=3),
-                    marker=dict(size=8, symbol='circle'),
-                    hovertemplate=f'<b>Tree %{{x}}</b><br>Class {class_idx}<br>Confidence: %{{y:.3f}}<extra></extra>'
-                ))
-            fig.update_layout(
-                title="XGBoost Progress: How Prediction Confidence Evolves",
-                xaxis_title="Tree Number",
-                yaxis_title="Prediction Confidence",
-                plot_bgcolor="white",
-                height=450,
-                legend=dict(
-                    yanchor="top",
-                    y=0.99,
-                    xanchor="right",
-                    x=0.99
-                ),
-                margin=dict(l=40, r=40, t=60, b=40)
-            )
-        except Exception as e:
-            # Fallback to simple visualization
-            fig = go.Figure()
-            fig.add_annotation(
-                text=f"Classification Progress Visualization<br>Final Prediction: {model.predict(new_point.reshape(1, -1))[0]}<br>Model trained with {model.n_estimators} trees",
-                xref="paper", yref="paper",
-                x=0.5, y=0.5, xanchor='center', yanchor='middle',
-                showarrow=False,
-                font=dict(size=14)
-            )
-            fig.update_layout(
-                title="XGBoost Progress: Classification Results",
-                height=450,
-                plot_bgcolor="white"
-            )
-    else:  # Regression
-        try:
-            # For regression, show prediction value evolution
-            n_trees = model.n_estimators
-            iteration_data = []
-            # Sample trees for visualization efficiency
-            if n_trees <= 50:
-                tree_indices = list(range(1, n_trees + 1))
-            else:
-                # Sample 50 evenly spaced trees for visualization
-                tree_indices = [int(i) for i in np.linspace(1, n_trees, min(50, n_trees))]
-            for i in tree_indices:
-                try:
-                    pred = model.predict(new_point.reshape(1, -1), ntree_limit=i)[0]
-                except:
-                    pred = model.predict(new_point.reshape(1, -1))[0]
-                iteration_data.append({
-                    'iteration': i,
-                    'prediction': pred
-                })
-            iterations = [data['iteration'] for data in iteration_data]
-            predictions = [data['prediction'] for data in iteration_data]
-            fig = go.Figure()
-            fig.add_trace(go.Scatter(
-                x=iterations,
-                y=predictions,
-                mode='lines+markers',
-                name='Prediction Value',
-                line=dict(color='#FF6B6B', width=3),
-                marker=dict(size=8, symbol='circle'),
-                hovertemplate='<b>Tree %{x}</b><br>Prediction: %{y:.3f}<extra></extra>'
-            ))
-            # Add final prediction line
-            final_pred = predictions[-1] if predictions else 0
-            fig.add_hline(
-                y=final_pred,
-                line_dash="dash",
-                line_color="gray",
-                annotation_text=f"Final: {final_pred:.3f}",
-                annotation_position="right"
-            )
-            fig.update_layout(
-                title="XGBoost Progress: How Prediction Value Evolves",
-                xaxis_title="Tree Number",
-                yaxis_title="Prediction Value",
-                plot_bgcolor="white",
-                height=450,
-                margin=dict(l=40, r=40, t=60, b=40)
-            )
-        except Exception as e:
-            # Fallback
-            fig = go.Figure()
-            final_pred = model.predict(new_point.reshape(1, -1))[0]
-            fig.add_annotation(
-                text=f"Regression Progress Visualization<br>Final Prediction: {final_pred:.3f}<br>Model trained with {model.n_estimators} trees",
-                xref="paper", yref="paper",
-                x=0.5, y=0.5, xanchor='center', yanchor='middle',
-                showarrow=False,
-                font=dict(size=14)
-            )
-            fig.update_layout(
-                title="XGBoost Progress: Regression Results",
-                height=450,
-                plot_bgcolor="white"
-            )
-    fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='lightgray')
-    fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='lightgray')
-    return fig
-def create_individual_tree_visualization(model, tree_index, feature_cols, problem_type):
-    """Create visualization of individual XGBoost tree"""
-    try:
-        # Get actual XGBoost tree structure
-        return create_xgboost_tree_plot(model, tree_index, feature_cols, problem_type)
-    except Exception as e:
-        # Fallback visualization
-        fig = go.Figure()
-        fig.add_annotation(
-            text=f"XGBoost Tree {tree_index + 1} Visualization<br>Unable to extract tree structure<br>Error: {str(e)}",
-            xref="paper", yref="paper",
-            x=0.5, y=0.5, xanchor='center', yanchor='middle',
-            showarrow=False,
-            font=dict(size=14)
-        )
-        fig.update_layout(
-            title=f"XGBoost Tree {tree_index + 1} Structure",
-            height=500,
-            plot_bgcolor="white"
-        )
-        return fig
-def create_xgboost_tree_plot(model, tree_index, feature_cols, problem_type):
-    """Create tree visualization for XGBoost models"""
-    try:
-        # Try to use XGBoost's built-in tree structure if available
-        booster = model.get_booster()
-        tree_dump = booster.get_dump(dump_format='json')[tree_index]
-        import json
-        tree_dict = json.loads(tree_dump)
-        return create_tree_plot_from_dict(tree_dict, tree_index, feature_cols, problem_type, "XGBoost")
-    except Exception as e:
-        # Fallback to manual tree creation
-        return create_manual_tree_plot(tree_index, feature_cols, problem_type, "XGBoost")
-# Removed sklearn tree plotting functions - XGBoost only
-def create_tree_plot_from_dict(tree_dict, tree_index, feature_cols, problem_type, model_type):
-    """Create tree plot from tree dictionary structure"""
-    fig = go.Figure()
-    # Calculate node positions
-    positions = {}
-    labels = {}
-    colors = {}
-    def assign_positions(node, node_id, x, y, width, level=0):
-        positions[node_id] = (x, y)
-        if "leaf" in node:
-            # Leaf node
-            if problem_type == "classification":
-                labels[node_id] = f"Leaf<br>Value: {node['leaf']:.3f}<br>Samples: {node.get('samples', 'N/A')}"
-            else:
-                labels[node_id] = f"Leaf<br>Prediction: {node['leaf']:.3f}<br>Samples: {node.get('samples', 'N/A')}"
-            colors[node_id] = "#FFB74D"  # Orange for leaves
-        else:
-            # Split node
-            split_name = node.get("split", "feature")
-            threshold = node.get("split_condition", 0)
-            samples = node.get("samples", "N/A")
-            labels[node_id] = f"{split_name}<br>≤ {threshold:.3f}<br>Samples: {samples}"
-            colors[node_id] = "#81C784"  # Green for split nodes
-            # Process children
-            if "children" in node and len(node["children"]) == 2:
-                child_width = width / 2
-                left_child_id = f"{node_id}_L"
-                right_child_id = f"{node_id}_R"
-                assign_positions(node["children"][0], left_child_id, x - child_width/2, y - 1, child_width, level + 1)
-                assign_positions(node["children"][1], right_child_id, x + child_width/2, y - 1, child_width, level + 1)
-    # Start positioning from root
-    assign_positions(tree_dict, "root", 0, 0, 4)
-    # Create edges first (so they appear behind nodes)
-    edge_x, edge_y = [], []
-    for node_id, (x, y) in positions.items():
-        if node_id.endswith("_L") or node_id.endswith("_R"):
-            # This is a child node, draw edge to parent
-            parent_id = node_id.rsplit("_", 1)[0]
-            if parent_id in positions:
-                parent_x, parent_y = positions[parent_id]
-                edge_x.extend([parent_x, x, None])
-                edge_y.extend([parent_y, y, None])
-    # Add edges
-    if edge_x:
-        fig.add_trace(go.Scatter(
-            x=edge_x, y=edge_y,
-            mode='lines',
-            line=dict(color='gray', width=2),
-            showlegend=False,
-            hoverinfo='none'
-        ))
-    # Add nodes
-    node_x = [pos[0] for pos in positions.values()]
-    node_y = [pos[1] for pos in positions.values()]
-    node_colors = [colors[node_id] for node_id in positions.keys()]
-    node_labels = [labels[node_id] for node_id in positions.keys()]
-    fig.add_trace(go.Scatter(
-        x=node_x, y=node_y,
-        mode='markers+text',
-        marker=dict(
-            size=30,
-            color=node_colors,
-            line=dict(width=2, color='darkblue'),
-            symbol='circle'
-        ),
-        text=node_labels,
-        textposition='middle center',
-        textfont=dict(size=10, color='black'),
-        showlegend=False,
-        hoverinfo='text',
-        hovertext=node_labels
-    ))
-    fig.update_layout(
-        title=f"{model_type} Tree {tree_index + 1} Structure ({problem_type.title()})",
-        xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
-        yaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
-        plot_bgcolor="white",
-        height=600,
-        margin=dict(l=40, r=40, t=60, b=40),
-        showlegend=False
-    )
-    return fig
-def create_manual_tree_plot(tree_index, feature_cols, problem_type, model_type):
-    """Create a manual tree visualization when tree structure is not accessible"""
-    fig = go.Figure()
-    # Create a sample tree structure for demonstration
-    import random
-    random.seed(tree_index)  # Consistent trees for same index
-    # Root node
-    root_feature = random.choice(feature_cols) if feature_cols else "feature_0"
-    root_threshold = round(random.uniform(0.1, 5.0), 2)
-    # Positions for a simple 3-level tree
-    positions = {
-        'root': (0, 2),
-        'left': (-1.5, 1),
-        'right': (1.5, 1),
-        'left_left': (-2.5, 0),
-        'left_right': (-0.5, 0),
-        'right_left': (0.5, 0),
-        'right_right': (2.5, 0)
-    }
-    # Labels and colors
-    labels = {
-        'root': f"{root_feature}<br>≤ {root_threshold}<br>Samples: 150",
-        'left': f"{random.choice(feature_cols) if feature_cols else 'feature_1'}<br>≤ {round(random.uniform(0.1, 3.0), 2)}<br>Samples: 75",
-        'right': f"{random.choice(feature_cols) if feature_cols else 'feature_2'}<br>≤ {round(random.uniform(0.1, 3.0), 2)}<br>Samples: 75",
-        'left_left': f"Leaf<br>Value: {round(random.uniform(-1, 1), 3)}<br>Samples: 25",
-        'left_right': f"Leaf<br>Value: {round(random.uniform(-1, 1), 3)}<br>Samples: 50",
-        'right_left': f"Leaf<br>Value: {round(random.uniform(-1, 1), 3)}<br>Samples: 30",
-        'right_right': f"Leaf<br>Value: {round(random.uniform(-1, 1), 3)}<br>Samples: 45"
-    }
-    colors = {
-        'root': '#81C784', 'left': '#81C784', 'right': '#81C784',  # Green for split nodes
-        'left_left': '#FFB74D', 'left_right': '#FFB74D', 'right_left': '#FFB74D', 'right_right': '#FFB74D'  # Orange for leaves
-    }
-    # Draw edges
-    edges = [
-        ('root', 'left'), ('root', 'right'),
-        ('left', 'left_left'), ('left', 'left_right'),
-        ('right', 'right_left'), ('right', 'right_right')
-    ]
-    edge_x, edge_y = [], []
-    for parent, child in edges:
-        parent_pos = positions[parent]
-        child_pos = positions[child]
-        edge_x.extend([parent_pos[0], child_pos[0], None])
-        edge_y.extend([parent_pos[1], child_pos[1], None])
-    fig.add_trace(go.Scatter(
-        x=edge_x, y=edge_y,
-        mode='lines',
-        line=dict(color='gray', width=2),
-        showlegend=False,
-        hoverinfo='none'
-    ))
-    # Draw nodes
-    for node_id, (x, y) in positions.items():
-        fig.add_trace(go.Scatter(
-            x=[x], y=[y],
-            mode='markers+text',
-            marker=dict(
-                size=35,
-                color=colors[node_id],
-                line=dict(width=2, color='darkblue'),
-                symbol='circle'
-            ),
-            text=labels[node_id],
-            textposition='middle center',
-            textfont=dict(size=9, color='black'),
-            showlegend=False,
-            hoverinfo='text',
-            hovertext=labels[node_id]
-        ))
-    fig.update_layout(
-        title=f"{model_type} Tree {tree_index + 1} Structure ({problem_type.title()})",
-        xaxis=dict(showgrid=False, zeroline=False, showticklabels=False, range=[-3, 3]),
-        yaxis=dict(showgrid=False, zeroline=False, showticklabels=False, range=[-0.5, 2.5]),
-        plot_bgcolor="white",
-        height=600,
-        margin=dict(l=40, r=40, t=60, b=40),
-        showlegend=False
-    )
-    return fig
-def get_individual_tree_visualization(model, tree_index, feature_cols, problem_type):
-    return create_individual_tree_visualization(model, tree_index, feature_cols, problem_type)
-def create_feature_importance_plot(model, feature_cols):
-    try:
-        importances = model.feature_importances_
-        order = np.argsort(importances)[::-1]
-        fig = go.Figure()
-        fig.add_trace(
-            go.Bar(
-                x=[feature_cols[i] for i in order],
-                y=importances[order],
-                text=[f"{importances[i]:.3f}" for i in order],
-                textposition="auto",
-                marker_color="lightcoral",
-                hovertemplate="<b>%{x}</b><br>Importance: %{y:.3f}<extra></extra>",
-            )
-        )
-        fig.update_layout(
-            title="XGBoost Feature Importance",
-            xaxis_title="Features",
-            yaxis_title="Importance",
-            plot_bgcolor="white",
-            height=400,
-            margin=dict(l=40, r=40, t=60, b=40),
-        )
-        fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor="lightgray")
-        fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor="lightgray")
-        return fig
-    except:
-        fig = go.Figure()
-        fig.add_annotation(
-            text="Feature importance not available",
-            xref="paper", yref="paper",
-            x=0.5, y=0.5, xanchor='center', yanchor='middle',
-            showarrow=False,
-            font=dict(size=14)
-        )
-        fig.update_layout(
-            title="XGBoost Feature Importance",
-            height=400,
-            plot_bgcolor="white"
-        )
-        return fig
-def create_prediction_details(model, new_point, feature_cols, target_col, prediction, problem_type):
-    if problem_type == "classification":
-        try:
-            probabilities = model.predict_proba(new_point.reshape(1, -1))[0]
-            classes = model.classes_
-            return f"Predicted Class: {int(prediction)} | Probabilities: {dict(zip(classes, probabilities))}"
-        except:
-            return f"Predicted Class: {int(prediction)}"
-    else:
-        return f"Predicted Value: {prediction:.3f}"
-def create_algorithm_summary(model, problem_type, n_estimators, max_depth, min_child_weight, subsample, colsample_bytree, learning_rate, feature_cols):
-    return f"""
-    **XGBoost {problem_type.title()} Model Summary:**
-    - Trees: {n_estimators}
-    - Max Depth: {max_depth}
-    - Min Child Weight: {min_child_weight}
-    - Subsample: {subsample}
-    - Column Sample by Tree: {colsample_bytree}
-    - Learning Rate: {learning_rate}
-    - Features: {len(feature_cols)}
-    """
-def create_xgboost_aggregation_display(model, new_point, problem_type, target_col=None, df=None, split_info=None):
-    """Create HTML display showing XGBoost ensemble aggregation process"""
-    try:
-        if problem_type == "classification":
-            prediction = model.predict(new_point.reshape(1, -1))[0]
-            probabilities = model.predict_proba(new_point.reshape(1, -1))[0]
-            # Build the aggregation display with split info
-            html_content = f"""
-            <div style='background:#F0F8FF;border-left:6px solid #4ECDC4;padding:14px 16px;border-radius:10px;'>
-                <strong>🚀 XGBoost Ensemble Process</strong><br><br>
-                <div style='margin:8px 0;'>
-                    <strong>📊 Model Configuration:</strong><br>
-                    • {model.n_estimators} trees in ensemble<br>
-                    • Max depth: {model.max_depth}<br>
-                    • Learning rate: {model.learning_rate}<br>
-                </div>"""
-            if split_info:
-                html_content += f"""
-                <div style='margin:8px 0;'>
-                    <strong>📊 Data Split Information:</strong><br>
-                    • Training Set: {split_info['train_size']} samples ({split_info['train_ratio']:.1%})<br>
-                    • Validation Set: {split_info['val_size']} samples ({split_info['val_ratio']:.1%})<br>
-                </div>
-                <div style='margin:8px 0;'>
-                    <strong>📈 Model Performance:</strong><br>
-                    • Training {split_info['performance_metric']}: <span style='background:#E8F5E8;padding:2px 6px;border-radius:4px;'><strong>{split_info['train_performance']:.4f}</strong></span><br>
-                    • Validation {split_info['performance_metric']}: <span style='background:#E8F5E8;padding:2px 6px;border-radius:4px;'><strong>{split_info['val_performance']:.4f}</strong></span><br>
-                </div>"""
-            html_content += f"""
-                <div style='margin:8px 0;'>
-                    <strong>🎯 Final Prediction:</strong><br>
-                    • Predicted Class: <span style='background:#FFE5B4;padding:2px 6px;border-radius:4px;'><strong>{int(prediction)}</strong></span><br>
-                    • Class Probabilities: {dict(zip(range(len(probabilities)), [f'{p:.3f}' for p in probabilities]))}<br>
-                </div>
-                <div style='margin:8px 0;'>
-                    <strong>⚡ XGBoost Process:</strong><br>
-                    1. Each tree corrects errors from previous trees<br>
-                    2. Gradient-based optimization for efficient learning<br>
-                    3. Regularization prevents overfitting<br>
-                    4. Final prediction combines all {model.n_estimators} trees<br>
-                </div>
-            </div>
-            """
-        else:
-            prediction = model.predict(new_point.reshape(1, -1))[0]
-            html_content = f"""
-            <div style='background:#F0F8FF;border-left:6px solid #4ECDC4;padding:14px 16px;border-radius:10px;'>
-                <strong>🚀 XGBoost Ensemble Process</strong><br><br>
-                <div style='margin:8px 0;'>
-                    <strong>📊 Model Configuration:</strong><br>
-                    • {model.n_estimators} trees in ensemble<br>
-                    • Max depth: {model.max_depth}<br>
-                    • Learning rate: {model.learning_rate}<br>
-                </div>"""
-            if split_info:
-                html_content += f"""
-                <div style='margin:8px 0;'>
-                    <strong>📊 Data Split Information:</strong><br>
-                    • Training Set: {split_info['train_size']} samples ({split_info['train_ratio']:.1%})<br>
-                    • Validation Set: {split_info['val_size']} samples ({split_info['val_ratio']:.1%})<br>
-                </div>
-                <div style='margin:8px 0;'>
-                    <strong>📈 Model Performance:</strong><br>
-                    • Training {split_info['performance_metric']}: <span style='background:#E8F5E8;padding:2px 6px;border-radius:4px;'><strong>{split_info['train_performance']:.4f}</strong></span><br>
-                    • Validation {split_info['performance_metric']}: <span style='background:#E8F5E8;padding:2px 6px;border-radius:4px;'><strong>{split_info['val_performance']:.4f}</strong></span><br>
-                </div>"""
-            html_content += f"""
-                <div style='margin:8px 0;'>
-                    <strong>🎯 Final Prediction:</strong><br>
-                    • Predicted Value: <span style='background:#FFE5B4;padding:2px 6px;border-radius:4px;'><strong>{prediction:.3f}</strong></span><br>
-                </div>
-                <div style='margin:8px 0;'>
-                    <strong>⚡ XGBoost Process:</strong><br>
-                    1. Each tree corrects errors from previous trees<br>
-                    2. Gradient-based optimization for efficient learning<br>
-                    3. Advanced regularization techniques<br>
-                    4. Final prediction aggregates all {model.n_estimators} trees<br>
-                </div>
-            </div>
-            """
-        return html_content
-    except Exception as e:
-        return f"""
-        <div style='background:#FFF4F4;border-left:6px solid #C4314B;padding:14px 16px;border-radius:10px;'>
-            <strong>🚀 XGBoost Process</strong><br><br>
-            Error generating aggregation display: {str(e)}
-        </div>
-        """