Spaces:

VLAI-AIVN
/

AIO2025M03_DEMO_DECISION_TREE

Running

App Files Files Community

wjnwjn59 commited on Aug 5

Commit

dbb8268

1 Parent(s): 8a68df4

first init

Browse files

Files changed (11) hide show

README.md +66 -6
__pycache__/vlai_template.cpython-312.pyc +0 -0
app.py +400 -0
requirements.txt +5 -0
src/__init__.py +0 -0
src/__pycache__/__init__.cpython-312.pyc +0 -0
src/__pycache__/decision_tree_core.cpython-312.pyc +0 -0
src/decision_tree_core.py +364 -0
static/aivn_logo.png +0 -0
static/vlai_logo.png +0 -0
vlai_template.py +142 -0

README.md CHANGED Viewed

@@ -1,12 +1,72 @@
 ---
-title: AIO2025M03 DEMO DECISION TREE
-emoji: 🔥
-colorFrom: blue
-colorTo: green
 sdk: gradio
-sdk_version: 5.40.0
 app_file: app.py
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: AIO2025M03 DEMO Decision Tree
+emoji: 🌳
+colorFrom: green
+colorTo: purple
 sdk: gradio
+sdk_version: 5.38.2
 app_file: app.py
 pinned: false
 ---
+# 🌳 Decision Tree Interactive Demo
+An interactive web application demonstrating Decision Tree algorithms with real-time visualization and educational features.
+## ✨ Features
+- **📊 Multiple Datasets**: 4 built-in datasets (Iris, Wine, Breast Cancer, Diabetes)
+- **🎮 Interactive Interface**: Real-time parameter adjustment and prediction
+- **🌳 Tree Visualization**: Interactive decision tree structure with zoom capabilities
+- **📊 Feature Importance**: Visual representation of feature importance scores
+- **🎛️ Flexible Parameters**: Adjustable max depth, split criteria, and leaf constraints
+- **📱 Responsive Design**: Works on desktop and mobile devices
+## 🚀 Quick Start
+### Local Installation
+```bash
+git clone <repository-url>
+cd AIO2025M03_DEMO_DECISION_TREE
+pip install -r requirements.txt
+python app.py
+```
+### Usage
+1. **Select Dataset**: Choose from pre-loaded datasets or upload your own CSV/Excel file
+2. **Configure Target**: Select target column and problem type (classification/regression)
+3. **Set Parameters**: Adjust max depth, split criteria, and leaf constraints
+4. **Input New Point**: Enter feature values for prediction
+5. **Run Prediction**: Get results with interactive tree visualization
+## 🧠 Technical Highlights
+- **Tree Structure**: Interactive visualization of decision tree nodes and splits
+- **Feature Importance**: Automatic calculation and visualization of feature importance scores
+- **Auto-Detection**: Automatically determines classification vs regression problems
+- **Error Handling**: Robust validation and user-friendly error messages
+## 📋 Requirements
+- Python 3.8+
+- Gradio 5.38+
+- Scikit-learn
+- Pandas
+- NumPy
+- Plotly
+## 🎓 Educational Value
+Perfect for:
+- Understanding Decision Tree algorithm mechanics
+- Learning about tree-based splitting criteria
+- Exploring feature importance and tree pruning
+- Comparing classification vs regression approaches
+## 📄 License
+Educational use for AIO2025 course materials.
+---
+**Live Demo**: [Decision Tree Demo](https://huggingface.co/spaces/VLAI-AIVN/AIO2025M03_DEMO_DECISION_TREE)

__pycache__/vlai_template.cpython-312.pyc ADDED Viewed

Binary file (5.12 kB). View file

app.py ADDED Viewed

	@@ -0,0 +1,400 @@

+import gradio as gr
+import pandas as pd
+from src import decision_tree_core
+import vlai_template
+# Global state
+current_dataframe = None
+# Dataset configurations
+SAMPLE_DATA_CONFIG = {
+    "Iris": {"target_column": "target", "problem_type": "classification"},
+    "Wine": {"target_column": "target", "problem_type": "classification"},
+    "Breast Cancer": {"target_column": "target", "problem_type": "classification"},
+    "Diabetes": {"target_column": "target", "problem_type": "regression"},
+}
+force_light_theme_js = """
+() => {
+  const params = new URLSearchParams(window.location.search);
+  if (!params.has('__theme')) {
+    params.set('__theme', 'light');
+    window.location.search = params.toString();
+  }
+}
+"""
+def validate_config(df, target_col, problem_type):
+    """Validate target column and problem type compatibility"""
+    if not target_col or target_col not in df.columns:
+        return False, "❌ Please select a valid target column from the dropdown."
+    if not problem_type:
+        return False, "❌ Please select either Classification or Regression as problem type."
+    target_series = df[target_col]
+    unique_vals = target_series.nunique()
+    if problem_type == "classification":
+        if unique_vals > 50:
+            return False, f"⚠️ Too many classes ({unique_vals}). Consider using Regression instead."
+        if target_series.isnull().any():
+            return False, "⚠️ Target column contains missing values. Please clean your data."
+    elif problem_type == "regression":
+        if target_series.dtype == 'object':
+            return False, "⚠️ Text values detected in target. Use Classification for categories."
+        if unique_vals < 5:
+            return False, f"⚠️ Too few unique values ({unique_vals}). Consider using Classification."
+    return True, f"\n✅ Configuration is valid! Ready for {unique_vals} {'classes' if problem_type == 'classification' else 'values'}."
+def get_status_message(is_sample, dataset_choice, target_col, problem_type, is_valid, validation_msg):
+    """Generate status message"""
+    if is_sample:
+        return f"✅ **Sample Dataset**: {dataset_choice} | **Target**: {target_col} | **Type**: {problem_type.title()}"
+    elif target_col and problem_type:
+        status_icon = "✅" if is_valid else "⚠️"
+        return f"{status_icon} **Custom Data** | **Target**: {target_col} | **Type**: {problem_type.title()} | {validation_msg}"
+    else:
+        return "📁 **Custom data uploaded!** 👆 Please select target column and problem type above to continue."
+def load_and_configure_data(file_obj=None, dataset_choice="Iris"):
+    """Load data and configure target/problem type"""
+    global current_dataframe
+    try:
+        df = decision_tree_core.load_data(file_obj, dataset_choice)
+        current_dataframe = df
+        target_options = df.columns.tolist()
+        is_sample = file_obj is None
+        if is_sample:
+            config = SAMPLE_DATA_CONFIG.get(dataset_choice, {})
+            target_col = config.get("target_column")
+            problem_type = config.get("problem_type")
+        else:
+            target_col = None
+            problem_type = None
+        # Validate and generate status
+        if target_col and problem_type:
+            is_valid, validation_msg = validate_config(df, target_col, problem_type)
+            status_msg = get_status_message(is_sample, dataset_choice, target_col, problem_type, is_valid, validation_msg)
+        else:
+            status_msg = get_status_message(is_sample, dataset_choice, target_col, problem_type, False, "")
+        # Generate input components
+        input_updates = [gr.update(visible=False)] * 16
+        inputs_visible = gr.update(visible=False)
+        input_status = "⚙️ Configure target and problem type above to enable feature inputs."
+        if target_col and problem_type and (not is_sample or is_valid):
+            try:
+                components_info = decision_tree_core.create_input_components(df, target_col)
+                for i in range(min(16, len(components_info))):
+                    comp_info = components_info[i]
+                    if comp_info['type'] == 'number':
+                        update_params = {
+                            'visible': True, 'label': comp_info['name'], 'value': comp_info['value']
+                        }
+                        if comp_info['minimum'] is not None:
+                            update_params['minimum'] = comp_info['minimum']
+                        if comp_info['maximum'] is not None:
+                            update_params['maximum'] = comp_info['maximum']
+                        input_updates[i] = gr.update(**update_params)
+                    else:
+                        input_updates[i] = gr.update(
+                            visible=True, label=comp_info['name'],
+                            choices=comp_info['choices'], value=comp_info['value']
+                        )
+                inputs_visible = gr.update(visible=True)
+                input_status = f"📝 **Ready!** Enter values for {len(components_info)} features below, then click Run Prediction! | {validation_msg}"
+            except Exception as e:
+                input_status = f"❌ Error generating inputs: {str(e)}"
+        return [df.head(5).round(2), gr.Dropdown(choices=target_options, value=target_col),
+                gr.Dropdown(value=problem_type), status_msg] + input_updates + [inputs_visible, input_status]
+    except Exception as e:
+        current_dataframe = None
+        empty_updates = [pd.DataFrame(), gr.Dropdown(choices=[], value=None),
+                        gr.Dropdown(value=None), f"❌ **Error loading data**: {str(e)} | Please try a different file or dataset."]
+        return empty_updates + [gr.update(visible=False)] * 16 + [gr.update(visible=False), "No data loaded."]
+def update_criterion_choices(problem_type):
+    """Update criterion choices based on problem type"""
+    if problem_type == "classification":
+        return gr.Dropdown(choices=["gini", "entropy", "log_loss"], value="gini")
+    else:
+        return gr.Dropdown(choices=["squared_error", "absolute_error", "friedman_mse", "poisson"], value="squared_error")
+def update_configuration(df_preview, target_col, problem_type):
+    """Update configuration when target or problem type changes"""
+    global current_dataframe
+    df = current_dataframe
+    if df is None or df.empty:
+        return [gr.update(visible=False)] * 16 + [gr.update(visible=False), "No data available."]
+    if not target_col or not problem_type:
+        return [gr.update(visible=False)] * 16 + [gr.update(visible=False), "Select target column and problem type."]
+    try:
+        is_valid, validation_msg = validate_config(df, target_col, problem_type)
+        if not is_valid:
+            return [gr.update(visible=False)] * 16 + [gr.update(visible=False), f"⚠️ {validation_msg}"]
+        # Generate input components
+        components_info = decision_tree_core.create_input_components(df, target_col)
+        input_updates = [gr.update(visible=False)] * 16
+        for i in range(min(16, len(components_info))):
+            comp_info = components_info[i]
+            if comp_info['type'] == 'number':
+                # Không giới hạn min/max để cho phép user nhập giá trị ngoài phạm vi training data
+                update_params = {
+                    'visible': True, 'label': comp_info['name'], 'value': comp_info['value']
+                }
+                if comp_info['minimum'] is not None:
+                    update_params['minimum'] = comp_info['minimum']
+                if comp_info['maximum'] is not None:
+                    update_params['maximum'] = comp_info['maximum']
+                input_updates[i] = gr.update(**update_params)
+            else:
+                input_updates[i] = gr.update(
+                    visible=True, label=comp_info['name'],
+                    choices=comp_info['choices'], value=comp_info['value']
+                )
+        input_status = f"📝 Enter values for {len(components_info)} features | {validation_msg}"
+        return input_updates + [gr.update(visible=True), input_status]
+    except Exception as e:
+        return [gr.update(visible=False)] * 16 + [gr.update(visible=False), f"❌ Error: {str(e)}"]
+def execute_prediction(df_preview, target_col, problem_type, max_depth, min_samples_split, min_samples_leaf, criterion, *input_values):
+    """Execute Decision Tree prediction"""
+    global current_dataframe
+    df = current_dataframe
+    # Validation checks
+    if df is None or df.empty:
+        return None, "❌ **No data loaded!** 📊 Please select a sample dataset or upload a file first.", None, "Load data to get started."
+    if not target_col or not problem_type:
+        return None, "❌ **Configuration incomplete!** 🎯 Please select target column and problem type above.", None, "Complete configuration to proceed."
+    is_valid, validation_msg = validate_config(df, target_col, problem_type)
+    if not is_valid:
+        return None, f"❌ **Configuration issue**: {validation_msg}", None, "Fix the configuration and try again."
+    try:
+        components_info = decision_tree_core.create_input_components(df, target_col)
+        new_point_dict = {}
+        for i, comp_info in enumerate(components_info):
+            if i < len(input_values) and input_values[i] is not None:
+                new_point_dict[comp_info['name']] = input_values[i]
+            else:
+                new_point_dict[comp_info['name']] = comp_info['value']
+        tree_fig, importance_fig, prediction, prediction_details, summary, error = decision_tree_core.run_decision_tree_and_visualize(
+            df, target_col, new_point_dict, max_depth, min_samples_split, min_samples_leaf, criterion, problem_type
+        )
+        if error:
+            return None, f"❌ **Prediction failed**: {error} | Please check your input values and try again.", None, "Adjust inputs and retry."
+        if problem_type == "classification":
+            result_header = f"## 🎯 **Classification Result**: {prediction}\n*Based on decision tree with {criterion} criterion*"
+        else:
+            result_header = f"## 🎯 **Regression Result**: {prediction:.3f}\n*Based on decision tree with {criterion} criterion*"
+        return tree_fig, importance_fig, result_header, prediction_details, summary
+    except Exception as e:
+        return None, None, f"❌ **Execution error**: {str(e)} | Please verify your input values are correct.", None, "Check inputs and try again."
+# Main Application
+with gr.Blocks(theme='gstaff/sketch', css=vlai_template.custom_css, fill_width=True, js=force_light_theme_js) as demo:
+    vlai_template.create_header()
+    # Main guidance text
+    gr.Markdown("### 🌳 **How to Use**: Select data → Configure target → Set tree parameters → Enter new point → Run prediction!")
+    with gr.Row(equal_height=False, variant="panel"):
+        with gr.Column(scale=45):
+            with gr.Accordion("📊 Data & Configuration", open=True):
+                with gr.Row():
+                    with gr.Column(scale=1):
+                        gr.Markdown("Start with sample datasets or upload your own CSV/Excel files.")
+                        file_upload = gr.File(
+                            label="📁 Upload Your Data",
+                            file_types=[".csv", ".xlsx", ".xls"],
+                        )
+                    with gr.Column(scale=3):
+                        sample_dataset = gr.Dropdown(
+                            choices=list(SAMPLE_DATA_CONFIG.keys()),
+                            value="Iris",
+                            label="🗂️ Sample Datasets",
+                        )
+                        problem_type_selector = gr.Dropdown(
+                            choices=["classification", "regression"],
+                            label="🎲 Problem Type",
+                            interactive=True,
+                        )
+                        target_column = gr.Dropdown(
+                            choices=[],
+                            label="🎯 Target Column",
+                            interactive=True,
+                        )
+                status_message = gr.Markdown("🔄 Loading sample data...")
+                data_preview = gr.DataFrame(
+                    label="📋 Data Preview (First 5 Rows)",
+                    row_count=5,
+                    interactive=False,
+                    max_height=250
+                )
+            with gr.Accordion("⚙️ Parameters & Input", open=True):
+                gr.Markdown("**🌳 Decision Tree Parameters**")
+                with gr.Row():
+                    max_depth = gr.Number(
+                        label="Max Depth",
+                        value=5,
+                        minimum=0,
+                        maximum=20,
+                        precision=0,
+                        info="Set to 0 for unlimited depth"
+                    )
+                    min_samples_split = gr.Number(
+                        label="Min Samples Split",
+                        value=2,
+                        minimum=2,
+                        maximum=100,
+                        precision=0,
+                    )
+                    min_samples_leaf = gr.Number(
+                        label="Min Samples Leaf",
+                        value=1,
+                        minimum=1,
+                        maximum=50,
+                        precision=0,
+                    )
+                with gr.Row():
+                    criterion = gr.Dropdown(
+                        choices=["gini", "entropy", "log_loss"],
+                        value="gini",
+                        label="🎯 Criterion",
+                    )
+                inputs_group = gr.Group(visible=False)
+                with inputs_group:
+                    input_status = gr.Markdown("Configure inputs above.")
+                    gr.Markdown("**📝 New Data Point** - Enter feature values for prediction:")
+                    input_components = []
+                    for row in range(4):
+                        with gr.Row():
+                            for col in range(4):
+                                idx = row * 4 + col
+                                if idx < 16:
+                                    input_components.append(
+                                        gr.Number(label=f"Feature {idx+1}", visible=False)
+                                    )
+                run_prediction_btn = gr.Button(
+                    "🚀 Run Prediction",
+                    variant="primary",
+                    size="lg",
+                )
+        with gr.Column(scale=55):
+            gr.Markdown("### 🌳 **Decision Tree Results & Visualization**")
+            with gr.Tabs():
+                with gr.TabItem("Decision Tree"):
+                    tree_visualization = gr.Plot(
+                        label="Interactive Decision Tree",
+                        visible=True,
+                    )
+                with gr.TabItem("Feature Importance"):
+                    feature_importance_plot = gr.Plot(
+                        label="Feature Importance",
+                        visible=True,
+                    )
+            prediction_result = gr.Markdown(
+                "## 🎯 Prediction Result\n**Run prediction to see the result.**",
+                label="📈 Final Prediction"
+            )
+            prediction_details = gr.Markdown(
+                "**📝 Prediction Details**\n\nDetailed prediction information will appear here.",
+                label="🔍 Prediction Details"
+            )
+            algorithm_summary = gr.Markdown(
+                "**📋 Algorithm Summary**\n\nAlgorithm details will appear here after prediction.",
+                label="🔍 Technical Details"
+            )
+    # Bottom guidance
+    gr.Markdown("""💡 **Tips**:
+    - **Interactive tree visualization** allows you to zoom and explore the decision tree structure.
+    - **Feature importance** shows which features are most critical for making decisions.
+    - Try different **max depth** and **criterion** values to see how the tree structure changes!
+    - **Min samples split/leaf** help control tree complexity and prevent overfitting.
+    """)
+    vlai_template.create_footer()
+    # Event Bindings
+    demo.load(
+        fn=lambda: load_and_configure_data(None, "Iris"),
+        outputs=[data_preview, target_column, problem_type_selector, status_message] + input_components + [inputs_group, input_status]
+    )
+    file_upload.upload(
+        fn=lambda file: load_and_configure_data(file, "Iris"),
+        inputs=[file_upload],
+        outputs=[data_preview, target_column, problem_type_selector, status_message] + input_components + [inputs_group, input_status]
+    )
+    sample_dataset.change(
+        fn=lambda choice: load_and_configure_data(None, choice),
+        inputs=[sample_dataset],
+        outputs=[data_preview, target_column, problem_type_selector, status_message] + input_components + [inputs_group, input_status]
+    )
+    target_column.change(
+        fn=update_configuration,
+        inputs=[data_preview, target_column, problem_type_selector],
+        outputs=input_components + [inputs_group, input_status]
+    )
+    problem_type_selector.change(
+        fn=update_configuration,
+        inputs=[data_preview, target_column, problem_type_selector],
+        outputs=input_components + [inputs_group, input_status]
+    )
+    problem_type_selector.change(
+        fn=update_criterion_choices,
+        inputs=[problem_type_selector],
+        outputs=[criterion]
+    )
+    run_prediction_btn.click(
+        fn=execute_prediction,
+        inputs=[data_preview, target_column, problem_type_selector, max_depth, min_samples_split, min_samples_leaf, criterion] + input_components,
+        outputs=[tree_visualization, feature_importance_plot, prediction_result, prediction_details, algorithm_summary]
+    )
+if __name__ == "__main__":
+    demo.launch(allowed_paths=["static/aivn_logo.png", "static/vlai_logo.png", "static"])

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+gradio==5.38.0
+pandas>=1.5.0
+scikit-learn>=1.3.0
+numpy>=1.24.0
+supertree==0.5.5

src/__init__.py ADDED Viewed

File without changes

src/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (205 Bytes). View file

src/__pycache__/decision_tree_core.cpython-312.pyc ADDED Viewed

Binary file (16.2 kB). View file

src/decision_tree_core.py ADDED Viewed

	@@ -0,0 +1,364 @@

+import pandas as pd
+import numpy as np
+from sklearn.tree import DecisionTreeClassifier, DecisionTreeRegressor
+from sklearn.preprocessing import StandardScaler, LabelEncoder
+from sklearn.datasets import (
+    load_iris, load_wine, load_diabetes, load_breast_cancer
+)
+import plotly.express as px
+import plotly.graph_objects as go
+def load_data(file_obj=None, dataset_choice="Iris"):
+    """Load data from file or sample dataset"""
+    if file_obj is not None:
+        if file_obj.name.endswith('.csv'):
+            return pd.read_csv(file_obj.name)
+        elif file_obj.name.endswith(('.xlsx', '.xls')):
+            return pd.read_excel(file_obj.name)
+        else:
+            raise ValueError("Unsupported format. Upload CSV or Excel files.")
+    # Sample datasets
+    datasets = {
+        "Iris": lambda: _sklearn_to_df(load_iris()),
+        "Wine": lambda: _sklearn_to_df(load_wine()),
+        "Breast Cancer": lambda: _sklearn_to_df(load_breast_cancer()),
+        "Diabetes": lambda: _sklearn_to_df(load_diabetes()),
+    }
+    if dataset_choice not in datasets:
+        raise ValueError(f"Unknown dataset: {dataset_choice}")
+    return datasets[dataset_choice]()
+def _sklearn_to_df(data):
+    """Convert sklearn dataset to DataFrame"""
+    df = pd.DataFrame(data.data, columns=data.feature_names)
+    df['target'] = data.target
+    return df
+def analyze_dataframe(df):
+    """Analyze DataFrame and return target options"""
+    return df.columns.tolist(), df.columns[-1]
+def determine_problem_type(df, target_col):
+    """Auto-detect classification or regression"""
+    if target_col not in df.columns:
+        return "classification"
+    target = df[target_col]
+    unique_vals = target.nunique()
+    if target.dtype == 'object' or unique_vals <= min(20, len(target) * 0.1):
+        return "classification"
+    return "regression"
+def create_input_components(df, target_col):
+    """Generate UI component specifications for features"""
+    feature_cols = [col for col in df.columns if col != target_col]
+    components = []
+    for col in feature_cols:
+        data = df[col]
+        if data.dtype == 'object':
+            unique_vals = sorted(data.unique())
+            components.append({
+                'name': col, 'type': 'dropdown',
+                'choices': unique_vals, 'value': unique_vals[0]
+            })
+        else:
+            components.append({
+                'name': col, 'type': 'number',
+                'value': round(float(data.mean()), 2),
+                'minimum': None,
+                'maximum': None
+            })
+    return components
+def preprocess_data(df, target_col, new_point_dict):
+    """Preprocess data for decision tree training"""
+    feature_cols = [col for col in df.columns if col != target_col]
+    X = df[feature_cols].copy()
+    y = df[target_col].copy()
+    # Encode categorical variables
+    encoders = {}
+    for col in feature_cols:
+        if X[col].dtype == 'object':
+            le = LabelEncoder()
+            X[col] = le.fit_transform(X[col].astype(str))
+            encoders[col] = le
+    # Process new point
+    new_point = []
+    for col in feature_cols:
+        if col in encoders:
+            try:
+                val = encoders[col].transform([str(new_point_dict[col])])[0]
+            except ValueError:
+                available_categories = list(encoders[col].classes_)
+                raise ValueError(f"Unknown category '{new_point_dict[col]}' for column '{col}'. Available options: {available_categories}")
+            new_point.append(val)
+        else:
+            new_point.append(float(new_point_dict[col]))
+    new_point = np.array(new_point).reshape(1, -1)
+    return X.values, y, new_point, feature_cols, encoders
+def run_decision_tree_and_visualize(df, target_col, new_point_dict, max_depth, min_samples_split, min_samples_leaf, criterion, problem_type=None):
+    """Execute Decision Tree algorithm and generate visualization"""
+    X, y, new_point, feature_cols, encoders = preprocess_data(df, target_col, new_point_dict)
+    if problem_type is None:
+        problem_type = determine_problem_type(df, target_col)
+    # Validate parameters
+    if max_depth is not None and max_depth < 0:
+        return None, None, None, None, "Max depth must be at least 0 (unlimited) or 1+ for specific depth."
+    if min_samples_split < 2:
+        return None, None, None, None, "Min samples split must be at least 2."
+    if min_samples_leaf < 1:
+        return None, None, None, None, "Min samples leaf must be at least 1."
+    # Train decision tree
+    ModelClass = DecisionTreeClassifier if problem_type == "classification" else DecisionTreeRegressor
+    model = ModelClass(
+        max_depth=None if max_depth == 0 else max_depth,
+        min_samples_split=min_samples_split,
+        min_samples_leaf=min_samples_leaf,
+        criterion=criterion,
+        random_state=42
+    )
+    model.fit(X, y)
+    prediction = model.predict(new_point)[0]
+    # Get prediction path
+    path = model.decision_path(new_point)
+    node_indices = path.indices
+    # Create tree visualization
+    tree_fig = create_tree_visualization(model, feature_cols, target_col, problem_type, new_point_dict, prediction)
+    # Create feature importance plot
+    importance_fig = create_feature_importance_plot(model, feature_cols)
+    # Create prediction details
+    prediction_details = create_prediction_details(model, new_point[0], feature_cols, target_col, prediction, problem_type)
+    # Generate algorithm summary
+    summary = create_algorithm_summary(model, problem_type, max_depth, min_samples_split, min_samples_leaf, criterion, feature_cols)
+    return tree_fig, importance_fig, prediction, prediction_details, summary, None
+def create_tree_visualization(model, feature_cols, target_col, problem_type, new_point_dict, prediction):
+    """Create interactive decision tree visualization using plotly"""
+    # Create a hierarchical tree visualization
+    fig = go.Figure()
+    # Get tree structure
+    tree_data = get_tree_structure(model, feature_cols, target_col, problem_type)
+    # Create tree layout
+    positions = calculate_tree_positions(tree_data)
+    # Add nodes
+    for node_id, pos in positions.items():
+        node_info = tree_data[node_id]
+        if node_info['is_leaf']:
+            color = 'lightgreen'
+            text = f"Leaf: {node_info['prediction']}"
+        else:
+            color = 'lightblue'
+            text = f"{node_info['feature']} ≤ {node_info['threshold']:.3f}"
+        fig.add_trace(go.Scatter(
+            x=[pos['x']], y=[pos['y']],
+            mode='markers+text',
+            marker=dict(size=15, color=color),
+            text=[text],
+            textposition='middle center',
+            textfont=dict(size=10),
+            showlegend=False,
+            hovertemplate=f"<b>{text}</b><br>Samples: {node_info['samples']}<extra></extra>"
+        ))
+    # Add edges
+    for node_id, pos in positions.items():
+        node_info = tree_data[node_id]
+        if not node_info['is_leaf']:
+            # Left child
+            if node_info['left_child'] in positions:
+                left_pos = positions[node_info['left_child']]
+                fig.add_trace(go.Scatter(
+                    x=[pos['x'], left_pos['x']], y=[pos['y'], left_pos['y']],
+                    mode='lines',
+                    line=dict(color='gray', width=1),
+                    showlegend=False,
+                    hoverinfo='skip'
+                ))
+            # Right child
+            if node_info['right_child'] in positions:
+                right_pos = positions[node_info['right_child']]
+                fig.add_trace(go.Scatter(
+                    x=[pos['x'], right_pos['x']], y=[pos['y'], right_pos['y']],
+                    mode='lines',
+                    line=dict(color='gray', width=1),
+                    showlegend=False,
+                    hoverinfo='skip'
+                ))
+    fig.update_layout(
+        title="Decision Tree Structure",
+        xaxis_title="",
+        yaxis_title="",
+        showlegend=False,
+        height=600,
+        width=800,
+        xaxis=dict(showgrid=False, zeroline=False, showticklabels=False),
+        yaxis=dict(showgrid=False, zeroline=False, showticklabels=False)
+    )
+    return fig
+def get_tree_structure(model, feature_cols, target_col, problem_type):
+    """Extract tree structure from sklearn model"""
+    tree_data = {}
+    def process_node(node_id):
+        if model.tree_.children_left[node_id] == -1:  # Leaf node
+            if problem_type == "classification":
+                class_counts = model.tree_.value[node_id][0]
+                predicted_class = np.argmax(class_counts)
+            else:
+                predicted_value = model.tree_.value[node_id][0][0]
+                predicted_class = predicted_value
+            tree_data[node_id] = {
+                'is_leaf': True,
+                'samples': int(model.tree_.n_node_samples[node_id]),
+                'prediction': predicted_class
+            }
+        else:  # Internal node
+            feature_idx = model.tree_.feature[node_id]
+            threshold = model.tree_.threshold[node_id]
+            feature_name = feature_cols[feature_idx] if feature_idx < len(feature_cols) else f'Feature_{feature_idx}'
+            tree_data[node_id] = {
+                'is_leaf': False,
+                'feature': feature_name,
+                'threshold': threshold,
+                'samples': int(model.tree_.n_node_samples[node_id]),
+                'left_child': model.tree_.children_left[node_id],
+                'right_child': model.tree_.children_right[node_id]
+            }
+            # Process children
+            process_node(model.tree_.children_left[node_id])
+            process_node(model.tree_.children_right[node_id])
+    process_node(0)
+    return tree_data
+def calculate_tree_positions(tree_data):
+    """Calculate positions for tree nodes"""
+    positions = {}
+    def calculate_positions_recursive(node_id, x, y, level_width):
+        if node_id not in tree_data:
+            return
+        positions[node_id] = {'x': x, 'y': y}
+        if not tree_data[node_id]['is_leaf']:
+            # Calculate positions for children
+            left_child = tree_data[node_id]['left_child']
+            right_child = tree_data[node_id]['right_child']
+            child_width = level_width / 2
+            calculate_positions_recursive(left_child, x - child_width/2, y - 1, child_width)
+            calculate_positions_recursive(right_child, x + child_width/2, y - 1, child_width)
+    # Start from root
+    calculate_positions_recursive(0, 0, 0, 4)
+    return positions
+def create_feature_importance_plot(model, feature_cols):
+    """Create feature importance visualization"""
+    importances = model.feature_importances_
+    indices = np.argsort(importances)[::-1]
+    fig = go.Figure()
+    fig.add_trace(go.Bar(
+        x=[feature_cols[i] for i in indices],
+        y=importances[indices],
+        marker_color='lightblue',
+        text=[f'{importances[i]:.3f}' for i in indices],
+        textposition='auto',
+    ))
+    fig.update_layout(
+        title="Feature Importance",
+        xaxis_title="Features",
+        yaxis_title="Importance Score",
+        showlegend=False,
+        height=400
+    )
+    return fig
+def create_prediction_details(model, new_point, feature_cols, target_col, prediction, problem_type):
+    """Create detailed prediction information"""
+    details = []
+    # Add input features
+    details.append("## 📝 **Input Features**")
+    for i, (col, val) in enumerate(zip(feature_cols, new_point)):
+        details.append(f"- **{col}**: {val}")
+    details.append(f"\n## 🎯 **Prediction**")
+    if problem_type == "classification":
+        details.append(f"- **Predicted Class**: {prediction}")
+        # Get prediction probabilities if available
+        if hasattr(model, 'predict_proba'):
+            proba = model.predict_proba(new_point.reshape(1, -1))[0]
+            details.append(f"- **Confidence**: {max(proba):.3f}")
+    else:
+        details.append(f"- **Predicted Value**: {prediction:.3f}")
+    # Add tree statistics
+    details.append(f"\n## 🌳 **Tree Statistics**")
+    details.append(f"- **Total Nodes**: {model.tree_.node_count}")
+    details.append(f"- **Leaf Nodes**: {model.get_n_leaves()}")
+    details.append(f"- **Max Depth**: {model.get_depth()}")
+    return "\n".join(details)
+def create_algorithm_summary(model, problem_type, max_depth, min_samples_split, min_samples_leaf, criterion, feature_cols):
+    """Generate algorithm summary"""
+    max_depth_str = "Unlimited" if max_depth == 0 else str(max_depth)
+    summary = f"""## Algorithm Summary
+**Criterion:** {criterion} | **Max Depth:** {max_depth_str} | **Min Samples Split:** {min_samples_split} | **Min Samples Leaf:** {min_samples_leaf}
+**Features:** {len(feature_cols)} | **Total Nodes:** {model.tree_.node_count} | **Leaf Nodes:** {model.get_n_leaves()}
+**Tree Depth:** {model.get_depth()} | **Problem Type:** {problem_type.title()}
+**Top 3 Most Important Features:**
+"""
+    importances = model.feature_importances_
+    indices = np.argsort(importances)[::-1]
+    for i in range(min(3, len(feature_cols))):
+        summary += f"- {feature_cols[indices[i]]}: {importances[indices[i]]:.3f}\n"
+    return summary

static/aivn_logo.png ADDED Viewed

static/vlai_logo.png ADDED Viewed

vlai_template.py ADDED Viewed

	@@ -0,0 +1,142 @@

+import os, base64
+import gradio as gr
+PROJECT_NAME = "Decision Tree Demo"
+AIO_YEAR = "2025"
+AIO_MODULE = "03"
+# END
+def image_to_base64(image_path: str):
+    # Construct the absolute path to the image
+    current_dir = os.path.dirname(os.path.abspath(__file__))
+    full_image_path = os.path.join(current_dir, image_path)
+    with open(full_image_path, "rb") as f:
+        return base64.b64encode(f.read()).decode("utf-8")
+def create_header():
+    with gr.Row():
+        with gr.Column(scale=2):
+            logo_base64 = image_to_base64("static/aivn_logo.png")
+            gr.HTML(
+                f"""<img src="data:image/png;base64,{logo_base64}"
+                        alt="Logo"
+                        style="height:120px;width:auto;margin:0 auto;margin-bottom:16px; display:block;">"""
+            )
+        with gr.Column(scale=2):
+            gr.HTML(f"""
+<div style="display:flex;justify-content:flex-start;align-items:center;gap:30px;">
+    <div>
+        <h1 style="margin-bottom:0; color: #2E7D32; font-size: 2.5em; font-weight: bold;"> {PROJECT_NAME} </h1>
+        <h3 style="color: #888; font-style: italic"> AIO{AIO_YEAR}: Module {AIO_MODULE}. </h3>
+    </div>
+</div>
+""")
+def create_footer():
+    logo_base64_vlai = image_to_base64("static/vlai_logo.png")
+    footer_html = """
+<style>
+  .sticky-footer{position:fixed;bottom:0px;left:0;width:100%;background:#E8F5E8;
+                 padding:10px;box-shadow:0 -2px 10px rgba(0,0,0,0.1);z-index:1000;}
+  .content-wrap{padding-bottom:60px;}
+</style>""" + f"""
+<div class="sticky-footer">
+  <div style="text-align:center;font-size:18px; color: #888">
+    Created by
+    <a href="https://vlai.work" target="_blank" style="color:#465C88;text-decoration:none;font-weight:bold; display:inline-flex; align-items:center;"> VLAI
+    <img src="data:image/png;base64,{logo_base64_vlai}" alt="Logo" style="height:20px; width:auto;">
+    </a> from <a href="https://aivietnam.edu.vn/" target="_blank" style="color:#355724;text-decoration:none;font-weight:bold">AI VIET NAM</a>
+  </div>
+</div>
+"""
+    return gr.HTML(footer_html)
+custom_css = """
+.gradio-container {
+    min-height: 100vh !important;
+    width: 100vw !important;
+    margin: 0 !important;
+    padding: 0px !important;
+    background: linear-gradient(135deg, #E8F5E8 0%, #D4E6D4 50%, #A8D8A8 100%);
+    background-size: 600% 600%;
+    animation: gradientBG 7s ease infinite;
+}
+@keyframes gradientBG {
+    0% {background-position: 0% 50%;}
+    50% {background-position: 100% 50%;}
+    100% {background-position: 0% 50%;}
+}
+/* Minimize spacing and padding */
+.content-wrap {
+    padding: 2px !important;
+    margin: 0 !important;
+}
+/* Reduce component spacing */
+.gr-row {
+    gap: 5px !important;
+    margin: 2px 0 !important;
+}
+.gr-column {
+    gap: 4px !important;
+    padding: 4px !important;
+}
+/* Accordion optimization */
+.gr-accordion {
+    margin: 4px 0 !important;
+}
+.gr-accordion .gr-accordion-content {
+    padding: 2px !important;
+}
+/* Form elements spacing */
+.gr-form {
+    gap: 2px !important;
+}
+/* Button styling */
+.gr-button {
+    margin: 2px 0 !important;
+}
+/* DataFrame optimization */
+.gr-dataframe {
+    margin: 4px 0 !important;
+}
+/* Remove horizontal scroll from data preview */
+.gr-dataframe .wrap {
+    overflow-x: auto !important;
+    max-width: 100% !important;
+}
+/* Plot optimization */
+.gr-plot {
+    margin: 4px 0 !important;
+}
+/* Reduce markdown margins */
+.gr-markdown {
+    margin: 2px 0 !important;
+}
+/* Footer positioning */
+.sticky-footer {
+    position: fixed;
+    bottom: 0px;
+    left: 0;
+    width: 100%;
+    background: #E8F5E8;
+    padding: 6px !important;
+    box-shadow: 0 -2px 10px rgba(0,0,0,0.1);
+    z-index: 1000;
+}
+"""