Spaces:

VLAI-AIVN
/

AIO2025M03_HEART_DISEASE_PREDICTION

Running

App Files Files Community

wjnwjn59 commited on Aug 18

Commit

47faf49

1 Parent(s): 655cae9

first init

Browse files

Files changed (10) hide show

.gitignore +5 -0
README.md +66 -6
app.py +206 -0
packages.txt +2 -0
requirements.txt +8 -0
src/__init__.py +0 -0
src/heart_disease_core.py +228 -0
static/aivn_logo.png +0 -0
static/vlai_logo.png +0 -0
vlai_template.py +142 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,5 @@

+__pycache__/
+__MACOSX/
+.DS_Store
+*.csv

README.md CHANGED Viewed

@@ -1,12 +1,72 @@
 ---
-title: AIO2025M03 HEART DISEASE PREDICTION
-emoji: 🏃
-colorFrom: yellow
-colorTo: pink
 sdk: gradio
-sdk_version: 5.42.0
 app_file: app.py
 pinned: false
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: AIO2025M03 DEMO Decision Tree
+emoji: 🌳
+colorFrom: green
+colorTo: purple
 sdk: gradio
+sdk_version: 5.38.2
 app_file: app.py
 pinned: false
 ---
+# 🌳 Decision Tree Interactive Demo
+An interactive web application demonstrating Decision Tree algorithms with real-time visualization and educational features.
+## ✨ Features
+- **📊 Multiple Datasets**: 4 built-in datasets (Iris, Wine, Breast Cancer, Diabetes)
+- **🎮 Interactive Interface**: Real-time parameter adjustment and prediction
+- **🌳 Tree Visualization**: Interactive decision tree structure with zoom capabilities
+- **📊 Feature Importance**: Visual representation of feature importance scores
+- **🎛️ Flexible Parameters**: Adjustable max depth, split criteria, and leaf constraints
+- **📱 Responsive Design**: Works on desktop and mobile devices
+## 🚀 Quick Start
+### Local Installation
+```bash
+git clone <repository-url>
+cd AIO2025M03_DEMO_DECISION_TREE
+pip install -r requirements.txt
+python app.py
+```
+### Usage
+1. **Select Dataset**: Choose from pre-loaded datasets or upload your own CSV/Excel file
+2. **Configure Target**: Select target column and problem type (classification/regression)
+3. **Set Parameters**: Adjust max depth, split criteria, and leaf constraints
+4. **Input New Point**: Enter feature values for prediction
+5. **Run Prediction**: Get results with interactive tree visualization
+## 🧠 Technical Highlights
+- **Tree Structure**: Interactive visualization of decision tree nodes and splits
+- **Feature Importance**: Automatic calculation and visualization of feature importance scores
+- **Auto-Detection**: Automatically determines classification vs regression problems
+- **Error Handling**: Robust validation and user-friendly error messages
+## 📋 Requirements
+- Python 3.8+
+- Gradio 5.38+
+- Scikit-learn
+- Pandas
+- NumPy
+- Plotly
+## 🎓 Educational Value
+Perfect for:
+- Understanding Decision Tree algorithm mechanics
+- Learning about tree-based splitting criteria
+- Exploring feature importance and tree pruning
+- Comparing classification vs regression approaches
+## 📄 License
+Educational use for AIO2025 course materials.
+---
+**Live Demo**: [Decision Tree Demo](https://huggingface.co/spaces/VLAI-AIVN/AIO2025M03_DEMO_DECISION_TREE)

app.py ADDED Viewed

	@@ -0,0 +1,206 @@

+import os
+import gradio as gr
+import plotly.graph_objects as go
+import pandas as pd
+from src.heart_disease_core import (
+    CLEVELAND_FEATURES_ORDER, TARGET_COL, CATEGORICAL_CHOICES,
+    load_cleveland_dataframe, fit_all_models, predict_all, example_patient
+)
+APP_PRIMARY = "#0F6CBD"   # medical calm blue
+APP_ACCENT = "#C4314B"    # medical alert red
+APP_BG = "#F7FAFC"
+STATE = {
+    "df": None,
+    "models": None,
+    "metrics": None,
+}
+def _ensure_models(df: pd.DataFrame):
+    if STATE["models"] is None:
+        models, metrics = fit_all_models(df)
+        STATE["models"] = models
+        STATE["metrics"] = metrics
+def load_dataset(file):
+    try:
+        if file is None:
+            return gr.Markdown.update(value="❌ Please upload a Cleveland-format dataset (CSV/XLSX)."), gr.DataFrame.update(value=pd.DataFrame()), gr.Markdown.update(visible=False)
+        if file.name.endswith(".csv"):
+            df = pd.read_csv(file.name)
+        else:
+            df = pd.read_excel(file.name)
+        df = load_cleveland_dataframe(uploaded_df=df)
+        STATE["df"] = df
+        STATE["models"] = None  # reset, will refit lazily
+        STATE["metrics"] = None
+        head = df.head(8)
+        return gr.Markdown.update(value="✅ Dataset loaded successfully."), gr.DataFrame.update(value=head, interactive=False), gr.Markdown.update(visible=False)
+    except Exception as e:
+        return gr.Markdown.update(value=f"❌ Error: {e}"), gr.DataFrame.update(value=pd.DataFrame()), gr.Markdown.update(visible=False)
+def fill_example(idx):
+    ex = example_patient(idx)
+    return [ex[c] for c in CLEVELAND_FEATURES_ORDER]
+def _bar_for_models(results: dict):
+    names = list(results.keys())
+    probs = [results[n]["prob_1"] for n in names]
+    labels = ["Disease" if results[n]["label"] == 1 else "No disease" for n in names]
+    fig = go.Figure()
+    fig.add_bar(x=names, y=probs, text=[f"{p:.2f}" for p in probs], textposition="auto")
+    fig.update_layout(
+        title="Model Confidence (P[Heart Disease = 1])",
+        yaxis_title="Probability",
+        xaxis_title="Model",
+        yaxis=dict(range=[0, 1]),
+        plot_bgcolor="white",
+        height=420,
+        margin=dict(l=30, r=20, t=60, b=40)
+    )
+    # color emphasis for ensemble bar (last)
+    if len(names) >= 1:
+        fig.data[0].marker.color = ["#9BB8D3"] * (len(names) - 1) + [APP_ACCENT]
+    return fig, labels
+def run_predict(*vals):
+    # Ensure dataset
+    if STATE["df"] is None:
+        return (
+            gr.Markdown.update(value="❌ No dataset yet. Please upload a Cleveland-format dataset."),
+            gr.Plot.update(None),
+            gr.Markdown.update(visible=False),
+            gr.DataFrame.update(visible=False)
+        )
+    # Build input row as dict with strict order
+    input_dict = {col: vals[i] for i, col in enumerate(CLEVELAND_FEATURES_ORDER)}
+    # Fit models lazily
+    _ensure_models(STATE["df"])
+    # Predict
+    results = predict_all(STATE["models"], input_dict)
+    # Compose readable summary and plot
+    pred_table = []
+    final_label = results["Ensemble (Soft Voting)"]["label"]
+    final_prob = results["Ensemble (Soft Voting)"]["prob_1"]
+    title_md = (
+        f"### 🫀 Cleveland Heart Disease Diagnosis\n"
+        f"**Ensemble Prediction**: **{'Positive' if final_label == 1 else 'Negative'}**  \n"
+        f"**Confidence (P=1)**: `{final_prob:.3f}`"
+    )
+    for name, r in results.items():
+        pred_table.append({
+            "Model": name,
+            "Predicted label": "Positive" if r["label"] == 1 else "Negative",
+            "P(No disease)": round(r["prob_0"], 3),
+            "P(Heart disease)": round(r["prob_1"], 3),
+        })
+    table_df = pd.DataFrame(pred_table)
+    fig, labels = _bar_for_models(results)
+    return (
+        gr.Markdown.update(value=title_md),
+        gr.Plot.update(value=fig),
+        gr.Markdown.update(value="**Per-Model Predictions**", visible=True),
+        gr.DataFrame.update(value=table_df, visible=True, interactive=False)
+    )
+# -----------------------------
+# UI
+# -----------------------------
+with gr.Blocks(theme="soft", css=f"""
+:root {{
+  --primary-600: {APP_PRIMARY};
+}}
+.gradio-container {{ background: {APP_BG}; }}
+.footer-note a {{ color: {APP_PRIMARY}; }}
+h1, h2, h3, h4 {{ color: {APP_PRIMARY}; }}
+""") as demo:
+    gr.Markdown("# 🫀 Cleveland Heart Disease Diagnosis (Ensemble Demo)")
+    with gr.Row(equal_height=False):
+        # LEFT: inputs
+        with gr.Column(scale=45):
+            with gr.Box():
+                gr.Markdown("### 📁 Load Dataset")
+                info_md = gr.Markdown("Upload a CSV/XLSX in **Cleveland** format (13 features + `target`).")
+                file_u = gr.File(file_count="single", file_types=[".csv", ".xlsx", ".xls"], label="Upload Cleveland Dataset")
+                preview = gr.DataFrame(label="Data Preview (first rows)", interactive=False)
+                metrics_box = gr.Markdown(visible=False)
+            with gr.Box():
+                gr.Markdown("### ✍️ Enter Patient Features")
+                with gr.Row():
+                    age = gr.Number(label="age (years)", value=58)
+                    sex = gr.Dropdown(label="sex (0=female, 1=male)", choices=[0,1], value=1)
+                    cp = gr.Dropdown(label="cp (chest pain type 0..3)", choices=[0,1,2,3], value=2)
+                    trestbps = gr.Number(label="trestbps (resting BP mmHg)", value=130)
+                with gr.Row():
+                    chol = gr.Number(label="chol (serum cholestrol mg/dl)", value=250)
+                    fbs = gr.Dropdown(label="fbs (>120 mg/dl? 1/0)", choices=[0,1], value=0)
+                    restecg = gr.Dropdown(label="restecg (0..2)", choices=[0,1,2], value=1)
+                    thalach = gr.Number(label="thalach (max heart rate)", value=150)
+                with gr.Row():
+                    exang = gr.Dropdown(label="exang (exercise angina 1/0)", choices=[0,1], value=0)
+                    oldpeak = gr.Number(label="oldpeak (ST depression)", value=1.0)
+                    slope = gr.Dropdown(label="slope (0..2)", choices=[0,1,2], value=1)
+                    ca = gr.Dropdown(label="ca (major vessels 0..3)", choices=[0,1,2,3], value=0)
+                thal = gr.Dropdown(label="thal (1=normal,2=fixed,3=reversible)", choices=[1,2,3], value=2)
+                with gr.Row():
+                    ex_selector = gr.Dropdown(
+                        label="Fill Example",
+                        choices=["Example 1 (likely negative)", "Example 2 (borderline)", "Example 3 (likely positive)"],
+                        value="Example 2 (borderline)"
+                    )
+                    fill_btn = gr.Button("🧪 Use Example", variant="secondary")
+                    predict_btn = gr.Button("🔍 Predict", variant="primary")
+        # RIGHT: outputs
+        with gr.Column(scale=55):
+            with gr.Box():
+                title_out = gr.Markdown("### Ensemble Prediction will appear here.")
+                bar_out = gr.Plot(label="Model Confidence")
+                sub_md = gr.Markdown(visible=False)
+                table_out = gr.DataFrame(visible=False)
+            with gr.Accordion("ℹ️ Notes", open=False):
+                gr.Markdown(
+                    "- This demo **fits models** on your uploaded dataset (80/20 split) the first time you predict.\n"
+                    "- **Target** is automatically binarized (0 = no disease, >0 = disease).\n"
+                    "- Ensemble is **soft voting** over Decision Tree, k-NN, and Naive Bayes.\n"
+                    "- This is **for demo/education**; not medical advice."
+                )
+    # Events
+    file_u.upload(fn=load_dataset, inputs=[file_u], outputs=[info_md, preview, metrics_box])
+    def _example_index(choice: str):
+        return {"Example 1 (likely negative)": 0, "Example 2 (borderline)": 1, "Example 3 (likely positive)": 2}[choice]
+    fill_btn.click(
+        fn=lambda choice: tuple(fill_example(_example_index(choice))),
+        inputs=[ex_selector],
+        outputs=[age, sex, cp, trestbps, chol, fbs, restecg, thalach, exang, oldpeak, slope, ca, thal]
+    )
+    predict_btn.click(
+        fn=run_predict,
+        inputs=[age, sex, cp, trestbps, chol, fbs, restecg, thalach, exang, oldpeak, slope, ca, thal],
+        outputs=[title_out, bar_out, sub_md, table_out]
+    )
+if __name__ == "__main__":
+    # Optional: allow GraphViz logos etc. from static if you keep them
+    demo.launch()

packages.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ graphviz
2	+ fonts-liberation

requirements.txt ADDED Viewed

	@@ -0,0 +1,8 @@

+gradio==5.38.0
+pandas>=1.5.0
+scikit-learn>=1.3.0
+numpy>=1.24.0
+dtreeviz>=2.2.2
+graphviz>=0.20.3
+plotly>=5.15.0
+supertree>=0.5.5

src/__init__.py ADDED Viewed

File without changes

src/heart_disease_core.py ADDED Viewed

	@@ -0,0 +1,228 @@

+# src/heart_disease_core.py
+import os
+import numpy as np
+import pandas as pd
+from typing import Dict, Tuple, Optional, List
+from sklearn.model_selection import train_test_split
+from sklearn.preprocessing import OneHotEncoder
+from sklearn.compose import ColumnTransformer
+from sklearn.pipeline import Pipeline
+from sklearn.impute import SimpleImputer
+from sklearn.metrics import roc_auc_score
+from sklearn.tree import DecisionTreeClassifier
+from sklearn.neighbors import KNeighborsClassifier
+from sklearn.naive_bayes import GaussianNB
+from sklearn.ensemble import VotingClassifier
+CLEVELAND_FEATURES_ORDER: List[str] = [
+    "age", "sex", "cp", "trestbps", "chol", "fbs", "restecg",
+    "thalach", "exang", "oldpeak", "slope", "ca", "thal"
+]
+TARGET_COL = "target"  # 0: no disease, 1: disease (we binarize if needed)
+CATEGORICAL_CHOICES = {
+    "sex": [0, 1],              # 0: female, 1: male
+    "cp": [0, 1, 2, 3],         # chest pain type
+    "fbs": [0, 1],              # fasting blood sugar > 120 mg/dl (1 true, 0 false)
+    "restecg": [0, 1, 2],       # resting ECG results
+    "exang": [0, 1],            # exercise-induced angina
+    "slope": [0, 1, 2],         # slope of ST
+    "ca": [0, 1, 2, 3],         # number of major vessels (0-3) colored by fluoroscopy
+    "thal": [1, 2, 3],          # 1: normal, 2: fixed defect, 3: reversible defect (commonly 3/6/7 variants exist; we standardize)
+}
+NUMERIC_COLS = ["age", "trestbps", "chol", "thalach", "oldpeak"]
+CATEGORICAL_COLS = ["sex", "cp", "fbs", "restecg", "exang", "slope", "ca", "thal"]
+def _coerce_and_clean(df: pd.DataFrame) -> pd.DataFrame:
+    """Clean '?' and cast numeric; keep only known columns if present."""
+    df = df.copy()
+    # Standardize columns if they are present with any case
+    colmap = {c.lower(): c for c in df.columns}
+    for col in CLEVELAND_FEATURES_ORDER + [TARGET_COL]:
+        if col not in df.columns and col in colmap:
+            df[col] = df.pop(colmap[col])  # normalize name
+    # Replace '?' with NaN and cast
+    for col in CLEVELAND_FEATURES_ORDER + [TARGET_COL]:
+        if col in df.columns:
+            df[col] = pd.to_numeric(df[col].replace("?", np.nan), errors="coerce")
+    # Binarize target if it appears as 0..4 (UCI often uses 0 vs 1..4 disease)
+    if TARGET_COL in df.columns:
+        df[TARGET_COL] = (df[TARGET_COL] > 0).astype(int)
+    return df
+def load_cleveland_dataframe(file_path: Optional[str] = None, uploaded_df: Optional[pd.DataFrame] = None) -> pd.DataFrame:
+    """
+    Load the Cleveland Heart Disease dataset.
+    Priority: uploaded_df > file_path > raise.
+    Expect columns CLEVELAND_FEATURES_ORDER + TARGET_COL.
+    """
+    if uploaded_df is not None:
+        df = _coerce_and_clean(uploaded_df)
+        missing = [c for c in CLEVELAND_FEATURES_ORDER + [TARGET_COL] if c not in df.columns]
+        if missing:
+            raise ValueError(f"Uploaded data missing required columns: {missing}")
+        return df
+    if file_path is not None and os.path.exists(file_path):
+        if file_path.endswith(".csv"):
+            df = pd.read_csv(file_path)
+        else:
+            df = pd.read_excel(file_path)
+        df = _coerce_and_clean(df)
+        missing = [c for c in CLEVELAND_FEATURES_ORDER + [TARGET_COL] if c not in df.columns]
+        if missing:
+            raise ValueError(f"File missing required columns: {missing}")
+        return df
+    raise FileNotFoundError(
+        "No dataset found. Please upload a CSV/XLSX with columns: "
+        f"{CLEVELAND_FEATURES_ORDER + [TARGET_COL]}"
+    )
+# -----------------------------
+# Preprocess & Modeling
+# -----------------------------
+def build_preprocessor() -> ColumnTransformer:
+    """
+    - Numeric: impute median
+    - Categorical: impute most_frequent + one-hot
+    """
+    numeric_pipe = Pipeline(steps=[
+        ("imputer", SimpleImputer(strategy="median")),
+    ])
+    categorical_pipe = Pipeline(steps=[
+        ("imputer", SimpleImputer(strategy="most_frequent")),
+        ("ohe", OneHotEncoder(handle_unknown="ignore"))
+    ])
+    preprocessor = ColumnTransformer(
+        transformers=[
+            ("num", numeric_pipe, NUMERIC_COLS),
+            ("cat", categorical_pipe, CATEGORICAL_COLS)
+        ],
+        remainder="drop"
+    )
+    return preprocessor
+def build_models() -> Dict[str, Pipeline]:
+    """
+    Create sklearn Pipelines for each model with the common preprocessor.
+    """
+    pre = build_preprocessor()
+    dt = Pipeline(steps=[
+        ("prep", pre),
+        ("clf", DecisionTreeClassifier(
+            random_state=42,
+            max_depth=5,
+            min_samples_split=2,
+            min_samples_leaf=1,
+            criterion="gini"
+        ))
+    ])
+    knn = Pipeline(steps=[
+        ("prep", pre),
+        ("clf", KNeighborsClassifier(n_neighbors=5))
+    ])
+    nb = Pipeline(steps=[
+        ("prep", pre),
+        ("clf", GaussianNB())
+    ])
+    # Soft Voting requires raw estimators, not Pipelines that share the same preprocessor.
+    # Easiest: ensemble as a single Pipeline with a VotingClassifier inside.
+    ensemble = Pipeline(steps=[
+        ("prep", pre),
+        ("clf", VotingClassifier(
+            estimators=[
+                ("dt", DecisionTreeClassifier(random_state=42, max_depth=5, min_samples_split=2, min_samples_leaf=1, criterion="gini")),
+                ("knn", KNeighborsClassifier(n_neighbors=5)),
+                ("nb", GaussianNB()),
+            ],
+            voting="soft",
+            weights=None  # can tweak later
+        ))
+    ])
+    return {"Decision Tree": dt, "k-NN": knn, "Naive Bayes": nb, "Ensemble (Soft Voting)": ensemble}
+def fit_all_models(df: pd.DataFrame, test_size: float = 0.2, random_state: int = 42) -> Tuple[Dict[str, Pipeline], pd.DataFrame]:
+    """
+    Fit all models on train split; return fitted models and metrics (AUC on holdout).
+    """
+    X = df[CLEVELAND_FEATURES_ORDER]
+    y = df[TARGET_COL].astype(int)
+    X_tr, X_te, y_tr, y_te = train_test_split(
+        X, y, test_size=test_size, random_state=random_state, stratify=y
+    )
+    models = build_models()
+    metrics = []
+    for name, pipe in models.items():
+        pipe.fit(X_tr, y_tr)
+        if hasattr(pipe, "predict_proba"):
+            proba = pipe.predict_proba(X_te)[:, 1]
+            auc = roc_auc_score(y_te, proba)
+        else:
+            # Fallback if any (unlikely here)
+            pred = pipe.predict(X_te)
+            auc = roc_auc_score(y_te, pred)
+        metrics.append({"model": name, "ROC-AUC": round(float(auc), 4)})
+    metrics_df = pd.DataFrame(metrics).sort_values("ROC-AUC", ascending=False, ignore_index=True)
+    return models, metrics_df
+def predict_all(models: Dict[str, Pipeline], input_dict: Dict[str, float]) -> Dict[str, Dict[str, float]]:
+    """
+    Predict probability for positive class (heart disease) for each model.
+    Returns: {model_name: {"prob_1": float, "prob_0": float, "label": int}}
+    """
+    # Ensure full set & order
+    row = [[input_dict[c] for c in CLEVELAND_FEATURES_ORDER]]
+    X_new = pd.DataFrame(row, columns=CLEVELAND_FEATURES_ORDER)
+    out = {}
+    for name, pipe in models.items():
+        if hasattr(pipe, "predict_proba"):
+            proba = pipe.predict_proba(X_new)[0]
+            # convention: class order is [0,1]
+            out[name] = {
+                "prob_0": float(proba[0]),
+                "prob_1": float(proba[1]),
+                "label": int(np.argmax(proba))
+            }
+        else:
+            label = int(pipe.predict(X_new)[0])
+            out[name] = {"prob_0": 1.0 - label, "prob_1": float(label), "label": label}
+    return out
+def example_patient(index: int = 0) -> Dict[str, float]:
+    """
+    A few realistic examples pulled from common Cleveland-like ranges.
+    You can add more patterns for quick testing.
+    """
+    examples = [
+        # Likely negative (no disease)
+        dict(age=45, sex=0, cp=0, trestbps=120, chol=230, fbs=0, restecg=1,
+             thalach=168, exang=0, oldpeak=0.0, slope=2, ca=0, thal=2),
+        # Borderline
+        dict(age=58, sex=1, cp=2, trestbps=138, chol=250, fbs=0, restecg=0,
+             thalach=150, exang=0, oldpeak=1.0, slope=1, ca=1, thal=2),
+        # Likely positive (disease)
+        dict(age=63, sex=1, cp=3, trestbps=145, chol=320, fbs=1, restecg=2,
+             thalach=130, exang=1, oldpeak=2.8, slope=0, ca=2, thal=3),
+    ]
+    index = max(0, min(index, len(examples) - 1))
+    return examples[index]

static/aivn_logo.png ADDED Viewed

static/vlai_logo.png ADDED Viewed

vlai_template.py ADDED Viewed

	@@ -0,0 +1,142 @@

+import os, base64
+import gradio as gr
+PROJECT_NAME = "Decision Tree Demo"
+AIO_YEAR = "2025"
+AIO_MODULE = "03"
+# END
+def image_to_base64(image_path: str):
+    # Construct the absolute path to the image
+    current_dir = os.path.dirname(os.path.abspath(__file__))
+    full_image_path = os.path.join(current_dir, image_path)
+    with open(full_image_path, "rb") as f:
+        return base64.b64encode(f.read()).decode("utf-8")
+def create_header():
+    with gr.Row():
+        with gr.Column(scale=2):
+            logo_base64 = image_to_base64("static/aivn_logo.png")
+            gr.HTML(
+                f"""<img src="data:image/png;base64,{logo_base64}"
+                        alt="Logo"
+                        style="height:120px;width:auto;margin:0 auto;margin-bottom:16px; display:block;">"""
+            )
+        with gr.Column(scale=2):
+            gr.HTML(f"""
+<div style="display:flex;justify-content:flex-start;align-items:center;gap:30px;">
+    <div>
+        <h1 style="margin-bottom:0; color: #2E7D32; font-size: 2.5em; font-weight: bold;"> {PROJECT_NAME} </h1>
+        <h3 style="color: #888; font-style: italic"> AIO{AIO_YEAR}: Module {AIO_MODULE}. </h3>
+    </div>
+</div>
+""")
+def create_footer():
+    logo_base64_vlai = image_to_base64("static/vlai_logo.png")
+    footer_html = """
+<style>
+  .sticky-footer{position:fixed;bottom:0px;left:0;width:100%;background:#E8F5E8;
+                 padding:10px;box-shadow:0 -2px 10px rgba(0,0,0,0.1);z-index:1000;}
+  .content-wrap{padding-bottom:60px;}
+</style>""" + f"""
+<div class="sticky-footer">
+  <div style="text-align:center;font-size:18px; color: #888">
+    Created by
+    <a href="https://vlai.work" target="_blank" style="color:#465C88;text-decoration:none;font-weight:bold; display:inline-flex; align-items:center;"> VLAI
+    <img src="data:image/png;base64,{logo_base64_vlai}" alt="Logo" style="height:20px; width:auto;">
+    </a> from <a href="https://aivietnam.edu.vn/" target="_blank" style="color:#355724;text-decoration:none;font-weight:bold">AI VIET NAM</a>
+  </div>
+</div>
+"""
+    return gr.HTML(footer_html)
+custom_css = """
+.gradio-container {
+    min-height: 100vh !important;
+    width: 100vw !important;
+    margin: 0 !important;
+    padding: 0px !important;
+    background: linear-gradient(135deg, #E8F5E8 0%, #D4E6D4 50%, #A8D8A8 100%);
+    background-size: 600% 600%;
+    animation: gradientBG 7s ease infinite;
+}
+@keyframes gradientBG {
+    0% {background-position: 0% 50%;}
+    50% {background-position: 100% 50%;}
+    100% {background-position: 0% 50%;}
+}
+/* Minimize spacing and padding */
+.content-wrap {
+    padding: 2px !important;
+    margin: 0 !important;
+}
+/* Reduce component spacing */
+.gr-row {
+    gap: 5px !important;
+    margin: 2px 0 !important;
+}
+.gr-column {
+    gap: 4px !important;
+    padding: 4px !important;
+}
+/* Accordion optimization */
+.gr-accordion {
+    margin: 4px 0 !important;
+}
+.gr-accordion .gr-accordion-content {
+    padding: 2px !important;
+}
+/* Form elements spacing */
+.gr-form {
+    gap: 2px !important;
+}
+/* Button styling */
+.gr-button {
+    margin: 2px 0 !important;
+}
+/* DataFrame optimization */
+.gr-dataframe {
+    margin: 4px 0 !important;
+}
+/* Remove horizontal scroll from data preview */
+.gr-dataframe .wrap {
+    overflow-x: auto !important;
+    max-width: 100% !important;
+}
+/* Plot optimization */
+.gr-plot {
+    margin: 4px 0 !important;
+}
+/* Reduce markdown margins */
+.gr-markdown {
+    margin: 2px 0 !important;
+}
+/* Footer positioning */
+.sticky-footer {
+    position: fixed;
+    bottom: 0px;
+    left: 0;
+    width: 100%;
+    background: #E8F5E8;
+    padding: 6px !important;
+    box-shadow: 0 -2px 10px rgba(0,0,0,0.1);
+    z-index: 1000;
+}
+"""