File size: 4,157 Bytes
c0573cd
1a21c11
 
c0573cd
 
1a21c11
 
c0573cd
1a21c11
 
c0573cd
1a21c11
c0573cd
1a21c11
 
c0573cd
1a21c11
 
c0573cd
1a21c11
 
c0573cd
1a21c11
c0573cd
 
1a21c11
 
 
c0573cd
1a21c11
c0573cd
1a21c11
 
c0573cd
 
 
1a21c11
 
c0573cd
1a21c11
 
 
 
c0573cd
1a21c11
 
 
 
 
 
 
 
 
c0573cd
1a21c11
 
 
c0573cd
1a21c11
 
 
 
 
 
 
 
 
c0573cd
1a21c11
 
 
c0573cd
1a21c11
c0573cd
1a21c11
 
 
 
 
 
 
c0573cd
1a21c11
 
 
c0573cd
 
1a21c11
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
import streamlit as st
from sklearn.datasets import make_moons
from sklearn.model_selection import train_test_split
from sklearn.ensemble import VotingClassifier, BaggingClassifier, RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
import numpy as np

st.set_page_config(page_title="Ensemble Methods", page_icon="πŸ€–", layout="wide")

st.markdown("<h1 style='text-align: center;'>πŸ€– Ensemble Learning Visualized</h1>", unsafe_allow_html=True)
st.markdown("### Select an Ensemble Method from the options below:")

st.sidebar.title("πŸ€– Choose an Ensemble Technique")
model_choice = st.sidebar.radio("Select Ensemble Method:", ["Voting", "Bagging", "Random Forest"])

X, y = make_moons(n_samples=300, noise=0.25, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

def plot_decision_boundary(model, X, y, title):
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    xx, yy = np.meshgrid(np.linspace(x_min, x_max, 200),
                         np.linspace(y_min, y_max, 200))
    Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
    Z = Z.reshape(xx.shape)

    fig, ax = plt.subplots()
    ax.contourf(xx, yy, Z, alpha=0.3, cmap='RdYlBu')
    ax.scatter(X[:, 0], X[:, 1], c=y, cmap='RdYlBu', edgecolor='k')
    ax.set_title(title)
    st.pyplot(fig)

# Model Training and Plotting
if model_choice == "Voting":
    st.subheader("πŸ—³οΈ Voting Classifier")
    st.write("""
    Combines multiple classifiers (Logistic Regression, SVM, and Decision Tree) to vote on predictions.  
    You can choose between **Hard Voting** (majority class) and **Soft Voting** (average probabilities).
    """)
    clf1 = LogisticRegression()
    clf2 = SVC(probability=True)
    clf3 = DecisionTreeClassifier(max_depth=5)

    voting_clf = VotingClassifier(estimators=[
        ('lr', clf1), ('svc', clf2), ('dt', clf3)],
        voting='soft')

    voting_clf.fit(X_train, y_train)
    y_pred = voting_clf.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    st.write(f"🎯 **Accuracy:** {acc:.2f}")
    plot_decision_boundary(voting_clf, X, y, "Voting Classifier Decision Region")
    st.markdown("πŸ‘‰ [Open Voting Ensemble Notebook](https://colab.research.google.com/drive/1LPZR9RnvEXP8mzOLOBfSVVyHHZ7GFns4?usp=sharing)", unsafe_allow_html=True)

elif model_choice == "Bagging":
    st.subheader("🧺 Bagging Classifier")
    st.write("""
    Trains multiple Decision Trees on random subsets (with replacement) of data and averages their predictions.  
    Helps reduce variance and overfitting.
    """)
    bag_clf = BaggingClassifier(DecisionTreeClassifier(), n_estimators=100, random_state=42)
    bag_clf.fit(X_train, y_train)
    y_pred = bag_clf.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    st.write(f"🎯 **Accuracy:** {acc:.2f}")
    plot_decision_boundary(bag_clf, X, y, "Bagging Classifier Decision Region")
    st.markdown("πŸ‘‰ [Open Bagging Ensemble Notebook](https://colab.research.google.com/drive/1cumZl7H9fqyORfaw236WWxQViJxvSKHV?usp=sharing)", unsafe_allow_html=True)

elif model_choice == "Random Forest":
    st.subheader("🌲 Random Forest")
    st.write("""
    A forest of randomized decision trees.  
    Each tree sees a bootstrapped sample and a random subset of features at every split.
    """)
    rf_clf = RandomForestClassifier(n_estimators=100, random_state=42)
    rf_clf.fit(X_train, y_train)
    y_pred = rf_clf.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    st.write(f"🎯 **Accuracy:** {acc:.2f}")
    plot_decision_boundary(rf_clf, X, y, "Random Forest Decision Region")
    st.markdown("πŸ‘‰ [Open Random Forest Notebook](https://colab.research.google.com/drive/1S6YyfTx9N35E5fpPF0z6ZDm85BSp1deT?usp=sharing)", unsafe_allow_html=True)

st.markdown("---")
st.success("βœ… Ensemble techniques improve model stability, reduce overfitting, and deliver better results. Try them on your data!")