Spaces:

Harika22
/

Machine_learning

Sleeping

App Files Files Community

Machine_learning / pages /11_Ensembling_Techniques.py

Harika22

Update pages/11_Ensembling_Techniques.py

1a21c11 verified 8 months ago

raw

history blame contribute delete

4.16 kB

	import streamlit as st
	from sklearn.datasets import make_moons
	from sklearn.model_selection import train_test_split
	from sklearn.ensemble import VotingClassifier, BaggingClassifier, RandomForestClassifier
	from sklearn.tree import DecisionTreeClassifier
	from sklearn.linear_model import LogisticRegression
	from sklearn.svm import SVC
	from sklearn.metrics import accuracy_score
	import matplotlib.pyplot as plt
	import numpy as np

	st.set_page_config(page_title="Ensemble Methods", page_icon="🤖", layout="wide")

	st.markdown("<h1 style='text-align: center;'>🤖 Ensemble Learning Visualized</h1>", unsafe_allow_html=True)
	st.markdown("### Select an Ensemble Method from the options below:")

	st.sidebar.title("🤖 Choose an Ensemble Technique")
	model_choice = st.sidebar.radio("Select Ensemble Method:", ["Voting", "Bagging", "Random Forest"])

	X, y = make_moons(n_samples=300, noise=0.25, random_state=42)
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

	def plot_decision_boundary(model, X, y, title):
	x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
	y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
	xx, yy = np.meshgrid(np.linspace(x_min, x_max, 200),
	np.linspace(y_min, y_max, 200))
	Z = model.predict(np.c_[xx.ravel(), yy.ravel()])
	Z = Z.reshape(xx.shape)

	fig, ax = plt.subplots()
	ax.contourf(xx, yy, Z, alpha=0.3, cmap='RdYlBu')
	ax.scatter(X[:, 0], X[:, 1], c=y, cmap='RdYlBu', edgecolor='k')
	ax.set_title(title)
	st.pyplot(fig)

	# Model Training and Plotting
	if model_choice == "Voting":
	st.subheader("🗳️ Voting Classifier")
	st.write("""
	Combines multiple classifiers (Logistic Regression, SVM, and Decision Tree) to vote on predictions.
	You can choose between Hard Voting (majority class) and Soft Voting (average probabilities).
	""")
	clf1 = LogisticRegression()
	clf2 = SVC(probability=True)
	clf3 = DecisionTreeClassifier(max_depth=5)

	voting_clf = VotingClassifier(estimators=[
	('lr', clf1), ('svc', clf2), ('dt', clf3)],
	voting='soft')

	voting_clf.fit(X_train, y_train)
	y_pred = voting_clf.predict(X_test)
	acc = accuracy_score(y_test, y_pred)
	st.write(f"🎯 Accuracy: {acc:.2f}")
	plot_decision_boundary(voting_clf, X, y, "Voting Classifier Decision Region")
	st.markdown("👉 [Open Voting Ensemble Notebook](https://colab.research.google.com/drive/1LPZR9RnvEXP8mzOLOBfSVVyHHZ7GFns4?usp=sharing)", unsafe_allow_html=True)

	elif model_choice == "Bagging":
	st.subheader("🧺 Bagging Classifier")
	st.write("""
	Trains multiple Decision Trees on random subsets (with replacement) of data and averages their predictions.
	Helps reduce variance and overfitting.
	""")
	bag_clf = BaggingClassifier(DecisionTreeClassifier(), n_estimators=100, random_state=42)
	bag_clf.fit(X_train, y_train)
	y_pred = bag_clf.predict(X_test)
	acc = accuracy_score(y_test, y_pred)
	st.write(f"🎯 Accuracy: {acc:.2f}")
	plot_decision_boundary(bag_clf, X, y, "Bagging Classifier Decision Region")
	st.markdown("👉 [Open Bagging Ensemble Notebook](https://colab.research.google.com/drive/1cumZl7H9fqyORfaw236WWxQViJxvSKHV?usp=sharing)", unsafe_allow_html=True)

	elif model_choice == "Random Forest":
	st.subheader("🌲 Random Forest")
	st.write("""
	A forest of randomized decision trees.
	Each tree sees a bootstrapped sample and a random subset of features at every split.
	""")
	rf_clf = RandomForestClassifier(n_estimators=100, random_state=42)
	rf_clf.fit(X_train, y_train)
	y_pred = rf_clf.predict(X_test)
	acc = accuracy_score(y_test, y_pred)
	st.write(f"🎯 Accuracy: {acc:.2f}")
	plot_decision_boundary(rf_clf, X, y, "Random Forest Decision Region")
	st.markdown("👉 [Open Random Forest Notebook](https://colab.research.google.com/drive/1S6YyfTx9N35E5fpPF0z6ZDm85BSp1deT?usp=sharing)", unsafe_allow_html=True)

	st.markdown("---")
	st.success("✅ Ensemble techniques improve model stability, reduce overfitting, and deliver better results. Try them on your data!")