File size: 5,079 Bytes
0a51192 d2efcd7 0a51192 6d9deb3 0a51192 6d9deb3 0a51192 6d9deb3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 |
import streamlit as st
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import make_classification, make_moons, make_circles, make_blobs
from sklearn.model_selection import train_test_split, learning_curve
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from mlxtend.plotting import plot_decision_regions
# Image
st.image("https://huggingface.co/spaces/varshitha22/DecisionBoundaries_Learningcurves_Algorithms/resolve/main/logo.png")
st.markdown("<br>", unsafe_allow_html=True)
# Sidebar for dataset selection
st.sidebar.header("Dataset Options")
data_type = st.sidebar.selectbox("Select Data Type:", ["Blobs", "Circles", "Moons", "Classification"])
noise = st.sidebar.slider("Add Noise:", 0.0, 1.0, 0.2, step=0.05)
# Sidebar for model selection
st.sidebar.header("Model Selection")
model_name = st.sidebar.radio("Choose a Model:", ["KNN", "Decision Tree", "Naive Bayes", "Logistic Regression", "SVC"])
# Display KNN specific settings only if KNN is selected
if model_name == "KNN":
neighbors = st.sidebar.number_input("Neighbors", min_value=1, max_value=25, value=5, step=1)
knn_weights = st.sidebar.radio("KNN Weights:", ["uniform", "distance"])
# KNN Algorithm
st.sidebar.subheader("KNN Algorithm")
algorithms_selected = []
if st.sidebar.checkbox("auto", value=True):
algorithms_selected.append("auto")
if st.sidebar.checkbox("ball_tree"):
algorithms_selected.append("ball_tree")
if st.sidebar.checkbox("kd_tree"):
algorithms_selected.append("kd_tree")
if st.sidebar.checkbox("brute"):
algorithms_selected.append("brute")
# KNN Metric
st.sidebar.subheader("KNN Metric")
metrics_selected = []
if st.sidebar.checkbox("euclidean", value=True):
metrics_selected.append("euclidean")
if st.sidebar.checkbox("manhattan"):
metrics_selected.append("manhattan")
if st.sidebar.checkbox("minkowski"):
metrics_selected.append("minkowski")
else:
neighbors = None
knn_weights = None
algorithms_selected = []
metrics_selected = []
# Generate dataset
if data_type == "Blobs":
X, y = make_blobs(n_samples=5000, centers=2, cluster_std=noise, random_state=42)
elif data_type == "Circles":
X, y = make_circles(n_samples=5000, noise=noise, factor=0.5, random_state=42)
elif data_type == "Moons":
X, y = make_moons(n_samples=5000, noise=noise, random_state=42)
else:
X, y = make_classification(n_samples=5000, n_features=2, n_classes=2, n_informative=2, n_redundant=0, random_state=42)
# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Model selection
if model_name == "KNN":
model = KNeighborsClassifier(n_neighbors=neighbors, weights=knn_weights, algorithm=algorithms_selected[0] if algorithms_selected else 'auto', metric=metrics_selected[0] if metrics_selected else 'minkowski')
elif model_name == "Decision Tree":
model = DecisionTreeClassifier(random_state=42)
elif model_name == "Naive Bayes":
model = GaussianNB()
elif model_name == "Logistic Regression":
model = LogisticRegression(max_iter=200, random_state=42)
else:
model = SVC(probability=True, kernel='linear', random_state=42)
# Fit the model
model.fit(X_train, y_train)
# Predict and calculate metrics
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
auc = roc_auc_score(y_test, model.predict_proba(X_test)[:, 1]) if hasattr(model, "predict_proba") else "N/A"
# --- Display model performance under the radio button ---
with st.sidebar:
st.subheader(f"{model_name} Model Evaluation Metrics")
st.write(f" Accuracy: {accuracy:.2f}")
st.write(f" Precision: {precision:.2f}")
st.write(f" Recall: {recall:.2f}")
st.write(f" F1 Score: {f1:.2f}")
st.write(f" AUC Score: {auc:.2f}")
# Plot dataset
st.subheader("Dataset Visualization")
fig, ax = plt.subplots()
sns.scatterplot(x=X[:, 0], y=X[:, 1], hue=y, palette="coolwarm", s=50, edgecolor="k")
st.pyplot(fig)
# Decision Boundary
st.subheader("Decision Boundary")
fig, ax = plt.subplots()
plot_decision_regions(X_train, y_train, clf=model, legend=2)
st.pyplot(fig)
# Learning Curve
st.subheader("Learning Curve")
train_sizes, train_scores, test_scores = learning_curve(model, X_train, y_train, cv=5, train_sizes=np.linspace(0.1, 1.0, 10))
train_mean = np.mean(train_scores, axis=1)
test_mean = np.mean(test_scores, axis=1)
fig, ax = plt.subplots()
ax.plot(train_sizes, train_mean, label='Train Accuracy', marker='o')
ax.plot(train_sizes, test_mean, label='Test Accuracy', marker='s')
ax.set_xlabel("Training Size")
ax.set_ylabel("Accuracy")
ax.legend()
st.pyplot(fig)
|