File size: 5,079 Bytes
0a51192
 
 
 
 
 
 
 
 
 
 
 
 
 
 
d2efcd7
0a51192
 
 
 
 
 
 
 
 
 
 
6d9deb3
0a51192
 
 
 
6d9deb3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0a51192
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6d9deb3
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
import streamlit as st
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import make_classification, make_moons, make_circles, make_blobs
from sklearn.model_selection import train_test_split, learning_curve
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, roc_auc_score
from mlxtend.plotting import plot_decision_regions

# Image
st.image("https://huggingface.co/spaces/varshitha22/DecisionBoundaries_Learningcurves_Algorithms/resolve/main/logo.png")
st.markdown("<br>", unsafe_allow_html=True)

# Sidebar for dataset selection
st.sidebar.header("Dataset Options")
data_type = st.sidebar.selectbox("Select Data Type:", ["Blobs", "Circles", "Moons", "Classification"])
noise = st.sidebar.slider("Add Noise:", 0.0, 1.0, 0.2, step=0.05)

# Sidebar for model selection
st.sidebar.header("Model Selection")
model_name = st.sidebar.radio("Choose a Model:", ["KNN", "Decision Tree", "Naive Bayes", "Logistic Regression", "SVC"])

# Display KNN specific settings only if KNN is selected
if model_name == "KNN":
    neighbors = st.sidebar.number_input("Neighbors", min_value=1, max_value=25, value=5, step=1)
    knn_weights = st.sidebar.radio("KNN Weights:", ["uniform", "distance"])

    # KNN Algorithm
    st.sidebar.subheader("KNN Algorithm")
    algorithms_selected = []
    if st.sidebar.checkbox("auto", value=True):
        algorithms_selected.append("auto")
    if st.sidebar.checkbox("ball_tree"):
        algorithms_selected.append("ball_tree")
    if st.sidebar.checkbox("kd_tree"):
        algorithms_selected.append("kd_tree")
    if st.sidebar.checkbox("brute"):
        algorithms_selected.append("brute")

    # KNN Metric
    st.sidebar.subheader("KNN Metric")
    metrics_selected = []
    if st.sidebar.checkbox("euclidean", value=True):
        metrics_selected.append("euclidean")
    if st.sidebar.checkbox("manhattan"):
        metrics_selected.append("manhattan")
    if st.sidebar.checkbox("minkowski"):
        metrics_selected.append("minkowski")
else:
    neighbors = None
    knn_weights = None
    algorithms_selected = []
    metrics_selected = []

# Generate dataset
if data_type == "Blobs":
    X, y = make_blobs(n_samples=5000, centers=2, cluster_std=noise, random_state=42)
elif data_type == "Circles":
    X, y = make_circles(n_samples=5000, noise=noise, factor=0.5, random_state=42)
elif data_type == "Moons":
    X, y = make_moons(n_samples=5000, noise=noise, random_state=42)
else:
    X, y = make_classification(n_samples=5000, n_features=2, n_classes=2, n_informative=2, n_redundant=0, random_state=42)

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Model selection
if model_name == "KNN":
    model = KNeighborsClassifier(n_neighbors=neighbors, weights=knn_weights, algorithm=algorithms_selected[0] if algorithms_selected else 'auto', metric=metrics_selected[0] if metrics_selected else 'minkowski')
elif model_name == "Decision Tree":
    model = DecisionTreeClassifier(random_state=42)
elif model_name == "Naive Bayes":
    model = GaussianNB()
elif model_name == "Logistic Regression":
    model = LogisticRegression(max_iter=200, random_state=42)
else:
    model = SVC(probability=True, kernel='linear', random_state=42)

# Fit the model
model.fit(X_train, y_train)

# Predict and calculate metrics
y_pred = model.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
auc = roc_auc_score(y_test, model.predict_proba(X_test)[:, 1]) if hasattr(model, "predict_proba") else "N/A"

# --- Display model performance under the radio button ---
with st.sidebar:
    st.subheader(f"{model_name} Model Evaluation Metrics")
    st.write(f" Accuracy: {accuracy:.2f}")
    st.write(f" Precision: {precision:.2f}")
    st.write(f" Recall: {recall:.2f}")
    st.write(f" F1 Score: {f1:.2f}")
    st.write(f" AUC Score: {auc:.2f}")

# Plot dataset
st.subheader("Dataset Visualization")
fig, ax = plt.subplots()
sns.scatterplot(x=X[:, 0], y=X[:, 1], hue=y, palette="coolwarm", s=50, edgecolor="k")
st.pyplot(fig)

# Decision Boundary
st.subheader("Decision Boundary")
fig, ax = plt.subplots()
plot_decision_regions(X_train, y_train, clf=model, legend=2)
st.pyplot(fig)

# Learning Curve
st.subheader("Learning Curve")
train_sizes, train_scores, test_scores = learning_curve(model, X_train, y_train, cv=5, train_sizes=np.linspace(0.1, 1.0, 10))
train_mean = np.mean(train_scores, axis=1)
test_mean = np.mean(test_scores, axis=1)

fig, ax = plt.subplots()
ax.plot(train_sizes, train_mean, label='Train Accuracy', marker='o')
ax.plot(train_sizes, test_mean, label='Test Accuracy', marker='s')
ax.set_xlabel("Training Size")
ax.set_ylabel("Accuracy")
ax.legend()
st.pyplot(fig)