File size: 4,554 Bytes
ab61b9b
 
 
 
 
 
 
 
 
 
 
f86e0c4
ab61b9b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import streamlit as st
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import make_classification, make_moons, make_circles, make_blobs
from sklearn.model_selection import train_test_split, learning_curve
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score, roc_auc_score
from mlxtend.plotting import plot_decision_regions

# image
st.image("https://huggingface.co/spaces/varshitha22/KNN_Algorithm/resolve/main/logo.png")
st.markdown("<br>", unsafe_allow_html=True)

def plot_learning_curves(X_train, y_train, X_test, y_test, model, scoring='accuracy'):
    train_sizes, train_scores, test_scores = learning_curve(model, X_train, y_train, cv=5, scoring=scoring)
    train_mean = np.mean(train_scores, axis=1)
    test_mean = np.mean(test_scores, axis=1)
    
    fig, ax = plt.subplots()
    plt.plot(train_sizes, train_mean, 'o-', color="r", label="Training Score")
    plt.plot(train_sizes, test_mean, 'o-', color="g", label="Cross-validation Score")
    plt.xlabel("Training Examples")
    plt.ylabel("Score")
    plt.legend()
    st.pyplot(fig)

# Sidebar for dataset selection
st.sidebar.header("Dataset Options")
data_type = st.sidebar.selectbox("Select Data Type:", ["Blobs", "Circles", "Moons", "Classification"])
noise = st.sidebar.slider("Add Noise:", 0.0, 1.0, 0.2, step=0.05)

# Sidebar for model selection
st.sidebar.header("Model")
model_name = st.sidebar.radio("Model: ","KNN")

# Display number of neighbors selector only if KNN is selected
if model_name == "KNN":
    neighbors = st.sidebar.number_input("Neighbors", min_value=1, max_value=25, value=5, step=1)
    knn_weights = st.sidebar.radio("KNN Weights:", ["uniform", "distance"])

# KNN Algorithm
st.sidebar.subheader("KNN Algorithm")
algorithms_selected = []
if st.sidebar.checkbox("auto", value=True):
    algorithms_selected.append("auto")
if st.sidebar.checkbox("ball_tree"):
    algorithms_selected.append("ball_tree")
if st.sidebar.checkbox("kd_tree"):
    algorithms_selected.append("kd_tree")
if st.sidebar.checkbox("brute"):
    algorithms_selected.append("brute")

# KNN Metric
st.sidebar.subheader("KNN Metric")
metrics_selected = []
if st.sidebar.checkbox("euclidean", value=True):
    metrics_selected.append("euclidean")
if st.sidebar.checkbox("manhattan"):
    metrics_selected.append("manhattan")
if st.sidebar.checkbox("minkowski"):
    metrics_selected.append("minkowski")

# Generate dataset
if data_type == "Blobs":
    X, y = make_blobs(n_samples=5000, centers=2, cluster_std=noise, random_state=27)
elif data_type == "Circles":
    X, y = make_circles(n_samples=5000, noise=noise, factor=0.5, random_state=27)
elif data_type == "Moons":
    X, y = make_moons(n_samples=5000, noise=noise, random_state=27)
else:
    X, y = make_classification(n_samples=5000, n_features=2, n_classes=2, n_informative=2, n_redundant=0, random_state=27)

# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=27)

# Model selection
if model_name == "KNN":
    model = KNeighborsClassifier(n_neighbors=neighbors, weights=knn_weights, algorithm=algorithms_selected[0] if algorithms_selected else 'auto', metric=metrics_selected[0] if metrics_selected else 'minkowski')

# Fit the model
model.fit(X_train, y_train)

# Display performance metrics only for KNN
if model_name == "KNN":
    st.subheader("KNN Model Evaluation Metrics")
    y_pred = model.predict(X_test)

    # Performance metrics calculation and display
    accuracy = accuracy_score(y_test, y_pred)
    st.write(f"Accuracy: {accuracy:.2f}")
    
    precision = precision_score(y_test, y_pred)
    st.write(f"Precision: {precision:.2f}")
    
    recall = recall_score(y_test, y_pred)
    st.write(f"Recall: {recall:.2f}")
    
    f1 = f1_score(y_test, y_pred)
    st.write(f"F1 Score: {f1:.2f}")
    
    auc = roc_auc_score(y_test, model.predict_proba(X_test)[:, 1]) if hasattr(model, "predict_proba") else "N/A"
    st.write(f"AUC Score: {auc:.2f}")

# Plot dataset
st.subheader("Dataset Visualization")
fig, ax = plt.subplots()
sns.scatterplot(x=X[:, 0], y=X[:, 1], hue=y, palette="coolwarm", s=50, edgecolor="k")
st.pyplot(fig)

# Decision Boundary
st.subheader("Decision Boundary")
fig, ax = plt.subplots()
plot_decision_regions(X_train, y_train, clf=model, legend=2)
st.pyplot(fig)

# Learning Curve
st.subheader("Learning Curve")
plot_learning_curves(X_train, y_train, X_test, y_test, model, scoring='accuracy')