KNN_Algorithm / KNN.py
varshitha22's picture
Rename knn.py to KNN.py
4ffdeaf verified
raw
history blame
4.55 kB
import streamlit as st
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.datasets import make_classification, make_moons, make_circles, make_blobs
from sklearn.model_selection import train_test_split, learning_curve
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score, roc_auc_score
from mlxtend.plotting import plot_decision_regions
# image
st.image("https://huggingface.co/spaces/varshitha22/KNN_Algorithm/resolve/main/logo.png")
st.markdown("<br>", unsafe_allow_html=True)
def plot_learning_curves(X_train, y_train, X_test, y_test, model, scoring='accuracy'):
train_sizes, train_scores, test_scores = learning_curve(model, X_train, y_train, cv=5, scoring=scoring)
train_mean = np.mean(train_scores, axis=1)
test_mean = np.mean(test_scores, axis=1)
fig, ax = plt.subplots()
plt.plot(train_sizes, train_mean, 'o-', color="r", label="Training Score")
plt.plot(train_sizes, test_mean, 'o-', color="g", label="Cross-validation Score")
plt.xlabel("Training Examples")
plt.ylabel("Score")
plt.legend()
st.pyplot(fig)
# Sidebar for dataset selection
st.sidebar.header("Dataset Options")
data_type = st.sidebar.selectbox("Select Data Type:", ["Blobs", "Circles", "Moons", "Classification"])
noise = st.sidebar.slider("Add Noise:", 0.0, 1.0, 0.2, step=0.05)
# Sidebar for model selection
st.sidebar.header("Model")
model_name = st.sidebar.radio("Model: ","KNN")
# Display number of neighbors selector only if KNN is selected
if model_name == "KNN":
neighbors = st.sidebar.number_input("Neighbors", min_value=1, max_value=25, value=5, step=1)
knn_weights = st.sidebar.radio("KNN Weights:", ["uniform", "distance"])
# KNN Algorithm
st.sidebar.subheader("KNN Algorithm")
algorithms_selected = []
if st.sidebar.checkbox("auto", value=True):
algorithms_selected.append("auto")
if st.sidebar.checkbox("ball_tree"):
algorithms_selected.append("ball_tree")
if st.sidebar.checkbox("kd_tree"):
algorithms_selected.append("kd_tree")
if st.sidebar.checkbox("brute"):
algorithms_selected.append("brute")
# KNN Metric
st.sidebar.subheader("KNN Metric")
metrics_selected = []
if st.sidebar.checkbox("euclidean", value=True):
metrics_selected.append("euclidean")
if st.sidebar.checkbox("manhattan"):
metrics_selected.append("manhattan")
if st.sidebar.checkbox("minkowski"):
metrics_selected.append("minkowski")
# Generate dataset
if data_type == "Blobs":
X, y = make_blobs(n_samples=5000, centers=2, cluster_std=noise, random_state=27)
elif data_type == "Circles":
X, y = make_circles(n_samples=5000, noise=noise, factor=0.5, random_state=27)
elif data_type == "Moons":
X, y = make_moons(n_samples=5000, noise=noise, random_state=27)
else:
X, y = make_classification(n_samples=5000, n_features=2, n_classes=2, n_informative=2, n_redundant=0, random_state=27)
# Split dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=27)
# Model selection
if model_name == "KNN":
model = KNeighborsClassifier(n_neighbors=neighbors, weights=knn_weights, algorithm=algorithms_selected[0] if algorithms_selected else 'auto', metric=metrics_selected[0] if metrics_selected else 'minkowski')
# Fit the model
model.fit(X_train, y_train)
# Display performance metrics only for KNN
if model_name == "KNN":
st.subheader("KNN Model Evaluation Metrics")
y_pred = model.predict(X_test)
# Performance metrics calculation and display
accuracy = accuracy_score(y_test, y_pred)
st.write(f"Accuracy: {accuracy:.2f}")
precision = precision_score(y_test, y_pred)
st.write(f"Precision: {precision:.2f}")
recall = recall_score(y_test, y_pred)
st.write(f"Recall: {recall:.2f}")
f1 = f1_score(y_test, y_pred)
st.write(f"F1 Score: {f1:.2f}")
auc = roc_auc_score(y_test, model.predict_proba(X_test)[:, 1]) if hasattr(model, "predict_proba") else "N/A"
st.write(f"AUC Score: {auc:.2f}")
# Plot dataset
st.subheader("Dataset Visualization")
fig, ax = plt.subplots()
sns.scatterplot(x=X[:, 0], y=X[:, 1], hue=y, palette="coolwarm", s=50, edgecolor="k")
st.pyplot(fig)
# Decision Boundary
st.subheader("Decision Boundary")
fig, ax = plt.subplots()
plot_decision_regions(X_train, y_train, clf=model, legend=2)
st.pyplot(fig)
# Learning Curve
st.subheader("Learning Curve")
plot_learning_curves(X_train, y_train, X_test, y_test, model, scoring='accuracy')