Spaces:

varshitha22
/

KNN_Algorithm

Sleeping

App Files Files Community

KNN_Algorithm / KNN.py

varshitha22

Rename knn.py to KNN.py

4ffdeaf verified 11 months ago

raw

history blame

4.55 kB

	import streamlit as st
	import numpy as np
	import matplotlib.pyplot as plt
	import seaborn as sns
	from sklearn.datasets import make_classification, make_moons, make_circles, make_blobs
	from sklearn.model_selection import train_test_split, learning_curve
	from sklearn.neighbors import KNeighborsClassifier
	from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score, roc_auc_score
	from mlxtend.plotting import plot_decision_regions

	# image
	st.image("https://huggingface.co/spaces/varshitha22/KNN_Algorithm/resolve/main/logo.png")
	st.markdown("<br>", unsafe_allow_html=True)

	def plot_learning_curves(X_train, y_train, X_test, y_test, model, scoring='accuracy'):
	train_sizes, train_scores, test_scores = learning_curve(model, X_train, y_train, cv=5, scoring=scoring)
	train_mean = np.mean(train_scores, axis=1)
	test_mean = np.mean(test_scores, axis=1)

	fig, ax = plt.subplots()
	plt.plot(train_sizes, train_mean, 'o-', color="r", label="Training Score")
	plt.plot(train_sizes, test_mean, 'o-', color="g", label="Cross-validation Score")
	plt.xlabel("Training Examples")
	plt.ylabel("Score")
	plt.legend()
	st.pyplot(fig)

	# Sidebar for dataset selection
	st.sidebar.header("Dataset Options")
	data_type = st.sidebar.selectbox("Select Data Type:", ["Blobs", "Circles", "Moons", "Classification"])
	noise = st.sidebar.slider("Add Noise:", 0.0, 1.0, 0.2, step=0.05)

	# Sidebar for model selection
	st.sidebar.header("Model")
	model_name = st.sidebar.radio("Model: ","KNN")

	# Display number of neighbors selector only if KNN is selected
	if model_name == "KNN":
	neighbors = st.sidebar.number_input("Neighbors", min_value=1, max_value=25, value=5, step=1)
	knn_weights = st.sidebar.radio("KNN Weights:", ["uniform", "distance"])

	# KNN Algorithm
	st.sidebar.subheader("KNN Algorithm")
	algorithms_selected = []
	if st.sidebar.checkbox("auto", value=True):
	algorithms_selected.append("auto")
	if st.sidebar.checkbox("ball_tree"):
	algorithms_selected.append("ball_tree")
	if st.sidebar.checkbox("kd_tree"):
	algorithms_selected.append("kd_tree")
	if st.sidebar.checkbox("brute"):
	algorithms_selected.append("brute")

	# KNN Metric
	st.sidebar.subheader("KNN Metric")
	metrics_selected = []
	if st.sidebar.checkbox("euclidean", value=True):
	metrics_selected.append("euclidean")
	if st.sidebar.checkbox("manhattan"):
	metrics_selected.append("manhattan")
	if st.sidebar.checkbox("minkowski"):
	metrics_selected.append("minkowski")

	# Generate dataset
	if data_type == "Blobs":
	X, y = make_blobs(n_samples=5000, centers=2, cluster_std=noise, random_state=27)
	elif data_type == "Circles":
	X, y = make_circles(n_samples=5000, noise=noise, factor=0.5, random_state=27)
	elif data_type == "Moons":
	X, y = make_moons(n_samples=5000, noise=noise, random_state=27)
	else:
	X, y = make_classification(n_samples=5000, n_features=2, n_classes=2, n_informative=2, n_redundant=0, random_state=27)

	# Split dataset
	X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=27)

	# Model selection
	if model_name == "KNN":
	model = KNeighborsClassifier(n_neighbors=neighbors, weights=knn_weights, algorithm=algorithms_selected[0] if algorithms_selected else 'auto', metric=metrics_selected[0] if metrics_selected else 'minkowski')

	# Fit the model
	model.fit(X_train, y_train)

	# Display performance metrics only for KNN
	if model_name == "KNN":
	st.subheader("KNN Model Evaluation Metrics")
	y_pred = model.predict(X_test)

	# Performance metrics calculation and display
	accuracy = accuracy_score(y_test, y_pred)
	st.write(f"Accuracy: {accuracy:.2f}")

	precision = precision_score(y_test, y_pred)
	st.write(f"Precision: {precision:.2f}")

	recall = recall_score(y_test, y_pred)
	st.write(f"Recall: {recall:.2f}")

	f1 = f1_score(y_test, y_pred)
	st.write(f"F1 Score: {f1:.2f}")

	auc = roc_auc_score(y_test, model.predict_proba(X_test)[:, 1]) if hasattr(model, "predict_proba") else "N/A"
	st.write(f"AUC Score: {auc:.2f}")

	# Plot dataset
	st.subheader("Dataset Visualization")
	fig, ax = plt.subplots()
	sns.scatterplot(x=X[:, 0], y=X[:, 1], hue=y, palette="coolwarm", s=50, edgecolor="k")
	st.pyplot(fig)

	# Decision Boundary
	st.subheader("Decision Boundary")
	fig, ax = plt.subplots()
	plot_decision_regions(X_train, y_train, clf=model, legend=2)
	st.pyplot(fig)

	# Learning Curve
	st.subheader("Learning Curve")
	plot_learning_curves(X_train, y_train, X_test, y_test, model, scoring='accuracy')