|
|
import streamlit as st |
|
|
import numpy as np |
|
|
import matplotlib.pyplot as plt |
|
|
from sklearn.datasets import make_classification, make_circles, make_blobs, make_moons |
|
|
from sklearn.model_selection import train_test_split, learning_curve |
|
|
from sklearn.neighbors import KNeighborsClassifier |
|
|
from sklearn.naive_bayes import GaussianNB |
|
|
from sklearn.linear_model import LogisticRegression |
|
|
from sklearn.tree import DecisionTreeClassifier |
|
|
from sklearn.ensemble import RandomForestClassifier |
|
|
from sklearn.svm import SVC |
|
|
from sklearn.metrics import accuracy_score, f1_score |
|
|
from mlxtend.plotting import plot_decision_regions |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
from PIL import Image |
|
|
|
|
|
|
|
|
|
|
|
st.image("innomatics-footer-logo.webp", use_container_width=True, width=300) |
|
|
|
|
|
|
|
|
st.markdown("<h1 style='text-align: center; color: #FF5733;'>Boundary Surfaces Visualization</h1>", unsafe_allow_html=True) |
|
|
|
|
|
|
|
|
data = st.sidebar.selectbox('Select Dataset', ('Classification', 'Circles', 'Blobs', 'Moons')) |
|
|
|
|
|
if data == 'Classification': |
|
|
X, y = make_classification(n_samples=100, n_features=2, n_redundant=0, random_state=27) |
|
|
elif data == 'Circles': |
|
|
X, y = make_circles(n_samples=100, factor=0.5, noise=0.05) |
|
|
elif data == 'Blobs': |
|
|
X, y = make_blobs(n_samples=250, centers=2, n_features=2, cluster_std=1.0, random_state=27) |
|
|
elif data == 'Moons': |
|
|
X, y = make_moons(n_samples=250, noise=0.1, random_state=27) |
|
|
|
|
|
|
|
|
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=27) |
|
|
|
|
|
def plot_decision_surface(X, y, model, title): |
|
|
plt.figure(figsize=(6,4)) |
|
|
plot_decision_regions(X, y, clf=model, colors="#7f7f7f,#bcbd22,#17becf") |
|
|
plt.title(title) |
|
|
st.pyplot(plt.gcf(), clear_figure=True) |
|
|
|
|
|
|
|
|
classifier_name = st.sidebar.selectbox('Select Classifier', |
|
|
('KNN', 'Naive Bayes', 'Logistic Regression', 'Decision Tree', 'Random Forest', 'SVM')) |
|
|
|
|
|
|
|
|
if classifier_name == 'KNN': |
|
|
n_neighbors = st.sidebar.slider('Number of Neighbors (k)', 1, 15, 3) |
|
|
weights = st.sidebar.radio('Weight Function', ('uniform', 'distance')) |
|
|
algorithm = st.sidebar.selectbox('Algorithm', ('auto', 'ball_tree', 'kd_tree', 'brute')) |
|
|
p = st.sidebar.slider("Distance Parameter (p)", 1, 5, 2) |
|
|
model = KNeighborsClassifier(n_neighbors=n_neighbors, weights=weights, algorithm=algorithm,p=p) |
|
|
|
|
|
elif classifier_name == 'Naive Bayes': |
|
|
model = GaussianNB() |
|
|
|
|
|
elif classifier_name == 'Logistic Regression': |
|
|
model = LogisticRegression() |
|
|
|
|
|
elif classifier_name == 'Decision Tree': |
|
|
model = DecisionTreeClassifier() |
|
|
|
|
|
elif classifier_name == 'Random Forest': |
|
|
n_estimators = st.sidebar.slider('Number of Trees', 10, 200, 100) |
|
|
model = RandomForestClassifier(n_estimators=n_estimators) |
|
|
|
|
|
elif classifier_name == 'SVM': |
|
|
kernel = st.sidebar.selectbox('Kernel Type', ('linear', 'poly', 'rbf', 'sigmoid')) |
|
|
C = st.sidebar.slider('Regularization (C)', 0.01, 10.0, 1.0) |
|
|
model = SVC(kernel=kernel, C=C) |
|
|
|
|
|
|
|
|
model.fit(X_train, y_train) |
|
|
|
|
|
|
|
|
y_pred = model.predict(X_test) |
|
|
|
|
|
|
|
|
accuracy = accuracy_score(y_test, y_pred) |
|
|
f1 = f1_score(y_test, y_pred) |
|
|
|
|
|
|
|
|
st.markdown("<h3 style='color: #4CAF50;'>π Model Performance</h3>", unsafe_allow_html=True) |
|
|
st.write(f"β
**Accuracy:** {accuracy:.2f}") |
|
|
st.write(f"π **F1-score:** {f1:.2f}") |
|
|
|
|
|
|
|
|
st.subheader("π Decision Boundary") |
|
|
plot_decision_surface(X, y, model, f'{classifier_name} Decision Surface') |
|
|
|
|
|
|
|
|
def plot_learning_curve(model, X, y): |
|
|
train_sizes, train_scores, test_scores = learning_curve(model, X, y, cv=5, scoring='accuracy', train_sizes=np.linspace(0.1, 1.0, 10)) |
|
|
|
|
|
train_mean = np.mean(train_scores, axis=1) |
|
|
test_mean = np.mean(test_scores, axis=1) |
|
|
|
|
|
plt.figure(figsize=(6,4)) |
|
|
plt.plot(train_sizes, train_mean, 'o-', label="Training Accuracy", color="blue") |
|
|
plt.plot(train_sizes, test_mean, 'o-', label="Validation Accuracy", color="red") |
|
|
|
|
|
plt.xlabel("Training Samples") |
|
|
plt.ylabel("Accuracy") |
|
|
plt.title(f"Learning Curve: {classifier_name}") |
|
|
plt.legend() |
|
|
st.pyplot(plt.gcf(), clear_figure=True) |
|
|
|
|
|
|
|
|
st.subheader("π Learning Curve") |
|
|
plot_learning_curve(model, X, y) |
|
|
|