|
|
import streamlit as st |
|
|
import pandas as pd |
|
|
import numpy as np |
|
|
from sklearn.datasets import load_breast_cancer |
|
|
from sklearn.model_selection import train_test_split |
|
|
from sklearn.preprocessing import StandardScaler |
|
|
from sklearn.svm import SVC |
|
|
from sklearn.metrics import accuracy_score, classification_report |
|
|
import matplotlib.pyplot as plt |
|
|
import seaborn as sns |
|
|
|
|
|
|
|
|
st.set_page_config(page_title="SVM Classifier", layout="wide") |
|
|
st.title("π¬ SVM Classifier") |
|
|
|
|
|
|
|
|
st.markdown(""" |
|
|
## π€ What is a Support Vector Machine (SVM)? |
|
|
Support Vector Machine is a powerful classification algorithm that works by finding the optimal decision boundary (hyperplane) that best separates different classes. |
|
|
|
|
|
### Key Features: |
|
|
- Maximizes the margin between classes |
|
|
- Uses support vectors β data points closest to the margin |
|
|
- Can handle linear and non-linear data using **kernels** |
|
|
|
|
|
--- |
|
|
|
|
|
## π Dataset: Breast Cancer Diagnosis |
|
|
Weβll classify tumors as **Malignant (1)** or **Benign (0)** based on features from cell nuclei in digitized images. |
|
|
""") |
|
|
|
|
|
|
|
|
@st.cache_data |
|
|
def load_data(): |
|
|
data = load_breast_cancer() |
|
|
df = pd.DataFrame(data.data, columns=data.feature_names) |
|
|
df["target"] = data.target |
|
|
return df, data |
|
|
|
|
|
df, data_info = load_data() |
|
|
|
|
|
|
|
|
st.subheader("π Data Preview") |
|
|
st.dataframe(df.head(), use_container_width=True) |
|
|
|
|
|
|
|
|
st.sidebar.header("βοΈ SVM Settings") |
|
|
kernel = st.sidebar.selectbox("Kernel Type", ["linear", "rbf", "poly"]) |
|
|
C = st.sidebar.slider("Regularization (C)", min_value=0.01, max_value=10.0, value=1.0) |
|
|
|
|
|
|
|
|
X = df.drop("target", axis=1) |
|
|
y = df["target"] |
|
|
|
|
|
scaler = StandardScaler() |
|
|
X_scaled = scaler.fit_transform(X) |
|
|
|
|
|
X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42) |
|
|
|
|
|
|
|
|
model = SVC(kernel=kernel, C=C, probability=True, random_state=42) |
|
|
model.fit(X_train, y_train) |
|
|
y_pred = model.predict(X_test) |
|
|
|
|
|
|
|
|
acc = accuracy_score(y_test, y_pred) |
|
|
st.success(f"β
Accuracy: {acc * 100:.2f}%") |
|
|
|
|
|
st.markdown("### π Classification Report") |
|
|
st.text(classification_report(y_test, y_pred, target_names=data_info.target_names)) |
|
|
|
|
|
|
|
|
st.subheader("π Visualizing with 2 Features") |
|
|
feature_x = st.selectbox("X-axis Feature", df.columns[:-1], index=0) |
|
|
feature_y = st.selectbox("Y-axis Feature", df.columns[:-1], index=1) |
|
|
|
|
|
X_vis = df[[feature_x, feature_y]] |
|
|
X_vis_scaled = scaler.fit_transform(X_vis) |
|
|
X_train_vis, X_test_vis, y_train_vis, y_test_vis = train_test_split(X_vis_scaled, y, test_size=0.2, random_state=42) |
|
|
|
|
|
model_vis = SVC(kernel=kernel, C=C) |
|
|
model_vis.fit(X_train_vis, y_train_vis) |
|
|
|
|
|
|
|
|
h = 0.02 |
|
|
x_min, x_max = X_vis_scaled[:, 0].min() - 1, X_vis_scaled[:, 0].max() + 1 |
|
|
y_min, y_max = X_vis_scaled[:, 1].min() - 1, X_vis_scaled[:, 1].max() + 1 |
|
|
xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h)) |
|
|
Z = model_vis.predict(np.c_[xx.ravel(), yy.ravel()]) |
|
|
Z = Z.reshape(xx.shape) |
|
|
|
|
|
fig, ax = plt.subplots(figsize=(8, 6)) |
|
|
plt.contourf(xx, yy, Z, alpha=0.3, cmap="coolwarm") |
|
|
sns.scatterplot(x=X_vis_scaled[:, 0], y=X_vis_scaled[:, 1], hue=df["target"], palette="coolwarm", ax=ax) |
|
|
plt.xlabel(feature_x) |
|
|
plt.ylabel(feature_y) |
|
|
plt.title("SVM Decision Boundary") |
|
|
st.pyplot(fig) |
|
|
|
|
|
|
|
|
st.markdown(""" |
|
|
--- |
|
|
## π‘ Summary |
|
|
- SVM creates a hyperplane that separates classes. |
|
|
- Works well for small and high-dimensional datasets. |
|
|
- The `C` parameter controls the trade-off between margin and misclassification. |
|
|
|
|
|
### Tips: |
|
|
- Use **RBF kernel** for non-linear data. |
|
|
- Try adjusting C to see how the margin changes. |
|
|
""") |
|
|
|
|
|
|
|
|
|