UmaKumpatla commited on
Commit
6d676dd
Β·
verified Β·
1 Parent(s): ed9e02d

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +111 -0
app.py CHANGED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import pandas as pd
3
+ import numpy as np
4
+ from sklearn.datasets import load_breast_cancer
5
+ from sklearn.model_selection import train_test_split
6
+ from sklearn.preprocessing import StandardScaler
7
+ from sklearn.svm import SVC
8
+ from sklearn.metrics import accuracy_score, classification_report
9
+ import matplotlib.pyplot as plt
10
+ import seaborn as sns
11
+
12
+ # Streamlit Page Config
13
+ st.set_page_config(page_title="SVM Classifier", layout="wide")
14
+ st.title("πŸ”¬ SVM Classifier on Breast Cancer Dataset")
15
+
16
+ # Intro Section
17
+ st.markdown("""
18
+ ## πŸ€– What is a Support Vector Machine (SVM)?
19
+ Support Vector Machine is a powerful classification algorithm that works by finding the optimal decision boundary (hyperplane) that best separates different classes.
20
+
21
+ ### Key Features:
22
+ - Maximizes the margin between classes
23
+ - Uses support vectors β€” data points closest to the margin
24
+ - Can handle linear and non-linear data using **kernels**
25
+
26
+ ---
27
+
28
+ ## πŸ“Š Dataset: Breast Cancer Diagnosis
29
+ We’ll classify tumors as **Malignant (1)** or **Benign (0)** based on features from cell nuclei in digitized images.
30
+ """)
31
+
32
+ # Load Dataset
33
+ @st.cache_data
34
+ def load_data():
35
+ data = load_breast_cancer()
36
+ df = pd.DataFrame(data.data, columns=data.feature_names)
37
+ df["target"] = data.target
38
+ return df, data
39
+
40
+ df, data_info = load_data()
41
+
42
+ # Show Data
43
+ st.subheader("πŸ” Data Preview")
44
+ st.dataframe(df.head(), use_container_width=True)
45
+
46
+ # Sidebar Settings
47
+ st.sidebar.header("βš™οΈ SVM Settings")
48
+ kernel = st.sidebar.selectbox("Kernel Type", ["linear", "rbf", "poly"])
49
+ C = st.sidebar.slider("Regularization (C)", min_value=0.01, max_value=10.0, value=1.0)
50
+
51
+ # Preprocess
52
+ X = df.drop("target", axis=1)
53
+ y = df["target"]
54
+
55
+ scaler = StandardScaler()
56
+ X_scaled = scaler.fit_transform(X)
57
+
58
+ X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=42)
59
+
60
+ # Model Training
61
+ model = SVC(kernel=kernel, C=C, probability=True, random_state=42)
62
+ model.fit(X_train, y_train)
63
+ y_pred = model.predict(X_test)
64
+
65
+ # Results
66
+ acc = accuracy_score(y_test, y_pred)
67
+ st.success(f"βœ… Accuracy: {acc * 100:.2f}%")
68
+
69
+ st.markdown("### πŸ“‹ Classification Report")
70
+ st.text(classification_report(y_test, y_pred, target_names=data_info.target_names))
71
+
72
+ # Feature Visualization
73
+ st.subheader("πŸ“ˆ Visualizing with 2 Features")
74
+ feature_x = st.selectbox("X-axis Feature", df.columns[:-1], index=0)
75
+ feature_y = st.selectbox("Y-axis Feature", df.columns[:-1], index=1)
76
+
77
+ X_vis = df[[feature_x, feature_y]]
78
+ X_vis_scaled = scaler.fit_transform(X_vis)
79
+ X_train_vis, X_test_vis, y_train_vis, y_test_vis = train_test_split(X_vis_scaled, y, test_size=0.2, random_state=42)
80
+
81
+ model_vis = SVC(kernel=kernel, C=C)
82
+ model_vis.fit(X_train_vis, y_train_vis)
83
+
84
+ # Decision Boundary
85
+ h = 0.02
86
+ x_min, x_max = X_vis_scaled[:, 0].min() - 1, X_vis_scaled[:, 0].max() + 1
87
+ y_min, y_max = X_vis_scaled[:, 1].min() - 1, X_vis_scaled[:, 1].max() + 1
88
+ xx, yy = np.meshgrid(np.arange(x_min, x_max, h), np.arange(y_min, y_max, h))
89
+ Z = model_vis.predict(np.c_[xx.ravel(), yy.ravel()])
90
+ Z = Z.reshape(xx.shape)
91
+
92
+ fig, ax = plt.subplots(figsize=(8, 6))
93
+ plt.contourf(xx, yy, Z, alpha=0.3, cmap="coolwarm")
94
+ sns.scatterplot(x=X_vis_scaled[:, 0], y=X_vis_scaled[:, 1], hue=df["target"], palette="coolwarm", ax=ax)
95
+ plt.xlabel(feature_x)
96
+ plt.ylabel(feature_y)
97
+ plt.title("SVM Decision Boundary")
98
+ st.pyplot(fig)
99
+
100
+ # Summary
101
+ st.markdown("""
102
+ ---
103
+ ## πŸ’‘ Summary
104
+ - SVM creates a hyperplane that separates classes.
105
+ - Works well for small and high-dimensional datasets.
106
+ - The `C` parameter controls the trade-off between margin and misclassification.
107
+
108
+ ### Tips:
109
+ - Use **RBF kernel** for non-linear data.
110
+ - Try adjusting C to see how the margin changes.
111
+ """)