sree4411 commited on
Commit
0a8122d
Β·
verified Β·
1 Parent(s): 56126e3

Update pages/SVM.py

Browse files
Files changed (1) hide show
  1. pages/SVM.py +101 -83
pages/SVM.py CHANGED
@@ -1,119 +1,137 @@
1
  import streamlit as st
2
 
3
- st.set_page_config(page_title="Support Vector Machine (SVM)", page_icon="🧠", layout="wide")
4
 
5
- # Header
6
- st.title("🧠 Support Vector Machine (SVM) - Classification")
7
 
 
 
8
  st.markdown("""
9
- SVM is a powerful **supervised machine learning algorithm** used for **classification** and **regression**,
10
- but it's mostly used for **classification** tasks.
 
11
  """)
12
 
13
- # Section 1 β€” Core Idea
14
- st.header("🎯 What is SVM?")
15
  st.markdown("""
16
- SVM aims to **find the best decision boundary** (called a **hyperplane**) that separates different classes.
17
- It does this by **maximizing the margin** between the closest points of each class, known as **support vectors**.
 
 
 
18
  """)
19
 
20
- st.latex(r"f(x) = w^T x + b")
21
- st.markdown("- \\( w \\): weight vector \n- \\( b \\): bias \n- If \\( f(x) > 0 \\): class +1, else class -1")
22
-
23
- st.image("https://upload.wikimedia.org/wikipedia/commons/7/72/SVM_margin.png",
24
- caption="SVM - Maximizing the Margin", use_column_width=True)
25
-
26
- # Section 2 β€” How it Works
27
- st.header("βš™οΈ How Does SVM Work?")
28
- col1, col2 = st.columns(2)
29
 
30
- with col1:
31
  st.markdown("""
32
- - Find the **hyperplane** that best separates the classes
33
- - Support vectors are the **critical points** that define the boundary
34
- - Maximize the distance (margin) from support vectors to hyperplane
35
  """)
36
 
37
- with col2:
38
- st.markdown("### ✨ Objective Function")
39
- st.latex(r"\min \frac{1}{2} ||w||^2 \quad \text{subject to: } y_i(w^T x_i + b) \geq 1")
40
- st.markdown("We minimize weight norm to maximize margin")
41
-
42
- # Section 3 β€” Kernels
43
- st.header("πŸŒ€ Kernel Trick")
44
 
45
- st.markdown("""
46
- Sometimes, data isn't linearly separable.
47
- **Kernel functions** help SVM project data into a higher-dimensional space.
 
48
 
49
- Common kernels:
50
- - **Linear Kernel**: Works when data is linearly separable
51
- - **Polynomial Kernel**: Curved boundaries
52
- - **RBF (Gaussian)**: Handles complex boundaries
53
 
54
- ### 🧠 Kernel Formula (Example: RBF)
55
- """)
56
- st.latex(r"K(x, x') = \exp\left(-\frac{||x - x'||^2}{2\sigma^2}\right)")
 
 
 
 
57
 
58
- st.markdown("This allows SVM to classify non-linear data!")
 
59
 
60
- # Section 4 β€” Hard Margin vs Soft Margin
61
- st.header("🧱 Hard Margin vs Soft Margin")
62
- col1, col2 = st.columns(2)
63
 
64
- with col1:
65
- st.markdown("### Hard Margin SVM")
66
  st.markdown("""
67
- - No misclassification allowed
68
- - Only works if data is perfectly separable
69
  """)
70
 
71
- with col2:
72
- st.markdown("### Soft Margin SVM")
 
 
 
 
73
  st.markdown("""
74
- - Allows misclassifications
75
- - Adds penalty to error via regularization term (\\( C \\))
76
- - Better generalization on noisy data
77
  """)
78
 
79
- st.latex(r"\min \frac{1}{2} ||w||^2 + C \sum \xi_i")
80
-
81
- # Section 5 β€” Evaluation Metrics
82
- st.header("πŸ“ Evaluation Metrics for Classification")
83
 
84
- col1, col2, col3 = st.columns(3)
 
 
85
 
86
- with col1:
87
- st.subheader("βœ”οΈ Accuracy")
88
- st.latex(r"Accuracy = \frac{TP + TN}{TP + TN + FP + FN}")
89
 
90
- with col2:
91
- st.subheader("🎯 Precision")
92
- st.latex(r"Precision = \frac{TP}{TP + FP}")
93
 
94
- with col3:
95
- st.subheader("πŸ” Recall")
96
- st.latex(r"Recall = \frac{TP}{TP + FN}")
97
 
98
- col4, col5 = st.columns(2)
99
-
100
- with col4:
101
- st.subheader("πŸ“Š F1-Score")
102
- st.latex(r"F1 = 2 \cdot \frac{Precision \cdot Recall}{Precision + Recall}")
103
-
104
- with col5:
105
- st.subheader("🧠 ROC-AUC")
106
- st.markdown("Area under ROC curve (TPR vs FPR)")
107
 
108
- # Section 6 β€” Summary
109
- st.header("🧾 Summary & Key Takeaways")
 
 
 
 
 
 
110
 
 
111
  st.markdown("""
112
- - SVM aims to **maximize the margin** between classes
113
- - Works with **linear and non-linear** data using **kernels**
114
- - **Support vectors** are the most critical data points
115
- - Use **soft margin + kernels** for real-world problems
116
- - Evaluate using **Accuracy, Precision, Recall, F1, ROC-AUC**
117
  """)
118
 
119
- st.success("βœ… Use SVM when you have clean, medium-sized datasets and need robust classification!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import streamlit as st
2
 
3
+ st.set_page_config(page_title="Support Vector Machine", page_icon="🧠", layout="wide")
4
 
5
+ # Title
6
+ st.markdown("<h1 style='color:#4CAF50;'>🧠 Support Vector Machine (SVM)</h1>", unsafe_allow_html=True)
7
 
8
+ # Introduction
9
+ st.markdown("### πŸ“š What is SVM?")
10
  st.markdown("""
11
+ Support Vector Machine (SVM) is a powerful **supervised learning algorithm** used for both **classification** and **regression**, though it is mostly used for classification tasks.
12
+
13
+ The core idea is to find the **optimal hyperplane** that best separates the data points of different classes by maximizing the **margin** between them.
14
  """)
15
 
16
+ # Use Cases
17
+ st.markdown("### 🎯 Where is SVM Used?")
18
  st.markdown("""
19
+ - Face Recognition
20
+ - Handwriting Recognition
21
+ - Bioinformatics (e.g., gene classification)
22
+ - Email Spam Detection
23
+ - Image Classification
24
  """)
25
 
26
+ # How It Works
27
+ st.markdown("### βš™οΈ How Does SVM Work?")
 
 
 
 
 
 
 
28
 
29
+ with st.expander("πŸ”Ή Step 1: Find a Hyperplane"):
30
  st.markdown("""
31
+ A **hyperplane** is a decision boundary that separates the data points of different classes.
32
+ SVM tries to find the hyperplane that **maximizes the margin** between classes.
 
33
  """)
34
 
35
+ with st.expander("πŸ”Ή Step 2: Identify Support Vectors"):
36
+ st.markdown("""
37
+ **Support vectors** are the data points that lie closest to the hyperplane.
38
+ These points are critical in defining the position and orientation of the hyperplane.
39
+ """)
 
 
40
 
41
+ with st.expander("πŸ”Ή Step 3: Handle Non-Linearly Separable Data"):
42
+ st.markdown("""
43
+ When the data is not linearly separable, SVM uses the **kernel trick** to project it into a higher-dimensional space where it becomes separable.
44
+ """)
45
 
46
+ # Kernel Functions
47
+ st.markdown("### πŸ§ͺ Kernels in SVM")
 
 
48
 
49
+ with st.expander("πŸ“Œ Common Kernel Functions"):
50
+ st.markdown("""
51
+ - **Linear Kernel**: For linearly separable data
52
+ - **Polynomial Kernel**: For curved decision boundaries
53
+ - **RBF (Radial Basis Function)**: Most popular, handles complex data
54
+ - **Sigmoid Kernel**: Similar to neural networks
55
+ """)
56
 
57
+ # Mathematical Intuition
58
+ st.markdown("### 🧠 Mathematical Formulation")
59
 
60
+ with st.expander("πŸ“Œ Decision Function"):
61
+ st.latex(r"f(x) = w \cdot x + b")
 
62
 
63
+ with st.expander("πŸ“Œ Classification Rule"):
 
64
  st.markdown("""
65
+ - If \\( f(x) > 0 \\): Predict **Class 1**
66
+ - If \\( f(x) < 0 \\): Predict **Class 0**
67
  """)
68
 
69
+ with st.expander("πŸ“Œ Optimization Objective"):
70
+ st.latex(r"\text{Maximize Margin} = \frac{2}{\|w\|}")
71
+ st.markdown("We want to maximize the margin between support vectors and the hyperplane.")
72
+
73
+ with st.expander("πŸ“Œ Soft Margin & C Parameter"):
74
+ st.latex(r" \min \frac{1}{2} \|w\|^2 + C \sum \xi_i ")
75
  st.markdown("""
76
+ - The **C parameter** balances margin maximization vs classification error.
77
+ - A **small C** allows for a wider margin but more errors.
78
+ - A **large C** aims for perfect classification but might overfit.
79
  """)
80
 
81
+ # Evaluation Metrics
82
+ st.markdown("### πŸ“ Evaluation Metrics")
 
 
83
 
84
+ st.markdown("#### βœ… Accuracy")
85
+ st.latex(r"Accuracy = \frac{TP + TN}{TP + TN + FP + FN}")
86
+ st.markdown("The percentage of correct predictions.")
87
 
88
+ st.markdown("#### 🎯 Precision")
89
+ st.latex(r"Precision = \frac{TP}{TP + FP}")
90
+ st.markdown("Out of all predicted positives, how many are actually positive?")
91
 
92
+ st.markdown("#### πŸ“£ Recall (Sensitivity)")
93
+ st.latex(r"Recall = \frac{TP}{TP + FN}")
94
+ st.markdown("Out of all actual positives, how many did we correctly predict?")
95
 
96
+ st.markdown("#### βš–οΈ F1 Score")
97
+ st.latex(r"F1 = 2 \cdot \frac{Precision \cdot Recall}{Precision + Recall}")
98
+ st.markdown("Balances precision and recall β€” especially useful in imbalanced datasets.")
99
 
100
+ st.markdown("#### πŸ“ˆ ROC-AUC")
101
+ st.markdown("""
102
+ - Plots True Positive Rate (TPR) vs False Positive Rate (FPR).
103
+ - **AUC (Area Under Curve)** closer to 1 indicates a better model.
104
+ """)
 
 
 
 
105
 
106
+ # Pros and Cons
107
+ st.markdown("### βœ… Advantages of SVM")
108
+ st.markdown("""
109
+ - Effective in high-dimensional spaces
110
+ - Works well even when features > samples
111
+ - Memory efficient (uses support vectors)
112
+ - Handles non-linearity with kernels
113
+ """)
114
 
115
+ st.markdown("### ❌ Limitations of SVM")
116
  st.markdown("""
117
+ - Not ideal for large datasets (computationally expensive)
118
+ - Requires careful parameter tuning (C, kernel)
119
+ - Hard to interpret compared to decision trees
 
 
120
  """)
121
 
122
+ # Summary
123
+ st.markdown("### πŸ”š Summary")
124
+ st.markdown("""
125
+ Support Vector Machine is a **robust**, **flexible**, and **accurate** classification algorithm.
126
+ Great for:
127
+ - Text data
128
+ - Image recognition
129
+ - Biomedical data
130
+
131
+ Make sure to:
132
+ - Scale your features
133
+ - Use kernel wisely
134
+ - Tune the **C** and **gamma** parameters
135
+
136
+ βœ… Powerful for **both linear and non-linear** decision boundaries!
137
+ """)