Spaces:
Sleeping
Sleeping
Update pages/9_KNN.py
Browse files- pages/9_KNN.py +79 -102
pages/9_KNN.py
CHANGED
|
@@ -1,132 +1,109 @@
|
|
| 1 |
import streamlit as st
|
| 2 |
|
| 3 |
-
st.set_page_config(page_title="KNN
|
| 4 |
|
| 5 |
-
st.markdown(""
|
| 6 |
-
<style>
|
| 7 |
-
.stApp {
|
| 8 |
-
background: linear-gradient(to right, #141E30, #243B55);
|
| 9 |
-
color: white;
|
| 10 |
-
font-family: 'Segoe UI', sans-serif;
|
| 11 |
-
}
|
| 12 |
-
h1, h2, h3 {
|
| 13 |
-
color: #00CED1;
|
| 14 |
-
}
|
| 15 |
-
.sidebar .sidebar-content {
|
| 16 |
-
background-color: #1e1e1e;
|
| 17 |
-
}
|
| 18 |
-
.block-container {
|
| 19 |
-
padding-top: 2rem;
|
| 20 |
-
padding-bottom: 2rem;
|
| 21 |
-
}
|
| 22 |
-
a {
|
| 23 |
-
color: #00BFFF;
|
| 24 |
-
text-decoration: none;
|
| 25 |
-
}
|
| 26 |
-
a:hover {
|
| 27 |
-
color: #1E90FF;
|
| 28 |
-
}
|
| 29 |
-
</style>
|
| 30 |
-
""", unsafe_allow_html=True)
|
| 31 |
|
| 32 |
-
st.sidebar.title("
|
| 33 |
-
st.sidebar.markdown("
|
| 34 |
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
"๐ Introduction to KNN",
|
| 41 |
-
"โ๏ธ How KNN Works",
|
| 42 |
"๐ฏ Underfitting vs Overfitting",
|
| 43 |
-
"๐ Cross-Validation",
|
| 44 |
"๐ ๏ธ Hyperparameter Tuning",
|
| 45 |
"โ๏ธ Feature Scaling",
|
| 46 |
"๐งฎ Weighted KNN",
|
| 47 |
-
"๐บ๏ธ Decision
|
| 48 |
-
|
|
|
|
| 49 |
)
|
| 50 |
|
| 51 |
-
if
|
| 52 |
-
st.
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
โ
|
| 58 |
-
|
| 59 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 60 |
""")
|
| 61 |
|
| 62 |
-
elif
|
| 63 |
-
st.
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
2. Measure distance (Euclidean, Manhattan, etc.) to all training points
|
| 68 |
-
3. Pick `K` nearest ones
|
| 69 |
-
4. ๐ Classification โ Majority vote
|
| 70 |
-
๐ Regression โ Average/weighted average
|
| 71 |
""")
|
| 72 |
|
| 73 |
-
elif
|
| 74 |
-
st.
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
โ
|
|
|
|
| 79 |
""")
|
| 80 |
|
| 81 |
-
elif
|
| 82 |
-
st.
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
|
| 87 |
""")
|
| 88 |
|
| 89 |
-
elif
|
| 90 |
-
st.
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
-
|
| 94 |
-
|
| 95 |
-
- `metric`: Distance type (e.g., Euclidean)
|
| 96 |
-
๐ง Use Grid Search, Random Search, or Optuna for optimization
|
| 97 |
""")
|
| 98 |
|
| 99 |
-
elif
|
| 100 |
-
st.
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
-
|
| 104 |
-
- ๐ป **Normalization**: Rescales between 0 and 1
|
| 105 |
-
โ Always scale after train-test split to avoid data leakage
|
| 106 |
""")
|
| 107 |
|
| 108 |
-
elif
|
| 109 |
-
st.
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
-
|
| 113 |
-
- Lower influence from distant points
|
| 114 |
-
๐ Improves performance when neighbor relevance varies
|
| 115 |
""")
|
| 116 |
|
| 117 |
-
elif
|
| 118 |
-
st.
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
-
|
| 122 |
-
- `k > 1` โ Smoother, more general
|
| 123 |
-
๐ Helps interpret model behavior in 2D/3D space
|
| 124 |
""")
|
| 125 |
|
|
|
|
| 126 |
st.markdown("""
|
| 127 |
-
<
|
| 128 |
-
|
| 129 |
-
|
| 130 |
""", unsafe_allow_html=True)
|
| 131 |
|
| 132 |
-
st.success("
|
|
|
|
| 1 |
import streamlit as st
|
| 2 |
|
| 3 |
+
st.set_page_config(page_title="KNN", page_icon="๐ค", layout="wide")
|
| 4 |
|
| 5 |
+
st.markdown("<h1 style='text-align: center; color: #FF4C60;'>๐ K-Nearest Neighbors (KNN) Algorithm</h1>", unsafe_allow_html=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
|
| 7 |
+
st.sidebar.title("๐ค KNN App")
|
| 8 |
+
st.sidebar.markdown("Explore KNN concepts step-by-step using the sections below.")
|
| 9 |
|
| 10 |
+
option = st.radio(
|
| 11 |
+
"Select a concept to learn:",
|
| 12 |
+
(
|
| 13 |
+
"๐ What is KNN?",
|
| 14 |
+
"โ๏ธ How Does KNN Work?",
|
|
|
|
|
|
|
| 15 |
"๐ฏ Underfitting vs Overfitting",
|
| 16 |
+
"๐ Training vs Cross-Validation Error",
|
| 17 |
"๐ ๏ธ Hyperparameter Tuning",
|
| 18 |
"โ๏ธ Feature Scaling",
|
| 19 |
"๐งฎ Weighted KNN",
|
| 20 |
+
"๐บ๏ธ Decision Regions",
|
| 21 |
+
"๐ Cross-Validation Explained"
|
| 22 |
+
)
|
| 23 |
)
|
| 24 |
|
| 25 |
+
if option == "๐ What is KNN?":
|
| 26 |
+
st.write("""
|
| 27 |
+
K-Nearest Neighbors (KNN) is a **non-parametric**, **lazy learning** algorithm used for both classification and regression.
|
| 28 |
+
|
| 29 |
+
โ
It stores all training data instead of learning a function.
|
| 30 |
+
โ
It uses distance metrics (e.g., Euclidean, Manhattan) to make predictions.
|
| 31 |
+
โ
Suitable for small to moderately sized datasets.
|
| 32 |
+
""")
|
| 33 |
+
|
| 34 |
+
elif option == "โ๏ธ How Does KNN Work?":
|
| 35 |
+
st.write("""
|
| 36 |
+
**Training Phase:**
|
| 37 |
+
- No actual training occurs. KNN memorizes the training dataset.
|
| 38 |
+
|
| 39 |
+
**Prediction Phase (Classification):**
|
| 40 |
+
1. Choose a value of **K**
|
| 41 |
+
2. Calculate distances from the new point to all others
|
| 42 |
+
3. Pick **K closest** points
|
| 43 |
+
4. Use majority vote to classify
|
| 44 |
+
|
| 45 |
+
**Prediction Phase (Regression):**
|
| 46 |
+
- Average the values of the K nearest neighbors.
|
| 47 |
""")
|
| 48 |
|
| 49 |
+
elif option == "๐ฏ Underfitting vs Overfitting":
|
| 50 |
+
st.write("""
|
| 51 |
+
- **Overfitting**: The model is too specific to the training data. Poor on unseen data.
|
| 52 |
+
- **Underfitting**: The model is too simple. Poor even on training data.
|
| 53 |
+
- **Ideal Model**: A balance that performs well on both seen and unseen data.
|
|
|
|
|
|
|
|
|
|
|
|
|
| 54 |
""")
|
| 55 |
|
| 56 |
+
elif option == "๐ Training vs Cross-Validation Error":
|
| 57 |
+
st.write("""
|
| 58 |
+
- **Training Error** is the error on the known training data.
|
| 59 |
+
- **Cross-Validation Error** is from unseen validation data.
|
| 60 |
+
|
| 61 |
+
โ
Use cross-validation to pick the best value of `K`.
|
| 62 |
+
๐ Big gap = Overfitting; Both high = Underfitting.
|
| 63 |
""")
|
| 64 |
|
| 65 |
+
elif option == "๐ ๏ธ Hyperparameter Tuning":
|
| 66 |
+
st.write("""
|
| 67 |
+
- **K**: Number of neighbors โ test multiple values.
|
| 68 |
+
- **Weights**: Equal (`uniform`) or based on distance (`distance`).
|
| 69 |
+
- **Metric**: How distance is measured (Euclidean, Manhattan).
|
| 70 |
+
- Use **Grid Search**, **Random Search**, or **Optuna** for best tuning.
|
| 71 |
""")
|
| 72 |
|
| 73 |
+
elif option == "โ๏ธ Feature Scaling":
|
| 74 |
+
st.write("""
|
| 75 |
+
KNN uses distances โ so features must be on the same scale.
|
| 76 |
+
- **Normalization** scales data between 0 and 1.
|
| 77 |
+
- **Standardization** centers data around mean 0.
|
| 78 |
+
โ ๏ธ Always scale data after splitting to avoid leakage.
|
|
|
|
|
|
|
| 79 |
""")
|
| 80 |
|
| 81 |
+
elif option == "๐งฎ Weighted KNN":
|
| 82 |
+
st.write("""
|
| 83 |
+
Weighted KNN assigns higher importance to closer neighbors.
|
| 84 |
+
- Use `weights='distance'` to apply this logic in libraries like scikit-learn.
|
| 85 |
+
- Helps in noisy datasets or when closer points are more meaningful.
|
|
|
|
|
|
|
| 86 |
""")
|
| 87 |
|
| 88 |
+
elif option == "๐บ๏ธ Decision Regions":
|
| 89 |
+
st.write("""
|
| 90 |
+
- Small `k` values create complex, wiggly decision boundaries (overfitting).
|
| 91 |
+
- Larger `k` smooths the boundary (better generalization).
|
| 92 |
+
- Visualizing decision regions helps understand the algorithmโs behavior.
|
|
|
|
|
|
|
| 93 |
""")
|
| 94 |
|
| 95 |
+
elif option == "๐ Cross-Validation Explained":
|
| 96 |
+
st.write("""
|
| 97 |
+
- **K-Fold Cross-Validation** splits data into `K` parts.
|
| 98 |
+
- The model trains on K-1 parts and tests on the remaining part.
|
| 99 |
+
- Helps evaluate model stability and avoid overfitting.
|
|
|
|
|
|
|
| 100 |
""")
|
| 101 |
|
| 102 |
+
st.markdown("<h2 style='color: #58a6ff;'>๐ Try KNN in Colab:</h2>", unsafe_allow_html=True)
|
| 103 |
st.markdown("""
|
| 104 |
+
<a href='https://colab.research.google.com/drive/11wk6wt7sZImXhTqzYrre3ic4oj3KFC4M?usp=sharing' target='_blank'>
|
| 105 |
+
๐ Open Jupyter Notebook on Colab
|
| 106 |
+
</a>
|
| 107 |
""", unsafe_allow_html=True)
|
| 108 |
|
| 109 |
+
st.success("KNN is easy to understand and surprisingly powerful! Tune it well, scale your data, and validate your model to get the best results.")
|