Harika22's picture
Update pages/9_KNN.py
fcf7f83 verified
import streamlit as st
st.set_page_config(page_title="KNN", page_icon="๐Ÿค–", layout="wide")
st.markdown("<h1 style='text-align: center; color: #FF4C60;'>๐Ÿ” K-Nearest Neighbors (KNN) Algorithm</h1>", unsafe_allow_html=True)
st.sidebar.title("๐Ÿค– KNN App")
st.sidebar.markdown("Explore KNN concepts step-by-step using the sections below.")
option = st.radio(
"Select a concept to learn:",
(
"๐Ÿ“˜ What is KNN?",
"โš™๏ธ How Does KNN Work?",
"๐ŸŽฏ Underfitting vs Overfitting",
"๐Ÿ“‰ Training vs Cross-Validation Error",
"๐Ÿ› ๏ธ Hyperparameter Tuning",
"โš–๏ธ Feature Scaling",
"๐Ÿงฎ Weighted KNN",
"๐Ÿ—บ๏ธ Decision Regions",
"๐Ÿ” Cross-Validation Explained"
)
)
if option == "๐Ÿ“˜ What is KNN?":
st.write("""
K-Nearest Neighbors (KNN) is a **non-parametric**, **lazy learning** algorithm used for both classification and regression.
โœ… It stores all training data instead of learning a function.
โœ… It uses distance metrics (e.g., Euclidean, Manhattan) to make predictions.
โœ… Suitable for small to moderately sized datasets.
""")
elif option == "โš™๏ธ How Does KNN Work?":
st.write("""
**Training Phase:**
- No actual training occurs. KNN memorizes the training dataset.
**Prediction Phase (Classification):**
1. Choose a value of **K**
2. Calculate distances from the new point to all others
3. Pick **K closest** points
4. Use majority vote to classify
**Prediction Phase (Regression):**
- Average the values of the K nearest neighbors.
""")
elif option == "๐ŸŽฏ Underfitting vs Overfitting":
st.write("""
- **Overfitting**: The model is too specific to the training data. Poor on unseen data.
- **Underfitting**: The model is too simple. Poor even on training data.
- **Ideal Model**: A balance that performs well on both seen and unseen data.
""")
elif option == "๐Ÿ“‰ Training vs Cross-Validation Error":
st.write("""
- **Training Error** is the error on the known training data.
- **Cross-Validation Error** is from unseen validation data.
โœ… Use cross-validation to pick the best value of `K`.
๐Ÿ” Big gap = Overfitting; Both high = Underfitting.
""")
elif option == "๐Ÿ› ๏ธ Hyperparameter Tuning":
st.write("""
- **K**: Number of neighbors โ€” test multiple values.
- **Weights**: Equal (`uniform`) or based on distance (`distance`).
- **Metric**: How distance is measured (Euclidean, Manhattan).
- Use **Grid Search**, **Random Search**, or **Optuna** for best tuning.
""")
elif option == "โš–๏ธ Feature Scaling":
st.write("""
KNN uses distances โ€” so features must be on the same scale.
- **Normalization** scales data between 0 and 1.
- **Standardization** centers data around mean 0.
โš ๏ธ Always scale data after splitting to avoid leakage.
""")
elif option == "๐Ÿงฎ Weighted KNN":
st.write("""
Weighted KNN assigns higher importance to closer neighbors.
- Use `weights='distance'` to apply this logic in libraries like scikit-learn.
- Helps in noisy datasets or when closer points are more meaningful.
""")
elif option == "๐Ÿ—บ๏ธ Decision Regions":
st.write("""
- Small `k` values create complex, wiggly decision boundaries (overfitting).
- Larger `k` smooths the boundary (better generalization).
- Visualizing decision regions helps understand the algorithmโ€™s behavior.
""")
elif option == "๐Ÿ” Cross-Validation Explained":
st.write("""
- **K-Fold Cross-Validation** splits data into `K` parts.
- The model trains on K-1 parts and tests on the remaining part.
- Helps evaluate model stability and avoid overfitting.
""")
st.markdown("<h2 style='color: #58a6ff;'>๐Ÿ““ Try KNN in Colab:</h2>", unsafe_allow_html=True)
st.markdown("""
<a href='https://colab.research.google.com/drive/11wk6wt7sZImXhTqzYrre3ic4oj3KFC4M?usp=sharing' target='_blank'>
๐Ÿ”— Open Jupyter Notebook on Colab
</a>
""", unsafe_allow_html=True)
st.success("KNN is easy to understand and surprisingly powerful! Tune it well, scale your data, and validate your model to get the best results.")