import streamlit as st st.set_page_config(page_title="KNN", page_icon="๐Ÿค–", layout="wide") st.markdown("

๐Ÿ” K-Nearest Neighbors (KNN) Algorithm

", unsafe_allow_html=True) st.sidebar.title("๐Ÿค– KNN App") st.sidebar.markdown("Explore KNN concepts step-by-step using the sections below.") option = st.radio( "Select a concept to learn:", ( "๐Ÿ“˜ What is KNN?", "โš™๏ธ How Does KNN Work?", "๐ŸŽฏ Underfitting vs Overfitting", "๐Ÿ“‰ Training vs Cross-Validation Error", "๐Ÿ› ๏ธ Hyperparameter Tuning", "โš–๏ธ Feature Scaling", "๐Ÿงฎ Weighted KNN", "๐Ÿ—บ๏ธ Decision Regions", "๐Ÿ” Cross-Validation Explained" ) ) if option == "๐Ÿ“˜ What is KNN?": st.write(""" K-Nearest Neighbors (KNN) is a **non-parametric**, **lazy learning** algorithm used for both classification and regression. โœ… It stores all training data instead of learning a function. โœ… It uses distance metrics (e.g., Euclidean, Manhattan) to make predictions. โœ… Suitable for small to moderately sized datasets. """) elif option == "โš™๏ธ How Does KNN Work?": st.write(""" **Training Phase:** - No actual training occurs. KNN memorizes the training dataset. **Prediction Phase (Classification):** 1. Choose a value of **K** 2. Calculate distances from the new point to all others 3. Pick **K closest** points 4. Use majority vote to classify **Prediction Phase (Regression):** - Average the values of the K nearest neighbors. """) elif option == "๐ŸŽฏ Underfitting vs Overfitting": st.write(""" - **Overfitting**: The model is too specific to the training data. Poor on unseen data. - **Underfitting**: The model is too simple. Poor even on training data. - **Ideal Model**: A balance that performs well on both seen and unseen data. """) elif option == "๐Ÿ“‰ Training vs Cross-Validation Error": st.write(""" - **Training Error** is the error on the known training data. - **Cross-Validation Error** is from unseen validation data. โœ… Use cross-validation to pick the best value of `K`. ๐Ÿ” Big gap = Overfitting; Both high = Underfitting. """) elif option == "๐Ÿ› ๏ธ Hyperparameter Tuning": st.write(""" - **K**: Number of neighbors โ€” test multiple values. - **Weights**: Equal (`uniform`) or based on distance (`distance`). - **Metric**: How distance is measured (Euclidean, Manhattan). - Use **Grid Search**, **Random Search**, or **Optuna** for best tuning. """) elif option == "โš–๏ธ Feature Scaling": st.write(""" KNN uses distances โ€” so features must be on the same scale. - **Normalization** scales data between 0 and 1. - **Standardization** centers data around mean 0. โš ๏ธ Always scale data after splitting to avoid leakage. """) elif option == "๐Ÿงฎ Weighted KNN": st.write(""" Weighted KNN assigns higher importance to closer neighbors. - Use `weights='distance'` to apply this logic in libraries like scikit-learn. - Helps in noisy datasets or when closer points are more meaningful. """) elif option == "๐Ÿ—บ๏ธ Decision Regions": st.write(""" - Small `k` values create complex, wiggly decision boundaries (overfitting). - Larger `k` smooths the boundary (better generalization). - Visualizing decision regions helps understand the algorithmโ€™s behavior. """) elif option == "๐Ÿ” Cross-Validation Explained": st.write(""" - **K-Fold Cross-Validation** splits data into `K` parts. - The model trains on K-1 parts and tests on the remaining part. - Helps evaluate model stability and avoid overfitting. """) st.markdown("

๐Ÿ““ Try KNN in Colab:

", unsafe_allow_html=True) st.markdown(""" ๐Ÿ”— Open Jupyter Notebook on Colab """, unsafe_allow_html=True) st.success("KNN is easy to understand and surprisingly powerful! Tune it well, scale your data, and validate your model to get the best results.")