import streamlit as st st.set_page_config(page_title="ML Performance Metrics", layout="wide") # Title st.title("📊 Machine Learning Performance Metrics") st.markdown("Learn how to evaluate ML models for both **classification** and **regression** problems with detailed explanations, formulas, and tips.") st.markdown("---") # Tabs for Classification and Regression tab1, tab2 = st.tabs(["🧠 Classification Metrics", "📈 Regression Metrics"]) # ======================== CLASSIFICATION ======================== with tab1: st.header("🧠 Classification Metrics") st.markdown("Classification metrics help evaluate how well your model predicts categories or labels.") with st.expander("🔍 Understanding TP, TN, FP, FN"): st.markdown(""" - **True Positive (TP)**: Model predicted Positive and it was actually Positive - **True Negative (TN)**: Model predicted Negative and it was actually Negative - **False Positive (FP)**: Model predicted Positive but it was actually Negative (Type I Error) - **False Negative (FN)**: Model predicted Negative but it was actually Positive (Type II Error) """) st.markdown( """

TP, FP, TN, FN Overview

""", unsafe_allow_html=True ) with st.expander("🎯 Accuracy"): st.latex(r"Accuracy = \frac{TP + TN}{TP + TN + FP + FN}") st.markdown("**How often the model is correct overall.**") st.info("✔️ Best when classes are balanced. Can be misleading for imbalanced datasets.") with st.expander("🎯 Precision"): st.latex(r"Precision = \frac{TP}{TP + FP}") st.markdown("**Out of all predicted positives, how many were actually positive?**") st.info("✔️ Use when False Positives are costly (e.g., spam detection).") with st.expander("🎯 Recall (Sensitivity / True Positive Rate)"): st.latex(r"Recall = \frac{TP}{TP + FN}") st.markdown("**Out of all actual positives, how many were correctly predicted?**") st.info("✔️ Use when False Negatives are costly (e.g., disease diagnosis).") with st.expander("🎯 Specificity (True Negative Rate)"): st.latex(r"Specificity = \frac{TN}{TN + FP}") st.markdown("**How many actual negatives were correctly predicted as negative?**") st.info("✔️ Important when you also care about negatives being classified correctly.") with st.expander("🎯 F1 Score"): st.latex(r"F1 = 2 \cdot \frac{Precision \cdot Recall}{Precision + Recall}") st.markdown("**Balances both Precision and Recall.**") st.info("✔️ Useful when there's an uneven class distribution.") with st.expander("🎯 ROC Curve & AUC"): st.markdown(""" - **ROC Curve** plots the **True Positive Rate (Recall)** vs **False Positive Rate**. - **AUC** (Area Under the Curve) measures how well the model separates the classes. - AUC ranges from 0 to 1. Closer to 1 = better. """) st.image( "https://upload.wikimedia.org/wikipedia/commons/6/6b/Roccurves.png", caption="ROC Curves Example", width=400 # 👈 adjust as needed (300–600 works well) ) st.info("✔️ Best for evaluating probabilistic classifiers.") st.markdown("---") st.success("📌 Tip: Use **F1 Score** or **ROC-AUC** in imbalanced classification problems!") # ======================== REGRESSION ======================== with tab2: st.header("📈 Regression Metrics") st.markdown("Regression metrics help evaluate how well your model predicts **continuous numeric values**.") with st.expander("🎯 Mean Absolute Error (MAE)"): st.latex(r"MAE = \frac{1}{n} \sum_{i=1}^{n} |y_i - \hat{y}_i|") st.markdown("**Average absolute difference** between predicted and actual values.") st.info("✔️ Easy to understand. Less sensitive to outliers.") with st.expander("🎯 Mean Squared Error (MSE)"): st.latex(r"MSE = \frac{1}{n} \sum_{i=1}^{n} (y_i - \hat{y}_i)^2") st.markdown("**Average of squared differences** between predicted and actual values.") st.info("✔️ Penalizes large errors more than MAE.") with st.expander("🎯 Root Mean Squared Error (RMSE)"): st.latex(r"RMSE = \sqrt{MSE}") st.markdown("**Square root of MSE**. Same unit as the target variable.") st.info("✔️ More interpretable. Heavily penalizes large errors.") with st.expander("🎯 R² Score (Coefficient of Determination)"): st.latex(r"R^2 = 1 - \frac{\sum (y_i - \hat{y}_i)^2}{\sum (y_i - \bar{y})^2}") st.markdown("**Proportion of variance in the target explained by the model.**") st.info("✔️ Closer to 1 = better fit. Can be negative if model is worse than a mean predictor.") st.markdown("---") st.success("📌 Tip: Use **MAE** for average error insights, **RMSE** for large error sensitivity, and **R²** for overall model fit.")