import streamlit as st
st.set_page_config(page_title="ML Performance Metrics", layout="wide")
# Title
st.title("📊 Machine Learning Performance Metrics")
st.markdown("Learn how to evaluate ML models for both **classification** and **regression** problems with detailed explanations, formulas, and tips.")
st.markdown("---")
# Tabs for Classification and Regression
tab1, tab2 = st.tabs(["🧠 Classification Metrics", "📈 Regression Metrics"])
# ======================== CLASSIFICATION ========================
with tab1:
st.header("🧠 Classification Metrics")
st.markdown("Classification metrics help evaluate how well your model predicts categories or labels.")
with st.expander("🔍 Understanding TP, TN, FP, FN"):
st.markdown("""
- **True Positive (TP)**: Model predicted Positive and it was actually Positive
- **True Negative (TN)**: Model predicted Negative and it was actually Negative
- **False Positive (FP)**: Model predicted Positive but it was actually Negative (Type I Error)
- **False Negative (FN)**: Model predicted Negative but it was actually Positive (Type II Error)
""")
st.markdown(
"""
TP, FP, TN, FN Overview
""",
unsafe_allow_html=True
)
with st.expander("🎯 Accuracy"):
st.latex(r"Accuracy = \frac{TP + TN}{TP + TN + FP + FN}")
st.markdown("**How often the model is correct overall.**")
st.info("✔️ Best when classes are balanced. Can be misleading for imbalanced datasets.")
with st.expander("🎯 Precision"):
st.latex(r"Precision = \frac{TP}{TP + FP}")
st.markdown("**Out of all predicted positives, how many were actually positive?**")
st.info("✔️ Use when False Positives are costly (e.g., spam detection).")
with st.expander("🎯 Recall (Sensitivity / True Positive Rate)"):
st.latex(r"Recall = \frac{TP}{TP + FN}")
st.markdown("**Out of all actual positives, how many were correctly predicted?**")
st.info("✔️ Use when False Negatives are costly (e.g., disease diagnosis).")
with st.expander("🎯 Specificity (True Negative Rate)"):
st.latex(r"Specificity = \frac{TN}{TN + FP}")
st.markdown("**How many actual negatives were correctly predicted as negative?**")
st.info("✔️ Important when you also care about negatives being classified correctly.")
with st.expander("🎯 F1 Score"):
st.latex(r"F1 = 2 \cdot \frac{Precision \cdot Recall}{Precision + Recall}")
st.markdown("**Balances both Precision and Recall.**")
st.info("✔️ Useful when there's an uneven class distribution.")
with st.expander("🎯 ROC Curve & AUC"):
st.markdown("""
- **ROC Curve** plots the **True Positive Rate (Recall)** vs **False Positive Rate**.
- **AUC** (Area Under the Curve) measures how well the model separates the classes.
- AUC ranges from 0 to 1. Closer to 1 = better.
""")
st.image(
"https://upload.wikimedia.org/wikipedia/commons/6/6b/Roccurves.png",
caption="ROC Curves Example",
width=400 # 👈 adjust as needed (300–600 works well)
)
st.info("✔️ Best for evaluating probabilistic classifiers.")
st.markdown("---")
st.success("📌 Tip: Use **F1 Score** or **ROC-AUC** in imbalanced classification problems!")
# ======================== REGRESSION ========================
with tab2:
st.header("📈 Regression Metrics")
st.markdown("Regression metrics help evaluate how well your model predicts **continuous numeric values**.")
with st.expander("🎯 Mean Absolute Error (MAE)"):
st.latex(r"MAE = \frac{1}{n} \sum_{i=1}^{n} |y_i - \hat{y}_i|")
st.markdown("**Average absolute difference** between predicted and actual values.")
st.info("✔️ Easy to understand. Less sensitive to outliers.")
with st.expander("🎯 Mean Squared Error (MSE)"):
st.latex(r"MSE = \frac{1}{n} \sum_{i=1}^{n} (y_i - \hat{y}_i)^2")
st.markdown("**Average of squared differences** between predicted and actual values.")
st.info("✔️ Penalizes large errors more than MAE.")
with st.expander("🎯 Root Mean Squared Error (RMSE)"):
st.latex(r"RMSE = \sqrt{MSE}")
st.markdown("**Square root of MSE**. Same unit as the target variable.")
st.info("✔️ More interpretable. Heavily penalizes large errors.")
with st.expander("🎯 R² Score (Coefficient of Determination)"):
st.latex(r"R^2 = 1 - \frac{\sum (y_i - \hat{y}_i)^2}{\sum (y_i - \bar{y})^2}")
st.markdown("**Proportion of variance in the target explained by the model.**")
st.info("✔️ Closer to 1 = better fit. Can be negative if model is worse than a mean predictor.")
st.markdown("---")
st.success("📌 Tip: Use **MAE** for average error insights, **RMSE** for large error sensitivity, and **R²** for overall model fit.")