File size: 5,219 Bytes
1ca4934
 
 
 
 
 
9dd58c8
1ca4934
 
 
 
 
 
 
 
 
9dd58c8
 
 
 
 
 
 
 
 
afe8b39
2665afe
afe8b39
aa9f86b
2665afe
 
 
 
 
1ca4934
afe8b39
1ca4934
 
9dd58c8
 
1ca4934
 
 
9dd58c8
 
1ca4934
9dd58c8
1ca4934
9dd58c8
 
 
 
 
 
 
1ca4934
 
 
9dd58c8
 
 
 
 
 
 
 
 
4d17abd
 
 
 
 
aa9f86b
4d17abd
1ca4934
 
9dd58c8
1ca4934
 
 
 
9dd58c8
1ca4934
 
 
9dd58c8
 
1ca4934
 
 
9dd58c8
 
1ca4934
 
 
9dd58c8
 
1ca4934
 
 
9dd58c8
 
1ca4934
 
9dd58c8
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import streamlit as st

st.set_page_config(page_title="ML Performance Metrics", layout="wide")

# Title
st.title("πŸ“Š Machine Learning Performance Metrics")
st.markdown("Learn how to evaluate ML models for both **classification** and **regression** problems with detailed explanations, formulas, and tips.")

st.markdown("---")

# Tabs for Classification and Regression
tab1, tab2 = st.tabs(["🧠 Classification Metrics", "πŸ“ˆ Regression Metrics"])

# ======================== CLASSIFICATION ========================
with tab1:
    st.header("🧠 Classification Metrics")
    st.markdown("Classification metrics help evaluate how well your model predicts categories or labels.")

    with st.expander("πŸ” Understanding TP, TN, FP, FN"):
        st.markdown("""
        - **True Positive (TP)**: Model predicted Positive and it was actually Positive  
        - **True Negative (TN)**: Model predicted Negative and it was actually Negative  
        - **False Positive (FP)**: Model predicted Positive but it was actually Negative (Type I Error)  
        - **False Negative (FN)**: Model predicted Negative but it was actually Positive (Type II Error)
        """)
    st.markdown(
    """
    <div style='text-align: center;'>
        <img src='https://upload.wikimedia.org/wikipedia/commons/thumb/2/26/Precisionrecall.svg/1200px-Precisionrecall.svg.png' width='400' height='400'>
        <p><i>TP, FP, TN, FN Overview</i></p>
    </div>
    """,
    unsafe_allow_html=True
)


    with st.expander("🎯 Accuracy"):
        st.latex(r"Accuracy = \frac{TP + TN}{TP + TN + FP + FN}")
        st.markdown("**How often the model is correct overall.**")
        st.info("βœ”οΈ Best when classes are balanced. Can be misleading for imbalanced datasets.")

    with st.expander("🎯 Precision"):
        st.latex(r"Precision = \frac{TP}{TP + FP}")
        st.markdown("**Out of all predicted positives, how many were actually positive?**")
        st.info("βœ”οΈ Use when False Positives are costly (e.g., spam detection).")

    with st.expander("🎯 Recall (Sensitivity / True Positive Rate)"):
        st.latex(r"Recall = \frac{TP}{TP + FN}")
        st.markdown("**Out of all actual positives, how many were correctly predicted?**")
        st.info("βœ”οΈ Use when False Negatives are costly (e.g., disease diagnosis).")

    with st.expander("🎯 Specificity (True Negative Rate)"):
        st.latex(r"Specificity = \frac{TN}{TN + FP}")
        st.markdown("**How many actual negatives were correctly predicted as negative?**")
        st.info("βœ”οΈ Important when you also care about negatives being classified correctly.")

    with st.expander("🎯 F1 Score"):
        st.latex(r"F1 = 2 \cdot \frac{Precision \cdot Recall}{Precision + Recall}")
        st.markdown("**Balances both Precision and Recall.**")
        st.info("βœ”οΈ Useful when there's an uneven class distribution.")

    with st.expander("🎯 ROC Curve & AUC"):
        st.markdown("""
        - **ROC Curve** plots the **True Positive Rate (Recall)** vs **False Positive Rate**.  
        - **AUC** (Area Under the Curve) measures how well the model separates the classes.  
        - AUC ranges from 0 to 1. Closer to 1 = better.
        """)
    st.image(
        "https://upload.wikimedia.org/wikipedia/commons/6/6b/Roccurves.png",
        caption="ROC Curves Example",
        width=400  # πŸ‘ˆ adjust as needed (300–600 works well)
    )

    st.info("βœ”οΈ Best for evaluating probabilistic classifiers.")

    st.markdown("---")
    st.success("πŸ“Œ Tip: Use **F1 Score** or **ROC-AUC** in imbalanced classification problems!")

# ======================== REGRESSION ========================
with tab2:
    st.header("πŸ“ˆ Regression Metrics")
    st.markdown("Regression metrics help evaluate how well your model predicts **continuous numeric values**.")

    with st.expander("🎯 Mean Absolute Error (MAE)"):
        st.latex(r"MAE = \frac{1}{n} \sum_{i=1}^{n} |y_i - \hat{y}_i|")
        st.markdown("**Average absolute difference** between predicted and actual values.")
        st.info("βœ”οΈ Easy to understand. Less sensitive to outliers.")

    with st.expander("🎯 Mean Squared Error (MSE)"):
        st.latex(r"MSE = \frac{1}{n} \sum_{i=1}^{n} (y_i - \hat{y}_i)^2")
        st.markdown("**Average of squared differences** between predicted and actual values.")
        st.info("βœ”οΈ Penalizes large errors more than MAE.")

    with st.expander("🎯 Root Mean Squared Error (RMSE)"):
        st.latex(r"RMSE = \sqrt{MSE}")
        st.markdown("**Square root of MSE**. Same unit as the target variable.")
        st.info("βœ”οΈ More interpretable. Heavily penalizes large errors.")

    with st.expander("🎯 R² Score (Coefficient of Determination)"):
        st.latex(r"R^2 = 1 - \frac{\sum (y_i - \hat{y}_i)^2}{\sum (y_i - \bar{y})^2}")
        st.markdown("**Proportion of variance in the target explained by the model.**")
        st.info("βœ”οΈ Closer to 1 = better fit. Can be negative if model is worse than a mean predictor.")

    st.markdown("---")
    st.success("πŸ“Œ Tip: Use **MAE** for average error insights, **RMSE** for large error sensitivity, and **RΒ²** for overall model fit.")