import streamlit as st

st.set_page_config(page_title="ML Performance Metrics", layout="wide")

# Title
st.title("📊 Machine Learning Performance Metrics")
st.markdown("Learn how to evaluate ML models for both **classification** and **regression** problems with detailed explanations, formulas, and tips.")

st.markdown("---")

# Tabs for Classification and Regression
tab1, tab2 = st.tabs(["🧠 Classification Metrics", "📈 Regression Metrics"])

# ======================== CLASSIFICATION ========================
with tab1:
    st.header("🧠 Classification Metrics")
    st.markdown("Classification metrics help evaluate how well your model predicts categories or labels.")

    with st.expander("🔍 Understanding TP, TN, FP, FN"):
        st.markdown("""
        - **True Positive (TP)**: Model predicted Positive and it was actually Positive  
        - **True Negative (TN)**: Model predicted Negative and it was actually Negative  
        - **False Positive (FP)**: Model predicted Positive but it was actually Negative (Type I Error)  
        - **False Negative (FN)**: Model predicted Negative but it was actually Positive (Type II Error)
        """)
    st.markdown(
    """
    <div style='text-align: center;'>
        <img src='https://upload.wikimedia.org/wikipedia/commons/thumb/2/26/Precisionrecall.svg/1200px-Precisionrecall.svg.png' width='400' height='400'>
        <p><i>TP, FP, TN, FN Overview</i></p>
    </div>
    """,
    unsafe_allow_html=True
)


    with st.expander("🎯 Accuracy"):
        st.latex(r"Accuracy = \frac{TP + TN}{TP + TN + FP + FN}")
        st.markdown("**How often the model is correct overall.**")
        st.info("✔️ Best when classes are balanced. Can be misleading for imbalanced datasets.")

    with st.expander("🎯 Precision"):
        st.latex(r"Precision = \frac{TP}{TP + FP}")
        st.markdown("**Out of all predicted positives, how many were actually positive?**")
        st.info("✔️ Use when False Positives are costly (e.g., spam detection).")

    with st.expander("🎯 Recall (Sensitivity / True Positive Rate)"):
        st.latex(r"Recall = \frac{TP}{TP + FN}")
        st.markdown("**Out of all actual positives, how many were correctly predicted?**")
        st.info("✔️ Use when False Negatives are costly (e.g., disease diagnosis).")

    with st.expander("🎯 Specificity (True Negative Rate)"):
        st.latex(r"Specificity = \frac{TN}{TN + FP}")
        st.markdown("**How many actual negatives were correctly predicted as negative?**")
        st.info("✔️ Important when you also care about negatives being classified correctly.")

    with st.expander("🎯 F1 Score"):
        st.latex(r"F1 = 2 \cdot \frac{Precision \cdot Recall}{Precision + Recall}")
        st.markdown("**Balances both Precision and Recall.**")
        st.info("✔️ Useful when there's an uneven class distribution.")

    with st.expander("🎯 ROC Curve & AUC"):
        st.markdown("""
        - **ROC Curve** plots the **True Positive Rate (Recall)** vs **False Positive Rate**.  
        - **AUC** (Area Under the Curve) measures how well the model separates the classes.  
        - AUC ranges from 0 to 1. Closer to 1 = better.
        """)
    st.image(
        "https://upload.wikimedia.org/wikipedia/commons/6/6b/Roccurves.png",
        caption="ROC Curves Example",
        width=400  # 👈 adjust as needed (300–600 works well)
    )

    st.info("✔️ Best for evaluating probabilistic classifiers.")

    st.markdown("---")
    st.success("📌 Tip: Use **F1 Score** or **ROC-AUC** in imbalanced classification problems!")

# ======================== REGRESSION ========================
with tab2:
    st.header("📈 Regression Metrics")
    st.markdown("Regression metrics help evaluate how well your model predicts **continuous numeric values**.")

    with st.expander("🎯 Mean Absolute Error (MAE)"):
        st.latex(r"MAE = \frac{1}{n} \sum_{i=1}^{n} |y_i - \hat{y}_i|")
        st.markdown("**Average absolute difference** between predicted and actual values.")
        st.info("✔️ Easy to understand. Less sensitive to outliers.")

    with st.expander("🎯 Mean Squared Error (MSE)"):
        st.latex(r"MSE = \frac{1}{n} \sum_{i=1}^{n} (y_i - \hat{y}_i)^2")
        st.markdown("**Average of squared differences** between predicted and actual values.")
        st.info("✔️ Penalizes large errors more than MAE.")

    with st.expander("🎯 Root Mean Squared Error (RMSE)"):
        st.latex(r"RMSE = \sqrt{MSE}")
        st.markdown("**Square root of MSE**. Same unit as the target variable.")
        st.info("✔️ More interpretable. Heavily penalizes large errors.")

    with st.expander("🎯 R² Score (Coefficient of Determination)"):
        st.latex(r"R^2 = 1 - \frac{\sum (y_i - \hat{y}_i)^2}{\sum (y_i - \bar{y})^2}")
        st.markdown("**Proportion of variance in the target explained by the model.**")
        st.info("✔️ Closer to 1 = better fit. Can be negative if model is worse than a mean predictor.")

    st.markdown("---")
    st.success("📌 Tip: Use **MAE** for average error insights, **RMSE** for large error sensitivity, and **R²** for overall model fit.")