Spaces:

kgdrathan
/

explainer-env

Running

File size: 5,769 Bytes

ac7572a
1eaaf1d
ac7572a
 
 
 
 
 
 
 
 
 
 
 
1eaaf1d
 
 
 
 
ac7572a
1eaaf1d
ac7572a
1eaaf1d
ac7572a
 
1eaaf1d
ac7572a
 
 
 
 
1eaaf1d
 
 
 
 
 
 
ac7572a
1eaaf1d
ac7572a
 
 
 
 
1eaaf1d
ac7572a
1eaaf1d
 
 
 
 
 
ac7572a
 
 
 
1eaaf1d
ac7572a
 
 
1eaaf1d
ac7572a
 
1eaaf1d
ac7572a
 
 
 
 
 
1eaaf1d
ac7572a
1eaaf1d
ac7572a
1eaaf1d
ac7572a
1eaaf1d
ac7572a
1eaaf1d
ac7572a
1eaaf1d
ac7572a
1eaaf1d
ac7572a
 
 
1eaaf1d
 
ac7572a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1eaaf1d
 
 
 
 
 
ac7572a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1eaaf1d
 
 
 
 
ac7572a
 
1eaaf1d
 
ac7572a
 
1eaaf1d

import marimo

__generated_with = "0.23.3"
app = marimo.App()


@app.cell
def _():
    import marimo as mo
    import numpy as np
    import matplotlib.pyplot as plt
    from matplotlib.patches import Rectangle

    return Rectangle, mo, plt


@app.cell
def _(mo):
    mo.md("""
    # Bayes' Theorem: Updating Beliefs with Evidence

    Bayes' Theorem is a fundamental concept in probability theory that helps us update our beliefs based on new evidence.

    The formula is:
    $$P(A|B) =     rac{P(B|A)P(A)}{P(B)}$$

    Where:
    - $P(A|B)$ is the **posterior probability**: the probability of event A given that B occurred
    - $P(B|A)$ is the **likelihood**: the probability of observing B given that A is true
    - $P(A)$ is the **prior probability**: our initial belief about A before seeing B
    - $P(B)$ is the **evidence**: the overall probability of observing B
    """)
    return


@app.cell
def _(mo):
    mo.md("""
    ## Medical Testing Example

    Let's say we're testing for a rare disease:
    - Prevalence (prior probability): 0.1% of the population has the disease
    - Test accuracy:
      - If you have the disease, the test correctly identifies it 99% of the time (true positive rate)
      - If you don't have the disease, the test incorrectly says you do 5% of the time (false positive rate)

    What's the probability that someone who tests positive actually has the disease?
    """)
    return


@app.cell
def _(mo):
    # Define the parameters
    prior_disease = 0.001  # P(Disease)
    sensitivity = 0.99  # P(Test+ | Disease)
    false_positive_rate = 0.05  # P(Test+ | No Disease)

    # Calculate the components
    # P(Test+)
    p_test_positive = sensitivity * prior_disease + false_positive_rate * (1 - prior_disease)

    # Apply Bayes' Theorem
    posterior_disease = (sensitivity * prior_disease) / p_test_positive

    # Display results
    mo.md(f"""
    Given:
    - Prior probability of disease: {prior_disease * 100:.1f}%
    - Sensitivity (true positive rate): {sensitivity * 100}%
    - False positive rate: {false_positive_rate * 100}%

    Using Bayes' theorem:

    P(Disease|Test+) = [P(Test+|Disease) × P(Disease)] / P(Test+)

    P(Test+) = P(Test+|Disease) × P(Disease) + P(Test+|No Disease) × P(No Disease)

    P(Test+) = {sensitivity:.2f} × {prior_disease:.3f} + {false_positive_rate:.2f} × {(1 - prior_disease):.3f}

    P(Test+) = {p_test_positive:.4f}

    P(Disease|Test+) = ({sensitivity:.2f} × {prior_disease:.3f}) / {p_test_positive:.4f} = {posterior_disease:.3f}

    **Only {posterior_disease * 100:.1f}% of people who test positive actually have the disease!**
    """)
    return


@app.cell
def _(Rectangle, mo, plt):
    # Create a visualization showing the four categories
    fig, ax = plt.subplots(figsize=(10, 6))

    # Set up the grid
    ax.set_xlim(0, 10)
    ax.set_ylim(0, 8)

    # Draw rectangles for different categories
    # Population (1000 people)
    ax.add_patch(Rectangle((1, 1), 8, 6, fill=False, edgecolor="black", linewidth=2))
    ax.text(5, 7.2, "Population (1000 people)", ha="center", va="bottom")

    # Disease (0.1%)
    disease_count = 1000 * 0.001
    no_disease_count = 1000 - disease_count

    # Draw disease area
    ax.add_patch(Rectangle((1, 1), 8, disease_count / 1000 * 6, facecolor="red", alpha=0.5))
    ax.text(
        5,
        1.5 + disease_count / 1000 * 3,
        f"Disease ({disease_count:.0f} people)",
        ha="center",
        va="center",
        color="white",
        weight="bold",
    )

    # Draw non-disease area
    ax.add_patch(
        Rectangle((1, 1 + disease_count / 1000 * 6), 8, no_disease_count / 1000 * 6, facecolor="blue", alpha=0.5)
    )
    ax.text(
        5,
        1.5 + disease_count / 1000 * 6 + no_disease_count / 1000 * 3,
        f"No Disease ({no_disease_count:.0f} people)",
        ha="center",
        va="center",
        color="white",
        weight="bold",
    )

    # Add labels
    ax.text(0.5, 4, "True Positive\n(99% of Diseased)", ha="right", va="center")
    ax.text(0.5, 2, "False Positive\n(5% of Non-Diseased)", ha="right", va="center")

    # Add test results
    tp = disease_count * 0.99  # True positives
    fp = no_disease_count * 0.05  # False positives

    ax.add_patch(Rectangle((2, 3.5), 2, 1, facecolor="green", alpha=0.7))
    ax.text(3, 4, f"True Positive\n({tp:.0f})", ha="center", va="center")

    ax.add_patch(Rectangle((2, 1.5), 2, 1, facecolor="orange", alpha=0.7))
    ax.text(3, 2, f"False Positive\n({fp:.0f})", ha="center", va="center")

    # Add total positive tests
    total_positives = tp + fp
    ax.add_patch(Rectangle((5, 1.5), 3, 2, facecolor="yellow", alpha=0.5))
    ax.text(6.5, 2.5, f"Total Positive\nTests ({total_positives:.0f})", ha="center", va="center")

    ax.set_xticks([])
    ax.set_yticks([])
    ax.set_title("Bayes' Theorem Visualization: Medical Testing")

    mo.ui.matplotlib(plt.gca())
    return


@app.cell
def _(mo):
    mo.md("""
    ## Why This Matters

    This example shows why Bayes' Theorem is important:

    1. **High false positive rate** combined with **low prevalence** leads to counterintuitive results
    2. **95% accurate tests** can still give misleading results when the condition is rare
    3. **Bayes' Theorem forces us to think about**:
       - Our initial beliefs (prior probability)
       - How likely we are to observe evidence given our beliefs
       - How to update our beliefs in light of new evidence

    This same logic applies to:
    - Spam detection
    - Financial risk assessment
    - Scientific hypothesis testing
    - Machine learning classification
    """)
    return


@app.cell
def _():
    return


@app.cell
def _():
    return


if __name__ == "__main__":
    app.run()