File size: 5,769 Bytes
ac7572a
1eaaf1d
ac7572a
 
 
 
 
 
 
 
 
 
 
 
1eaaf1d
 
 
 
 
ac7572a
1eaaf1d
ac7572a
1eaaf1d
ac7572a
 
1eaaf1d
ac7572a
 
 
 
 
1eaaf1d
 
 
 
 
 
 
ac7572a
1eaaf1d
ac7572a
 
 
 
 
1eaaf1d
ac7572a
1eaaf1d
 
 
 
 
 
ac7572a
 
 
 
1eaaf1d
ac7572a
 
 
1eaaf1d
ac7572a
 
1eaaf1d
ac7572a
 
 
 
 
 
1eaaf1d
ac7572a
1eaaf1d
ac7572a
1eaaf1d
ac7572a
1eaaf1d
ac7572a
1eaaf1d
ac7572a
1eaaf1d
ac7572a
1eaaf1d
ac7572a
 
 
1eaaf1d
 
ac7572a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1eaaf1d
 
 
 
 
 
ac7572a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1eaaf1d
 
 
 
 
ac7572a
 
1eaaf1d
 
ac7572a
 
1eaaf1d
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
import marimo

__generated_with = "0.23.3"
app = marimo.App()


@app.cell
def _():
    import marimo as mo
    import numpy as np
    import matplotlib.pyplot as plt
    from matplotlib.patches import Rectangle

    return Rectangle, mo, plt


@app.cell
def _(mo):
    mo.md("""
    # Bayes' Theorem: Updating Beliefs with Evidence

    Bayes' Theorem is a fundamental concept in probability theory that helps us update our beliefs based on new evidence.

    The formula is:
    $$P(A|B) =     rac{P(B|A)P(A)}{P(B)}$$

    Where:
    - $P(A|B)$ is the **posterior probability**: the probability of event A given that B occurred
    - $P(B|A)$ is the **likelihood**: the probability of observing B given that A is true
    - $P(A)$ is the **prior probability**: our initial belief about A before seeing B
    - $P(B)$ is the **evidence**: the overall probability of observing B
    """)
    return


@app.cell
def _(mo):
    mo.md("""
    ## Medical Testing Example

    Let's say we're testing for a rare disease:
    - Prevalence (prior probability): 0.1% of the population has the disease
    - Test accuracy:
      - If you have the disease, the test correctly identifies it 99% of the time (true positive rate)
      - If you don't have the disease, the test incorrectly says you do 5% of the time (false positive rate)

    What's the probability that someone who tests positive actually has the disease?
    """)
    return


@app.cell
def _(mo):
    # Define the parameters
    prior_disease = 0.001  # P(Disease)
    sensitivity = 0.99  # P(Test+ | Disease)
    false_positive_rate = 0.05  # P(Test+ | No Disease)

    # Calculate the components
    # P(Test+)
    p_test_positive = sensitivity * prior_disease + false_positive_rate * (1 - prior_disease)

    # Apply Bayes' Theorem
    posterior_disease = (sensitivity * prior_disease) / p_test_positive

    # Display results
    mo.md(f"""
    Given:
    - Prior probability of disease: {prior_disease * 100:.1f}%
    - Sensitivity (true positive rate): {sensitivity * 100}%
    - False positive rate: {false_positive_rate * 100}%

    Using Bayes' theorem:

    P(Disease|Test+) = [P(Test+|Disease) × P(Disease)] / P(Test+)

    P(Test+) = P(Test+|Disease) × P(Disease) + P(Test+|No Disease) × P(No Disease)

    P(Test+) = {sensitivity:.2f} × {prior_disease:.3f} + {false_positive_rate:.2f} × {(1 - prior_disease):.3f}

    P(Test+) = {p_test_positive:.4f}

    P(Disease|Test+) = ({sensitivity:.2f} × {prior_disease:.3f}) / {p_test_positive:.4f} = {posterior_disease:.3f}

    **Only {posterior_disease * 100:.1f}% of people who test positive actually have the disease!**
    """)
    return


@app.cell
def _(Rectangle, mo, plt):
    # Create a visualization showing the four categories
    fig, ax = plt.subplots(figsize=(10, 6))

    # Set up the grid
    ax.set_xlim(0, 10)
    ax.set_ylim(0, 8)

    # Draw rectangles for different categories
    # Population (1000 people)
    ax.add_patch(Rectangle((1, 1), 8, 6, fill=False, edgecolor="black", linewidth=2))
    ax.text(5, 7.2, "Population (1000 people)", ha="center", va="bottom")

    # Disease (0.1%)
    disease_count = 1000 * 0.001
    no_disease_count = 1000 - disease_count

    # Draw disease area
    ax.add_patch(Rectangle((1, 1), 8, disease_count / 1000 * 6, facecolor="red", alpha=0.5))
    ax.text(
        5,
        1.5 + disease_count / 1000 * 3,
        f"Disease ({disease_count:.0f} people)",
        ha="center",
        va="center",
        color="white",
        weight="bold",
    )

    # Draw non-disease area
    ax.add_patch(
        Rectangle((1, 1 + disease_count / 1000 * 6), 8, no_disease_count / 1000 * 6, facecolor="blue", alpha=0.5)
    )
    ax.text(
        5,
        1.5 + disease_count / 1000 * 6 + no_disease_count / 1000 * 3,
        f"No Disease ({no_disease_count:.0f} people)",
        ha="center",
        va="center",
        color="white",
        weight="bold",
    )

    # Add labels
    ax.text(0.5, 4, "True Positive\n(99% of Diseased)", ha="right", va="center")
    ax.text(0.5, 2, "False Positive\n(5% of Non-Diseased)", ha="right", va="center")

    # Add test results
    tp = disease_count * 0.99  # True positives
    fp = no_disease_count * 0.05  # False positives

    ax.add_patch(Rectangle((2, 3.5), 2, 1, facecolor="green", alpha=0.7))
    ax.text(3, 4, f"True Positive\n({tp:.0f})", ha="center", va="center")

    ax.add_patch(Rectangle((2, 1.5), 2, 1, facecolor="orange", alpha=0.7))
    ax.text(3, 2, f"False Positive\n({fp:.0f})", ha="center", va="center")

    # Add total positive tests
    total_positives = tp + fp
    ax.add_patch(Rectangle((5, 1.5), 3, 2, facecolor="yellow", alpha=0.5))
    ax.text(6.5, 2.5, f"Total Positive\nTests ({total_positives:.0f})", ha="center", va="center")

    ax.set_xticks([])
    ax.set_yticks([])
    ax.set_title("Bayes' Theorem Visualization: Medical Testing")

    mo.ui.matplotlib(plt.gca())
    return


@app.cell
def _(mo):
    mo.md("""
    ## Why This Matters

    This example shows why Bayes' Theorem is important:

    1. **High false positive rate** combined with **low prevalence** leads to counterintuitive results
    2. **95% accurate tests** can still give misleading results when the condition is rare
    3. **Bayes' Theorem forces us to think about**:
       - Our initial beliefs (prior probability)
       - How likely we are to observe evidence given our beliefs
       - How to update our beliefs in light of new evidence

    This same logic applies to:
    - Spam detection
    - Financial risk assessment
    - Scientific hypothesis testing
    - Machine learning classification
    """)
    return


@app.cell
def _():
    return


@app.cell
def _():
    return


if __name__ == "__main__":
    app.run()