Spaces:
Running
Running
File size: 5,769 Bytes
ac7572a 1eaaf1d ac7572a 1eaaf1d ac7572a 1eaaf1d ac7572a 1eaaf1d ac7572a 1eaaf1d ac7572a 1eaaf1d ac7572a 1eaaf1d ac7572a 1eaaf1d ac7572a 1eaaf1d ac7572a 1eaaf1d ac7572a 1eaaf1d ac7572a 1eaaf1d ac7572a 1eaaf1d ac7572a 1eaaf1d ac7572a 1eaaf1d ac7572a 1eaaf1d ac7572a 1eaaf1d ac7572a 1eaaf1d ac7572a 1eaaf1d ac7572a 1eaaf1d ac7572a 1eaaf1d ac7572a 1eaaf1d ac7572a 1eaaf1d ac7572a 1eaaf1d | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 | import marimo
__generated_with = "0.23.3"
app = marimo.App()
@app.cell
def _():
import marimo as mo
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
return Rectangle, mo, plt
@app.cell
def _(mo):
mo.md("""
# Bayes' Theorem: Updating Beliefs with Evidence
Bayes' Theorem is a fundamental concept in probability theory that helps us update our beliefs based on new evidence.
The formula is:
$$P(A|B) = rac{P(B|A)P(A)}{P(B)}$$
Where:
- $P(A|B)$ is the **posterior probability**: the probability of event A given that B occurred
- $P(B|A)$ is the **likelihood**: the probability of observing B given that A is true
- $P(A)$ is the **prior probability**: our initial belief about A before seeing B
- $P(B)$ is the **evidence**: the overall probability of observing B
""")
return
@app.cell
def _(mo):
mo.md("""
## Medical Testing Example
Let's say we're testing for a rare disease:
- Prevalence (prior probability): 0.1% of the population has the disease
- Test accuracy:
- If you have the disease, the test correctly identifies it 99% of the time (true positive rate)
- If you don't have the disease, the test incorrectly says you do 5% of the time (false positive rate)
What's the probability that someone who tests positive actually has the disease?
""")
return
@app.cell
def _(mo):
# Define the parameters
prior_disease = 0.001 # P(Disease)
sensitivity = 0.99 # P(Test+ | Disease)
false_positive_rate = 0.05 # P(Test+ | No Disease)
# Calculate the components
# P(Test+)
p_test_positive = sensitivity * prior_disease + false_positive_rate * (1 - prior_disease)
# Apply Bayes' Theorem
posterior_disease = (sensitivity * prior_disease) / p_test_positive
# Display results
mo.md(f"""
Given:
- Prior probability of disease: {prior_disease * 100:.1f}%
- Sensitivity (true positive rate): {sensitivity * 100}%
- False positive rate: {false_positive_rate * 100}%
Using Bayes' theorem:
P(Disease|Test+) = [P(Test+|Disease) × P(Disease)] / P(Test+)
P(Test+) = P(Test+|Disease) × P(Disease) + P(Test+|No Disease) × P(No Disease)
P(Test+) = {sensitivity:.2f} × {prior_disease:.3f} + {false_positive_rate:.2f} × {(1 - prior_disease):.3f}
P(Test+) = {p_test_positive:.4f}
P(Disease|Test+) = ({sensitivity:.2f} × {prior_disease:.3f}) / {p_test_positive:.4f} = {posterior_disease:.3f}
**Only {posterior_disease * 100:.1f}% of people who test positive actually have the disease!**
""")
return
@app.cell
def _(Rectangle, mo, plt):
# Create a visualization showing the four categories
fig, ax = plt.subplots(figsize=(10, 6))
# Set up the grid
ax.set_xlim(0, 10)
ax.set_ylim(0, 8)
# Draw rectangles for different categories
# Population (1000 people)
ax.add_patch(Rectangle((1, 1), 8, 6, fill=False, edgecolor="black", linewidth=2))
ax.text(5, 7.2, "Population (1000 people)", ha="center", va="bottom")
# Disease (0.1%)
disease_count = 1000 * 0.001
no_disease_count = 1000 - disease_count
# Draw disease area
ax.add_patch(Rectangle((1, 1), 8, disease_count / 1000 * 6, facecolor="red", alpha=0.5))
ax.text(
5,
1.5 + disease_count / 1000 * 3,
f"Disease ({disease_count:.0f} people)",
ha="center",
va="center",
color="white",
weight="bold",
)
# Draw non-disease area
ax.add_patch(
Rectangle((1, 1 + disease_count / 1000 * 6), 8, no_disease_count / 1000 * 6, facecolor="blue", alpha=0.5)
)
ax.text(
5,
1.5 + disease_count / 1000 * 6 + no_disease_count / 1000 * 3,
f"No Disease ({no_disease_count:.0f} people)",
ha="center",
va="center",
color="white",
weight="bold",
)
# Add labels
ax.text(0.5, 4, "True Positive\n(99% of Diseased)", ha="right", va="center")
ax.text(0.5, 2, "False Positive\n(5% of Non-Diseased)", ha="right", va="center")
# Add test results
tp = disease_count * 0.99 # True positives
fp = no_disease_count * 0.05 # False positives
ax.add_patch(Rectangle((2, 3.5), 2, 1, facecolor="green", alpha=0.7))
ax.text(3, 4, f"True Positive\n({tp:.0f})", ha="center", va="center")
ax.add_patch(Rectangle((2, 1.5), 2, 1, facecolor="orange", alpha=0.7))
ax.text(3, 2, f"False Positive\n({fp:.0f})", ha="center", va="center")
# Add total positive tests
total_positives = tp + fp
ax.add_patch(Rectangle((5, 1.5), 3, 2, facecolor="yellow", alpha=0.5))
ax.text(6.5, 2.5, f"Total Positive\nTests ({total_positives:.0f})", ha="center", va="center")
ax.set_xticks([])
ax.set_yticks([])
ax.set_title("Bayes' Theorem Visualization: Medical Testing")
mo.ui.matplotlib(plt.gca())
return
@app.cell
def _(mo):
mo.md("""
## Why This Matters
This example shows why Bayes' Theorem is important:
1. **High false positive rate** combined with **low prevalence** leads to counterintuitive results
2. **95% accurate tests** can still give misleading results when the condition is rare
3. **Bayes' Theorem forces us to think about**:
- Our initial beliefs (prior probability)
- How likely we are to observe evidence given our beliefs
- How to update our beliefs in light of new evidence
This same logic applies to:
- Spam detection
- Financial risk assessment
- Scientific hypothesis testing
- Machine learning classification
""")
return
@app.cell
def _():
return
@app.cell
def _():
return
if __name__ == "__main__":
app.run()
|