kgdrathan's picture
Upload folder using huggingface_hub
ac7572a verified
import marimo
__generated_with = "0.23.3"
app = marimo.App()
@app.cell
def _():
import marimo as mo
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.patches import Rectangle
return Rectangle, mo, plt
@app.cell
def _(mo):
mo.md("""
# Bayes' Theorem: Updating Beliefs with Evidence
Bayes' Theorem is a fundamental concept in probability theory that helps us update our beliefs based on new evidence.
The formula is:
$$P(A|B) = rac{P(B|A)P(A)}{P(B)}$$
Where:
- $P(A|B)$ is the **posterior probability**: the probability of event A given that B occurred
- $P(B|A)$ is the **likelihood**: the probability of observing B given that A is true
- $P(A)$ is the **prior probability**: our initial belief about A before seeing B
- $P(B)$ is the **evidence**: the overall probability of observing B
""")
return
@app.cell
def _(mo):
mo.md("""
## Medical Testing Example
Let's say we're testing for a rare disease:
- Prevalence (prior probability): 0.1% of the population has the disease
- Test accuracy:
- If you have the disease, the test correctly identifies it 99% of the time (true positive rate)
- If you don't have the disease, the test incorrectly says you do 5% of the time (false positive rate)
What's the probability that someone who tests positive actually has the disease?
""")
return
@app.cell
def _(mo):
# Define the parameters
prior_disease = 0.001 # P(Disease)
sensitivity = 0.99 # P(Test+ | Disease)
false_positive_rate = 0.05 # P(Test+ | No Disease)
# Calculate the components
# P(Test+)
p_test_positive = sensitivity * prior_disease + false_positive_rate * (1 - prior_disease)
# Apply Bayes' Theorem
posterior_disease = (sensitivity * prior_disease) / p_test_positive
# Display results
mo.md(f"""
Given:
- Prior probability of disease: {prior_disease * 100:.1f}%
- Sensitivity (true positive rate): {sensitivity * 100}%
- False positive rate: {false_positive_rate * 100}%
Using Bayes' theorem:
P(Disease|Test+) = [P(Test+|Disease) × P(Disease)] / P(Test+)
P(Test+) = P(Test+|Disease) × P(Disease) + P(Test+|No Disease) × P(No Disease)
P(Test+) = {sensitivity:.2f} × {prior_disease:.3f} + {false_positive_rate:.2f} × {(1 - prior_disease):.3f}
P(Test+) = {p_test_positive:.4f}
P(Disease|Test+) = ({sensitivity:.2f} × {prior_disease:.3f}) / {p_test_positive:.4f} = {posterior_disease:.3f}
**Only {posterior_disease * 100:.1f}% of people who test positive actually have the disease!**
""")
return
@app.cell
def _(Rectangle, mo, plt):
# Create a visualization showing the four categories
fig, ax = plt.subplots(figsize=(10, 6))
# Set up the grid
ax.set_xlim(0, 10)
ax.set_ylim(0, 8)
# Draw rectangles for different categories
# Population (1000 people)
ax.add_patch(Rectangle((1, 1), 8, 6, fill=False, edgecolor="black", linewidth=2))
ax.text(5, 7.2, "Population (1000 people)", ha="center", va="bottom")
# Disease (0.1%)
disease_count = 1000 * 0.001
no_disease_count = 1000 - disease_count
# Draw disease area
ax.add_patch(Rectangle((1, 1), 8, disease_count / 1000 * 6, facecolor="red", alpha=0.5))
ax.text(
5,
1.5 + disease_count / 1000 * 3,
f"Disease ({disease_count:.0f} people)",
ha="center",
va="center",
color="white",
weight="bold",
)
# Draw non-disease area
ax.add_patch(
Rectangle((1, 1 + disease_count / 1000 * 6), 8, no_disease_count / 1000 * 6, facecolor="blue", alpha=0.5)
)
ax.text(
5,
1.5 + disease_count / 1000 * 6 + no_disease_count / 1000 * 3,
f"No Disease ({no_disease_count:.0f} people)",
ha="center",
va="center",
color="white",
weight="bold",
)
# Add labels
ax.text(0.5, 4, "True Positive\n(99% of Diseased)", ha="right", va="center")
ax.text(0.5, 2, "False Positive\n(5% of Non-Diseased)", ha="right", va="center")
# Add test results
tp = disease_count * 0.99 # True positives
fp = no_disease_count * 0.05 # False positives
ax.add_patch(Rectangle((2, 3.5), 2, 1, facecolor="green", alpha=0.7))
ax.text(3, 4, f"True Positive\n({tp:.0f})", ha="center", va="center")
ax.add_patch(Rectangle((2, 1.5), 2, 1, facecolor="orange", alpha=0.7))
ax.text(3, 2, f"False Positive\n({fp:.0f})", ha="center", va="center")
# Add total positive tests
total_positives = tp + fp
ax.add_patch(Rectangle((5, 1.5), 3, 2, facecolor="yellow", alpha=0.5))
ax.text(6.5, 2.5, f"Total Positive\nTests ({total_positives:.0f})", ha="center", va="center")
ax.set_xticks([])
ax.set_yticks([])
ax.set_title("Bayes' Theorem Visualization: Medical Testing")
mo.ui.matplotlib(plt.gca())
return
@app.cell
def _(mo):
mo.md("""
## Why This Matters
This example shows why Bayes' Theorem is important:
1. **High false positive rate** combined with **low prevalence** leads to counterintuitive results
2. **95% accurate tests** can still give misleading results when the condition is rare
3. **Bayes' Theorem forces us to think about**:
- Our initial beliefs (prior probability)
- How likely we are to observe evidence given our beliefs
- How to update our beliefs in light of new evidence
This same logic applies to:
- Spam detection
- Financial risk assessment
- Scientific hypothesis testing
- Machine learning classification
""")
return
@app.cell
def _():
return
@app.cell
def _():
return
if __name__ == "__main__":
app.run()