import marimo __generated_with = "0.23.3" app = marimo.App() @app.cell def _(): import marimo as mo import numpy as np import matplotlib.pyplot as plt from matplotlib.patches import Rectangle return Rectangle, mo, plt @app.cell def _(mo): mo.md(""" # Bayes' Theorem: Updating Beliefs with Evidence Bayes' Theorem is a fundamental concept in probability theory that helps us update our beliefs based on new evidence. The formula is: $$P(A|B) = rac{P(B|A)P(A)}{P(B)}$$ Where: - $P(A|B)$ is the **posterior probability**: the probability of event A given that B occurred - $P(B|A)$ is the **likelihood**: the probability of observing B given that A is true - $P(A)$ is the **prior probability**: our initial belief about A before seeing B - $P(B)$ is the **evidence**: the overall probability of observing B """) return @app.cell def _(mo): mo.md(""" ## Medical Testing Example Let's say we're testing for a rare disease: - Prevalence (prior probability): 0.1% of the population has the disease - Test accuracy: - If you have the disease, the test correctly identifies it 99% of the time (true positive rate) - If you don't have the disease, the test incorrectly says you do 5% of the time (false positive rate) What's the probability that someone who tests positive actually has the disease? """) return @app.cell def _(mo): # Define the parameters prior_disease = 0.001 # P(Disease) sensitivity = 0.99 # P(Test+ | Disease) false_positive_rate = 0.05 # P(Test+ | No Disease) # Calculate the components # P(Test+) p_test_positive = sensitivity * prior_disease + false_positive_rate * (1 - prior_disease) # Apply Bayes' Theorem posterior_disease = (sensitivity * prior_disease) / p_test_positive # Display results mo.md(f""" Given: - Prior probability of disease: {prior_disease * 100:.1f}% - Sensitivity (true positive rate): {sensitivity * 100}% - False positive rate: {false_positive_rate * 100}% Using Bayes' theorem: P(Disease|Test+) = [P(Test+|Disease) × P(Disease)] / P(Test+) P(Test+) = P(Test+|Disease) × P(Disease) + P(Test+|No Disease) × P(No Disease) P(Test+) = {sensitivity:.2f} × {prior_disease:.3f} + {false_positive_rate:.2f} × {(1 - prior_disease):.3f} P(Test+) = {p_test_positive:.4f} P(Disease|Test+) = ({sensitivity:.2f} × {prior_disease:.3f}) / {p_test_positive:.4f} = {posterior_disease:.3f} **Only {posterior_disease * 100:.1f}% of people who test positive actually have the disease!** """) return @app.cell def _(Rectangle, mo, plt): # Create a visualization showing the four categories fig, ax = plt.subplots(figsize=(10, 6)) # Set up the grid ax.set_xlim(0, 10) ax.set_ylim(0, 8) # Draw rectangles for different categories # Population (1000 people) ax.add_patch(Rectangle((1, 1), 8, 6, fill=False, edgecolor="black", linewidth=2)) ax.text(5, 7.2, "Population (1000 people)", ha="center", va="bottom") # Disease (0.1%) disease_count = 1000 * 0.001 no_disease_count = 1000 - disease_count # Draw disease area ax.add_patch(Rectangle((1, 1), 8, disease_count / 1000 * 6, facecolor="red", alpha=0.5)) ax.text( 5, 1.5 + disease_count / 1000 * 3, f"Disease ({disease_count:.0f} people)", ha="center", va="center", color="white", weight="bold", ) # Draw non-disease area ax.add_patch( Rectangle((1, 1 + disease_count / 1000 * 6), 8, no_disease_count / 1000 * 6, facecolor="blue", alpha=0.5) ) ax.text( 5, 1.5 + disease_count / 1000 * 6 + no_disease_count / 1000 * 3, f"No Disease ({no_disease_count:.0f} people)", ha="center", va="center", color="white", weight="bold", ) # Add labels ax.text(0.5, 4, "True Positive\n(99% of Diseased)", ha="right", va="center") ax.text(0.5, 2, "False Positive\n(5% of Non-Diseased)", ha="right", va="center") # Add test results tp = disease_count * 0.99 # True positives fp = no_disease_count * 0.05 # False positives ax.add_patch(Rectangle((2, 3.5), 2, 1, facecolor="green", alpha=0.7)) ax.text(3, 4, f"True Positive\n({tp:.0f})", ha="center", va="center") ax.add_patch(Rectangle((2, 1.5), 2, 1, facecolor="orange", alpha=0.7)) ax.text(3, 2, f"False Positive\n({fp:.0f})", ha="center", va="center") # Add total positive tests total_positives = tp + fp ax.add_patch(Rectangle((5, 1.5), 3, 2, facecolor="yellow", alpha=0.5)) ax.text(6.5, 2.5, f"Total Positive\nTests ({total_positives:.0f})", ha="center", va="center") ax.set_xticks([]) ax.set_yticks([]) ax.set_title("Bayes' Theorem Visualization: Medical Testing") mo.ui.matplotlib(plt.gca()) return @app.cell def _(mo): mo.md(""" ## Why This Matters This example shows why Bayes' Theorem is important: 1. **High false positive rate** combined with **low prevalence** leads to counterintuitive results 2. **95% accurate tests** can still give misleading results when the condition is rare 3. **Bayes' Theorem forces us to think about**: - Our initial beliefs (prior probability) - How likely we are to observe evidence given our beliefs - How to update our beliefs in light of new evidence This same logic applies to: - Spam detection - Financial risk assessment - Scientific hypothesis testing - Machine learning classification """) return @app.cell def _(): return @app.cell def _(): return if __name__ == "__main__": app.run()