import streamlit as st import numpy as np import matplotlib.pyplot as plt from scipy.stats import norm # Set page config st.set_page_config(page_title="Proportion Testing Visualizer", layout="wide") # Sidebar controls st.sidebar.header("Parameters") p_0 = st.sidebar.number_input("Null Hypothesis Proportion (p₀)", min_value=0.0, max_value=1.0, value=0.50) p_1 = st.sidebar.number_input("True Proportion (p₁)", min_value=0.0, max_value=1.0, value=0.60) n = st.sidebar.number_input("Sample Size (n)", min_value=1, value=30) alpha = st.sidebar.slider("Significance Level (α)", min_value=0.01, max_value=0.1, step=0.01, value=0.05) show_alt = st.sidebar.checkbox("Show Alternative Distribution", value=False) simulate_many = st.sidebar.checkbox("Simulate Many Samples", value=False) # Redraw simulation button run_simulation = False if simulate_many: run_simulation = st.sidebar.button("Redraw Simulations") # Initialize session state if 'sample_proportions' not in st.session_state: st.session_state.sample_proportions = [] st.session_state.z_scores = [] st.session_state.decisions = [] # Calculate standard error and critical values se = np.sqrt(p_0 * (1 - p_0) / n) z_critical = norm.ppf(1 - alpha / 2) # Draw sample button if st.sidebar.button("Draw Sample"): # Draw a sample from the true proportion p_1 sample = np.random.binomial(n, p_1) p_hat = sample / n # Z statistic under H₀ z_stat = (p_hat - p_0) / se decision = 'Reject H₀' if abs(z_stat) > z_critical else 'Fail to Reject H₀' st.session_state.sample_proportions.append(p_hat) st.session_state.z_scores.append(z_stat) st.session_state.decisions.append(decision) ##################################### # Main Plot with dynamic x-axis ##################################### # 1) If user shows alt distribution, expand for shift x_min, x_max = -4.0, 4.0 if show_alt: shift = (p_1 - p_0) / se x_min = min(x_min, shift - 4) x_max = max(x_max, shift + 4) # 2) Also expand if the latest sample's z_stat is outside range if st.session_state.z_scores: last_z = st.session_state.z_scores[-1] # Give a little padding around that Z value pad = 1.0 x_min = min(x_min, last_z - pad) x_max = max(x_max, last_z + pad) x = np.linspace(x_min, x_max, 1000) # Null distribution (centered at 0, stdev=1) y = norm.pdf(x, loc=0, scale=1) fig, ax = plt.subplots(figsize=(10, 5)) # Plot null distribution ax.plot(x, y, label='Null Distribution (Z)', color='black') ax.fill_between(x, y, where=(x <= -z_critical) | (x >= z_critical), color='red', alpha=0.3, label='Rejection Regions') ax.axvline(-z_critical, linestyle='--', color='red') ax.axvline(z_critical, linestyle='--', color='red') # Optional alternative distribution if show_alt: shift = (p_1 - p_0) / se alt_y = norm.pdf(x, loc=shift, scale=1) ax.plot(x, alt_y, label='Alternative Distribution (Z)', color='blue', linestyle='--') # Plot sample z-score if st.session_state.z_scores: z_stat = st.session_state.z_scores[-1] color_choice = 'blue' if abs(z_stat) <= z_critical else 'red' ax.axvline(z_stat, color=color_choice, linestyle='-', linewidth=2, label=f'Z = {z_stat:.2f}') ax.annotate(f"Z = {z_stat:.2f}", xy=(z_stat, 0.05), # arrow points here xytext=(z_stat + 0.4, 0.15), # position of text box ha='center', arrowprops=dict(facecolor='black', arrowstyle='->'), bbox=dict(boxstyle='round,pad=0.3', facecolor='white', alpha=0.8)) ax.set_title("Z Sampling Distribution Under H₀") ax.set_xlabel("Z-Score") ax.set_ylabel("Probability Density") ax.legend() st.pyplot(fig) # Show decision if st.session_state.z_scores: st.subheader("Latest Sample Analysis") st.write(f"Sample Proportion (p̂): **{st.session_state.sample_proportions[-1]:.3f}**") st.write(f"Z-Statistic: **{st.session_state.z_scores[-1]:.2f}**") st.write(f"Decision: **{st.session_state.decisions[-1]}** at α = {alpha}") # Show formulas in four columns st.markdown("---") st.subheader("📐 Formulas Used") col1, col2, col3, col4 = st.columns(4) with col1: st.markdown(r""" **Standard Error (SE):** $$ SE(\hat{p}) = \sqrt{\dfrac{p_0(1 - p_0)}{n}} $$ """) with col2: st.markdown(r""" **Z-Statistic:** $$ Z = \dfrac{\hat{p} - p_0}{\sqrt{\dfrac{p_0(1 - p_0)}{n}}} $$ """) with col3: st.markdown(r""" **Critical Values (Two-Tailed):** $$ \pm Z_{\alpha/2} $$ """) with col4: st.markdown(r""" **Decision Rule:** Reject $ H_0 $ if $ |Z| > |Z_{\alpha/2}| $ """) # Simulate many samples (Type I Error check if H₀ is actually true) if simulate_many and run_simulation: st.subheader("Simulation of Many Samples") num_sim = 1000 # Under H₀, the true proportion is p_0 # Draw 1000 samples of size n from Bernoulli(p_0) binomial_draws = np.random.binomial(n, p_0, size=num_sim) p_hats = binomial_draws / n z_vals = (p_hats - p_0) / se type_I_errors = np.sum(np.abs(z_vals) > z_critical) / num_sim fig2, ax2 = plt.subplots(figsize=(10, 4)) ax2.hist(z_vals, bins=50, density=True, alpha=0.6) ax2.axvline(-z_critical, linestyle='--', color='red') ax2.axvline(z_critical, linestyle='--', color='red') ax2.set_title("Histogram of Z-Statistics from Simulated Samples (Under H₀)") ax2.set_xlabel("Z-Statistic") ax2.set_ylabel("Density") st.pyplot(fig2) st.write(f"Empirical Rejection Rate (Type I error, H₀ true): **{type_I_errors:.3f}**")