Spaces:
Sleeping
Sleeping
| import streamlit as st | |
| import numpy as np | |
| import matplotlib.pyplot as plt | |
| from scipy.stats import norm | |
| # Set page config | |
| st.set_page_config(page_title="Proportion Testing Visualizer", layout="wide") | |
| # Sidebar controls | |
| st.sidebar.header("Parameters") | |
| p_0 = st.sidebar.number_input("Null Hypothesis Proportion (p₀)", min_value=0.0, max_value=1.0, value=0.50) | |
| p_1 = st.sidebar.number_input("True Proportion (p₁)", min_value=0.0, max_value=1.0, value=0.60) | |
| n = st.sidebar.number_input("Sample Size (n)", min_value=1, value=30) | |
| alpha = st.sidebar.slider("Significance Level (α)", min_value=0.01, max_value=0.1, step=0.01, value=0.05) | |
| show_alt = st.sidebar.checkbox("Show Alternative Distribution", value=False) | |
| simulate_many = st.sidebar.checkbox("Simulate Many Samples", value=False) | |
| # Redraw simulation button | |
| run_simulation = False | |
| if simulate_many: | |
| run_simulation = st.sidebar.button("Redraw Simulations") | |
| # Initialize session state | |
| if 'sample_proportions' not in st.session_state: | |
| st.session_state.sample_proportions = [] | |
| st.session_state.z_scores = [] | |
| st.session_state.decisions = [] | |
| # Calculate standard error and critical values | |
| se = np.sqrt(p_0 * (1 - p_0) / n) | |
| z_critical = norm.ppf(1 - alpha / 2) | |
| # Draw sample button | |
| if st.sidebar.button("Draw Sample"): | |
| # Draw a sample from the true proportion p_1 | |
| sample = np.random.binomial(n, p_1) | |
| p_hat = sample / n | |
| # Z statistic under H₀ | |
| z_stat = (p_hat - p_0) / se | |
| decision = 'Reject H₀' if abs(z_stat) > z_critical else 'Fail to Reject H₀' | |
| st.session_state.sample_proportions.append(p_hat) | |
| st.session_state.z_scores.append(z_stat) | |
| st.session_state.decisions.append(decision) | |
| ##################################### | |
| # Main Plot with dynamic x-axis | |
| ##################################### | |
| # 1) If user shows alt distribution, expand for shift | |
| x_min, x_max = -4.0, 4.0 | |
| if show_alt: | |
| shift = (p_1 - p_0) / se | |
| x_min = min(x_min, shift - 4) | |
| x_max = max(x_max, shift + 4) | |
| # 2) Also expand if the latest sample's z_stat is outside range | |
| if st.session_state.z_scores: | |
| last_z = st.session_state.z_scores[-1] | |
| # Give a little padding around that Z value | |
| pad = 1.0 | |
| x_min = min(x_min, last_z - pad) | |
| x_max = max(x_max, last_z + pad) | |
| x = np.linspace(x_min, x_max, 1000) | |
| # Null distribution (centered at 0, stdev=1) | |
| y = norm.pdf(x, loc=0, scale=1) | |
| fig, ax = plt.subplots(figsize=(10, 5)) | |
| # Plot null distribution | |
| ax.plot(x, y, label='Null Distribution (Z)', color='black') | |
| ax.fill_between(x, y, where=(x <= -z_critical) | (x >= z_critical), color='red', alpha=0.3, label='Rejection Regions') | |
| ax.axvline(-z_critical, linestyle='--', color='red') | |
| ax.axvline(z_critical, linestyle='--', color='red') | |
| # Optional alternative distribution | |
| if show_alt: | |
| shift = (p_1 - p_0) / se | |
| alt_y = norm.pdf(x, loc=shift, scale=1) | |
| ax.plot(x, alt_y, label='Alternative Distribution (Z)', color='blue', linestyle='--') | |
| # Plot sample z-score | |
| if st.session_state.z_scores: | |
| z_stat = st.session_state.z_scores[-1] | |
| color_choice = 'blue' if abs(z_stat) <= z_critical else 'red' | |
| ax.axvline(z_stat, color=color_choice, linestyle='-', linewidth=2, | |
| label=f'Z = {z_stat:.2f}') | |
| ax.annotate(f"Z = {z_stat:.2f}", | |
| xy=(z_stat, 0.05), # arrow points here | |
| xytext=(z_stat + 0.4, 0.15), # position of text box | |
| ha='center', | |
| arrowprops=dict(facecolor='black', arrowstyle='->'), | |
| bbox=dict(boxstyle='round,pad=0.3', facecolor='white', alpha=0.8)) | |
| ax.set_title("Z Sampling Distribution Under H₀") | |
| ax.set_xlabel("Z-Score") | |
| ax.set_ylabel("Probability Density") | |
| ax.legend() | |
| st.pyplot(fig) | |
| # Show decision | |
| if st.session_state.z_scores: | |
| st.subheader("Latest Sample Analysis") | |
| st.write(f"Sample Proportion (p̂): **{st.session_state.sample_proportions[-1]:.3f}**") | |
| st.write(f"Z-Statistic: **{st.session_state.z_scores[-1]:.2f}**") | |
| st.write(f"Decision: **{st.session_state.decisions[-1]}** at α = {alpha}") | |
| # Show formulas in four columns | |
| st.markdown("---") | |
| st.subheader("📐 Formulas Used") | |
| col1, col2, col3, col4 = st.columns(4) | |
| with col1: | |
| st.markdown(r""" | |
| **Standard Error (SE):** | |
| $$ SE(\hat{p}) = \sqrt{\dfrac{p_0(1 - p_0)}{n}} $$ | |
| """) | |
| with col2: | |
| st.markdown(r""" | |
| **Z-Statistic:** | |
| $$ Z = \dfrac{\hat{p} - p_0}{\sqrt{\dfrac{p_0(1 - p_0)}{n}}} $$ | |
| """) | |
| with col3: | |
| st.markdown(r""" | |
| **Critical Values (Two-Tailed):** | |
| $$ \pm Z_{\alpha/2} $$ | |
| """) | |
| with col4: | |
| st.markdown(r""" | |
| **Decision Rule:** | |
| Reject $ H_0 $ if $ |Z| > |Z_{\alpha/2}| $ | |
| """) | |
| # Simulate many samples (Type I Error check if H₀ is actually true) | |
| if simulate_many and run_simulation: | |
| st.subheader("Simulation of Many Samples") | |
| num_sim = 1000 | |
| # Under H₀, the true proportion is p_0 | |
| # Draw 1000 samples of size n from Bernoulli(p_0) | |
| binomial_draws = np.random.binomial(n, p_0, size=num_sim) | |
| p_hats = binomial_draws / n | |
| z_vals = (p_hats - p_0) / se | |
| type_I_errors = np.sum(np.abs(z_vals) > z_critical) / num_sim | |
| fig2, ax2 = plt.subplots(figsize=(10, 4)) | |
| ax2.hist(z_vals, bins=50, density=True, alpha=0.6) | |
| ax2.axvline(-z_critical, linestyle='--', color='red') | |
| ax2.axvline(z_critical, linestyle='--', color='red') | |
| ax2.set_title("Histogram of Z-Statistics from Simulated Samples (Under H₀)") | |
| ax2.set_xlabel("Z-Statistic") | |
| ax2.set_ylabel("Density") | |
| st.pyplot(fig2) | |
| st.write(f"Empirical Rejection Rate (Type I error, H₀ true): **{type_I_errors:.3f}**") | |