iurbinah's picture
Update app.py
a5141dd verified
import streamlit as st
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import norm
# Set page config
st.set_page_config(page_title="Proportion Testing Visualizer", layout="wide")
# Sidebar controls
st.sidebar.header("Parameters")
p_0 = st.sidebar.number_input("Null Hypothesis Proportion (p₀)", min_value=0.0, max_value=1.0, value=0.50)
p_1 = st.sidebar.number_input("True Proportion (p₁)", min_value=0.0, max_value=1.0, value=0.60)
n = st.sidebar.number_input("Sample Size (n)", min_value=1, value=30)
alpha = st.sidebar.slider("Significance Level (α)", min_value=0.01, max_value=0.1, step=0.01, value=0.05)
show_alt = st.sidebar.checkbox("Show Alternative Distribution", value=False)
simulate_many = st.sidebar.checkbox("Simulate Many Samples", value=False)
# Redraw simulation button
run_simulation = False
if simulate_many:
run_simulation = st.sidebar.button("Redraw Simulations")
# Initialize session state
if 'sample_proportions' not in st.session_state:
st.session_state.sample_proportions = []
st.session_state.z_scores = []
st.session_state.decisions = []
# Calculate standard error and critical values
se = np.sqrt(p_0 * (1 - p_0) / n)
z_critical = norm.ppf(1 - alpha / 2)
# Draw sample button
if st.sidebar.button("Draw Sample"):
# Draw a sample from the true proportion p_1
sample = np.random.binomial(n, p_1)
p_hat = sample / n
# Z statistic under H₀
z_stat = (p_hat - p_0) / se
decision = 'Reject H₀' if abs(z_stat) > z_critical else 'Fail to Reject H₀'
st.session_state.sample_proportions.append(p_hat)
st.session_state.z_scores.append(z_stat)
st.session_state.decisions.append(decision)
#####################################
# Main Plot with dynamic x-axis
#####################################
# 1) If user shows alt distribution, expand for shift
x_min, x_max = -4.0, 4.0
if show_alt:
shift = (p_1 - p_0) / se
x_min = min(x_min, shift - 4)
x_max = max(x_max, shift + 4)
# 2) Also expand if the latest sample's z_stat is outside range
if st.session_state.z_scores:
last_z = st.session_state.z_scores[-1]
# Give a little padding around that Z value
pad = 1.0
x_min = min(x_min, last_z - pad)
x_max = max(x_max, last_z + pad)
x = np.linspace(x_min, x_max, 1000)
# Null distribution (centered at 0, stdev=1)
y = norm.pdf(x, loc=0, scale=1)
fig, ax = plt.subplots(figsize=(10, 5))
# Plot null distribution
ax.plot(x, y, label='Null Distribution (Z)', color='black')
ax.fill_between(x, y, where=(x <= -z_critical) | (x >= z_critical), color='red', alpha=0.3, label='Rejection Regions')
ax.axvline(-z_critical, linestyle='--', color='red')
ax.axvline(z_critical, linestyle='--', color='red')
# Optional alternative distribution
if show_alt:
shift = (p_1 - p_0) / se
alt_y = norm.pdf(x, loc=shift, scale=1)
ax.plot(x, alt_y, label='Alternative Distribution (Z)', color='blue', linestyle='--')
# Plot sample z-score
if st.session_state.z_scores:
z_stat = st.session_state.z_scores[-1]
color_choice = 'blue' if abs(z_stat) <= z_critical else 'red'
ax.axvline(z_stat, color=color_choice, linestyle='-', linewidth=2,
label=f'Z = {z_stat:.2f}')
ax.annotate(f"Z = {z_stat:.2f}",
xy=(z_stat, 0.05), # arrow points here
xytext=(z_stat + 0.4, 0.15), # position of text box
ha='center',
arrowprops=dict(facecolor='black', arrowstyle='->'),
bbox=dict(boxstyle='round,pad=0.3', facecolor='white', alpha=0.8))
ax.set_title("Z Sampling Distribution Under H₀")
ax.set_xlabel("Z-Score")
ax.set_ylabel("Probability Density")
ax.legend()
st.pyplot(fig)
# Show decision
if st.session_state.z_scores:
st.subheader("Latest Sample Analysis")
st.write(f"Sample Proportion (p̂): **{st.session_state.sample_proportions[-1]:.3f}**")
st.write(f"Z-Statistic: **{st.session_state.z_scores[-1]:.2f}**")
st.write(f"Decision: **{st.session_state.decisions[-1]}** at α = {alpha}")
# Show formulas in four columns
st.markdown("---")
st.subheader("📐 Formulas Used")
col1, col2, col3, col4 = st.columns(4)
with col1:
st.markdown(r"""
**Standard Error (SE):**
$$ SE(\hat{p}) = \sqrt{\dfrac{p_0(1 - p_0)}{n}} $$
""")
with col2:
st.markdown(r"""
**Z-Statistic:**
$$ Z = \dfrac{\hat{p} - p_0}{\sqrt{\dfrac{p_0(1 - p_0)}{n}}} $$
""")
with col3:
st.markdown(r"""
**Critical Values (Two-Tailed):**
$$ \pm Z_{\alpha/2} $$
""")
with col4:
st.markdown(r"""
**Decision Rule:**
Reject $ H_0 $ if $ |Z| > |Z_{\alpha/2}| $
""")
# Simulate many samples (Type I Error check if H₀ is actually true)
if simulate_many and run_simulation:
st.subheader("Simulation of Many Samples")
num_sim = 1000
# Under H₀, the true proportion is p_0
# Draw 1000 samples of size n from Bernoulli(p_0)
binomial_draws = np.random.binomial(n, p_0, size=num_sim)
p_hats = binomial_draws / n
z_vals = (p_hats - p_0) / se
type_I_errors = np.sum(np.abs(z_vals) > z_critical) / num_sim
fig2, ax2 = plt.subplots(figsize=(10, 4))
ax2.hist(z_vals, bins=50, density=True, alpha=0.6)
ax2.axvline(-z_critical, linestyle='--', color='red')
ax2.axvline(z_critical, linestyle='--', color='red')
ax2.set_title("Histogram of Z-Statistics from Simulated Samples (Under H₀)")
ax2.set_xlabel("Z-Statistic")
ax2.set_ylabel("Density")
st.pyplot(fig2)
st.write(f"Empirical Rejection Rate (Type I error, H₀ true): **{type_I_errors:.3f}**")