Spaces:

iurbinah
/

hypothesis-testing

Sleeping

App Files Files Community

hypothesis-testing / app.py

iurbinah

Update app.py

a5141dd verified 12 months ago

raw

history blame contribute delete

5.59 kB

	import streamlit as st
	import numpy as np
	import matplotlib.pyplot as plt
	from scipy.stats import norm

	# Set page config
	st.set_page_config(page_title="Proportion Testing Visualizer", layout="wide")

	# Sidebar controls
	st.sidebar.header("Parameters")
	p_0 = st.sidebar.number_input("Null Hypothesis Proportion (p₀)", min_value=0.0, max_value=1.0, value=0.50)
	p_1 = st.sidebar.number_input("True Proportion (p₁)", min_value=0.0, max_value=1.0, value=0.60)
	n = st.sidebar.number_input("Sample Size (n)", min_value=1, value=30)
	alpha = st.sidebar.slider("Significance Level (α)", min_value=0.01, max_value=0.1, step=0.01, value=0.05)

	show_alt = st.sidebar.checkbox("Show Alternative Distribution", value=False)
	simulate_many = st.sidebar.checkbox("Simulate Many Samples", value=False)

	# Redraw simulation button
	run_simulation = False
	if simulate_many:
	run_simulation = st.sidebar.button("Redraw Simulations")

	# Initialize session state
	if 'sample_proportions' not in st.session_state:
	st.session_state.sample_proportions = []
	st.session_state.z_scores = []
	st.session_state.decisions = []

	# Calculate standard error and critical values
	se = np.sqrt(p_0 * (1 - p_0) / n)
	z_critical = norm.ppf(1 - alpha / 2)

	# Draw sample button
	if st.sidebar.button("Draw Sample"):
	# Draw a sample from the true proportion p_1
	sample = np.random.binomial(n, p_1)
	p_hat = sample / n

	# Z statistic under H₀
	z_stat = (p_hat - p_0) / se
	decision = 'Reject H₀' if abs(z_stat) > z_critical else 'Fail to Reject H₀'

	st.session_state.sample_proportions.append(p_hat)
	st.session_state.z_scores.append(z_stat)
	st.session_state.decisions.append(decision)

	#####################################
	# Main Plot with dynamic x-axis
	#####################################

	# 1) If user shows alt distribution, expand for shift
	x_min, x_max = -4.0, 4.0
	if show_alt:
	shift = (p_1 - p_0) / se
	x_min = min(x_min, shift - 4)
	x_max = max(x_max, shift + 4)

	# 2) Also expand if the latest sample's z_stat is outside range
	if st.session_state.z_scores:
	last_z = st.session_state.z_scores[-1]
	# Give a little padding around that Z value
	pad = 1.0
	x_min = min(x_min, last_z - pad)
	x_max = max(x_max, last_z + pad)

	x = np.linspace(x_min, x_max, 1000)

	# Null distribution (centered at 0, stdev=1)
	y = norm.pdf(x, loc=0, scale=1)

	fig, ax = plt.subplots(figsize=(10, 5))

	# Plot null distribution
	ax.plot(x, y, label='Null Distribution (Z)', color='black')
	ax.fill_between(x, y, where=(x <= -z_critical) \| (x >= z_critical), color='red', alpha=0.3, label='Rejection Regions')
	ax.axvline(-z_critical, linestyle='--', color='red')
	ax.axvline(z_critical, linestyle='--', color='red')

	# Optional alternative distribution
	if show_alt:
	shift = (p_1 - p_0) / se
	alt_y = norm.pdf(x, loc=shift, scale=1)
	ax.plot(x, alt_y, label='Alternative Distribution (Z)', color='blue', linestyle='--')

	# Plot sample z-score
	if st.session_state.z_scores:
	z_stat = st.session_state.z_scores[-1]
	color_choice = 'blue' if abs(z_stat) <= z_critical else 'red'
	ax.axvline(z_stat, color=color_choice, linestyle='-', linewidth=2,
	label=f'Z = {z_stat:.2f}')
	ax.annotate(f"Z = {z_stat:.2f}",
	xy=(z_stat, 0.05), # arrow points here
	xytext=(z_stat + 0.4, 0.15), # position of text box
	ha='center',
	arrowprops=dict(facecolor='black', arrowstyle='->'),
	bbox=dict(boxstyle='round,pad=0.3', facecolor='white', alpha=0.8))

	ax.set_title("Z Sampling Distribution Under H₀")
	ax.set_xlabel("Z-Score")
	ax.set_ylabel("Probability Density")
	ax.legend()
	st.pyplot(fig)

	# Show decision
	if st.session_state.z_scores:
	st.subheader("Latest Sample Analysis")
	st.write(f"Sample Proportion (p̂): {st.session_state.sample_proportions[-1]:.3f}")
	st.write(f"Z-Statistic: {st.session_state.z_scores[-1]:.2f}")
	st.write(f"Decision: {st.session_state.decisions[-1]} at α = {alpha}")

	# Show formulas in four columns
	st.markdown("---")
	st.subheader("📐 Formulas Used")

	col1, col2, col3, col4 = st.columns(4)

	with col1:
	st.markdown(r"""
	Standard Error (SE):
	$$ SE(\hat{p}) = \sqrt{\dfrac{p_0(1 - p_0)}{n}} $$
	""")

	with col2:
	st.markdown(r"""
	Z-Statistic:
	$$ Z = \dfrac{\hat{p} - p_0}{\sqrt{\dfrac{p_0(1 - p_0)}{n}}} $$
	""")

	with col3:
	st.markdown(r"""
	Critical Values (Two-Tailed):
	$$ \pm Z_{\alpha/2} $$
	""")

	with col4:
	st.markdown(r"""
	Decision Rule:
	Reject $ H_0 $ if $ \|Z\| > \|Z_{\alpha/2}\| $
	""")

	# Simulate many samples (Type I Error check if H₀ is actually true)
	if simulate_many and run_simulation:
	st.subheader("Simulation of Many Samples")
	num_sim = 1000
	# Under H₀, the true proportion is p_0
	# Draw 1000 samples of size n from Bernoulli(p_0)
	binomial_draws = np.random.binomial(n, p_0, size=num_sim)
	p_hats = binomial_draws / n
	z_vals = (p_hats - p_0) / se
	type_I_errors = np.sum(np.abs(z_vals) > z_critical) / num_sim

	fig2, ax2 = plt.subplots(figsize=(10, 4))
	ax2.hist(z_vals, bins=50, density=True, alpha=0.6)
	ax2.axvline(-z_critical, linestyle='--', color='red')
	ax2.axvline(z_critical, linestyle='--', color='red')
	ax2.set_title("Histogram of Z-Statistics from Simulated Samples (Under H₀)")
	ax2.set_xlabel("Z-Statistic")
	ax2.set_ylabel("Density")
	st.pyplot(fig2)

	st.write(f"Empirical Rejection Rate (Type I error, H₀ true): {type_I_errors:.3f}")