Spaces:
Sleeping
Sleeping
| # proportion_inference_app.py | |
| # Streamlit ≥1.32 — Accessible, minimal color design | |
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| from scipy.stats import norm | |
| import io | |
| # ---------- Page Config ---------- | |
| st.set_page_config( | |
| page_title="Inference for Proportions", | |
| page_icon="📊", | |
| layout="centered", | |
| initial_sidebar_state="collapsed" | |
| ) | |
| # ---------- Accessible CSS - Compact for embedding ---------- | |
| st.markdown(""" | |
| <style> | |
| /* Maximize vertical space usage */ | |
| .block-container { | |
| padding-top: 0.5rem !important; | |
| padding-bottom: 0.5rem !important; | |
| max-width: 100% !important; | |
| } | |
| /* Tighter element spacing */ | |
| .element-container { margin-bottom: 0.3rem !important; } | |
| .stRadio > div { margin-bottom: 0 !important; } | |
| .stSelectSlider { padding-top: 0 !important; padding-bottom: 0 !important; } | |
| h2 { margin-top: 0 !important; margin-bottom: 0.3rem !important; font-size: 1.4rem !important; } | |
| h3 { margin-top: 0.3rem !important; margin-bottom: 0.2rem !important; font-size: 1.1rem !important; } | |
| p { margin-bottom: 0.3rem !important; } | |
| .stNumberInput { margin-bottom: 0 !important; } | |
| /* Compact info boxes */ | |
| .stAlert { padding: 0.4rem 0.7rem !important; margin: 0.2rem 0 !important; } | |
| /* Clean result box - compact */ | |
| .result-box { | |
| background: #f8f9fa; | |
| border: 2px solid #dee2e6; | |
| padding: 0.6rem; | |
| border-radius: 8px; | |
| text-align: center; | |
| margin: 0.2rem 0; | |
| } | |
| .result-box .label { | |
| font-size: 0.75rem; | |
| color: #6c757d; | |
| margin-bottom: 0.1rem; | |
| } | |
| .result-box .value { | |
| font-size: 1.2rem; | |
| font-weight: 600; | |
| color: #212529; | |
| } | |
| /* Decision boxes - compact */ | |
| .decision-box { | |
| padding: 0.6rem; | |
| border-radius: 8px; | |
| text-align: center; | |
| margin: 0.3rem 0; | |
| font-weight: 600; | |
| font-size: 1rem; | |
| } | |
| .decision-reject { | |
| background: #fff; | |
| border: 3px solid #212529; | |
| } | |
| .decision-accept { | |
| background: #fff; | |
| border: 3px dashed #6c757d; | |
| } | |
| /* Compact dividers */ | |
| hr { margin: 0.5rem 0 !important; border: none; border-top: 1px solid #dee2e6; } | |
| /* Compact metrics */ | |
| [data-testid="stMetricValue"] { font-size: 1.1rem !important; } | |
| [data-testid="stMetricLabel"] { font-size: 0.75rem !important; } | |
| /* Compact expander */ | |
| .streamlit-expanderHeader { padding: 0.3rem 0 !important; font-size: 0.9rem !important; } | |
| /* Hide Streamlit branding for cleaner embed */ | |
| #MainMenu {visibility: hidden;} | |
| footer {visibility: hidden;} | |
| header {visibility: hidden;} | |
| </style> | |
| """, unsafe_allow_html=True) | |
| # ---------- Title ---------- | |
| st.markdown("## 📊 Inference for Proportions") | |
| # ---------- INPUTS ---------- | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| inf_type = st.radio("Inference type:", ["One-Sample Proportion", "Two-Sample Proportions (independent)"], | |
| key="inf_type") | |
| with col2: | |
| analysis_type = st.radio("Analysis:", ["Confidence Interval", "Hypothesis Test"], | |
| key="analysis_type") | |
| st.markdown("---") | |
| # ---------- Sample Data Inputs ---------- | |
| if inf_type == "One-Sample Proportion": | |
| input_method = st.radio( | |
| "Input method:", | |
| ["Successes and sample size", "Sample proportion and sample size"], | |
| key="input_method", | |
| horizontal=True | |
| ) | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| n = st.number_input("Sample size (n)", min_value=2, step=1, value=100, key="n") | |
| with col2: | |
| if input_method == "Successes and sample size": | |
| x = st.number_input("Number of successes (x)", min_value=0, max_value=int(n), | |
| step=1, value=50, key="x") | |
| phat = x / n if n > 0 else 0 | |
| else: | |
| phat = st.number_input("Sample proportion (p̂)", min_value=0.0, max_value=1.0, | |
| format="%.4f", value=0.5, key="phat") | |
| x = round(phat * n) | |
| st.info(f"Sample proportion: **p̂ = {phat:.4f}**") | |
| else: # Two-Sample | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| st.subheader("Group 1") | |
| input_method1 = st.radio( | |
| "Input method:", | |
| ["Successes and sample size", "Sample proportion and sample size"], | |
| key="input_method1" | |
| ) | |
| n1 = st.number_input("Sample size n₁", min_value=2, step=1, value=100, key="n1") | |
| if input_method1 == "Successes and sample size": | |
| x1 = st.number_input("Number of successes x₁", min_value=0, max_value=int(n1), | |
| step=1, value=50, key="x1") | |
| phat1 = x1 / n1 if n1 > 0 else 0 | |
| else: | |
| phat1 = st.number_input("Sample proportion p̂₁", min_value=0.0, max_value=1.0, | |
| format="%.4f", value=0.5, key="phat1") | |
| x1 = round(phat1 * n1) | |
| st.info(f"p̂₁ = {phat1:.4f}") | |
| with col2: | |
| st.subheader("Group 2") | |
| input_method2 = st.radio( | |
| "Input method:", | |
| ["Successes and sample size", "Sample proportion and sample size"], | |
| key="input_method2" | |
| ) | |
| n2 = st.number_input("Sample size n₂", min_value=2, step=1, value=100, key="n2") | |
| if input_method2 == "Successes and sample size": | |
| x2 = st.number_input("Number of successes x₂", min_value=0, max_value=int(n2), | |
| step=1, value=50, key="x2") | |
| phat2 = x2 / n2 if n2 > 0 else 0 | |
| else: | |
| phat2 = st.number_input("Sample proportion p̂₂", min_value=0.0, max_value=1.0, | |
| format="%.4f", value=0.5, key="phat2") | |
| x2 = round(phat2 * n2) | |
| st.info(f"p̂₂ = {phat2:.4f}") | |
| st.markdown("---") | |
| # ---------- CI / HT specific controls ---------- | |
| if analysis_type == "Confidence Interval": | |
| conf_level = st.select_slider("Confidence level:", | |
| options=[0.90, 0.95, 0.99], value=0.95, | |
| format_func=lambda x: f"{x*100:.0f}%", key="conf_level") | |
| else: | |
| col1, col2 = st.columns(2) | |
| with col1: | |
| alpha = st.select_slider("Significance level (α):", | |
| options=[0.01, 0.05, 0.10], value=0.05, key="alpha") | |
| with col2: | |
| if inf_type == "One-Sample Proportion": | |
| p0 = st.number_input("Null proportion (p₀):", min_value=0.0001, max_value=0.9999, | |
| format="%.4f", value=0.5, key="p0") | |
| if inf_type == "One-Sample Proportion": | |
| alt = st.radio("Alternative hypothesis (H₁):", | |
| ["p ≠ p₀ (Two-sided)", "p > p₀ (Right-sided)", "p < p₀ (Left-sided)"], | |
| horizontal=True, key="alt_one") | |
| else: | |
| alt = st.radio("Alternative hypothesis (H₁):", | |
| ["p₁ ≠ p₂ (Two-sided)", "p₁ > p₂ (Right-sided)", "p₁ < p₂ (Left-sided)"], | |
| horizontal=True, key="alt_two") | |
| # ---------- RUN ---------- | |
| run = st.button("▶ Run Analysis", type="primary", use_container_width=True) | |
| # ---------- Helper function ---------- | |
| def result_box(label, value): | |
| return f'<div class="result-box"><div class="label">{label}</div><div class="value">{value}</div></div>' | |
| # ===== RESULTS ===== | |
| if run: | |
| st.markdown("---") | |
| # ===== ONE-SAMPLE ===== | |
| if inf_type == "One-Sample Proportion" and n >= 2: | |
| if analysis_type == "Confidence Interval": | |
| se = np.sqrt(phat * (1 - phat) / n) | |
| crit = norm.ppf(1 - (1 - conf_level) / 2) | |
| margin = crit * se | |
| lower, upper = max(0, phat - margin), min(1, phat + margin) | |
| check1, check2 = n * phat >= 10, n * (1 - phat) >= 10 | |
| cols = st.columns(2) | |
| with cols[0]: | |
| st.markdown(result_box("Sample Proportion (p̂)", f"{phat:.4f}"), unsafe_allow_html=True) | |
| with cols[1]: | |
| st.markdown(result_box(f"{conf_level*100:.0f}% Confidence Interval", | |
| f"[{lower:.4f}, {upper:.4f}]"), unsafe_allow_html=True) | |
| st.markdown("---") | |
| st.subheader("Calculation Details") | |
| col1, col2, col3 = st.columns(3) | |
| with col1: | |
| st.metric("Standard Error", f"{se:.4f}") | |
| with col2: | |
| st.metric("Critical Value (z)", f"±{crit:.4f}") | |
| with col3: | |
| st.metric("Margin of Error", f"{margin:.4f}") | |
| st.markdown("**Normal Approximation Check:**") | |
| st.write(f"• np̂ = {n*phat:.1f} {'✓ ≥ 10' if check1 else '✗ < 10'}") | |
| st.write(f"• n(1−p̂) = {n*(1-phat):.1f} {'✓ ≥ 10' if check2 else '✗ < 10'}") | |
| results = {"Analysis": ["CI"], "n": [n], "p̂": [phat], "SE": [se], | |
| "Conf": [conf_level], "Lower": [lower], "Upper": [upper]} | |
| else: # Hypothesis Test | |
| se = np.sqrt(p0 * (1 - p0) / n) | |
| z = (phat - p0) / se | |
| if "≠" in alt: | |
| p_val = 2 * (1 - norm.cdf(abs(z))) | |
| crit = norm.ppf(1 - alpha/2) | |
| crit_display = f"±{crit:.4f}" | |
| elif ">" in alt: | |
| p_val = 1 - norm.cdf(z) | |
| crit = norm.ppf(1 - alpha) | |
| crit_display = f"{crit:.4f}" | |
| else: | |
| p_val = norm.cdf(z) | |
| crit = -norm.ppf(1 - alpha) | |
| crit_display = f"{crit:.4f}" | |
| reject = p_val <= alpha | |
| cols = st.columns(2) | |
| with cols[0]: | |
| st.markdown(result_box("z-statistic", f"{z:.4f}"), unsafe_allow_html=True) | |
| with cols[1]: | |
| st.markdown(result_box("p-value", f"{p_val:.4g}"), unsafe_allow_html=True) | |
| # Decision with pattern, not color | |
| if reject: | |
| st.markdown(f'<div class="decision-box decision-reject">✗ REJECT H₀ at α = {alpha}</div>', | |
| unsafe_allow_html=True) | |
| else: | |
| st.markdown(f'<div class="decision-box decision-accept">— Fail to reject H₀ at α = {alpha}</div>', | |
| unsafe_allow_html=True) | |
| st.markdown("---") | |
| st.subheader("Calculation Details") | |
| st.write(f"**Hypotheses:** H₀: p = {p0:.4f} vs H₁: {alt}") | |
| st.write(f"**Sample proportion:** p̂ = {phat:.4f}") | |
| st.write(f"**Standard Error (under H₀):** {se:.4f}") | |
| st.write(f"**Critical value(s):** {crit_display}") | |
| results = {"Analysis": ["HT"], "n": [n], "p̂": [phat], "p₀": [p0], | |
| "z": [z], "p-value": [p_val], "Reject": [reject]} | |
| # ===== TWO-SAMPLE ===== | |
| elif inf_type.startswith("Two-Sample") and n1 >= 2 and n2 >= 2: | |
| diff = phat1 - phat2 | |
| if analysis_type == "Confidence Interval": | |
| se = np.sqrt(phat1*(1-phat1)/n1 + phat2*(1-phat2)/n2) | |
| crit = norm.ppf(1 - (1 - conf_level) / 2) | |
| margin = crit * se | |
| lower, upper = diff - margin, diff + margin | |
| cols = st.columns(2) | |
| with cols[0]: | |
| st.markdown(result_box("Difference (p̂₁ − p̂₂)", f"{diff:.4f}"), unsafe_allow_html=True) | |
| with cols[1]: | |
| st.markdown(result_box(f"{conf_level*100:.0f}% Confidence Interval", | |
| f"[{lower:.4f}, {upper:.4f}]"), unsafe_allow_html=True) | |
| st.markdown("---") | |
| st.subheader("Calculation Details") | |
| st.write(f"**p̂₁ = {phat1:.4f}**, **p̂₂ = {phat2:.4f}**") | |
| col1, col2, col3 = st.columns(3) | |
| with col1: | |
| st.metric("SE (unpooled)", f"{se:.4f}") | |
| with col2: | |
| st.metric("Critical Value (z)", f"±{crit:.4f}") | |
| with col3: | |
| st.metric("Margin of Error", f"{margin:.4f}") | |
| results = {"Analysis": ["CI-2"], "p̂₁": [phat1], "p̂₂": [phat2], | |
| "Diff": [diff], "Lower": [lower], "Upper": [upper]} | |
| else: # Hypothesis Test | |
| p_pooled = (x1 + x2) / (n1 + n2) | |
| se = np.sqrt(p_pooled * (1 - p_pooled) * (1/n1 + 1/n2)) | |
| z = diff / se | |
| if "≠" in alt: | |
| p_val = 2 * (1 - norm.cdf(abs(z))) | |
| crit = norm.ppf(1 - alpha/2) | |
| crit_display = f"±{crit:.4f}" | |
| elif ">" in alt: | |
| p_val = 1 - norm.cdf(z) | |
| crit = norm.ppf(1 - alpha) | |
| crit_display = f"{crit:.4f}" | |
| else: | |
| p_val = norm.cdf(z) | |
| crit = -norm.ppf(1 - alpha) | |
| crit_display = f"{crit:.4f}" | |
| reject = p_val <= alpha | |
| cols = st.columns(2) | |
| with cols[0]: | |
| st.markdown(result_box("z-statistic", f"{z:.4f}"), unsafe_allow_html=True) | |
| with cols[1]: | |
| st.markdown(result_box("p-value", f"{p_val:.4g}"), unsafe_allow_html=True) | |
| if reject: | |
| st.markdown(f'<div class="decision-box decision-reject">✗ REJECT H₀ at α = {alpha}</div>', | |
| unsafe_allow_html=True) | |
| else: | |
| st.markdown(f'<div class="decision-box decision-accept">— Fail to reject H₀ at α = {alpha}</div>', | |
| unsafe_allow_html=True) | |
| st.markdown("---") | |
| st.subheader("Calculation Details") | |
| st.write(f"**Hypotheses:** H₀: p₁ = p₂ vs H₁: {alt}") | |
| st.write(f"**p̂₁ = {phat1:.4f}**, **p̂₂ = {phat2:.4f}**, **Difference = {diff:.4f}**") | |
| st.write(f"**Pooled proportion:** p̂ = {p_pooled:.4f}") | |
| st.write(f"**Standard Error (pooled):** {se:.4f}") | |
| st.write(f"**Critical value(s):** {crit_display}") | |
| results = {"Analysis": ["HT-2"], "p̂₁": [phat1], "p̂₂": [phat2], | |
| "z": [z], "p-value": [p_val], "Reject": [reject]} | |
| # Download | |
| if 'results' in locals(): | |
| df_out = pd.DataFrame(results) | |
| buff = io.BytesIO() | |
| with pd.ExcelWriter(buff, engine="xlsxwriter") as writer: | |
| df_out.to_excel(writer, index=False) | |
| st.download_button("📥 Download Results", data=buff.getvalue(), | |
| file_name="proportion_inference.xlsx", | |
| mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet") | |
| # ---------- Formulas (collapsed) ---------- | |
| with st.expander("📚 Formulas & Theory"): | |
| st.markdown(r""" | |
| **One-Sample Proportion** | |
| - Sample proportion: $\hat p = x/n$ | |
| - SE for CI: $\sqrt{\hat p(1-\hat p)/n}$ | |
| - SE for HT: $\sqrt{p_0(1-p_0)/n}$ | |
| - CI: $\hat p \pm z_{\alpha/2} \cdot SE$ | |
| - Test stat: $z = (\hat p - p_0)/SE$ | |
| **Two-Sample Proportions** | |
| - Pooled: $\hat p = (x_1+x_2)/(n_1+n_2)$ | |
| - SE for CI: $\sqrt{\frac{\hat p_1(1-\hat p_1)}{n_1} + \frac{\hat p_2(1-\hat p_2)}{n_2}}$ | |
| - SE for HT: $\sqrt{\hat p(1-\hat p)(1/n_1 + 1/n_2)}$ | |
| **Conditions:** $np \geq 10$ and $n(1-p) \geq 10$ | |
| """) | |