Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
File size: 7,004 Bytes
f9b644c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 |
import numpy as np
import matplotlib.pyplot as plt
from scipy import stats
from ideal_poly_volume_toolkit.geometry import ideal_poly_volume_via_delaunay
import warnings
warnings.filterwarnings('ignore')
def generate_volumes(n_samples, seed):
"""Generate volume samples"""
np.random.seed(seed)
volumes = []
for i in range(n_samples):
vec = np.random.randn(3)
vec = vec / np.linalg.norm(vec)
x, y, z = vec
if z > 0.999:
continue
w = complex(x/(1-z), y/(1-z))
if abs(w) < 0.01 or abs(w-1) < 0.01:
continue
vertices = np.array([0+0j, 1+0j, w])
vol = ideal_poly_volume_via_delaunay(vertices, mode='fast', series_terms=96)
volumes.append(vol)
return np.array(volumes)
print("Beta Distribution Fit Analysis")
print("="*70)
# Generate a large sample
print("Generating 100,000 samples for precise parameter estimation...")
volumes = generate_volumes(100000, seed=42)
print(f"Actual samples: {len(volumes)}")
# Fit beta distribution
print("\nFitting beta distribution...")
params = stats.beta.fit(volumes, floc=0, fscale=1.02) # Fix location and scale
alpha, beta_param = params[0], params[1]
print(f"\nBeta parameters:")
print(f" α = {alpha:.6f}")
print(f" β = {beta_param:.6f}")
print(f" α - β = {alpha - beta_param:.6f}")
# Bootstrap confidence intervals
print("\nBootstrap confidence intervals (1000 resamples)...")
n_bootstrap = 1000
alpha_samples = []
beta_samples = []
asymmetry_samples = []
for i in range(n_bootstrap):
if i % 100 == 0:
print(f" Progress: {i}/{n_bootstrap}")
# Resample with replacement
resample = np.random.choice(volumes, size=len(volumes), replace=True)
# Fit beta
try:
params_boot = stats.beta.fit(resample, floc=0, fscale=1.02)
alpha_boot, beta_boot = params_boot[0], params_boot[1]
alpha_samples.append(alpha_boot)
beta_samples.append(beta_boot)
asymmetry_samples.append(alpha_boot - beta_boot)
except:
pass
alpha_samples = np.array(alpha_samples)
beta_samples = np.array(beta_samples)
asymmetry_samples = np.array(asymmetry_samples)
# Confidence intervals
print(f"\n95% Confidence Intervals:")
print(f" α: [{np.percentile(alpha_samples, 2.5):.6f}, {np.percentile(alpha_samples, 97.5):.6f}]")
print(f" β: [{np.percentile(beta_samples, 2.5):.6f}, {np.percentile(beta_samples, 97.5):.6f}]")
print(f" α - β: [{np.percentile(asymmetry_samples, 2.5):.6f}, {np.percentile(asymmetry_samples, 97.5):.6f}]")
# Test if α = β = 1 (uniform distribution)
print("\n\nHypothesis Tests:")
print("-"*50)
# Test 1: Are parameters equal to 1?
print("Test 1: H0: α = 1")
t_alpha = (alpha - 1) / np.std(alpha_samples)
p_alpha = 2 * (1 - stats.norm.cdf(abs(t_alpha)))
print(f" t-statistic: {t_alpha:.4f}")
print(f" p-value: {p_alpha:.4f}")
print(f" Conclusion: {'Reject H0' if p_alpha < 0.05 else 'Fail to reject H0'}")
print("\nTest 2: H0: β = 1")
t_beta = (beta_param - 1) / np.std(beta_samples)
p_beta = 2 * (1 - stats.norm.cdf(abs(t_beta)))
print(f" t-statistic: {t_beta:.4f}")
print(f" p-value: {p_beta:.4f}")
print(f" Conclusion: {'Reject H0' if p_beta < 0.05 else 'Fail to reject H0'}")
# Test 2: Is there asymmetry?
print("\nTest 3: H0: α = β (symmetric distribution)")
contains_zero = np.percentile(asymmetry_samples, 2.5) <= 0 <= np.percentile(asymmetry_samples, 97.5)
print(f" 95% CI for α - β: [{np.percentile(asymmetry_samples, 2.5):.6f}, {np.percentile(asymmetry_samples, 97.5):.6f}]")
print(f" Contains 0? {'Yes' if contains_zero else 'No'}")
print(f" Conclusion: {'Fail to reject H0 (no asymmetry)' if contains_zero else 'Reject H0 (significant asymmetry)'}")
# Visual check
fig, axes = plt.subplots(2, 2, figsize=(12, 10))
# 1. Histogram with beta fit
ax = axes[0, 0]
n, bins, _ = ax.hist(volumes, bins=100, density=True, alpha=0.6, color='blue', label='Data')
x = np.linspace(0.001, 1.019, 1000)
ax.plot(x, stats.beta.pdf(x, alpha, beta_param, 0, 1.02), 'r-', linewidth=2,
label=f'Beta({alpha:.3f}, {beta_param:.3f})')
ax.plot(x, stats.uniform.pdf(x, 0, 1.02), 'g--', linewidth=2, label='Uniform(0, 1.02)')
ax.set_xlabel('Volume')
ax.set_ylabel('Density')
ax.set_title('Data vs Beta Fit vs Uniform')
ax.legend()
ax.set_xlim(0, 1.05)
# 2. Q-Q plot
ax = axes[0, 1]
theoretical_quantiles = np.linspace(0.001, 0.999, 999)
empirical_quantiles = np.percentile(volumes, theoretical_quantiles * 100)
beta_quantiles = stats.beta.ppf(theoretical_quantiles, alpha, beta_param, 0, 1.02)
ax.plot(beta_quantiles, empirical_quantiles, 'b.', alpha=0.5, markersize=2)
ax.plot([0, 1.02], [0, 1.02], 'r--', linewidth=2)
ax.set_xlabel('Beta Distribution Quantiles')
ax.set_ylabel('Empirical Quantiles')
ax.set_title('Q-Q Plot: Data vs Beta')
# 3. Residual plot
ax = axes[1, 0]
residuals = empirical_quantiles - beta_quantiles
ax.plot(beta_quantiles, residuals, 'b.', alpha=0.5, markersize=2)
ax.axhline(0, color='r', linestyle='--', linewidth=2)
ax.set_xlabel('Beta Quantiles')
ax.set_ylabel('Residuals')
ax.set_title('Residual Plot')
ax.grid(True, alpha=0.3)
# 4. Bootstrap distribution of asymmetry
ax = axes[1, 1]
ax.hist(asymmetry_samples, bins=50, density=True, alpha=0.7, color='purple')
ax.axvline(0, color='r', linestyle='--', linewidth=2, label='α = β')
ax.axvline(alpha - beta_param, color='g', linestyle='-', linewidth=2,
label=f'Observed: {alpha - beta_param:.4f}')
ax.set_xlabel('α - β')
ax.set_ylabel('Density')
ax.set_title('Bootstrap Distribution of Asymmetry')
ax.legend()
plt.tight_layout()
plt.savefig('beta_fit_analysis.png', dpi=150)
print("\nSaved plots to beta_fit_analysis.png")
# Alternative distribution tests
print("\n\nAlternative Distribution Fits:")
print("-"*50)
# Try other distributions
distributions = [
('uniform', stats.uniform),
('beta', stats.beta),
('kumaraswamy', None), # Would need special implementation
]
# Kolmogorov-Smirnov tests
print("\nKolmogorov-Smirnov Tests:")
ks_uniform = stats.kstest(volumes, lambda x: stats.uniform.cdf(x, 0, 1.02))
print(f" Uniform(0, 1.02): D={ks_uniform.statistic:.6f}, p={ks_uniform.pvalue:.6f}")
beta_cdf = lambda x: stats.beta.cdf(x, alpha, beta_param, 0, 1.02)
ks_beta = stats.kstest(volumes, beta_cdf)
print(f" Beta({alpha:.3f}, {beta_param:.3f}): D={ks_beta.statistic:.6f}, p={ks_beta.pvalue:.6f}")
# Final summary
print("\n" + "="*70)
print("SUMMARY:")
print(f" With {len(volumes)} samples, the fitted parameters are:")
print(f" α = {alpha:.4f} ± {np.std(alpha_samples):.4f}")
print(f" β = {beta_param:.4f} ± {np.std(beta_samples):.4f}")
print(f"\n The asymmetry α - β = {alpha - beta_param:.4f} is "
f"{'statistically significant' if not contains_zero else 'NOT statistically significant'}")
print(f"\n The distribution is {'very close to' if abs(alpha-1) < 0.1 and abs(beta_param-1) < 0.1 else 'significantly different from'} uniform")
plt.close() |