idealpolyhedra / examples /analysis /check_statistical_precision.py
igriv's picture
Major reorganization and feature additions
d7d27f0
import numpy as np
from ideal_poly_volume_toolkit.geometry import ideal_poly_volume_via_delaunay
def compute_volume_statistics(n_samples, seed=42):
"""Compute volumes and return statistics"""
np.random.seed(seed)
volumes = []
for i in range(n_samples):
# Uniform point on sphere
vec = np.random.randn(3)
vec = vec / np.linalg.norm(vec)
x, y, z = vec
if z > 0.999: # Skip north pole
continue
# Stereographic projection
w = complex(x/(1-z), y/(1-z))
if abs(w) < 0.01 or abs(w-1) < 0.01:
continue
vertices = np.array([0+0j, 1+0j, w])
vol = ideal_poly_volume_via_delaunay(vertices, mode='fast', series_terms=96)
volumes.append(vol)
volumes = np.array(volumes)
n = len(volumes)
mean = np.mean(volumes)
std = np.std(volumes, ddof=1) # Sample standard deviation
se_mean = std / np.sqrt(n) # Standard error of mean
se_std = std / np.sqrt(2 * (n - 1)) # Standard error of std dev
return {
'n': n,
'mean': mean,
'std': std,
'se_mean': se_mean,
'se_std': se_std,
'volumes': volumes
}
print("Statistical Precision Analysis for Random Ideal Tetrahedron Volumes")
print("="*70)
# Test with different sample sizes
sample_sizes = [1000, 5000, 10000, 25000, 50000]
results = {}
for n in sample_sizes:
print(f"\nComputing {n} samples...")
stats = compute_volume_statistics(n)
results[n] = stats
print(f" Actual samples: {stats['n']}")
print(f" Mean: {stats['mean']:.6f} ± {stats['se_mean']:.6f}")
print(f" Std: {stats['std']:.6f} ± {stats['se_std']:.6f}")
print(f" 95% CI for mean: [{stats['mean'] - 1.96*stats['se_mean']:.6f}, "
f"{stats['mean'] + 1.96*stats['se_mean']:.6f}]")
# How many decimal places are reliable?
mean_precision = -int(np.floor(np.log10(2 * stats['se_mean'])))
std_precision = -int(np.floor(np.log10(2 * stats['se_std'])))
print(f" Reliable decimal places: mean={mean_precision}, std={std_precision}")
# Convergence analysis
print("\n" + "="*70)
print("Convergence Analysis:")
print("-"*70)
# Check how mean and std converge
means = [results[n]['mean'] for n in sample_sizes]
stds = [results[n]['std'] for n in sample_sizes]
print("\nMean convergence:")
for i, n in enumerate(sample_sizes):
print(f" n={n:6d}: {means[i]:.6f}")
print("\nStd convergence:")
for i, n in enumerate(sample_sizes):
print(f" n={n:6d}: {stds[i]:.6f}")
# Estimate how many samples needed for different precisions
print("\n" + "="*70)
print("Samples needed for different precision levels:")
print("-"*70)
# Use the largest sample to estimate true parameters
best_estimate = results[sample_sizes[-1]]
true_std = best_estimate['std']
# For mean
for decimals in [2, 3, 4, 5]:
required_se = 0.5 * 10**(-decimals)
required_n = int(np.ceil((true_std / required_se)**2))
print(f" {decimals} decimal places in mean: {required_n:,} samples")
# For standard deviation
print("\nFor standard deviation:")
for decimals in [2, 3, 4]:
required_se = 0.5 * 10**(-decimals)
required_n = int(np.ceil(2 * (true_std / (2 * required_se))**2))
print(f" {decimals} decimal places in std: {required_n:,} samples")
# Final recommendation
print("\n" + "="*70)
print("SUMMARY:")
print(f" Standard deviation: ~{true_std:.4f}")
print(f" This means the distribution is quite broad (CV = {true_std/best_estimate['mean']:.2f})")
print(f"\nFor 4 significant digits:")
print(f" In mean: need ~{int(np.ceil((true_std / 0.00005)**2)):,} samples")
print(f" In std: need ~{int(np.ceil(2 * (true_std / 0.0001)**2)):,} samples")
print(f"\nCurrent precision with {sample_sizes[-1]} samples:")
print(f" Mean: {best_estimate['mean']:.4f}{best_estimate['se_mean']:.4f})")
print(f" Std: {best_estimate['std']:.4f}{best_estimate['se_std']:.4f})")