idealpolyhedra / examples /analyze_spanning_tree_distribution.py
igriv's picture
Add HuggingFace Spaces support
e0ef700
#!/usr/bin/env python3
"""
Analyze the distribution of spanning trees and identify what makes
some triangulations "vastly more forested" than others.
"""
import sys
from pathlib import Path
sys.path.insert(0, str(Path(__file__).parent.parent))
import json
import numpy as np
import matplotlib.pyplot as plt
from collections import Counter
def analyze_distribution(data_file: str):
"""Analyze spanning tree distribution in detail."""
with open(data_file, 'r') as f:
data = json.load(f)
tris = data['raw_data']['triangulations']
print("="*70)
print("SPANNING TREE DISTRIBUTION ANALYSIS")
print("="*70)
print(f"\nTotal triangulations: {len(tris)}")
# Extract spanning tree counts
spanning_trees = np.array([t['n_spanning_trees'] for t in tris])
log_spanning_trees = np.log1p(spanning_trees) # log(1+x) to handle zeros
# Partition by realizability
standard_real = [t for t in tris if t['standard_realizable']]
standard_nonreal = [t for t in tris if not t['standard_realizable']]
strict_real = [t for t in tris if t['strict_realizable']]
strict_nonreal = [t for t in tris if not t['strict_realizable']]
st_standard_real = np.array([t['n_spanning_trees'] for t in standard_real])
st_standard_nonreal = np.array([t['n_spanning_trees'] for t in standard_nonreal])
st_strict_real = np.array([t['n_spanning_trees'] for t in strict_real])
st_strict_nonreal = np.array([t['n_spanning_trees'] for t in strict_nonreal])
# Log-transformed statistics
print("\n" + "="*70)
print("LOG-TRANSFORMED STATISTICS: log(1 + spanning_trees)")
print("="*70)
print("\n--- STANDARD REALIZABILITY ---")
print(f"Realizable:")
print(f" Log mean: {np.mean(np.log1p(st_standard_real)):.3f}")
print(f" Log median: {np.median(np.log1p(st_standard_real)):.3f}")
print(f" Log std: {np.std(np.log1p(st_standard_real)):.3f}")
print(f"\nNon-realizable:")
print(f" Log mean: {np.mean(np.log1p(st_standard_nonreal)):.3f}")
print(f" Log median: {np.median(np.log1p(st_standard_nonreal)):.3f}")
print(f" Log std: {np.std(np.log1p(st_standard_nonreal)):.3f}")
print("\n--- STRICT REALIZABILITY ---")
print(f"Strict realizable:")
print(f" Log mean: {np.mean(np.log1p(st_strict_real)):.3f}")
print(f" Log median: {np.median(np.log1p(st_strict_real)):.3f}")
print(f" Log std: {np.std(np.log1p(st_strict_real)):.3f}")
print(f"\nStrict non-realizable:")
print(f" Log mean: {np.mean(np.log1p(st_strict_nonreal)):.3f}")
print(f" Log median: {np.median(np.log1p(st_strict_nonreal)):.3f}")
print(f" Log std: {np.std(np.log1p(st_strict_nonreal)):.3f}")
# Identify extreme cases
print("\n" + "="*70)
print("EXTREME CASES: Most Forested Triangulations")
print("="*70)
# Sort by spanning trees
tris_sorted = sorted(tris, key=lambda t: t['n_spanning_trees'], reverse=True)
print("\nTop 20 most forested triangulations:")
print(f"{'Rank':<6} {'Index':<8} {'Spanning':<10} {'Vertices':<10} {'Edges':<8} {'Std Real':<10} {'Strict Real':<12}")
print("-"*70)
for rank, t in enumerate(tris_sorted[:20], 1):
print(f"{rank:<6} {t['index']:<8} {t['n_spanning_trees']:<10} "
f"{t['n_vertices']:<10} {t['n_edges']:<8} "
f"{'Yes' if t['standard_realizable'] else 'No':<10} "
f"{'Yes' if t['strict_realizable'] else 'No':<12}")
# Analyze bottom cases
print("\n" + "="*70)
print("EXTREME CASES: Least Forested Triangulations")
print("="*70)
# Count zeros
n_zero = sum(1 for t in tris if t['n_spanning_trees'] == 0)
print(f"\nTriangulations with ZERO spanning trees: {n_zero} ({100*n_zero/len(tris):.2f}%)")
if n_zero > 0:
zero_tris = [t for t in tris if t['n_spanning_trees'] == 0]
n_real = sum(1 for t in zero_tris if t['standard_realizable'])
n_strict = sum(1 for t in zero_tris if t['strict_realizable'])
print(f" Standard realizable: {n_real} ({100*n_real/n_zero:.1f}%)")
print(f" Strict realizable: {n_strict} ({100*n_strict/n_zero:.1f}%)")
print("\nBottom 20 least forested (non-zero):")
nonzero_tris = [t for t in tris if t['n_spanning_trees'] > 0]
tris_sorted_bottom = sorted(nonzero_tris, key=lambda t: t['n_spanning_trees'])
print(f"{'Rank':<6} {'Index':<8} {'Spanning':<10} {'Vertices':<10} {'Edges':<8} {'Std Real':<10} {'Strict Real':<12}")
print("-"*70)
for rank, t in enumerate(tris_sorted_bottom[:20], 1):
print(f"{rank:<6} {t['index']:<8} {t['n_spanning_trees']:<10} "
f"{t['n_vertices']:<10} {t['n_edges']:<8} "
f"{'Yes' if t['standard_realizable'] else 'No':<10} "
f"{'Yes' if t['strict_realizable'] else 'No':<12}")
# Analyze correlation with graph properties
print("\n" + "="*70)
print("CORRELATION WITH GRAPH PROPERTIES")
print("="*70)
vertices = np.array([t['n_vertices'] for t in tris])
edges = np.array([t['n_edges'] for t in tris])
# Compute correlations
corr_vertices = np.corrcoef(spanning_trees, vertices)[0, 1]
corr_edges = np.corrcoef(spanning_trees, edges)[0, 1]
corr_log_vertices = np.corrcoef(log_spanning_trees, vertices)[0, 1]
corr_log_edges = np.corrcoef(log_spanning_trees, edges)[0, 1]
print(f"\nPearson correlation (raw):")
print(f" Spanning trees vs vertices: {corr_vertices:.4f}")
print(f" Spanning trees vs edges: {corr_edges:.4f}")
print(f"\nPearson correlation (log-transformed):")
print(f" log(spanning trees) vs vertices: {corr_log_vertices:.4f}")
print(f" log(spanning trees) vs edges: {corr_log_edges:.4f}")
# Percentile analysis
print("\n" + "="*70)
print("PERCENTILE ANALYSIS")
print("="*70)
percentiles = [0, 1, 5, 10, 25, 50, 75, 90, 95, 99, 100]
values = np.percentile(spanning_trees, percentiles)
print(f"\n{'Percentile':<12} {'Value':<12} {'log(1+value)':<15}")
print("-"*40)
for p, v in zip(percentiles, values):
print(f"{p:<12} {int(v):<12} {np.log1p(v):<15.3f}")
# Create visualizations
create_visualizations(data, tris, spanning_trees, log_spanning_trees,
st_standard_real, st_standard_nonreal,
st_strict_real, st_strict_nonreal)
def create_visualizations(data, tris, spanning_trees, log_spanning_trees,
st_standard_real, st_standard_nonreal,
st_strict_real, st_strict_nonreal):
"""Create distribution plots."""
fig, axes = plt.subplots(2, 3, figsize=(18, 12))
# 1. Overall distribution (linear)
ax = axes[0, 0]
ax.hist(spanning_trees, bins=100, alpha=0.7, edgecolor='black')
ax.set_xlabel('Number of spanning trees')
ax.set_ylabel('Frequency')
ax.set_title('Overall Distribution (Linear Scale)')
ax.axvline(np.mean(spanning_trees), color='red', linestyle='--',
label=f'Mean: {np.mean(spanning_trees):.1f}')
ax.axvline(np.median(spanning_trees), color='blue', linestyle='--',
label=f'Median: {np.median(spanning_trees):.1f}')
ax.legend()
ax.grid(True, alpha=0.3)
# 2. Overall distribution (log)
ax = axes[0, 1]
ax.hist(log_spanning_trees, bins=100, alpha=0.7, edgecolor='black')
ax.set_xlabel('log(1 + spanning trees)')
ax.set_ylabel('Frequency')
ax.set_title('Overall Distribution (Log-Transformed)')
ax.axvline(np.mean(log_spanning_trees), color='red', linestyle='--',
label=f'Mean: {np.mean(log_spanning_trees):.2f}')
ax.axvline(np.median(log_spanning_trees), color='blue', linestyle='--',
label=f'Median: {np.median(log_spanning_trees):.2f}')
ax.legend()
ax.grid(True, alpha=0.3)
# 3. Standard realizability comparison (log)
ax = axes[0, 2]
ax.hist(np.log1p(st_standard_real), bins=50, alpha=0.5,
label=f'Realizable (n={len(st_standard_real)})', color='green', edgecolor='black')
ax.hist(np.log1p(st_standard_nonreal), bins=50, alpha=0.5,
label=f'Non-realizable (n={len(st_standard_nonreal)})', color='red', edgecolor='black')
ax.set_xlabel('log(1 + spanning trees)')
ax.set_ylabel('Frequency')
ax.set_title('Standard Realizability (Log Scale)')
ax.legend()
ax.grid(True, alpha=0.3)
# 4. Strict realizability comparison (log)
ax = axes[1, 0]
ax.hist(np.log1p(st_strict_real), bins=50, alpha=0.5,
label=f'Strict (n={len(st_strict_real)})', color='blue', edgecolor='black')
ax.hist(np.log1p(st_strict_nonreal), bins=50, alpha=0.5,
label=f'Non-strict (n={len(st_strict_nonreal)})', color='orange', edgecolor='black')
ax.set_xlabel('log(1 + spanning trees)')
ax.set_ylabel('Frequency')
ax.set_title('Strict Realizability (Log Scale)')
ax.legend()
ax.grid(True, alpha=0.3)
# 5. Scatter: vertices vs log(spanning trees)
ax = axes[1, 1]
vertices = np.array([t['n_vertices'] for t in tris])
ax.scatter(vertices, log_spanning_trees, alpha=0.3, s=10)
ax.set_xlabel('Number of vertices')
ax.set_ylabel('log(1 + spanning trees)')
ax.set_title('Vertices vs Log(Spanning Trees)')
ax.grid(True, alpha=0.3)
# 6. Scatter: edges vs log(spanning trees)
ax = axes[1, 2]
edges = np.array([t['n_edges'] for t in tris])
ax.scatter(edges, log_spanning_trees, alpha=0.3, s=10)
ax.set_xlabel('Number of edges')
ax.set_ylabel('log(1 + spanning trees)')
ax.set_title('Edges vs Log(Spanning Trees)')
ax.grid(True, alpha=0.3)
plt.tight_layout()
# Save figure
n = data['parameters']['n_vertices']
output_path = f'results/plots/spanning_tree_analysis_n{n}.png'
Path(output_path).parent.mkdir(parents=True, exist_ok=True)
plt.savefig(output_path, dpi=150, bbox_inches='tight')
print(f"\n{'='*70}")
print(f"Plots saved to: {output_path}")
print(f"{'='*70}")
plt.close()
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser(description='Analyze spanning tree distribution')
parser.add_argument('--data', type=str,
default='results/spanning_trees_n10.json',
help='Path to spanning trees data JSON')
args = parser.parse_args()
analyze_distribution(args.data)