Spaces:

igriv
/

idealpolyhedra

Sleeping

App Files Files Community

idealpolyhedra / examples /analyze_spanning_tree_distribution.py

igriv

Add HuggingFace Spaces support

e0ef700 about 1 month ago

raw

history blame contribute delete

10.4 kB

	#!/usr/bin/env python3
	"""
	Analyze the distribution of spanning trees and identify what makes
	some triangulations "vastly more forested" than others.
	"""

	import sys
	from pathlib import Path
	sys.path.insert(0, str(Path(__file__).parent.parent))

	import json
	import numpy as np
	import matplotlib.pyplot as plt
	from collections import Counter

	def analyze_distribution(data_file: str):
	"""Analyze spanning tree distribution in detail."""

	with open(data_file, 'r') as f:
	data = json.load(f)

	tris = data['raw_data']['triangulations']

	print("="*70)
	print("SPANNING TREE DISTRIBUTION ANALYSIS")
	print("="*70)
	print(f"\nTotal triangulations: {len(tris)}")

	# Extract spanning tree counts
	spanning_trees = np.array([t['n_spanning_trees'] for t in tris])
	log_spanning_trees = np.log1p(spanning_trees) # log(1+x) to handle zeros

	# Partition by realizability
	standard_real = [t for t in tris if t['standard_realizable']]
	standard_nonreal = [t for t in tris if not t['standard_realizable']]
	strict_real = [t for t in tris if t['strict_realizable']]
	strict_nonreal = [t for t in tris if not t['strict_realizable']]

	st_standard_real = np.array([t['n_spanning_trees'] for t in standard_real])
	st_standard_nonreal = np.array([t['n_spanning_trees'] for t in standard_nonreal])
	st_strict_real = np.array([t['n_spanning_trees'] for t in strict_real])
	st_strict_nonreal = np.array([t['n_spanning_trees'] for t in strict_nonreal])

	# Log-transformed statistics
	print("\n" + "="*70)
	print("LOG-TRANSFORMED STATISTICS: log(1 + spanning_trees)")
	print("="*70)

	print("\n--- STANDARD REALIZABILITY ---")
	print(f"Realizable:")
	print(f" Log mean: {np.mean(np.log1p(st_standard_real)):.3f}")
	print(f" Log median: {np.median(np.log1p(st_standard_real)):.3f}")
	print(f" Log std: {np.std(np.log1p(st_standard_real)):.3f}")

	print(f"\nNon-realizable:")
	print(f" Log mean: {np.mean(np.log1p(st_standard_nonreal)):.3f}")
	print(f" Log median: {np.median(np.log1p(st_standard_nonreal)):.3f}")
	print(f" Log std: {np.std(np.log1p(st_standard_nonreal)):.3f}")

	print("\n--- STRICT REALIZABILITY ---")
	print(f"Strict realizable:")
	print(f" Log mean: {np.mean(np.log1p(st_strict_real)):.3f}")
	print(f" Log median: {np.median(np.log1p(st_strict_real)):.3f}")
	print(f" Log std: {np.std(np.log1p(st_strict_real)):.3f}")

	print(f"\nStrict non-realizable:")
	print(f" Log mean: {np.mean(np.log1p(st_strict_nonreal)):.3f}")
	print(f" Log median: {np.median(np.log1p(st_strict_nonreal)):.3f}")
	print(f" Log std: {np.std(np.log1p(st_strict_nonreal)):.3f}")

	# Identify extreme cases
	print("\n" + "="*70)
	print("EXTREME CASES: Most Forested Triangulations")
	print("="*70)

	# Sort by spanning trees
	tris_sorted = sorted(tris, key=lambda t: t['n_spanning_trees'], reverse=True)

	print("\nTop 20 most forested triangulations:")
	print(f"{'Rank':<6} {'Index':<8} {'Spanning':<10} {'Vertices':<10} {'Edges':<8} {'Std Real':<10} {'Strict Real':<12}")
	print("-"*70)

	for rank, t in enumerate(tris_sorted[:20], 1):
	print(f"{rank:<6} {t['index']:<8} {t['n_spanning_trees']:<10} "
	f"{t['n_vertices']:<10} {t['n_edges']:<8} "
	f"{'Yes' if t['standard_realizable'] else 'No':<10} "
	f"{'Yes' if t['strict_realizable'] else 'No':<12}")

	# Analyze bottom cases
	print("\n" + "="*70)
	print("EXTREME CASES: Least Forested Triangulations")
	print("="*70)

	# Count zeros
	n_zero = sum(1 for t in tris if t['n_spanning_trees'] == 0)
	print(f"\nTriangulations with ZERO spanning trees: {n_zero} ({100*n_zero/len(tris):.2f}%)")

	if n_zero > 0:
	zero_tris = [t for t in tris if t['n_spanning_trees'] == 0]
	n_real = sum(1 for t in zero_tris if t['standard_realizable'])
	n_strict = sum(1 for t in zero_tris if t['strict_realizable'])
	print(f" Standard realizable: {n_real} ({100*n_real/n_zero:.1f}%)")
	print(f" Strict realizable: {n_strict} ({100*n_strict/n_zero:.1f}%)")

	print("\nBottom 20 least forested (non-zero):")
	nonzero_tris = [t for t in tris if t['n_spanning_trees'] > 0]
	tris_sorted_bottom = sorted(nonzero_tris, key=lambda t: t['n_spanning_trees'])

	print(f"{'Rank':<6} {'Index':<8} {'Spanning':<10} {'Vertices':<10} {'Edges':<8} {'Std Real':<10} {'Strict Real':<12}")
	print("-"*70)

	for rank, t in enumerate(tris_sorted_bottom[:20], 1):
	print(f"{rank:<6} {t['index']:<8} {t['n_spanning_trees']:<10} "
	f"{t['n_vertices']:<10} {t['n_edges']:<8} "
	f"{'Yes' if t['standard_realizable'] else 'No':<10} "
	f"{'Yes' if t['strict_realizable'] else 'No':<12}")

	# Analyze correlation with graph properties
	print("\n" + "="*70)
	print("CORRELATION WITH GRAPH PROPERTIES")
	print("="*70)

	vertices = np.array([t['n_vertices'] for t in tris])
	edges = np.array([t['n_edges'] for t in tris])

	# Compute correlations
	corr_vertices = np.corrcoef(spanning_trees, vertices)[0, 1]
	corr_edges = np.corrcoef(spanning_trees, edges)[0, 1]
	corr_log_vertices = np.corrcoef(log_spanning_trees, vertices)[0, 1]
	corr_log_edges = np.corrcoef(log_spanning_trees, edges)[0, 1]

	print(f"\nPearson correlation (raw):")
	print(f" Spanning trees vs vertices: {corr_vertices:.4f}")
	print(f" Spanning trees vs edges: {corr_edges:.4f}")

	print(f"\nPearson correlation (log-transformed):")
	print(f" log(spanning trees) vs vertices: {corr_log_vertices:.4f}")
	print(f" log(spanning trees) vs edges: {corr_log_edges:.4f}")

	# Percentile analysis
	print("\n" + "="*70)
	print("PERCENTILE ANALYSIS")
	print("="*70)

	percentiles = [0, 1, 5, 10, 25, 50, 75, 90, 95, 99, 100]
	values = np.percentile(spanning_trees, percentiles)

	print(f"\n{'Percentile':<12} {'Value':<12} {'log(1+value)':<15}")
	print("-"*40)
	for p, v in zip(percentiles, values):
	print(f"{p:<12} {int(v):<12} {np.log1p(v):<15.3f}")

	# Create visualizations
	create_visualizations(data, tris, spanning_trees, log_spanning_trees,
	st_standard_real, st_standard_nonreal,
	st_strict_real, st_strict_nonreal)


	def create_visualizations(data, tris, spanning_trees, log_spanning_trees,
	st_standard_real, st_standard_nonreal,
	st_strict_real, st_strict_nonreal):
	"""Create distribution plots."""

	fig, axes = plt.subplots(2, 3, figsize=(18, 12))

	# 1. Overall distribution (linear)
	ax = axes[0, 0]
	ax.hist(spanning_trees, bins=100, alpha=0.7, edgecolor='black')
	ax.set_xlabel('Number of spanning trees')
	ax.set_ylabel('Frequency')
	ax.set_title('Overall Distribution (Linear Scale)')
	ax.axvline(np.mean(spanning_trees), color='red', linestyle='--',
	label=f'Mean: {np.mean(spanning_trees):.1f}')
	ax.axvline(np.median(spanning_trees), color='blue', linestyle='--',
	label=f'Median: {np.median(spanning_trees):.1f}')
	ax.legend()
	ax.grid(True, alpha=0.3)

	# 2. Overall distribution (log)
	ax = axes[0, 1]
	ax.hist(log_spanning_trees, bins=100, alpha=0.7, edgecolor='black')
	ax.set_xlabel('log(1 + spanning trees)')
	ax.set_ylabel('Frequency')
	ax.set_title('Overall Distribution (Log-Transformed)')
	ax.axvline(np.mean(log_spanning_trees), color='red', linestyle='--',
	label=f'Mean: {np.mean(log_spanning_trees):.2f}')
	ax.axvline(np.median(log_spanning_trees), color='blue', linestyle='--',
	label=f'Median: {np.median(log_spanning_trees):.2f}')
	ax.legend()
	ax.grid(True, alpha=0.3)

	# 3. Standard realizability comparison (log)
	ax = axes[0, 2]
	ax.hist(np.log1p(st_standard_real), bins=50, alpha=0.5,
	label=f'Realizable (n={len(st_standard_real)})', color='green', edgecolor='black')
	ax.hist(np.log1p(st_standard_nonreal), bins=50, alpha=0.5,
	label=f'Non-realizable (n={len(st_standard_nonreal)})', color='red', edgecolor='black')
	ax.set_xlabel('log(1 + spanning trees)')
	ax.set_ylabel('Frequency')
	ax.set_title('Standard Realizability (Log Scale)')
	ax.legend()
	ax.grid(True, alpha=0.3)

	# 4. Strict realizability comparison (log)
	ax = axes[1, 0]
	ax.hist(np.log1p(st_strict_real), bins=50, alpha=0.5,
	label=f'Strict (n={len(st_strict_real)})', color='blue', edgecolor='black')
	ax.hist(np.log1p(st_strict_nonreal), bins=50, alpha=0.5,
	label=f'Non-strict (n={len(st_strict_nonreal)})', color='orange', edgecolor='black')
	ax.set_xlabel('log(1 + spanning trees)')
	ax.set_ylabel('Frequency')
	ax.set_title('Strict Realizability (Log Scale)')
	ax.legend()
	ax.grid(True, alpha=0.3)

	# 5. Scatter: vertices vs log(spanning trees)
	ax = axes[1, 1]
	vertices = np.array([t['n_vertices'] for t in tris])
	ax.scatter(vertices, log_spanning_trees, alpha=0.3, s=10)
	ax.set_xlabel('Number of vertices')
	ax.set_ylabel('log(1 + spanning trees)')
	ax.set_title('Vertices vs Log(Spanning Trees)')
	ax.grid(True, alpha=0.3)

	# 6. Scatter: edges vs log(spanning trees)
	ax = axes[1, 2]
	edges = np.array([t['n_edges'] for t in tris])
	ax.scatter(edges, log_spanning_trees, alpha=0.3, s=10)
	ax.set_xlabel('Number of edges')
	ax.set_ylabel('log(1 + spanning trees)')
	ax.set_title('Edges vs Log(Spanning Trees)')
	ax.grid(True, alpha=0.3)

	plt.tight_layout()

	# Save figure
	n = data['parameters']['n_vertices']
	output_path = f'results/plots/spanning_tree_analysis_n{n}.png'
	Path(output_path).parent.mkdir(parents=True, exist_ok=True)
	plt.savefig(output_path, dpi=150, bbox_inches='tight')
	print(f"\n{'='*70}")
	print(f"Plots saved to: {output_path}")
	print(f"{'='*70}")

	plt.close()


	if __name__ == '__main__':
	import argparse

	parser = argparse.ArgumentParser(description='Analyze spanning tree distribution')
	parser.add_argument('--data', type=str,
	default='results/spanning_trees_n10.json',
	help='Path to spanning trees data JSON')

	args = parser.parse_args()

	analyze_distribution(args.data)