#!/usr/bin/env python3 """ Analyze the distribution of spanning trees and identify what makes some triangulations "vastly more forested" than others. """ import sys from pathlib import Path sys.path.insert(0, str(Path(__file__).parent.parent)) import json import numpy as np import matplotlib.pyplot as plt from collections import Counter def analyze_distribution(data_file: str): """Analyze spanning tree distribution in detail.""" with open(data_file, 'r') as f: data = json.load(f) tris = data['raw_data']['triangulations'] print("="*70) print("SPANNING TREE DISTRIBUTION ANALYSIS") print("="*70) print(f"\nTotal triangulations: {len(tris)}") # Extract spanning tree counts spanning_trees = np.array([t['n_spanning_trees'] for t in tris]) log_spanning_trees = np.log1p(spanning_trees) # log(1+x) to handle zeros # Partition by realizability standard_real = [t for t in tris if t['standard_realizable']] standard_nonreal = [t for t in tris if not t['standard_realizable']] strict_real = [t for t in tris if t['strict_realizable']] strict_nonreal = [t for t in tris if not t['strict_realizable']] st_standard_real = np.array([t['n_spanning_trees'] for t in standard_real]) st_standard_nonreal = np.array([t['n_spanning_trees'] for t in standard_nonreal]) st_strict_real = np.array([t['n_spanning_trees'] for t in strict_real]) st_strict_nonreal = np.array([t['n_spanning_trees'] for t in strict_nonreal]) # Log-transformed statistics print("\n" + "="*70) print("LOG-TRANSFORMED STATISTICS: log(1 + spanning_trees)") print("="*70) print("\n--- STANDARD REALIZABILITY ---") print(f"Realizable:") print(f" Log mean: {np.mean(np.log1p(st_standard_real)):.3f}") print(f" Log median: {np.median(np.log1p(st_standard_real)):.3f}") print(f" Log std: {np.std(np.log1p(st_standard_real)):.3f}") print(f"\nNon-realizable:") print(f" Log mean: {np.mean(np.log1p(st_standard_nonreal)):.3f}") print(f" Log median: {np.median(np.log1p(st_standard_nonreal)):.3f}") print(f" Log std: {np.std(np.log1p(st_standard_nonreal)):.3f}") print("\n--- STRICT REALIZABILITY ---") print(f"Strict realizable:") print(f" Log mean: {np.mean(np.log1p(st_strict_real)):.3f}") print(f" Log median: {np.median(np.log1p(st_strict_real)):.3f}") print(f" Log std: {np.std(np.log1p(st_strict_real)):.3f}") print(f"\nStrict non-realizable:") print(f" Log mean: {np.mean(np.log1p(st_strict_nonreal)):.3f}") print(f" Log median: {np.median(np.log1p(st_strict_nonreal)):.3f}") print(f" Log std: {np.std(np.log1p(st_strict_nonreal)):.3f}") # Identify extreme cases print("\n" + "="*70) print("EXTREME CASES: Most Forested Triangulations") print("="*70) # Sort by spanning trees tris_sorted = sorted(tris, key=lambda t: t['n_spanning_trees'], reverse=True) print("\nTop 20 most forested triangulations:") print(f"{'Rank':<6} {'Index':<8} {'Spanning':<10} {'Vertices':<10} {'Edges':<8} {'Std Real':<10} {'Strict Real':<12}") print("-"*70) for rank, t in enumerate(tris_sorted[:20], 1): print(f"{rank:<6} {t['index']:<8} {t['n_spanning_trees']:<10} " f"{t['n_vertices']:<10} {t['n_edges']:<8} " f"{'Yes' if t['standard_realizable'] else 'No':<10} " f"{'Yes' if t['strict_realizable'] else 'No':<12}") # Analyze bottom cases print("\n" + "="*70) print("EXTREME CASES: Least Forested Triangulations") print("="*70) # Count zeros n_zero = sum(1 for t in tris if t['n_spanning_trees'] == 0) print(f"\nTriangulations with ZERO spanning trees: {n_zero} ({100*n_zero/len(tris):.2f}%)") if n_zero > 0: zero_tris = [t for t in tris if t['n_spanning_trees'] == 0] n_real = sum(1 for t in zero_tris if t['standard_realizable']) n_strict = sum(1 for t in zero_tris if t['strict_realizable']) print(f" Standard realizable: {n_real} ({100*n_real/n_zero:.1f}%)") print(f" Strict realizable: {n_strict} ({100*n_strict/n_zero:.1f}%)") print("\nBottom 20 least forested (non-zero):") nonzero_tris = [t for t in tris if t['n_spanning_trees'] > 0] tris_sorted_bottom = sorted(nonzero_tris, key=lambda t: t['n_spanning_trees']) print(f"{'Rank':<6} {'Index':<8} {'Spanning':<10} {'Vertices':<10} {'Edges':<8} {'Std Real':<10} {'Strict Real':<12}") print("-"*70) for rank, t in enumerate(tris_sorted_bottom[:20], 1): print(f"{rank:<6} {t['index']:<8} {t['n_spanning_trees']:<10} " f"{t['n_vertices']:<10} {t['n_edges']:<8} " f"{'Yes' if t['standard_realizable'] else 'No':<10} " f"{'Yes' if t['strict_realizable'] else 'No':<12}") # Analyze correlation with graph properties print("\n" + "="*70) print("CORRELATION WITH GRAPH PROPERTIES") print("="*70) vertices = np.array([t['n_vertices'] for t in tris]) edges = np.array([t['n_edges'] for t in tris]) # Compute correlations corr_vertices = np.corrcoef(spanning_trees, vertices)[0, 1] corr_edges = np.corrcoef(spanning_trees, edges)[0, 1] corr_log_vertices = np.corrcoef(log_spanning_trees, vertices)[0, 1] corr_log_edges = np.corrcoef(log_spanning_trees, edges)[0, 1] print(f"\nPearson correlation (raw):") print(f" Spanning trees vs vertices: {corr_vertices:.4f}") print(f" Spanning trees vs edges: {corr_edges:.4f}") print(f"\nPearson correlation (log-transformed):") print(f" log(spanning trees) vs vertices: {corr_log_vertices:.4f}") print(f" log(spanning trees) vs edges: {corr_log_edges:.4f}") # Percentile analysis print("\n" + "="*70) print("PERCENTILE ANALYSIS") print("="*70) percentiles = [0, 1, 5, 10, 25, 50, 75, 90, 95, 99, 100] values = np.percentile(spanning_trees, percentiles) print(f"\n{'Percentile':<12} {'Value':<12} {'log(1+value)':<15}") print("-"*40) for p, v in zip(percentiles, values): print(f"{p:<12} {int(v):<12} {np.log1p(v):<15.3f}") # Create visualizations create_visualizations(data, tris, spanning_trees, log_spanning_trees, st_standard_real, st_standard_nonreal, st_strict_real, st_strict_nonreal) def create_visualizations(data, tris, spanning_trees, log_spanning_trees, st_standard_real, st_standard_nonreal, st_strict_real, st_strict_nonreal): """Create distribution plots.""" fig, axes = plt.subplots(2, 3, figsize=(18, 12)) # 1. Overall distribution (linear) ax = axes[0, 0] ax.hist(spanning_trees, bins=100, alpha=0.7, edgecolor='black') ax.set_xlabel('Number of spanning trees') ax.set_ylabel('Frequency') ax.set_title('Overall Distribution (Linear Scale)') ax.axvline(np.mean(spanning_trees), color='red', linestyle='--', label=f'Mean: {np.mean(spanning_trees):.1f}') ax.axvline(np.median(spanning_trees), color='blue', linestyle='--', label=f'Median: {np.median(spanning_trees):.1f}') ax.legend() ax.grid(True, alpha=0.3) # 2. Overall distribution (log) ax = axes[0, 1] ax.hist(log_spanning_trees, bins=100, alpha=0.7, edgecolor='black') ax.set_xlabel('log(1 + spanning trees)') ax.set_ylabel('Frequency') ax.set_title('Overall Distribution (Log-Transformed)') ax.axvline(np.mean(log_spanning_trees), color='red', linestyle='--', label=f'Mean: {np.mean(log_spanning_trees):.2f}') ax.axvline(np.median(log_spanning_trees), color='blue', linestyle='--', label=f'Median: {np.median(log_spanning_trees):.2f}') ax.legend() ax.grid(True, alpha=0.3) # 3. Standard realizability comparison (log) ax = axes[0, 2] ax.hist(np.log1p(st_standard_real), bins=50, alpha=0.5, label=f'Realizable (n={len(st_standard_real)})', color='green', edgecolor='black') ax.hist(np.log1p(st_standard_nonreal), bins=50, alpha=0.5, label=f'Non-realizable (n={len(st_standard_nonreal)})', color='red', edgecolor='black') ax.set_xlabel('log(1 + spanning trees)') ax.set_ylabel('Frequency') ax.set_title('Standard Realizability (Log Scale)') ax.legend() ax.grid(True, alpha=0.3) # 4. Strict realizability comparison (log) ax = axes[1, 0] ax.hist(np.log1p(st_strict_real), bins=50, alpha=0.5, label=f'Strict (n={len(st_strict_real)})', color='blue', edgecolor='black') ax.hist(np.log1p(st_strict_nonreal), bins=50, alpha=0.5, label=f'Non-strict (n={len(st_strict_nonreal)})', color='orange', edgecolor='black') ax.set_xlabel('log(1 + spanning trees)') ax.set_ylabel('Frequency') ax.set_title('Strict Realizability (Log Scale)') ax.legend() ax.grid(True, alpha=0.3) # 5. Scatter: vertices vs log(spanning trees) ax = axes[1, 1] vertices = np.array([t['n_vertices'] for t in tris]) ax.scatter(vertices, log_spanning_trees, alpha=0.3, s=10) ax.set_xlabel('Number of vertices') ax.set_ylabel('log(1 + spanning trees)') ax.set_title('Vertices vs Log(Spanning Trees)') ax.grid(True, alpha=0.3) # 6. Scatter: edges vs log(spanning trees) ax = axes[1, 2] edges = np.array([t['n_edges'] for t in tris]) ax.scatter(edges, log_spanning_trees, alpha=0.3, s=10) ax.set_xlabel('Number of edges') ax.set_ylabel('log(1 + spanning trees)') ax.set_title('Edges vs Log(Spanning Trees)') ax.grid(True, alpha=0.3) plt.tight_layout() # Save figure n = data['parameters']['n_vertices'] output_path = f'results/plots/spanning_tree_analysis_n{n}.png' Path(output_path).parent.mkdir(parents=True, exist_ok=True) plt.savefig(output_path, dpi=150, bbox_inches='tight') print(f"\n{'='*70}") print(f"Plots saved to: {output_path}") print(f"{'='*70}") plt.close() if __name__ == '__main__': import argparse parser = argparse.ArgumentParser(description='Analyze spanning tree distribution') parser.add_argument('--data', type=str, default='results/spanning_trees_n10.json', help='Path to spanning trees data JSON') args = parser.parse_args() analyze_distribution(args.data)