# -*- coding: utf-8 -*- import matplotlib.pyplot as plt import matplotlib.patches as mpatches import numpy as np import geomstats.backend as gs # type: ignore from geomstats.geometry.poincare_ball import PoincareBall def generate_data(n_samples, center_r, center_theta, spread, label): """Generates points in polar coordinates, then converts to Cartesian.""" # Random noise in polar coordinates r_noise = np.random.normal(0, spread, n_samples) theta_noise = np.random.normal(0, spread, n_samples) r = np.clip(center_r + r_noise, 0, 0.99) # Keep within unit disk theta = center_theta + theta_noise # Convert to Cartesian for plotting x = r * np.cos(theta) y = r * np.sin(theta) return np.column_stack([x, y]), label def main(): print("Generating synthetic hierarchical data...") # 1. Define the Hierarchy # Majority: Center of the space # Minority: Periphery # Rare: Deep Periphery (Sub-group of Minority) # Parameters majority_n = 500 minority_n = 50 rare_n = 10 # Angle for the minority group theta_minority = np.pi / 4 # 45 degrees # --- Euclidean Simulation --- # In Euclidean space, we simulate "crowding" by placing them close together # because the space doesn't expand. # Majority at (0,0) maj_euc, _ = generate_data(majority_n, 0.0, 0.0, 0.1, "Majority") # Minority at distance 0.5 min_euc, _ = generate_data(minority_n, 0.5, theta_minority, 0.08, "Minority") # Rare at distance 0.52 (Buried inside the Minority cluster in Euclidean terms) rare_euc, _ = generate_data(rare_n, 0.52, theta_minority, 0.04, "Rare") # --- Hyperbolic Simulation --- # In Hyperbolic space, we map these same "conceptual" positions to the Poincaré disk. # The "Rare" group is pushed further out to the edge (r=0.95). # The "Minority" group is at r=0.8. # The "Majority" is at r=0.0. # Majority at center maj_hyp, _ = generate_data(majority_n, 0.0, 0.0, 0.1, "Majority") # Minority at r=0.8 min_hyp, _ = generate_data(minority_n, 0.8, theta_minority, 0.05, "Minority") # Rare at r=0.95 (Deep in the hyperbolic tail) rare_hyp, _ = generate_data(rare_n, 0.95, theta_minority, 0.01, "Rare") # --- Visualization --- fig, axes = plt.subplots(1, 2, figsize=(16, 8)) # Plot 1: Euclidean View ax = axes[0] ax.set_title("Euclidean Space (Standard AI)\n'Representation Collapse'", fontsize=14, fontweight='bold') # Draw Unit Circle for reference circle = mpatches.Circle((0, 0), 1, color='black', fill=False, linestyle='--', alpha=0.3) ax.add_artist(circle) ax.scatter(maj_euc[:, 0], maj_euc[:, 1], c='gray', alpha=0.3, label='Majority (Head)', s=20) ax.scatter(min_euc[:, 0], min_euc[:, 1], c='blue', alpha=0.6, label='Minority (Tail)', s=40) ax.scatter(rare_euc[:, 0], rare_euc[:, 1], c='red', alpha=0.9, label='Rare Subgroup (Long Tail)', s=40) # Annotate the "Crush" ax.annotate('Indistinguishable\nCluster', xy=(0.55, 0.55), xytext=(0.2, 0.7), arrowprops=dict(facecolor='black', shrink=0.05), fontsize=12) ax.set_xlim(-1.1, 1.1) ax.set_ylim(-1.1, 1.1) ax.set_aspect('equal') ax.legend(loc='lower right') ax.grid(True, alpha=0.2) # Plot 2: Hyperbolic View ax = axes[1] ax.set_title("Hyperbolic Space (HyperView)\n'Hierarchical Expansion'", fontsize=14, fontweight='bold') # Draw Poincaré Disk Boundary circle = mpatches.Circle((0, 0), 1, color='black', fill=False, linewidth=2) ax.add_artist(circle) ax.scatter(maj_hyp[:, 0], maj_hyp[:, 1], c='gray', alpha=0.3, label='Majority', s=20) ax.scatter(min_hyp[:, 0], min_hyp[:, 1], c='blue', alpha=0.6, label='Minority', s=40) ax.scatter(rare_hyp[:, 0], rare_hyp[:, 1], c='red', alpha=0.9, label='Rare Subgroup', s=40) # Calculate Geodesic Distance (Visual representation) # We use geomstats to calculate the actual hyperbolic distance between the centers manifold = PoincareBall(2) p_min = gs.array([0.8 * np.cos(theta_minority), 0.8 * np.sin(theta_minority)]) p_rare = gs.array([0.95 * np.cos(theta_minority), 0.95 * np.sin(theta_minority)]) dist = manifold.metric.dist(p_min, p_rare) # Annotate the Expansion ax.annotate(f'Hyperbolic Dist: {dist:.2f}\n(Distinct & Separable)', xy=(0.85, 0.85), xytext=(0.2, 0.8), arrowprops=dict(facecolor='black', shrink=0.05), fontsize=12) ax.set_xlim(-1.1, 1.1) ax.set_ylim(-1.1, 1.1) ax.set_aspect('equal') ax.legend(loc='lower right') ax.axis('off') # Hide grid for cleaner Poincaré look plt.tight_layout() output_path = 'assets/bias_collapse.png' plt.savefig(output_path, dpi=300) print(f"Visualization saved to {output_path}") if __name__ == "__main__": main()