""" Auxiliary Evaluation System for Circle Packing =============================================== This module provides ADDITIONAL metrics and analysis beyond the ground truth evaluation. It is designed to be extensible and allow for future LLM-generated evaluation code. IMPORTANT: - Does NOT replace or modify the ground truth evaluation (evaluate.py) - Provides supplementary metrics that help guide evolution - Designed for easy extension with new metrics (manual or LLM-generated) Architecture: 1. MetricRegistry: Plugin system for registering metrics 2. AuxiliaryEvaluator: Orchestrates metric computation 3. Individual metric functions: Compute specific aspects 4. FeedbackGenerator: Generate actionable text feedback """ import numpy as np from typing import Dict, Any, List, Tuple, Callable, Optional from dataclasses import dataclass, field from pathlib import Path import json @dataclass class MetricResult: """Result of a single auxiliary metric computation.""" name: str value: float interpretation: str # "higher_better" or "lower_better" or "neutral" description: str details: Dict[str, Any] = field(default_factory=dict) @dataclass class AuxiliaryEvalConfig: """Configuration for auxiliary evaluation.""" enabled: bool = True enabled_metrics: List[str] = field(default_factory=lambda: [ "spatial_uniformity", "edge_utilization", "density_variance", "packing_efficiency", "radius_distribution", "gap_analysis", "geometric_quality" ]) generate_text_feedback: bool = True save_detailed_analysis: bool = True class MetricRegistry: """ Registry for auxiliary metrics. Supports plugin-style registration. This makes it easy to add new metrics (manually or LLM-generated). """ def __init__(self): self._metrics: Dict[str, Callable] = {} self._metric_metadata: Dict[str, Dict[str, str]] = {} def register( self, name: str, func: Callable, description: str, interpretation: str = "higher_better" ): """ Register a new metric function. Args: name: Unique metric identifier func: Function with signature (centers, radii) -> MetricResult description: Human-readable description interpretation: "higher_better", "lower_better", or "neutral" """ self._metrics[name] = func self._metric_metadata[name] = { "description": description, "interpretation": interpretation } print(f"[MetricRegistry] Registered metric: {name}") def get(self, name: str) -> Optional[Callable]: """Get a metric function by name.""" return self._metrics.get(name) def list_metrics(self) -> List[str]: """List all registered metric names.""" return list(self._metrics.keys()) def get_metadata(self, name: str) -> Dict[str, str]: """Get metadata for a metric.""" return self._metric_metadata.get(name, {}) # Global registry instance METRIC_REGISTRY = MetricRegistry() # ============================================================================ # MANUALLY DESIGNED AUXILIARY METRICS # ============================================================================ # These are hand-crafted metrics that provide useful signals for evolution. # Future versions will allow LLM to generate additional metrics. # ============================================================================ def compute_spatial_uniformity( centers: np.ndarray, radii: np.ndarray ) -> MetricResult: """ Measure spatial uniformity using Voronoi cell analysis. Higher score = more uniform distribution (desirable). """ try: from scipy.spatial import Voronoi # Compute Voronoi diagram vor = Voronoi(centers) # Compute "effective" cell sizes by measuring distance to nearest neighbors # (Voronoi cells can be infinite for boundary points) cell_sizes = [] for i in range(len(centers)): # Find distances to all other centers distances = np.linalg.norm(centers - centers[i], axis=1) distances = distances[distances > 0] # Exclude self if len(distances) > 0: # Use mean distance to 3 nearest neighbors as proxy for cell size k_nearest = min(3, len(distances)) avg_dist = np.mean(np.sort(distances)[:k_nearest]) cell_sizes.append(avg_dist) if len(cell_sizes) == 0: uniformity = 0.0 else: # Lower coefficient of variation = more uniform cv = np.std(cell_sizes) / (np.mean(cell_sizes) + 1e-9) uniformity = 1.0 / (1.0 + cv) # Transform to [0, 1], higher is better return MetricResult( name="spatial_uniformity", value=float(uniformity), interpretation="higher_better", description="Spatial distribution uniformity (Voronoi analysis)", details={ "cell_size_mean": float(np.mean(cell_sizes)) if cell_sizes else 0.0, "cell_size_std": float(np.std(cell_sizes)) if cell_sizes else 0.0, "coefficient_of_variation": float(cv) if cell_sizes else 0.0 } ) except Exception as e: return MetricResult( name="spatial_uniformity", value=0.0, interpretation="higher_better", description="Spatial distribution uniformity (failed to compute)", details={"error": str(e)} ) def compute_edge_utilization( centers: np.ndarray, radii: np.ndarray ) -> MetricResult: """ Measure how well the packing utilizes the boundary. Circles near edges/corners are often larger in optimal packings. """ boundary_threshold = 0.02 # Distance to be considered "touching" touching_edges = 0 touching_corners = 0 for i, (center, radius) in enumerate(zip(centers, radii)): x, y = center # Check which edges are touched touches_left = (x - radius) < boundary_threshold touches_right = (x + radius) > (1.0 - boundary_threshold) touches_bottom = (y - radius) < boundary_threshold touches_top = (y + radius) > (1.0 - boundary_threshold) # Count edge touches edge_count = sum([touches_left, touches_right, touches_bottom, touches_top]) if edge_count == 2: # Corner (touches two edges) touching_corners += 1 elif edge_count == 1: # Edge (touches one edge) touching_edges += 1 # Normalize: ideally want good corner and edge utilization corner_score = touching_corners / 4.0 # 4 corners max edge_score = touching_edges / float(len(centers)) # Fraction of circles on edges # Combined score (corners are more valuable) utilization = 0.6 * corner_score + 0.4 * edge_score return MetricResult( name="edge_utilization", value=float(utilization), interpretation="higher_better", description="Boundary and corner utilization", details={ "corners_touched": touching_corners, "edges_touched": touching_edges, "corner_score": float(corner_score), "edge_score": float(edge_score) } ) def compute_density_variance( centers: np.ndarray, radii: np.ndarray ) -> MetricResult: """ Measure density variance across a grid. Lower variance = more uniform density (desirable). """ grid_size = 10 grid = np.zeros((grid_size, grid_size)) # Compute density contribution in each grid cell for center, radius in zip(centers, radii): x, y = center # Find which grid cells this circle overlaps x_cells = np.arange(grid_size) y_cells = np.arange(grid_size) for i in x_cells: for j in y_cells: # Center of grid cell cell_x = (i + 0.5) / grid_size cell_y = (j + 0.5) / grid_size # Distance from circle center to cell center dist = np.sqrt((cell_x - x)**2 + (cell_y - y)**2) # Add contribution if circle overlaps cell (simplified) if dist < radius: grid[i, j] += np.pi * radius**2 # Area contribution # Compute variance (lower is better) variance = float(np.var(grid)) mean_density = float(np.mean(grid)) # Transform to score (higher is better) # Use inverse of coefficient of variation cv = np.sqrt(variance) / (mean_density + 1e-9) uniformity_score = 1.0 / (1.0 + cv) return MetricResult( name="density_variance", value=float(uniformity_score), interpretation="higher_better", description="Spatial density uniformity across grid", details={ "grid_size": grid_size, "variance": variance, "mean_density": mean_density, "cv": float(cv) } ) def compute_packing_efficiency( centers: np.ndarray, radii: np.ndarray ) -> MetricResult: """ Ratio of total circle area to unit square area. Theoretical upper bound for 26 circles is unknown, but this gives relative measure. """ total_area = float(np.sum(np.pi * radii**2)) square_area = 1.0 efficiency = total_area / square_area # Known best result is ~2.635 sum of radii # Approximate area for that would be pi * (2.635/26)^2 * 26 ≈ 0.839 # (assuming equal radii for rough estimate) estimated_best_efficiency = 0.84 # Rough estimate relative_efficiency = efficiency / estimated_best_efficiency return MetricResult( name="packing_efficiency", value=float(efficiency), interpretation="higher_better", description="Area utilization efficiency", details={ "total_area": total_area, "square_area": square_area, "efficiency": efficiency, "relative_to_estimated_best": float(relative_efficiency) } ) def compute_radius_distribution( centers: np.ndarray, radii: np.ndarray ) -> MetricResult: """ Analyze radius size distribution. Optimal packings often have specific radius patterns. """ radius_mean = float(np.mean(radii)) radius_std = float(np.std(radii)) radius_min = float(np.min(radii)) radius_max = float(np.max(radii)) radius_range = radius_max - radius_min # Count size categories small_radii = np.sum(radii < radius_mean - 0.5 * radius_std) medium_radii = np.sum(np.abs(radii - radius_mean) <= 0.5 * radius_std) large_radii = np.sum(radii > radius_mean + 0.5 * radius_std) # Diversity score (higher is often better for packings) # Use entropy-like measure sizes = [small_radii, medium_radii, large_radii] proportions = np.array(sizes) / len(radii) proportions = proportions[proportions > 0] # Remove zeros entropy = -np.sum(proportions * np.log(proportions + 1e-9)) diversity = entropy / np.log(3) # Normalize to [0, 1] return MetricResult( name="radius_distribution", value=float(diversity), interpretation="neutral", description="Radius size diversity", details={ "mean": radius_mean, "std": radius_std, "min": radius_min, "max": radius_max, "range": radius_range, "small_count": int(small_radii), "medium_count": int(medium_radii), "large_count": int(large_radii), "diversity_score": float(diversity) } ) def compute_gap_analysis( centers: np.ndarray, radii: np.ndarray ) -> MetricResult: """ Programmatic detection of unused space (gaps). Uses a sampling approach on a fine grid. """ sample_size = 50 # Grid resolution total_samples = sample_size * sample_size covered_samples = 0 # Sample points in unit square for i in range(sample_size): for j in range(sample_size): px = (i + 0.5) / sample_size py = (j + 0.5) / sample_size # Check if this point is inside any circle for center, radius in zip(centers, radii): dist = np.sqrt((px - center[0])**2 + (py - center[1])**2) if dist < radius: covered_samples += 1 break coverage = covered_samples / total_samples gap_ratio = 1.0 - coverage return MetricResult( name="gap_analysis", value=float(coverage), interpretation="higher_better", description="Area coverage (1 - gap ratio)", details={ "covered_samples": covered_samples, "total_samples": total_samples, "coverage": float(coverage), "gap_ratio": float(gap_ratio) } ) def compute_geometric_quality( centers: np.ndarray, radii: np.ndarray ) -> MetricResult: """ Analyze geometric quality using Delaunay triangulation. Well-packed configurations tend to have good triangulation quality. """ try: from scipy.spatial import Delaunay tri = Delaunay(centers) # Analyze triangle quality (aspect ratio) triangle_qualities = [] for simplex in tri.simplices: pts = centers[simplex] # Compute edge lengths edges = [ np.linalg.norm(pts[1] - pts[0]), np.linalg.norm(pts[2] - pts[1]), np.linalg.norm(pts[0] - pts[2]) ] # Triangle quality = min_edge / max_edge (1.0 = equilateral) if max(edges) > 0: quality = min(edges) / max(edges) triangle_qualities.append(quality) if len(triangle_qualities) > 0: avg_quality = float(np.mean(triangle_qualities)) else: avg_quality = 0.0 return MetricResult( name="geometric_quality", value=avg_quality, interpretation="higher_better", description="Delaunay triangulation quality", details={ "num_triangles": len(tri.simplices), "avg_triangle_quality": avg_quality, "min_quality": float(np.min(triangle_qualities)) if triangle_qualities else 0.0, "max_quality": float(np.max(triangle_qualities)) if triangle_qualities else 0.0 } ) except Exception as e: return MetricResult( name="geometric_quality", value=0.0, interpretation="higher_better", description="Delaunay triangulation quality (failed)", details={"error": str(e)} ) # ============================================================================ # REGISTER ALL METRICS # ============================================================================ METRIC_REGISTRY.register( "spatial_uniformity", compute_spatial_uniformity, "Spatial distribution uniformity using Voronoi analysis", "higher_better" ) METRIC_REGISTRY.register( "edge_utilization", compute_edge_utilization, "Boundary and corner utilization", "higher_better" ) METRIC_REGISTRY.register( "density_variance", compute_density_variance, "Spatial density uniformity across grid", "higher_better" ) METRIC_REGISTRY.register( "packing_efficiency", compute_packing_efficiency, "Area utilization efficiency", "higher_better" ) METRIC_REGISTRY.register( "radius_distribution", compute_radius_distribution, "Radius size diversity", "neutral" ) METRIC_REGISTRY.register( "gap_analysis", compute_gap_analysis, "Area coverage analysis", "higher_better" ) METRIC_REGISTRY.register( "geometric_quality", compute_geometric_quality, "Delaunay triangulation quality", "higher_better" ) # ============================================================================ # AUXILIARY EVALUATOR # ============================================================================ class AuxiliaryEvaluator: """ Main class for computing auxiliary metrics. Designed to be extensible for future LLM-generated metrics. """ def __init__(self, config: Optional[AuxiliaryEvalConfig] = None): self.config = config or AuxiliaryEvalConfig() self.registry = METRIC_REGISTRY def evaluate( self, centers: np.ndarray, radii: np.ndarray, primary_score: float ) -> Dict[str, Any]: """ Compute all enabled auxiliary metrics. Args: centers: Circle centers (n, 2) radii: Circle radii (n,) primary_score: Ground truth primary score (sum of radii) Returns: Dictionary with auxiliary metrics and generated feedback """ if not self.config.enabled: return {} results = {} metric_results: List[MetricResult] = [] # Compute all enabled metrics for metric_name in self.config.enabled_metrics: metric_func = self.registry.get(metric_name) if metric_func is None: print(f"[AuxiliaryEvaluator] Warning: Metric '{metric_name}' not found in registry") continue try: metric_result = metric_func(centers, radii) metric_results.append(metric_result) # Add to results results[metric_name] = metric_result.value results[f"{metric_name}_details"] = metric_result.details except Exception as e: print(f"[AuxiliaryEvaluator] Error computing '{metric_name}': {e}") results[metric_name] = 0.0 results[f"{metric_name}_details"] = {"error": str(e)} # Generate text feedback if self.config.generate_text_feedback: feedback = self._generate_feedback( metric_results, primary_score, centers, radii ) results["auxiliary_text_feedback"] = feedback return results def _generate_feedback( self, metric_results: List[MetricResult], primary_score: float, centers: np.ndarray, radii: np.ndarray ) -> str: """Generate human-readable feedback based on metric results.""" lines = [] lines.append("=" * 60) lines.append("AUXILIARY EVALUATION FEEDBACK") lines.append("=" * 60) lines.append(f"Primary Score (sum of radii): {primary_score:.4f}") lines.append("") lines.append("Auxiliary Metrics:") lines.append("-" * 60) # Organize metrics by performance good_metrics = [] poor_metrics = [] neutral_metrics = [] for result in metric_results: if result.interpretation == "higher_better": if result.value >= 0.7: good_metrics.append(result) elif result.value < 0.5: poor_metrics.append(result) else: neutral_metrics.append(result) else: neutral_metrics.append(result) # Report good metrics if good_metrics: lines.append("\n✅ Strengths:") for result in good_metrics: lines.append(f" • {result.description}: {result.value:.3f}") # Report areas for improvement if poor_metrics: lines.append("\n⚠️ Areas for Improvement:") for result in poor_metrics: lines.append(f" • {result.description}: {result.value:.3f}") # Add specific suggestions lines.append(f" → {self._get_suggestion(result)}") # Report neutral metrics if neutral_metrics: lines.append("\n📊 Other Metrics:") for result in neutral_metrics: lines.append(f" • {result.description}: {result.value:.3f}") # Add specific recommendations lines.append("\n" + "-" * 60) lines.append("💡 Actionable Recommendations:") recommendations = self._generate_recommendations(metric_results, centers, radii) for i, rec in enumerate(recommendations, 1): lines.append(f" {i}. {rec}") lines.append("=" * 60) return "\n".join(lines) def _get_suggestion(self, result: MetricResult) -> str: """Get specific suggestion based on metric result.""" suggestions = { "spatial_uniformity": "Try redistributing circles to reduce clustering", "edge_utilization": "Consider placing larger circles near boundaries and corners", "density_variance": "Balance circle density across different regions", "gap_analysis": "Identify and fill empty regions with additional circles or larger radii", "geometric_quality": "Improve triangle quality in Delaunay triangulation" } return suggestions.get(result.name, "Consider optimizing this aspect") def _generate_recommendations( self, metric_results: List[MetricResult], centers: np.ndarray, radii: np.ndarray ) -> List[str]: """Generate specific actionable recommendations.""" recommendations = [] # Find poorest performing metrics prioritized = sorted( [r for r in metric_results if r.interpretation == "higher_better"], key=lambda r: r.value ) for result in prioritized[:3]: # Top 3 areas to improve if result.name == "spatial_uniformity" and result.value < 0.6: recommendations.append( f"Spatial uniformity is low ({result.value:.2f}). " "Check for clustered regions and redistribute circles." ) elif result.name == "edge_utilization" and result.value < 0.5: details = result.details corners = details.get("corners_touched", 0) if corners < 4: recommendations.append( f"Only {corners}/4 corners are utilized. " "Place larger circles at unused corners." ) elif result.name == "gap_analysis" and result.value < 0.7: gap_ratio = result.details.get("gap_ratio", 0) recommendations.append( f"Detected {gap_ratio*100:.1f}% unused space. " "Consider increasing radii in sparse regions." ) if not recommendations: recommendations.append("Overall packing quality is good! Continue optimizing primary score.") return recommendations # ============================================================================ # CONVENIENCE FUNCTIONS # ============================================================================ def evaluate_auxiliary( centers: np.ndarray, radii: np.ndarray, primary_score: float, config: Optional[AuxiliaryEvalConfig] = None ) -> Dict[str, Any]: """ Convenience function to run auxiliary evaluation. Args: centers: Circle centers (n, 2) radii: Circle radii (n,) primary_score: Ground truth primary score config: Optional configuration Returns: Dictionary with auxiliary metrics """ evaluator = AuxiliaryEvaluator(config) return evaluator.evaluate(centers, radii, primary_score) def save_auxiliary_analysis( results: Dict[str, Any], output_path: str ): """Save detailed auxiliary analysis to JSON file.""" output_path = Path(output_path) output_path.parent.mkdir(parents=True, exist_ok=True) # Convert numpy types to native Python types for JSON serialization def convert_types(obj): if isinstance(obj, np.ndarray): return obj.tolist() elif isinstance(obj, np.integer): return int(obj) elif isinstance(obj, np.floating): return float(obj) elif isinstance(obj, dict): return {k: convert_types(v) for k, v in obj.items()} elif isinstance(obj, list): return [convert_types(item) for item in obj] return obj results_serializable = convert_types(results) with open(output_path, 'w') as f: json.dump(results_serializable, f, indent=2) print(f"[AuxiliaryEval] Detailed analysis saved to: {output_path}") # ============================================================================ # FUTURE EXTENSION INTERFACE # ============================================================================ class LLMGeneratedMetric: """ Interface for LLM-generated metrics (future feature). This allows the evaluation agent to propose new metrics dynamically. """ def __init__( self, name: str, code: str, description: str, interpretation: str = "higher_better" ): self.name = name self.code = code self.description = description self.interpretation = interpretation self._compiled_func = None def compile(self) -> bool: """ Compile and validate the LLM-generated code. Returns True if successful, False otherwise. """ try: # Create a safe execution environment namespace = { 'np': np, 'MetricResult': MetricResult, } # Execute the code to define the function exec(self.code, namespace) # Extract the function (assume it's named after the metric) if self.name in namespace: self._compiled_func = namespace[self.name] return True else: print(f"[LLMGeneratedMetric] Function '{self.name}' not found in generated code") return False except Exception as e: print(f"[LLMGeneratedMetric] Failed to compile: {e}") return False def evaluate(self, centers: np.ndarray, radii: np.ndarray) -> Optional[MetricResult]: """Execute the compiled metric function.""" if self._compiled_func is None: return None try: result = self._compiled_func(centers, radii) if not isinstance(result, MetricResult): # Try to wrap result if isinstance(result, (int, float)): result = MetricResult( name=self.name, value=float(result), interpretation=self.interpretation, description=self.description ) return result except Exception as e: print(f"[LLMGeneratedMetric] Error executing '{self.name}': {e}") return None def register_to_global(self) -> bool: """Register this metric to the global registry.""" if self._compiled_func is None: if not self.compile(): return False METRIC_REGISTRY.register( self.name, lambda c, r: self.evaluate(c, r), self.description, self.interpretation ) return True # Example of how to use LLMGeneratedMetric (for future): """ # LLM generates this code: llm_metric_code = ''' def corner_circle_size_metric(centers, radii): # Find circles in corners corner_circles = [] for i, (center, radius) in enumerate(zip(centers, radii)): x, y = center if (x < 0.1 or x > 0.9) and (y < 0.1 or y > 0.9): corner_circles.append(radius) if len(corner_circles) == 0: score = 0.0 else: score = sum(corner_circles) / len(corner_circles) return MetricResult( name="corner_circle_size", value=score, interpretation="higher_better", description="Average size of circles in corners" ) ''' # Create and register the metric llm_metric = LLMGeneratedMetric( name="corner_circle_size_metric", code=llm_metric_code, description="LLM-generated: Corner circle size analysis", interpretation="higher_better" ) if llm_metric.register_to_global(): print("Successfully registered LLM-generated metric!") """