Add files using upload-large-folder tool

14c9c2b verified about 2 months ago

29.2 kB

	"""
	Auxiliary Evaluation System for Circle Packing
	===============================================

	This module provides ADDITIONAL metrics and analysis beyond the ground truth evaluation.
	It is designed to be extensible and allow for future LLM-generated evaluation code.

	IMPORTANT:
	- Does NOT replace or modify the ground truth evaluation (evaluate.py)
	- Provides supplementary metrics that help guide evolution
	- Designed for easy extension with new metrics (manual or LLM-generated)

	Architecture:
	1. MetricRegistry: Plugin system for registering metrics
	2. AuxiliaryEvaluator: Orchestrates metric computation
	3. Individual metric functions: Compute specific aspects
	4. FeedbackGenerator: Generate actionable text feedback
	"""

	import numpy as np
	from typing import Dict, Any, List, Tuple, Callable, Optional
	from dataclasses import dataclass, field
	from pathlib import Path
	import json


	@dataclass
	class MetricResult:
	"""Result of a single auxiliary metric computation."""
	name: str
	value: float
	interpretation: str # "higher_better" or "lower_better" or "neutral"
	description: str
	details: Dict[str, Any] = field(default_factory=dict)


	@dataclass
	class AuxiliaryEvalConfig:
	"""Configuration for auxiliary evaluation."""
	enabled: bool = True
	enabled_metrics: List[str] = field(default_factory=lambda: [
	"spatial_uniformity",
	"edge_utilization",
	"density_variance",
	"packing_efficiency",
	"radius_distribution",
	"gap_analysis",
	"geometric_quality"
	])
	generate_text_feedback: bool = True
	save_detailed_analysis: bool = True


	class MetricRegistry:
	"""
	Registry for auxiliary metrics. Supports plugin-style registration.
	This makes it easy to add new metrics (manually or LLM-generated).
	"""

	def __init__(self):
	self._metrics: Dict[str, Callable] = {}
	self._metric_metadata: Dict[str, Dict[str, str]] = {}

	def register(
	self,
	name: str,
	func: Callable,
	description: str,
	interpretation: str = "higher_better"
	):
	"""
	Register a new metric function.

	Args:
	name: Unique metric identifier
	func: Function with signature (centers, radii) -> MetricResult
	description: Human-readable description
	interpretation: "higher_better", "lower_better", or "neutral"
	"""
	self._metrics[name] = func
	self._metric_metadata[name] = {
	"description": description,
	"interpretation": interpretation
	}
	print(f"[MetricRegistry] Registered metric: {name}")

	def get(self, name: str) -> Optional[Callable]:
	"""Get a metric function by name."""
	return self._metrics.get(name)

	def list_metrics(self) -> List[str]:
	"""List all registered metric names."""
	return list(self._metrics.keys())

	def get_metadata(self, name: str) -> Dict[str, str]:
	"""Get metadata for a metric."""
	return self._metric_metadata.get(name, {})


	# Global registry instance
	METRIC_REGISTRY = MetricRegistry()


	# ============================================================================
	# MANUALLY DESIGNED AUXILIARY METRICS
	# ============================================================================
	# These are hand-crafted metrics that provide useful signals for evolution.
	# Future versions will allow LLM to generate additional metrics.
	# ============================================================================

	def compute_spatial_uniformity(
	centers: np.ndarray,
	radii: np.ndarray
	) -> MetricResult:
	"""
	Measure spatial uniformity using Voronoi cell analysis.
	Higher score = more uniform distribution (desirable).
	"""
	try:
	from scipy.spatial import Voronoi

	# Compute Voronoi diagram
	vor = Voronoi(centers)

	# Compute "effective" cell sizes by measuring distance to nearest neighbors
	# (Voronoi cells can be infinite for boundary points)
	cell_sizes = []
	for i in range(len(centers)):
	# Find distances to all other centers
	distances = np.linalg.norm(centers - centers[i], axis=1)
	distances = distances[distances > 0] # Exclude self
	if len(distances) > 0:
	# Use mean distance to 3 nearest neighbors as proxy for cell size
	k_nearest = min(3, len(distances))
	avg_dist = np.mean(np.sort(distances)[:k_nearest])
	cell_sizes.append(avg_dist)

	if len(cell_sizes) == 0:
	uniformity = 0.0
	else:
	# Lower coefficient of variation = more uniform
	cv = np.std(cell_sizes) / (np.mean(cell_sizes) + 1e-9)
	uniformity = 1.0 / (1.0 + cv) # Transform to [0, 1], higher is better

	return MetricResult(
	name="spatial_uniformity",
	value=float(uniformity),
	interpretation="higher_better",
	description="Spatial distribution uniformity (Voronoi analysis)",
	details={
	"cell_size_mean": float(np.mean(cell_sizes)) if cell_sizes else 0.0,
	"cell_size_std": float(np.std(cell_sizes)) if cell_sizes else 0.0,
	"coefficient_of_variation": float(cv) if cell_sizes else 0.0
	}
	)
	except Exception as e:
	return MetricResult(
	name="spatial_uniformity",
	value=0.0,
	interpretation="higher_better",
	description="Spatial distribution uniformity (failed to compute)",
	details={"error": str(e)}
	)


	def compute_edge_utilization(
	centers: np.ndarray,
	radii: np.ndarray
	) -> MetricResult:
	"""
	Measure how well the packing utilizes the boundary.
	Circles near edges/corners are often larger in optimal packings.
	"""
	boundary_threshold = 0.02 # Distance to be considered "touching"

	touching_edges = 0
	touching_corners = 0

	for i, (center, radius) in enumerate(zip(centers, radii)):
	x, y = center

	# Check which edges are touched
	touches_left = (x - radius) < boundary_threshold
	touches_right = (x + radius) > (1.0 - boundary_threshold)
	touches_bottom = (y - radius) < boundary_threshold
	touches_top = (y + radius) > (1.0 - boundary_threshold)

	# Count edge touches
	edge_count = sum([touches_left, touches_right, touches_bottom, touches_top])

	if edge_count == 2:
	# Corner (touches two edges)
	touching_corners += 1
	elif edge_count == 1:
	# Edge (touches one edge)
	touching_edges += 1

	# Normalize: ideally want good corner and edge utilization
	corner_score = touching_corners / 4.0 # 4 corners max
	edge_score = touching_edges / float(len(centers)) # Fraction of circles on edges

	# Combined score (corners are more valuable)
	utilization = 0.6 * corner_score + 0.4 * edge_score

	return MetricResult(
	name="edge_utilization",
	value=float(utilization),
	interpretation="higher_better",
	description="Boundary and corner utilization",
	details={
	"corners_touched": touching_corners,
	"edges_touched": touching_edges,
	"corner_score": float(corner_score),
	"edge_score": float(edge_score)
	}
	)


	def compute_density_variance(
	centers: np.ndarray,
	radii: np.ndarray
	) -> MetricResult:
	"""
	Measure density variance across a grid.
	Lower variance = more uniform density (desirable).
	"""
	grid_size = 10
	grid = np.zeros((grid_size, grid_size))

	# Compute density contribution in each grid cell
	for center, radius in zip(centers, radii):
	x, y = center
	# Find which grid cells this circle overlaps
	x_cells = np.arange(grid_size)
	y_cells = np.arange(grid_size)

	for i in x_cells:
	for j in y_cells:
	# Center of grid cell
	cell_x = (i + 0.5) / grid_size
	cell_y = (j + 0.5) / grid_size

	# Distance from circle center to cell center
	dist = np.sqrt((cell_x - x)2 + (cell_y - y)2)

	# Add contribution if circle overlaps cell (simplified)
	if dist < radius:
	grid[i, j] += np.pi * radius**2 # Area contribution

	# Compute variance (lower is better)
	variance = float(np.var(grid))
	mean_density = float(np.mean(grid))

	# Transform to score (higher is better)
	# Use inverse of coefficient of variation
	cv = np.sqrt(variance) / (mean_density + 1e-9)
	uniformity_score = 1.0 / (1.0 + cv)

	return MetricResult(
	name="density_variance",
	value=float(uniformity_score),
	interpretation="higher_better",
	description="Spatial density uniformity across grid",
	details={
	"grid_size": grid_size,
	"variance": variance,
	"mean_density": mean_density,
	"cv": float(cv)
	}
	)


	def compute_packing_efficiency(
	centers: np.ndarray,
	radii: np.ndarray
	) -> MetricResult:
	"""
	Ratio of total circle area to unit square area.
	Theoretical upper bound for 26 circles is unknown, but this gives relative measure.
	"""
	total_area = float(np.sum(np.pi * radii**2))
	square_area = 1.0
	efficiency = total_area / square_area

	# Known best result is ~2.635 sum of radii
	# Approximate area for that would be pi * (2.635/26)^2 * 26 ≈ 0.839
	# (assuming equal radii for rough estimate)
	estimated_best_efficiency = 0.84 # Rough estimate

	relative_efficiency = efficiency / estimated_best_efficiency

	return MetricResult(
	name="packing_efficiency",
	value=float(efficiency),
	interpretation="higher_better",
	description="Area utilization efficiency",
	details={
	"total_area": total_area,
	"square_area": square_area,
	"efficiency": efficiency,
	"relative_to_estimated_best": float(relative_efficiency)
	}
	)


	def compute_radius_distribution(
	centers: np.ndarray,
	radii: np.ndarray
	) -> MetricResult:
	"""
	Analyze radius size distribution.
	Optimal packings often have specific radius patterns.
	"""
	radius_mean = float(np.mean(radii))
	radius_std = float(np.std(radii))
	radius_min = float(np.min(radii))
	radius_max = float(np.max(radii))
	radius_range = radius_max - radius_min

	# Count size categories
	small_radii = np.sum(radii < radius_mean - 0.5 * radius_std)
	medium_radii = np.sum(np.abs(radii - radius_mean) <= 0.5 * radius_std)
	large_radii = np.sum(radii > radius_mean + 0.5 * radius_std)

	# Diversity score (higher is often better for packings)
	# Use entropy-like measure
	sizes = [small_radii, medium_radii, large_radii]
	proportions = np.array(sizes) / len(radii)
	proportions = proportions[proportions > 0] # Remove zeros
	entropy = -np.sum(proportions * np.log(proportions + 1e-9))
	diversity = entropy / np.log(3) # Normalize to [0, 1]

	return MetricResult(
	name="radius_distribution",
	value=float(diversity),
	interpretation="neutral",
	description="Radius size diversity",
	details={
	"mean": radius_mean,
	"std": radius_std,
	"min": radius_min,
	"max": radius_max,
	"range": radius_range,
	"small_count": int(small_radii),
	"medium_count": int(medium_radii),
	"large_count": int(large_radii),
	"diversity_score": float(diversity)
	}
	)


	def compute_gap_analysis(
	centers: np.ndarray,
	radii: np.ndarray
	) -> MetricResult:
	"""
	Programmatic detection of unused space (gaps).
	Uses a sampling approach on a fine grid.
	"""
	sample_size = 50 # Grid resolution
	total_samples = sample_size * sample_size

	covered_samples = 0

	# Sample points in unit square
	for i in range(sample_size):
	for j in range(sample_size):
	px = (i + 0.5) / sample_size
	py = (j + 0.5) / sample_size

	# Check if this point is inside any circle
	for center, radius in zip(centers, radii):
	dist = np.sqrt((px - center[0])2 + (py - center[1])2)
	if dist < radius:
	covered_samples += 1
	break

	coverage = covered_samples / total_samples
	gap_ratio = 1.0 - coverage

	return MetricResult(
	name="gap_analysis",
	value=float(coverage),
	interpretation="higher_better",
	description="Area coverage (1 - gap ratio)",
	details={
	"covered_samples": covered_samples,
	"total_samples": total_samples,
	"coverage": float(coverage),
	"gap_ratio": float(gap_ratio)
	}
	)


	def compute_geometric_quality(
	centers: np.ndarray,
	radii: np.ndarray
	) -> MetricResult:
	"""
	Analyze geometric quality using Delaunay triangulation.
	Well-packed configurations tend to have good triangulation quality.
	"""
	try:
	from scipy.spatial import Delaunay

	tri = Delaunay(centers)

	# Analyze triangle quality (aspect ratio)
	triangle_qualities = []
	for simplex in tri.simplices:
	pts = centers[simplex]
	# Compute edge lengths
	edges = [
	np.linalg.norm(pts[1] - pts[0]),
	np.linalg.norm(pts[2] - pts[1]),
	np.linalg.norm(pts[0] - pts[2])
	]
	# Triangle quality = min_edge / max_edge (1.0 = equilateral)
	if max(edges) > 0:
	quality = min(edges) / max(edges)
	triangle_qualities.append(quality)

	if len(triangle_qualities) > 0:
	avg_quality = float(np.mean(triangle_qualities))
	else:
	avg_quality = 0.0

	return MetricResult(
	name="geometric_quality",
	value=avg_quality,
	interpretation="higher_better",
	description="Delaunay triangulation quality",
	details={
	"num_triangles": len(tri.simplices),
	"avg_triangle_quality": avg_quality,
	"min_quality": float(np.min(triangle_qualities)) if triangle_qualities else 0.0,
	"max_quality": float(np.max(triangle_qualities)) if triangle_qualities else 0.0
	}
	)
	except Exception as e:
	return MetricResult(
	name="geometric_quality",
	value=0.0,
	interpretation="higher_better",
	description="Delaunay triangulation quality (failed)",
	details={"error": str(e)}
	)


	# ============================================================================
	# REGISTER ALL METRICS
	# ============================================================================

	METRIC_REGISTRY.register(
	"spatial_uniformity",
	compute_spatial_uniformity,
	"Spatial distribution uniformity using Voronoi analysis",
	"higher_better"
	)

	METRIC_REGISTRY.register(
	"edge_utilization",
	compute_edge_utilization,
	"Boundary and corner utilization",
	"higher_better"
	)

	METRIC_REGISTRY.register(
	"density_variance",
	compute_density_variance,
	"Spatial density uniformity across grid",
	"higher_better"
	)

	METRIC_REGISTRY.register(
	"packing_efficiency",
	compute_packing_efficiency,
	"Area utilization efficiency",
	"higher_better"
	)

	METRIC_REGISTRY.register(
	"radius_distribution",
	compute_radius_distribution,
	"Radius size diversity",
	"neutral"
	)

	METRIC_REGISTRY.register(
	"gap_analysis",
	compute_gap_analysis,
	"Area coverage analysis",
	"higher_better"
	)

	METRIC_REGISTRY.register(
	"geometric_quality",
	compute_geometric_quality,
	"Delaunay triangulation quality",
	"higher_better"
	)


	# ============================================================================
	# AUXILIARY EVALUATOR
	# ============================================================================

	class AuxiliaryEvaluator:
	"""
	Main class for computing auxiliary metrics.
	Designed to be extensible for future LLM-generated metrics.
	"""

	def __init__(self, config: Optional[AuxiliaryEvalConfig] = None):
	self.config = config or AuxiliaryEvalConfig()
	self.registry = METRIC_REGISTRY

	def evaluate(
	self,
	centers: np.ndarray,
	radii: np.ndarray,
	primary_score: float
	) -> Dict[str, Any]:
	"""
	Compute all enabled auxiliary metrics.

	Args:
	centers: Circle centers (n, 2)
	radii: Circle radii (n,)
	primary_score: Ground truth primary score (sum of radii)

	Returns:
	Dictionary with auxiliary metrics and generated feedback
	"""
	if not self.config.enabled:
	return {}

	results = {}
	metric_results: List[MetricResult] = []

	# Compute all enabled metrics
	for metric_name in self.config.enabled_metrics:
	metric_func = self.registry.get(metric_name)
	if metric_func is None:
	print(f"[AuxiliaryEvaluator] Warning: Metric '{metric_name}' not found in registry")
	continue

	try:
	metric_result = metric_func(centers, radii)
	metric_results.append(metric_result)

	# Add to results
	results[metric_name] = metric_result.value
	results[f"{metric_name}_details"] = metric_result.details

	except Exception as e:
	print(f"[AuxiliaryEvaluator] Error computing '{metric_name}': {e}")
	results[metric_name] = 0.0
	results[f"{metric_name}_details"] = {"error": str(e)}

	# Generate text feedback
	if self.config.generate_text_feedback:
	feedback = self._generate_feedback(
	metric_results,
	primary_score,
	centers,
	radii
	)
	results["auxiliary_text_feedback"] = feedback

	return results

	def _generate_feedback(
	self,
	metric_results: List[MetricResult],
	primary_score: float,
	centers: np.ndarray,
	radii: np.ndarray
	) -> str:
	"""Generate human-readable feedback based on metric results."""
	lines = []
	lines.append("=" * 60)
	lines.append("AUXILIARY EVALUATION FEEDBACK")
	lines.append("=" * 60)
	lines.append(f"Primary Score (sum of radii): {primary_score:.4f}")
	lines.append("")
	lines.append("Auxiliary Metrics:")
	lines.append("-" * 60)

	# Organize metrics by performance
	good_metrics = []
	poor_metrics = []
	neutral_metrics = []

	for result in metric_results:
	if result.interpretation == "higher_better":
	if result.value >= 0.7:
	good_metrics.append(result)
	elif result.value < 0.5:
	poor_metrics.append(result)
	else:
	neutral_metrics.append(result)
	else:
	neutral_metrics.append(result)

	# Report good metrics
	if good_metrics:
	lines.append("\n✅ Strengths:")
	for result in good_metrics:
	lines.append(f" • {result.description}: {result.value:.3f}")

	# Report areas for improvement
	if poor_metrics:
	lines.append("\n⚠️ Areas for Improvement:")
	for result in poor_metrics:
	lines.append(f" • {result.description}: {result.value:.3f}")
	# Add specific suggestions
	lines.append(f" → {self._get_suggestion(result)}")

	# Report neutral metrics
	if neutral_metrics:
	lines.append("\n📊 Other Metrics:")
	for result in neutral_metrics:
	lines.append(f" • {result.description}: {result.value:.3f}")

	# Add specific recommendations
	lines.append("\n" + "-" * 60)
	lines.append("💡 Actionable Recommendations:")
	recommendations = self._generate_recommendations(metric_results, centers, radii)
	for i, rec in enumerate(recommendations, 1):
	lines.append(f" {i}. {rec}")

	lines.append("=" * 60)

	return "\n".join(lines)

	def _get_suggestion(self, result: MetricResult) -> str:
	"""Get specific suggestion based on metric result."""
	suggestions = {
	"spatial_uniformity": "Try redistributing circles to reduce clustering",
	"edge_utilization": "Consider placing larger circles near boundaries and corners",
	"density_variance": "Balance circle density across different regions",
	"gap_analysis": "Identify and fill empty regions with additional circles or larger radii",
	"geometric_quality": "Improve triangle quality in Delaunay triangulation"
	}
	return suggestions.get(result.name, "Consider optimizing this aspect")

	def _generate_recommendations(
	self,
	metric_results: List[MetricResult],
	centers: np.ndarray,
	radii: np.ndarray
	) -> List[str]:
	"""Generate specific actionable recommendations."""
	recommendations = []

	# Find poorest performing metrics
	prioritized = sorted(
	[r for r in metric_results if r.interpretation == "higher_better"],
	key=lambda r: r.value
	)

	for result in prioritized[:3]: # Top 3 areas to improve
	if result.name == "spatial_uniformity" and result.value < 0.6:
	recommendations.append(
	f"Spatial uniformity is low ({result.value:.2f}). "
	"Check for clustered regions and redistribute circles."
	)
	elif result.name == "edge_utilization" and result.value < 0.5:
	details = result.details
	corners = details.get("corners_touched", 0)
	if corners < 4:
	recommendations.append(
	f"Only {corners}/4 corners are utilized. "
	"Place larger circles at unused corners."
	)
	elif result.name == "gap_analysis" and result.value < 0.7:
	gap_ratio = result.details.get("gap_ratio", 0)
	recommendations.append(
	f"Detected {gap_ratio*100:.1f}% unused space. "
	"Consider increasing radii in sparse regions."
	)

	if not recommendations:
	recommendations.append("Overall packing quality is good! Continue optimizing primary score.")

	return recommendations


	# ============================================================================
	# CONVENIENCE FUNCTIONS
	# ============================================================================

	def evaluate_auxiliary(
	centers: np.ndarray,
	radii: np.ndarray,
	primary_score: float,
	config: Optional[AuxiliaryEvalConfig] = None
	) -> Dict[str, Any]:
	"""
	Convenience function to run auxiliary evaluation.

	Args:
	centers: Circle centers (n, 2)
	radii: Circle radii (n,)
	primary_score: Ground truth primary score
	config: Optional configuration

	Returns:
	Dictionary with auxiliary metrics
	"""
	evaluator = AuxiliaryEvaluator(config)
	return evaluator.evaluate(centers, radii, primary_score)


	def save_auxiliary_analysis(
	results: Dict[str, Any],
	output_path: str
	):
	"""Save detailed auxiliary analysis to JSON file."""
	output_path = Path(output_path)
	output_path.parent.mkdir(parents=True, exist_ok=True)

	# Convert numpy types to native Python types for JSON serialization
	def convert_types(obj):
	if isinstance(obj, np.ndarray):
	return obj.tolist()
	elif isinstance(obj, np.integer):
	return int(obj)
	elif isinstance(obj, np.floating):
	return float(obj)
	elif isinstance(obj, dict):
	return {k: convert_types(v) for k, v in obj.items()}
	elif isinstance(obj, list):
	return [convert_types(item) for item in obj]
	return obj

	results_serializable = convert_types(results)

	with open(output_path, 'w') as f:
	json.dump(results_serializable, f, indent=2)

	print(f"[AuxiliaryEval] Detailed analysis saved to: {output_path}")


	# ============================================================================
	# FUTURE EXTENSION INTERFACE
	# ============================================================================

	class LLMGeneratedMetric:
	"""
	Interface for LLM-generated metrics (future feature).
	This allows the evaluation agent to propose new metrics dynamically.
	"""

	def __init__(
	self,
	name: str,
	code: str,
	description: str,
	interpretation: str = "higher_better"
	):
	self.name = name
	self.code = code
	self.description = description
	self.interpretation = interpretation
	self._compiled_func = None

	def compile(self) -> bool:
	"""
	Compile and validate the LLM-generated code.
	Returns True if successful, False otherwise.
	"""
	try:
	# Create a safe execution environment
	namespace = {
	'np': np,
	'MetricResult': MetricResult,
	}

	# Execute the code to define the function
	exec(self.code, namespace)

	# Extract the function (assume it's named after the metric)
	if self.name in namespace:
	self._compiled_func = namespace[self.name]
	return True
	else:
	print(f"[LLMGeneratedMetric] Function '{self.name}' not found in generated code")
	return False
	except Exception as e:
	print(f"[LLMGeneratedMetric] Failed to compile: {e}")
	return False

	def evaluate(self, centers: np.ndarray, radii: np.ndarray) -> Optional[MetricResult]:
	"""Execute the compiled metric function."""
	if self._compiled_func is None:
	return None

	try:
	result = self._compiled_func(centers, radii)
	if not isinstance(result, MetricResult):
	# Try to wrap result
	if isinstance(result, (int, float)):
	result = MetricResult(
	name=self.name,
	value=float(result),
	interpretation=self.interpretation,
	description=self.description
	)
	return result
	except Exception as e:
	print(f"[LLMGeneratedMetric] Error executing '{self.name}': {e}")
	return None

	def register_to_global(self) -> bool:
	"""Register this metric to the global registry."""
	if self._compiled_func is None:
	if not self.compile():
	return False

	METRIC_REGISTRY.register(
	self.name,
	lambda c, r: self.evaluate(c, r),
	self.description,
	self.interpretation
	)
	return True


	# Example of how to use LLMGeneratedMetric (for future):
	"""
	# LLM generates this code:
	llm_metric_code = '''
	def corner_circle_size_metric(centers, radii):
	# Find circles in corners
	corner_circles = []
	for i, (center, radius) in enumerate(zip(centers, radii)):
	x, y = center
	if (x < 0.1 or x > 0.9) and (y < 0.1 or y > 0.9):
	corner_circles.append(radius)

	if len(corner_circles) == 0:
	score = 0.0
	else:
	score = sum(corner_circles) / len(corner_circles)

	return MetricResult(
	name="corner_circle_size",
	value=score,
	interpretation="higher_better",
	description="Average size of circles in corners"
	)
	'''

	# Create and register the metric
	llm_metric = LLMGeneratedMetric(
	name="corner_circle_size_metric",
	code=llm_metric_code,
	description="LLM-generated: Corner circle size analysis",
	interpretation="higher_better"
	)

	if llm_metric.register_to_global():
	print("Successfully registered LLM-generated metric!")
	"""