"""
Auxiliary Evaluation System for Circle Packing
===============================================

This module provides ADDITIONAL metrics and analysis beyond the ground truth evaluation.
It is designed to be extensible and allow for future LLM-generated evaluation code.

IMPORTANT:
- Does NOT replace or modify the ground truth evaluation (evaluate.py)
- Provides supplementary metrics that help guide evolution
- Designed for easy extension with new metrics (manual or LLM-generated)

Architecture:
1. MetricRegistry: Plugin system for registering metrics
2. AuxiliaryEvaluator: Orchestrates metric computation
3. Individual metric functions: Compute specific aspects
4. FeedbackGenerator: Generate actionable text feedback
"""

import numpy as np
from typing import Dict, Any, List, Tuple, Callable, Optional
from dataclasses import dataclass, field
from pathlib import Path
import json


@dataclass
class MetricResult:
    """Result of a single auxiliary metric computation."""
    name: str
    value: float
    interpretation: str  # "higher_better" or "lower_better" or "neutral"
    description: str
    details: Dict[str, Any] = field(default_factory=dict)


@dataclass
class AuxiliaryEvalConfig:
    """Configuration for auxiliary evaluation."""
    enabled: bool = True
    enabled_metrics: List[str] = field(default_factory=lambda: [
        "spatial_uniformity",
        "edge_utilization", 
        "density_variance",
        "packing_efficiency",
        "radius_distribution",
        "gap_analysis",
        "geometric_quality"
    ])
    generate_text_feedback: bool = True
    save_detailed_analysis: bool = True


class MetricRegistry:
    """
    Registry for auxiliary metrics. Supports plugin-style registration.
    This makes it easy to add new metrics (manually or LLM-generated).
    """
    
    def __init__(self):
        self._metrics: Dict[str, Callable] = {}
        self._metric_metadata: Dict[str, Dict[str, str]] = {}
    
    def register(
        self, 
        name: str, 
        func: Callable,
        description: str,
        interpretation: str = "higher_better"
    ):
        """
        Register a new metric function.
        
        Args:
            name: Unique metric identifier
            func: Function with signature (centers, radii) -> MetricResult
            description: Human-readable description
            interpretation: "higher_better", "lower_better", or "neutral"
        """
        self._metrics[name] = func
        self._metric_metadata[name] = {
            "description": description,
            "interpretation": interpretation
        }
        print(f"[MetricRegistry] Registered metric: {name}")
    
    def get(self, name: str) -> Optional[Callable]:
        """Get a metric function by name."""
        return self._metrics.get(name)
    
    def list_metrics(self) -> List[str]:
        """List all registered metric names."""
        return list(self._metrics.keys())
    
    def get_metadata(self, name: str) -> Dict[str, str]:
        """Get metadata for a metric."""
        return self._metric_metadata.get(name, {})


# Global registry instance
METRIC_REGISTRY = MetricRegistry()


# ============================================================================
# MANUALLY DESIGNED AUXILIARY METRICS
# ============================================================================
# These are hand-crafted metrics that provide useful signals for evolution.
# Future versions will allow LLM to generate additional metrics.
# ============================================================================

def compute_spatial_uniformity(
    centers: np.ndarray, 
    radii: np.ndarray
) -> MetricResult:
    """
    Measure spatial uniformity using Voronoi cell analysis.
    Higher score = more uniform distribution (desirable).
    """
    try:
        from scipy.spatial import Voronoi
        
        # Compute Voronoi diagram
        vor = Voronoi(centers)
        
        # Compute "effective" cell sizes by measuring distance to nearest neighbors
        # (Voronoi cells can be infinite for boundary points)
        cell_sizes = []
        for i in range(len(centers)):
            # Find distances to all other centers
            distances = np.linalg.norm(centers - centers[i], axis=1)
            distances = distances[distances > 0]  # Exclude self
            if len(distances) > 0:
                # Use mean distance to 3 nearest neighbors as proxy for cell size
                k_nearest = min(3, len(distances))
                avg_dist = np.mean(np.sort(distances)[:k_nearest])
                cell_sizes.append(avg_dist)
        
        if len(cell_sizes) == 0:
            uniformity = 0.0
        else:
            # Lower coefficient of variation = more uniform
            cv = np.std(cell_sizes) / (np.mean(cell_sizes) + 1e-9)
            uniformity = 1.0 / (1.0 + cv)  # Transform to [0, 1], higher is better
        
        return MetricResult(
            name="spatial_uniformity",
            value=float(uniformity),
            interpretation="higher_better",
            description="Spatial distribution uniformity (Voronoi analysis)",
            details={
                "cell_size_mean": float(np.mean(cell_sizes)) if cell_sizes else 0.0,
                "cell_size_std": float(np.std(cell_sizes)) if cell_sizes else 0.0,
                "coefficient_of_variation": float(cv) if cell_sizes else 0.0
            }
        )
    except Exception as e:
        return MetricResult(
            name="spatial_uniformity",
            value=0.0,
            interpretation="higher_better",
            description="Spatial distribution uniformity (failed to compute)",
            details={"error": str(e)}
        )


def compute_edge_utilization(
    centers: np.ndarray,
    radii: np.ndarray
) -> MetricResult:
    """
    Measure how well the packing utilizes the boundary.
    Circles near edges/corners are often larger in optimal packings.
    """
    boundary_threshold = 0.02  # Distance to be considered "touching"
    
    touching_edges = 0
    touching_corners = 0
    
    for i, (center, radius) in enumerate(zip(centers, radii)):
        x, y = center
        
        # Check which edges are touched
        touches_left = (x - radius) < boundary_threshold
        touches_right = (x + radius) > (1.0 - boundary_threshold)
        touches_bottom = (y - radius) < boundary_threshold
        touches_top = (y + radius) > (1.0 - boundary_threshold)
        
        # Count edge touches
        edge_count = sum([touches_left, touches_right, touches_bottom, touches_top])
        
        if edge_count == 2:
            # Corner (touches two edges)
            touching_corners += 1
        elif edge_count == 1:
            # Edge (touches one edge)
            touching_edges += 1
    
    # Normalize: ideally want good corner and edge utilization
    corner_score = touching_corners / 4.0  # 4 corners max
    edge_score = touching_edges / float(len(centers))  # Fraction of circles on edges
    
    # Combined score (corners are more valuable)
    utilization = 0.6 * corner_score + 0.4 * edge_score
    
    return MetricResult(
        name="edge_utilization",
        value=float(utilization),
        interpretation="higher_better",
        description="Boundary and corner utilization",
        details={
            "corners_touched": touching_corners,
            "edges_touched": touching_edges,
            "corner_score": float(corner_score),
            "edge_score": float(edge_score)
        }
    )


def compute_density_variance(
    centers: np.ndarray,
    radii: np.ndarray
) -> MetricResult:
    """
    Measure density variance across a grid.
    Lower variance = more uniform density (desirable).
    """
    grid_size = 10
    grid = np.zeros((grid_size, grid_size))
    
    # Compute density contribution in each grid cell
    for center, radius in zip(centers, radii):
        x, y = center
        # Find which grid cells this circle overlaps
        x_cells = np.arange(grid_size)
        y_cells = np.arange(grid_size)
        
        for i in x_cells:
            for j in y_cells:
                # Center of grid cell
                cell_x = (i + 0.5) / grid_size
                cell_y = (j + 0.5) / grid_size
                
                # Distance from circle center to cell center
                dist = np.sqrt((cell_x - x)**2 + (cell_y - y)**2)
                
                # Add contribution if circle overlaps cell (simplified)
                if dist < radius:
                    grid[i, j] += np.pi * radius**2  # Area contribution
    
    # Compute variance (lower is better)
    variance = float(np.var(grid))
    mean_density = float(np.mean(grid))
    
    # Transform to score (higher is better)
    # Use inverse of coefficient of variation
    cv = np.sqrt(variance) / (mean_density + 1e-9)
    uniformity_score = 1.0 / (1.0 + cv)
    
    return MetricResult(
        name="density_variance",
        value=float(uniformity_score),
        interpretation="higher_better",
        description="Spatial density uniformity across grid",
        details={
            "grid_size": grid_size,
            "variance": variance,
            "mean_density": mean_density,
            "cv": float(cv)
        }
    )


def compute_packing_efficiency(
    centers: np.ndarray,
    radii: np.ndarray
) -> MetricResult:
    """
    Ratio of total circle area to unit square area.
    Theoretical upper bound for 26 circles is unknown, but this gives relative measure.
    """
    total_area = float(np.sum(np.pi * radii**2))
    square_area = 1.0
    efficiency = total_area / square_area
    
    # Known best result is ~2.635 sum of radii
    # Approximate area for that would be pi * (2.635/26)^2 * 26 ≈ 0.839
    # (assuming equal radii for rough estimate)
    estimated_best_efficiency = 0.84  # Rough estimate
    
    relative_efficiency = efficiency / estimated_best_efficiency
    
    return MetricResult(
        name="packing_efficiency",
        value=float(efficiency),
        interpretation="higher_better",
        description="Area utilization efficiency",
        details={
            "total_area": total_area,
            "square_area": square_area,
            "efficiency": efficiency,
            "relative_to_estimated_best": float(relative_efficiency)
        }
    )


def compute_radius_distribution(
    centers: np.ndarray,
    radii: np.ndarray
) -> MetricResult:
    """
    Analyze radius size distribution.
    Optimal packings often have specific radius patterns.
    """
    radius_mean = float(np.mean(radii))
    radius_std = float(np.std(radii))
    radius_min = float(np.min(radii))
    radius_max = float(np.max(radii))
    radius_range = radius_max - radius_min
    
    # Count size categories
    small_radii = np.sum(radii < radius_mean - 0.5 * radius_std)
    medium_radii = np.sum(np.abs(radii - radius_mean) <= 0.5 * radius_std)
    large_radii = np.sum(radii > radius_mean + 0.5 * radius_std)
    
    # Diversity score (higher is often better for packings)
    # Use entropy-like measure
    sizes = [small_radii, medium_radii, large_radii]
    proportions = np.array(sizes) / len(radii)
    proportions = proportions[proportions > 0]  # Remove zeros
    entropy = -np.sum(proportions * np.log(proportions + 1e-9))
    diversity = entropy / np.log(3)  # Normalize to [0, 1]
    
    return MetricResult(
        name="radius_distribution",
        value=float(diversity),
        interpretation="neutral",
        description="Radius size diversity",
        details={
            "mean": radius_mean,
            "std": radius_std,
            "min": radius_min,
            "max": radius_max,
            "range": radius_range,
            "small_count": int(small_radii),
            "medium_count": int(medium_radii),
            "large_count": int(large_radii),
            "diversity_score": float(diversity)
        }
    )


def compute_gap_analysis(
    centers: np.ndarray,
    radii: np.ndarray
) -> MetricResult:
    """
    Programmatic detection of unused space (gaps).
    Uses a sampling approach on a fine grid.
    """
    sample_size = 50  # Grid resolution
    total_samples = sample_size * sample_size
    
    covered_samples = 0
    
    # Sample points in unit square
    for i in range(sample_size):
        for j in range(sample_size):
            px = (i + 0.5) / sample_size
            py = (j + 0.5) / sample_size
            
            # Check if this point is inside any circle
            for center, radius in zip(centers, radii):
                dist = np.sqrt((px - center[0])**2 + (py - center[1])**2)
                if dist < radius:
                    covered_samples += 1
                    break
    
    coverage = covered_samples / total_samples
    gap_ratio = 1.0 - coverage
    
    return MetricResult(
        name="gap_analysis",
        value=float(coverage),
        interpretation="higher_better",
        description="Area coverage (1 - gap ratio)",
        details={
            "covered_samples": covered_samples,
            "total_samples": total_samples,
            "coverage": float(coverage),
            "gap_ratio": float(gap_ratio)
        }
    )


def compute_geometric_quality(
    centers: np.ndarray,
    radii: np.ndarray
) -> MetricResult:
    """
    Analyze geometric quality using Delaunay triangulation.
    Well-packed configurations tend to have good triangulation quality.
    """
    try:
        from scipy.spatial import Delaunay
        
        tri = Delaunay(centers)
        
        # Analyze triangle quality (aspect ratio)
        triangle_qualities = []
        for simplex in tri.simplices:
            pts = centers[simplex]
            # Compute edge lengths
            edges = [
                np.linalg.norm(pts[1] - pts[0]),
                np.linalg.norm(pts[2] - pts[1]),
                np.linalg.norm(pts[0] - pts[2])
            ]
            # Triangle quality = min_edge / max_edge (1.0 = equilateral)
            if max(edges) > 0:
                quality = min(edges) / max(edges)
                triangle_qualities.append(quality)
        
        if len(triangle_qualities) > 0:
            avg_quality = float(np.mean(triangle_qualities))
        else:
            avg_quality = 0.0
        
        return MetricResult(
            name="geometric_quality",
            value=avg_quality,
            interpretation="higher_better",
            description="Delaunay triangulation quality",
            details={
                "num_triangles": len(tri.simplices),
                "avg_triangle_quality": avg_quality,
                "min_quality": float(np.min(triangle_qualities)) if triangle_qualities else 0.0,
                "max_quality": float(np.max(triangle_qualities)) if triangle_qualities else 0.0
            }
        )
    except Exception as e:
        return MetricResult(
            name="geometric_quality",
            value=0.0,
            interpretation="higher_better",
            description="Delaunay triangulation quality (failed)",
            details={"error": str(e)}
        )


# ============================================================================
# REGISTER ALL METRICS
# ============================================================================

METRIC_REGISTRY.register(
    "spatial_uniformity",
    compute_spatial_uniformity,
    "Spatial distribution uniformity using Voronoi analysis",
    "higher_better"
)

METRIC_REGISTRY.register(
    "edge_utilization",
    compute_edge_utilization,
    "Boundary and corner utilization",
    "higher_better"
)

METRIC_REGISTRY.register(
    "density_variance",
    compute_density_variance,
    "Spatial density uniformity across grid",
    "higher_better"
)

METRIC_REGISTRY.register(
    "packing_efficiency",
    compute_packing_efficiency,
    "Area utilization efficiency",
    "higher_better"
)

METRIC_REGISTRY.register(
    "radius_distribution",
    compute_radius_distribution,
    "Radius size diversity",
    "neutral"
)

METRIC_REGISTRY.register(
    "gap_analysis",
    compute_gap_analysis,
    "Area coverage analysis",
    "higher_better"
)

METRIC_REGISTRY.register(
    "geometric_quality",
    compute_geometric_quality,
    "Delaunay triangulation quality",
    "higher_better"
)


# ============================================================================
# AUXILIARY EVALUATOR
# ============================================================================

class AuxiliaryEvaluator:
    """
    Main class for computing auxiliary metrics.
    Designed to be extensible for future LLM-generated metrics.
    """
    
    def __init__(self, config: Optional[AuxiliaryEvalConfig] = None):
        self.config = config or AuxiliaryEvalConfig()
        self.registry = METRIC_REGISTRY
    
    def evaluate(
        self,
        centers: np.ndarray,
        radii: np.ndarray,
        primary_score: float
    ) -> Dict[str, Any]:
        """
        Compute all enabled auxiliary metrics.
        
        Args:
            centers: Circle centers (n, 2)
            radii: Circle radii (n,)
            primary_score: Ground truth primary score (sum of radii)
        
        Returns:
            Dictionary with auxiliary metrics and generated feedback
        """
        if not self.config.enabled:
            return {}
        
        results = {}
        metric_results: List[MetricResult] = []
        
        # Compute all enabled metrics
        for metric_name in self.config.enabled_metrics:
            metric_func = self.registry.get(metric_name)
            if metric_func is None:
                print(f"[AuxiliaryEvaluator] Warning: Metric '{metric_name}' not found in registry")
                continue
            
            try:
                metric_result = metric_func(centers, radii)
                metric_results.append(metric_result)
                
                # Add to results
                results[metric_name] = metric_result.value
                results[f"{metric_name}_details"] = metric_result.details
                
            except Exception as e:
                print(f"[AuxiliaryEvaluator] Error computing '{metric_name}': {e}")
                results[metric_name] = 0.0
                results[f"{metric_name}_details"] = {"error": str(e)}
        
        # Generate text feedback
        if self.config.generate_text_feedback:
            feedback = self._generate_feedback(
                metric_results, 
                primary_score,
                centers,
                radii
            )
            results["auxiliary_text_feedback"] = feedback
        
        return results
    
    def _generate_feedback(
        self,
        metric_results: List[MetricResult],
        primary_score: float,
        centers: np.ndarray,
        radii: np.ndarray
    ) -> str:
        """Generate human-readable feedback based on metric results."""
        lines = []
        lines.append("=" * 60)
        lines.append("AUXILIARY EVALUATION FEEDBACK")
        lines.append("=" * 60)
        lines.append(f"Primary Score (sum of radii): {primary_score:.4f}")
        lines.append("")
        lines.append("Auxiliary Metrics:")
        lines.append("-" * 60)
        
        # Organize metrics by performance
        good_metrics = []
        poor_metrics = []
        neutral_metrics = []
        
        for result in metric_results:
            if result.interpretation == "higher_better":
                if result.value >= 0.7:
                    good_metrics.append(result)
                elif result.value < 0.5:
                    poor_metrics.append(result)
                else:
                    neutral_metrics.append(result)
            else:
                neutral_metrics.append(result)
        
        # Report good metrics
        if good_metrics:
            lines.append("\n✅ Strengths:")
            for result in good_metrics:
                lines.append(f"  • {result.description}: {result.value:.3f}")
        
        # Report areas for improvement
        if poor_metrics:
            lines.append("\n⚠️  Areas for Improvement:")
            for result in poor_metrics:
                lines.append(f"  • {result.description}: {result.value:.3f}")
                # Add specific suggestions
                lines.append(f"    → {self._get_suggestion(result)}")
        
        # Report neutral metrics
        if neutral_metrics:
            lines.append("\n📊 Other Metrics:")
            for result in neutral_metrics:
                lines.append(f"  • {result.description}: {result.value:.3f}")
        
        # Add specific recommendations
        lines.append("\n" + "-" * 60)
        lines.append("💡 Actionable Recommendations:")
        recommendations = self._generate_recommendations(metric_results, centers, radii)
        for i, rec in enumerate(recommendations, 1):
            lines.append(f"  {i}. {rec}")
        
        lines.append("=" * 60)
        
        return "\n".join(lines)
    
    def _get_suggestion(self, result: MetricResult) -> str:
        """Get specific suggestion based on metric result."""
        suggestions = {
            "spatial_uniformity": "Try redistributing circles to reduce clustering",
            "edge_utilization": "Consider placing larger circles near boundaries and corners",
            "density_variance": "Balance circle density across different regions",
            "gap_analysis": "Identify and fill empty regions with additional circles or larger radii",
            "geometric_quality": "Improve triangle quality in Delaunay triangulation"
        }
        return suggestions.get(result.name, "Consider optimizing this aspect")
    
    def _generate_recommendations(
        self,
        metric_results: List[MetricResult],
        centers: np.ndarray,
        radii: np.ndarray
    ) -> List[str]:
        """Generate specific actionable recommendations."""
        recommendations = []
        
        # Find poorest performing metrics
        prioritized = sorted(
            [r for r in metric_results if r.interpretation == "higher_better"],
            key=lambda r: r.value
        )
        
        for result in prioritized[:3]:  # Top 3 areas to improve
            if result.name == "spatial_uniformity" and result.value < 0.6:
                recommendations.append(
                    f"Spatial uniformity is low ({result.value:.2f}). "
                    "Check for clustered regions and redistribute circles."
                )
            elif result.name == "edge_utilization" and result.value < 0.5:
                details = result.details
                corners = details.get("corners_touched", 0)
                if corners < 4:
                    recommendations.append(
                        f"Only {corners}/4 corners are utilized. "
                        "Place larger circles at unused corners."
                    )
            elif result.name == "gap_analysis" and result.value < 0.7:
                gap_ratio = result.details.get("gap_ratio", 0)
                recommendations.append(
                    f"Detected {gap_ratio*100:.1f}% unused space. "
                    "Consider increasing radii in sparse regions."
                )
        
        if not recommendations:
            recommendations.append("Overall packing quality is good! Continue optimizing primary score.")
        
        return recommendations


# ============================================================================
# CONVENIENCE FUNCTIONS
# ============================================================================

def evaluate_auxiliary(
    centers: np.ndarray,
    radii: np.ndarray,
    primary_score: float,
    config: Optional[AuxiliaryEvalConfig] = None
) -> Dict[str, Any]:
    """
    Convenience function to run auxiliary evaluation.
    
    Args:
        centers: Circle centers (n, 2)
        radii: Circle radii (n,)
        primary_score: Ground truth primary score
        config: Optional configuration
    
    Returns:
        Dictionary with auxiliary metrics
    """
    evaluator = AuxiliaryEvaluator(config)
    return evaluator.evaluate(centers, radii, primary_score)


def save_auxiliary_analysis(
    results: Dict[str, Any],
    output_path: str
):
    """Save detailed auxiliary analysis to JSON file."""
    output_path = Path(output_path)
    output_path.parent.mkdir(parents=True, exist_ok=True)
    
    # Convert numpy types to native Python types for JSON serialization
    def convert_types(obj):
        if isinstance(obj, np.ndarray):
            return obj.tolist()
        elif isinstance(obj, np.integer):
            return int(obj)
        elif isinstance(obj, np.floating):
            return float(obj)
        elif isinstance(obj, dict):
            return {k: convert_types(v) for k, v in obj.items()}
        elif isinstance(obj, list):
            return [convert_types(item) for item in obj]
        return obj
    
    results_serializable = convert_types(results)
    
    with open(output_path, 'w') as f:
        json.dump(results_serializable, f, indent=2)
    
    print(f"[AuxiliaryEval] Detailed analysis saved to: {output_path}")


# ============================================================================
# FUTURE EXTENSION INTERFACE
# ============================================================================

class LLMGeneratedMetric:
    """
    Interface for LLM-generated metrics (future feature).
    This allows the evaluation agent to propose new metrics dynamically.
    """
    
    def __init__(
        self,
        name: str,
        code: str,
        description: str,
        interpretation: str = "higher_better"
    ):
        self.name = name
        self.code = code
        self.description = description
        self.interpretation = interpretation
        self._compiled_func = None
    
    def compile(self) -> bool:
        """
        Compile and validate the LLM-generated code.
        Returns True if successful, False otherwise.
        """
        try:
            # Create a safe execution environment
            namespace = {
                'np': np,
                'MetricResult': MetricResult,
            }
            
            # Execute the code to define the function
            exec(self.code, namespace)
            
            # Extract the function (assume it's named after the metric)
            if self.name in namespace:
                self._compiled_func = namespace[self.name]
                return True
            else:
                print(f"[LLMGeneratedMetric] Function '{self.name}' not found in generated code")
                return False
        except Exception as e:
            print(f"[LLMGeneratedMetric] Failed to compile: {e}")
            return False
    
    def evaluate(self, centers: np.ndarray, radii: np.ndarray) -> Optional[MetricResult]:
        """Execute the compiled metric function."""
        if self._compiled_func is None:
            return None
        
        try:
            result = self._compiled_func(centers, radii)
            if not isinstance(result, MetricResult):
                # Try to wrap result
                if isinstance(result, (int, float)):
                    result = MetricResult(
                        name=self.name,
                        value=float(result),
                        interpretation=self.interpretation,
                        description=self.description
                    )
            return result
        except Exception as e:
            print(f"[LLMGeneratedMetric] Error executing '{self.name}': {e}")
            return None
    
    def register_to_global(self) -> bool:
        """Register this metric to the global registry."""
        if self._compiled_func is None:
            if not self.compile():
                return False
        
        METRIC_REGISTRY.register(
            self.name,
            lambda c, r: self.evaluate(c, r),
            self.description,
            self.interpretation
        )
        return True


# Example of how to use LLMGeneratedMetric (for future):
"""
# LLM generates this code:
llm_metric_code = '''
def corner_circle_size_metric(centers, radii):
    # Find circles in corners
    corner_circles = []
    for i, (center, radius) in enumerate(zip(centers, radii)):
        x, y = center
        if (x < 0.1 or x > 0.9) and (y < 0.1 or y > 0.9):
            corner_circles.append(radius)
    
    if len(corner_circles) == 0:
        score = 0.0
    else:
        score = sum(corner_circles) / len(corner_circles)
    
    return MetricResult(
        name="corner_circle_size",
        value=score,
        interpretation="higher_better",
        description="Average size of circles in corners"
    )
'''

# Create and register the metric
llm_metric = LLMGeneratedMetric(
    name="corner_circle_size_metric",
    code=llm_metric_code,
    description="LLM-generated: Corner circle size analysis",
    interpretation="higher_better"
)

if llm_metric.register_to_global():
    print("Successfully registered LLM-generated metric!")
"""