"""Primary evaluator for the AlphaEvolve AC2 task."""

import argparse
import sys
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple

import numpy as np

# Add project root to import path for direct execution.
sys.path.insert(0, str(Path(__file__).resolve().parents[2]))

from shinka.core import run_shinka_eval


def evaluate_sequence(sequence: list[float]) -> float:
    """Evaluate sequence. Higher is better."""
    if not isinstance(sequence, list):
        raise ValueError("Invalid sequence type")
    if not sequence:
        raise ValueError("Empty sequence")

    for x in sequence:
        if isinstance(x, bool) or not isinstance(x, (int, float)):
            raise ValueError("Invalid sequence element type")
        if np.isnan(x) or np.isinf(x):
            raise ValueError("Invalid sequence element value")

    sequence = [float(x) for x in sequence]
    sequence = [max(0.0, x) for x in sequence]
    if np.sum(sequence) < 0.01:
        raise ValueError("Sum of sequence is too close to zero")
    sequence = [min(1000.0, x) for x in sequence]

    convolution_2 = np.convolve(sequence, sequence)
    num_points = len(convolution_2)
    x_points = np.linspace(-0.5, 0.5, num_points + 2)
    x_intervals = np.diff(x_points)
    y_points = np.concatenate(([0.0], convolution_2, [0.0]))

    l2_norm_squared = 0.0
    for i in range(len(convolution_2) + 1):
        y1 = y_points[i]
        y2 = y_points[i + 1]
        h = x_intervals[i]
        l2_norm_squared += (h / 3.0) * (y1**2 + y1 * y2 + y2**2)

    norm_1 = np.sum(np.abs(convolution_2)) / (len(convolution_2) + 1)
    norm_inf = np.max(np.abs(convolution_2))
    if norm_1 <= 0.0 or norm_inf <= 0.0:
        raise ValueError("Degenerate convolution norms")

    return float(l2_norm_squared / (norm_1 * norm_inf))


def validate_run_output(run_output: Any) -> Tuple[bool, Optional[str]]:
    """Validate run output structure and objective computability."""
    try:
        if not isinstance(run_output, list):
            return False, "run() must return list[float]"
        if len(run_output) == 0:
            return False, "run() returned empty list"
        value = evaluate_sequence(run_output)
        if not np.isfinite(value):
            return False, "Objective is inf/nan"
        return True, None
    except Exception as exc:
        return False, str(exc)


def aggregate_alphaevolve_ac2_metrics(results: List[list[float]]) -> Dict[str, Any]:
    """Aggregate metrics using best objective value as ranking signal."""
    if not results:
        return {
            "combined_score": 0.0,
            "public": {"best_value": None, "num_runs": 0},
            "private": {"all_values": []},
            "text_feedback": "No successful runs.",
        }

    values: List[float] = []
    lengths: List[int] = []
    best_sequence: Optional[list[float]] = None
    best_value = -float(np.inf)

    for seq in results:
        val = evaluate_sequence(seq)
        values.append(float(val))
        lengths.append(len(seq))
        if val > best_value:
            best_value = float(val)
            best_sequence = seq

    # Maximization task: combined score is best value directly.
    combined_score = best_value
    public = {
        "best_value": best_value,
        "best_length": len(best_sequence) if best_sequence is not None else None,
        "num_runs": len(results),
    }
    private = {
        "all_values": values,
        "all_lengths": lengths,
    }

    return {
        "combined_score": combined_score,
        "public": public,
        "private": private,
        "text_feedback": (
            "Higher evaluate_sequence value is better. "
            "combined_score = best_value."
        ),
    }


def main(program_path: str, results_dir: str, num_experiment_runs: int = 1):
    print(f"Evaluating program: {program_path}")
    print(f"Saving results to: {results_dir}")
    print(f"Number of runs: {num_experiment_runs}")

    metrics, correct, error = run_shinka_eval(
        program_path=program_path,
        results_dir=results_dir,
        experiment_fn_name="run",
        num_runs=num_experiment_runs,
        validate_fn=validate_run_output,
        aggregate_metrics_fn=aggregate_alphaevolve_ac2_metrics,
    )

    if correct:
        print("Evaluation completed successfully.")
    else:
        print(f"Evaluation failed: {error}")
    print(f"combined_score={metrics.get('combined_score')}")


if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Evaluate AlphaEvolve AC2 task")
    parser.add_argument(
        "--program_path",
        type=str,
        default="tasks/alphaevolve_ac2/initial.py",
    )
    parser.add_argument(
        "--results_dir",
        type=str,
        default="tasks/alphaevolve_ac2/results/debug_eval",
    )
    parser.add_argument(
        "--num_experiment_runs",
        type=int,
        default=1,
    )
    args = parser.parse_args()
    main(
        program_path=args.program_path,
        results_dir=args.results_dir,
        num_experiment_runs=args.num_experiment_runs,
    )