# ===--------------------------------------------------------------------------------------===# # # This file implements the evaluator for the sums and differences of finite sets problem. # # ===--------------------------------------------------------------------------------------===# # # Some of the code in this file is adapted from: # # google-deepmind/alphaevolve_results: # Licensed under the Apache License v2.0. # # ===--------------------------------------------------------------------------------------===# import sys import os from importlib import __import__ import time import numpy as np BENCHMARK = 1.158417281556896 def verify_c6_solution(u_set: np.ndarray, c6_achieved: float): """Verifies the C6 lower bound solution.""" if not isinstance(u_set, np.ndarray) or u_set.ndim != 1: raise ValueError("Solution U must be a 1D numpy array of integers.") # Verify constraints if 0 not in u_set: raise ValueError("Set U must contain 0.") if np.any(u_set < 0): raise ValueError("Set U must contain non-negative integers.") # Re-calculate the C6 bound using NumPy u_plus_u = np.unique(u_set[:, None] + u_set[None, :]) u_minus_u = np.unique(u_set[:, None] - u_set[None, :]) size_U_plus_U = len(u_plus_u) size_U_minus_U = len(u_minus_u) max_U = np.max(u_set) ratio = size_U_minus_U / size_U_plus_U log_ratio = np.log(ratio) log_denom = np.log(2 * max_U + 1) computed_c6 = 1 + log_ratio / log_denom # Check for consistency if not np.isclose(computed_c6, c6_achieved): raise ValueError(f"C6 mismatch: reported {c6_achieved:.6f}, computed {computed_c6:.6f}") print(f"C6 lower bound achieved: {c6_achieved:.6f}") print(f"Known best bound (AlphaEvolve): {BENCHMARK}") if c6_achieved > BENCHMARK: print("Successfully found a new, better lower bound!") else: print("Result is not better than the known lower bounds.") def evaluate(program_path: str): try: abs_program_path = os.path.abspath(program_path) program_dir = os.path.dirname(abs_program_path) module_name = os.path.splitext(os.path.basename(program_path))[0] try: sys.path.insert(0, program_dir) program = __import__(module_name) start_time = time.time() u_set, c6_bound = program.run() end_time = time.time() eval_time = end_time - start_time finally: if program_dir in sys.path: sys.path.remove(program_dir) verify_c6_solution(u_set, c6_bound) return { "c6_bound": float(c6_bound), "combined_score": float(c6_bound) / BENCHMARK, "set_size": len(u_set), "max_val": int(np.max(u_set)), "eval_time": float(eval_time), } except Exception as e: return {"combined_score": 0.0, "error": str(e)} if __name__ == "__main__": # Backwards-compat: bridges old evaluate() -> dict to the container JSON # protocol. wrapper.py is copied from skydiscover/evaluation/wrapper.py. from wrapper import run run(evaluate)