JustinTX's picture
Add files using upload-large-folder tool
b0e88cf verified
# ===--------------------------------------------------------------------------------------===#
#
# This file implements the evaluator for the sums and differences of finite sets problem.
#
# ===--------------------------------------------------------------------------------------===#
#
# Some of the code in this file is adapted from:
#
# google-deepmind/alphaevolve_results:
# Licensed under the Apache License v2.0.
#
# ===--------------------------------------------------------------------------------------===#
import sys
import os
from importlib import __import__
import time
import numpy as np
BENCHMARK = 1.158417281556896
def verify_c6_solution(u_set: np.ndarray, c6_achieved: float):
"""Verifies the C6 lower bound solution."""
if not isinstance(u_set, np.ndarray) or u_set.ndim != 1:
raise ValueError("Solution U must be a 1D numpy array of integers.")
# Verify constraints
if 0 not in u_set:
raise ValueError("Set U must contain 0.")
if np.any(u_set < 0):
raise ValueError("Set U must contain non-negative integers.")
# Re-calculate the C6 bound using NumPy
u_plus_u = np.unique(u_set[:, None] + u_set[None, :])
u_minus_u = np.unique(u_set[:, None] - u_set[None, :])
size_U_plus_U = len(u_plus_u)
size_U_minus_U = len(u_minus_u)
max_U = np.max(u_set)
ratio = size_U_minus_U / size_U_plus_U
log_ratio = np.log(ratio)
log_denom = np.log(2 * max_U + 1)
computed_c6 = 1 + log_ratio / log_denom
# Check for consistency
if not np.isclose(computed_c6, c6_achieved):
raise ValueError(f"C6 mismatch: reported {c6_achieved:.6f}, computed {computed_c6:.6f}")
print(f"C6 lower bound achieved: {c6_achieved:.6f}")
print(f"Known best bound (AlphaEvolve): {BENCHMARK}")
if c6_achieved > BENCHMARK:
print("Successfully found a new, better lower bound!")
else:
print("Result is not better than the known lower bounds.")
def evaluate(program_path: str):
try:
abs_program_path = os.path.abspath(program_path)
program_dir = os.path.dirname(abs_program_path)
module_name = os.path.splitext(os.path.basename(program_path))[0]
try:
sys.path.insert(0, program_dir)
program = __import__(module_name)
start_time = time.time()
u_set, c6_bound = program.run()
end_time = time.time()
eval_time = end_time - start_time
finally:
if program_dir in sys.path:
sys.path.remove(program_dir)
verify_c6_solution(u_set, c6_bound)
return {
"c6_bound": float(c6_bound),
"combined_score": float(c6_bound) / BENCHMARK,
"set_size": len(u_set),
"max_val": int(np.max(u_set)),
"eval_time": float(eval_time),
}
except Exception as e:
return {"combined_score": 0.0, "error": str(e)}
if __name__ == "__main__":
# Backwards-compat: bridges old evaluate() -> dict to the container JSON
# protocol. wrapper.py is copied from skydiscover/evaluation/wrapper.py.
from wrapper import run
run(evaluate)