from __future__ import annotations import sys from pathlib import Path ROOT = Path(__file__).resolve().parents[1] if str(ROOT) not in sys.path: sys.path.insert(0, str(ROOT)) from verifier.verifier import verify test_cases = [ {"input": "5\n2 3 4 5 6\n", "output": "12", "is_visible": False}, {"input": "4\n1 3 5 7\n", "output": "0", "is_visible": False}, {"input": "6\n-2 -3 -4 -5 -6 -7\n", "output": "-12", "is_visible": False}, {"input": "3\n0 10 11\n", "output": "10", "is_visible": False}, {"input": "5\n8 8 8 8 8\n", "output": "40", "is_visible": False}, ] empirical_test_cases = [ {"input": "10\n", "output": "45", "is_visible": False}, {"input": "200\n", "output": "19900", "is_visible": False}, {"input": "1000\n", "output": "499500", "is_visible": False}, ] correct_code = """ n = int(input()) nums = list(map(int, input().split())) print(sum(x for x in nums if x % 2 == 0)) """ wrong_code = """ n = int(input()) nums = list(map(int, input().split())) print(sum(nums)) """ less_optimized_code = """ n = int(input()) nums = list(map(int, input().split())) evens = [x for x in nums if x % 2 == 0] print(sum(evens)) """ invalid_output_code = """ n = int(input()) input() print() """ timeout_code = """ while True: pass """ runtime_error_code = """ n = int(input()) nums = list(map(int, input().split())) print(nums[n]) """ safety_violation_code = """ import os print(os.listdir(".")) """ for name, code in [ ("correct", correct_code), ("wrong", wrong_code), ("less_optimized", less_optimized_code), ("invalid_output", invalid_output_code), ("timeout", timeout_code), ("runtime_error", runtime_error_code), ("safety_violation", safety_violation_code), ]: reward, info = verify(code, test_cases) print("\nCASE:", name) print("Reward:", reward) print("Pass rate:", info["pass_rate"]) print("Passed:", info["passed"], "/", info["total"]) print("Timeouts:", info["timeout_count"]) print("Runtime errors:", info["runtime_error_count"]) print("Invalid output:", info["invalid_output_count"]) print("Wrong answers:", info["wrong_answer_count"]) print("Status:", info["execution_status"]) print("Efficiency:", info.get("efficiency_score")) reward_optimal, info_optimal = verify(correct_code, test_cases) reward_less_optimal, info_less_optimal = verify(less_optimized_code, test_cases) reward_safety, info_safety = verify(safety_violation_code, test_cases) assert info_optimal["efficiency_score"] > info_less_optimal["efficiency_score"] assert info_less_optimal["complexity_signals"]["list_comprehensions"] > 0 assert info_optimal["verifier_components"]["hidden_correctness"] == 1.0 assert info_optimal["verifier_components"]["anti_cheat_compliance"] == 1.0 assert reward_safety == 0.0 assert info_safety["execution_status"] == "safety_violation" linear_code = "n = int(input()); print(sum(range(n)))" quadratic_code = "n = int(input()); print(sum(i*j for i in range(n) for j in range(n)))" _, linear_result = verify(linear_code, empirical_test_cases) _, quadratic_result = verify(quadratic_code, empirical_test_cases) assert linear_result["efficiency_score"] >= quadratic_result["efficiency_score"], ( "Empirical complexity: O(n) should score >= O(n^2)" ) small_alloc = "n = int(input()); print(n)" large_alloc = "n = int(input()); x = [0] * (n * 10000); print(len(x))" _, small_result = verify(small_alloc, empirical_test_cases) _, large_result = verify(large_alloc, empirical_test_cases) assert small_result["efficiency_score"] >= large_result["efficiency_score"], ( "Empirical complexity: small allocation should score >= large allocation" )