| | import contextlib |
| | import multiprocessing |
| | import os |
| | import subprocess |
| | import tempfile |
| |
|
| |
|
| | def check_correctness(candidate, reference, cpp_type, task_id, completion_id): |
| | """ |
| | Evaluates the functional correctness of a completion by running the test |
| | suite provided in the problem. |
| | |
| | :param completion_id: an optional completion ID so we can match |
| | the results later even if execution finishes asynchronously. |
| | """ |
| |
|
| | manager = multiprocessing.Manager() |
| |
|
| | result = dict( |
| | task_id=task_id, |
| | completion_id=completion_id, |
| | ) |
| |
|
| | if cpp_type == "base": |
| | base_run_result = manager.list() |
| | process_case( |
| | unsafe_execute_cpp, |
| | candidate, |
| | reference["tests"], |
| | base_run_result, |
| | "c++17", |
| | ) |
| | result |= dict( |
| | base_run_passed=base_run_result[0] == "passed", |
| | base_run_compiled=( |
| | base_run_result[0] == "passed" |
| | or base_run_result[0].startswith("failed: runtime error:") |
| | ), |
| | base_run_result=base_run_result[0], |
| | ) |
| | elif cpp_type == "sfinae": |
| | sfinae_run_result = manager.list() |
| | process_case( |
| | unsafe_execute_cpp, |
| | candidate, |
| | reference["tests"], |
| | sfinae_run_result, |
| | "c++17", |
| | ) |
| | sfinae_constrain_result = manager.list() |
| | process_case( |
| | invalid_compile_cpp, |
| | candidate, |
| | reference["invalids"], |
| | sfinae_constrain_result, |
| | "c++17", |
| | ) |
| | result |= dict( |
| | sfinae_run_passed=sfinae_run_result[0] == "passed", |
| | sfinae_run_compiled=( |
| | sfinae_run_result[0] == "passed" |
| | or sfinae_run_result[0].startswith("failed: runtime error:") |
| | ), |
| | sfinae_run_result=sfinae_run_result[0], |
| | sfinae_constrain_passed=sfinae_constrain_result[0] == "passed", |
| | sfinae_constrain_result=sfinae_constrain_result[0], |
| | ) |
| | elif cpp_type == "concepts": |
| | concepts_run_result = manager.list() |
| | process_case( |
| | unsafe_execute_cpp, |
| | candidate, |
| | reference["tests"], |
| | concepts_run_result, |
| | "c++20", |
| | ) |
| | concepts_constrain_result = manager.list() |
| | process_case( |
| | invalid_compile_cpp, |
| | candidate, |
| | reference["invalids"], |
| | concepts_constrain_result, |
| | "c++20", |
| | ) |
| | result |= dict( |
| | concepts_run_passed=concepts_run_result[0] == "passed", |
| | concepts_run_compiled=( |
| | concepts_run_result[0] == "passed" |
| | or concepts_run_result[0].startswith("failed: runtime error:") |
| | ), |
| | concepts_run_result=concepts_run_result[0], |
| | concepts_constrain_passed=concepts_constrain_result[0] == "passed", |
| | concepts_constrain_result=concepts_constrain_result[0], |
| | ) |
| | else: |
| | raise ValueError(f"Unknown cpp_type: {cpp_type}") |
| |
|
| | return result |
| |
|
| |
|
| | def process_case(target, candidate, reference, result, cppstd): |
| | timeout = 60 |
| |
|
| | p = multiprocessing.Process( |
| | target=target, |
| | args=(candidate, reference, result, timeout, cppstd), |
| | ) |
| |
|
| | p.start() |
| | p.join(timeout=timeout + 5) |
| | if p.is_alive(): |
| | p.kill() |
| |
|
| | if not result: |
| | result.append("timed out") |
| |
|
| |
|
| | def unsafe_execute_cpp(candidate, reference, result, timeout, cppstd): |
| | with create_tempdir(): |
| | code = "#include <bits/stdc++.h>\n" + candidate + reference |
| | open(f"test.cpp", "w").write(code) |
| |
|
| | cpp_compiler = os.getenv("GENERICIFY_CLANG") |
| | compilation_result = subprocess.run( |
| | [cpp_compiler, f"-std={cppstd}", "test.cpp"], |
| | timeout=timeout, |
| | capture_output=True, |
| | ) |
| | if compilation_result.returncode != 0: |
| | if compilation_result.stderr: |
| | err = compilation_result.stderr.decode() |
| | else: |
| | err = compilation_result.stdout.decode() |
| | result.append(f"failed: compilation error: {err}") |
| | else: |
| | try: |
| | exec_result = subprocess.run( |
| | ["./a.out"], timeout=timeout, capture_output=True |
| | ) |
| |
|
| | if exec_result.returncode == 0: |
| | result.append("passed") |
| | else: |
| | if exec_result.stderr: |
| | try: |
| | err = exec_result.stderr.decode() |
| | except: |
| | err = exec_result.stderr |
| | else: |
| | try: |
| | err = exec_result.stdout.decode() |
| | except: |
| | err = exec_result.stdout |
| | result.append(f"failed: runtime error: {err}") |
| |
|
| | except subprocess.TimeoutExpired as e: |
| | result.append("timed out") |
| |
|
| |
|
| | def invalid_compile_cpp(candidate, reference, result, timeout, cppstd): |
| | with create_tempdir(): |
| | code = "#include <bits/stdc++.h>\n" + candidate + reference |
| | open(f"invalid.cpp", "w").write(code) |
| |
|
| | cpp_compiler = os.getenv("GENERICIFY_CLANG") |
| | compilation_result = subprocess.run( |
| | [cpp_compiler, f"-std={cppstd}", "invalid.cpp"], |
| | timeout=timeout, |
| | capture_output=True, |
| | ) |
| |
|
| | if compilation_result.stderr: |
| | err = compilation_result.stderr.decode() |
| | else: |
| | err = compilation_result.stdout.decode() |
| |
|
| | if compilation_result.returncode != 1: |
| | result.append(f"failed: compilation succeeded: {err}") |
| | else: |
| | if "note: candidate template ignored" in err: |
| | result.append("passed") |
| | else: |
| | result.append("failed: improperly constrained: {err}") |
| |
|
| |
|
| | @contextlib.contextmanager |
| | def create_tempdir(): |
| | with tempfile.TemporaryDirectory() as dirname: |
| | with chdir(dirname): |
| | yield dirname |
| |
|
| |
|
| | @contextlib.contextmanager |
| | def chdir(root): |
| | if root == ".": |
| | yield |
| | return |
| | cwd = os.getcwd() |
| | os.chdir(root) |
| | try: |
| | yield |
| | except BaseException as exc: |
| | raise exc |
| | finally: |
| | os.chdir(cwd) |
| |
|