Spaces:
Running
Running
File size: 4,723 Bytes
96b50a5 5b695bd 267d60a 75f3efd 58173f5 267d60a 58173f5 267d60a 5b695bd 267d60a 5b695bd 267d60a 5b695bd 267d60a 5b695bd 2be5c6e 7f2d9e7 267d60a 5b695bd 267d60a 5b695bd 267d60a 5b695bd 267d60a 5b695bd 267d60a 5b695bd 267d60a 2be5c6e 7f2d9e7 2be5c6e 5b695bd 267d60a 5b695bd 267d60a 5b695bd 267d60a 5b695bd 267d60a 5b695bd 267d60a 5b695bd 7f2d9e7 267d60a 5b695bd 267d60a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 | from __future__ import annotations
import sys
from pathlib import Path
ROOT = Path(__file__).resolve().parents[1]
if str(ROOT) not in sys.path:
sys.path.insert(0, str(ROOT))
from env.adapt_env import AdaptEnvironment, MAX_STEPS_PER_EPISODE
from env.generator import GeneratorAgent
from models import AdaptAction
def assert_hidden_tests_are_not_exposed(payload: dict) -> None:
text = str(payload)
assert "test_cases" not in text
assert "visible_tests" not in text
assert '"is_visible": True' not in text
def main() -> None:
env = AdaptEnvironment(generator=GeneratorAgent())
observation = env.reset(problem_id="sum_even_numbers", difficulty="easy")
assert observation.problem
assert "Examples:" in observation.problem
assert observation.input_format
assert observation.constraints
assert observation.problem_type == "sum_even_numbers"
assert observation.execution_status == "ready"
assert observation.max_steps == MAX_STEPS_PER_EPISODE
assert_hidden_tests_are_not_exposed(observation.model_dump())
correct = env.step(
AdaptAction(
code=(
"n=int(input())\n"
"nums=list(map(int,input().split()))\n"
"print(sum(x for x in nums if x % 2 == 0))"
)
)
)
print(correct)
assert correct.reward == 1.0, correct.model_dump()
assert correct.pass_rate == 1.0
assert correct.execution_status == "completed"
assert correct.done is True
assert correct.reward_components["efficiency_score"] >= 0.95
assert correct.reward_components["hidden_correctness"] == 1.0
observation = env.reset(problem_id="running_total", difficulty="easy")
repair_1 = env.step(
AdaptAction(
code=(
"n=int(input())\n"
"nums=list(map(int,input().split()))\n"
"print(sum(nums))"
)
)
)
print(repair_1)
assert repair_1.done is False
assert repair_1.execution_status in {"wrong_answer", "runtime_error", "invalid_output_format"}
assert "Previous attempt status: ready" in repair_1.feedback
repair_2 = env.step(
AdaptAction(
code=(
"n=int(input())\n"
"nums=list(map(int,input().split()))\n"
"running=0\n"
"out=[]\n"
"for x in nums:\n"
" running += x\n"
" out.append(str(running))\n"
"print(' '.join(out))"
)
)
)
print(repair_2)
assert repair_2.done is True
assert repair_2.pass_rate == 1.0
assert repair_2.reward == 0.85
assert "Previous attempt status:" in repair_2.feedback
observation = env.reset(problem_id="sum_even_numbers", difficulty="easy")
less_optimized = env.step(
AdaptAction(
code=(
"n=int(input())\n"
"nums=list(map(int,input().split()))\n"
"evens=[x for x in nums if x % 2 == 0]\n"
"print(sum(evens))"
)
)
)
print(less_optimized)
assert less_optimized.pass_rate == 1.0
assert less_optimized.done is False
assert less_optimized.reward < 1.0
assert "can still be optimized further" in less_optimized.feedback
assert less_optimized.reward_components["format_compliance"] == 1.0
observation = env.reset(problem_id="sum_even_numbers", difficulty="easy")
syntax = env.step(AdaptAction(code="def broken(:\n pass"))
print(syntax)
assert syntax.reward == 0.0
assert syntax.done is False
assert syntax.execution_status == "syntax_error"
runtime = env.step(
AdaptAction(
code=(
"n=int(input())\n"
"nums=list(map(int,input().split()))\n"
"print(nums[n])"
)
)
)
print(runtime)
assert runtime.execution_status == "runtime_error"
timeout = env.step(AdaptAction(code="while True:\n pass"))
print(timeout)
assert timeout.timeout_count > 0
assert timeout.execution_status == "timeout"
assert timeout.done is True
observation = env.reset(problem_id="sum_even_numbers", difficulty="easy")
unsafe = env.step(AdaptAction(code="import os\nprint(os.listdir('.'))"))
print(unsafe)
assert unsafe.reward == 0.0
assert unsafe.execution_status == "safety_violation"
assert unsafe.done is False
assert unsafe.reward_components["anti_cheat_compliance"] == 0.0
assert env.state.history["attempts"]
assert_hidden_tests_are_not_exposed(timeout.model_dump())
print("ADAPT OpenEnv smoke tests passed")
if __name__ == "__main__":
main()
|