Spaces:
Sleeping
Sleeping
| """ Agents for Task3 : Rule Checking for a function """ | |
| import json | |
| import random as _random | |
| from typing import Any, Dict, List | |
| from tasks.task3 import Task3Environment | |
| from env.schemas import Action, ActionType | |
| from data.data_loader import load_contracts, get_function_by_name | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Helpers | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def _parse_fn_list(result_text: str) -> List[str]: | |
| """Parse 'Functions in X: f1, f2, f3' into [f1, f2, f3].""" | |
| if ": " in result_text: | |
| return [f.strip() for f in result_text.split(": ", 1)[-1].split(", ") if f.strip()] | |
| return [] | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # Task 3 agents | |
| # βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| def oracle_t3(env: Task3Environment, seed: int, verbose: bool = False) -> Dict[str, Any]: | |
| """Submits exact target function β score = 1.0.""" | |
| r = env.reset(seed=seed) | |
| obs = r.observation | |
| fn_name = env.state().target_function | |
| contract = obs.contract_name | |
| if verbose: | |
| prop = obs.extra.get("property_english", "")[:60] | |
| print(f" {contract}.{fn_name}() \"{prop}\"") | |
| env.step(Action(action_type=ActionType.GET_PROPERTY_SPECIFICATION)) | |
| env.step(Action(action_type=ActionType.LIST_FUNCTIONS)) | |
| result = env.step(Action(action_type=ActionType.SUBMIT_FUNCTION, | |
| params={"function_name": fn_name})) | |
| v = result.reward.value | |
| score = 1.0 if v >= 4.9 else (0.3 if v >= 1.0 else 0.0) | |
| return {"seed": seed, "contract": contract, "target_function": fn_name, | |
| "grader_score": score, | |
| "cumulative_reward": result.observation.cumulative_reward} | |
| def subfunction_t3(env: Task3Environment, seed: int) -> Dict[str, Any]: | |
| """Submits the first partial-credit subfunction if one exists, else 'constructor'.""" | |
| r = env.reset(seed=seed) | |
| obs = r.observation | |
| contracts = load_contracts() | |
| partial_fns = [] | |
| for c in contracts: | |
| if c["contract_name"] == obs.contract_name: | |
| fn = get_function_by_name(c, env.state().target_function) | |
| if fn: | |
| partial_fns = fn.get("task3", {}).get("partial_credit_functions", []) | |
| break | |
| submit_name = partial_fns[0] if partial_fns else "constructor" | |
| result = env.step(Action(action_type=ActionType.SUBMIT_FUNCTION, | |
| params={"function_name": submit_name})) | |
| v = result.reward.value | |
| score = 1.0 if v >= 4.9 else (0.3 if v >= 1.0 else 0.0) | |
| return {"seed": seed, "grader_score": score, "submitted": submit_name, | |
| "cumulative_reward": result.observation.cumulative_reward} | |
| def random_t3(env: Task3Environment, seed: int) -> Dict[str, Any]: | |
| """Genuine random agent: lists functions, picks one at random, submits. | |
| With N functions per contract and 1 target, expected score β 1/N β 0.20β0.25. | |
| Uses a seeded RNG for reproducibility. | |
| """ | |
| rng = _random.Random(seed ^ 0xCAFE1) | |
| env.reset(seed=seed) | |
| # Step 1: get function list (necessary to pick a real candidate) | |
| s = env.step(Action(action_type=ActionType.LIST_FUNCTIONS)) | |
| fns = _parse_fn_list(s.observation.last_action_result or "") | |
| if not fns: | |
| fns = ["constructor"] | |
| # Step 2: optionally do 1 cheap browse action (formalized or call_graph) | |
| browse_options = [ | |
| (ActionType.GET_PROPERTY_SPECIFICATION, {}), | |
| (ActionType.GET_CALL_GRAPH, {}), | |
| ] | |
| at, params = rng.choice(browse_options) | |
| env.step(Action(action_type=at, params=params)) | |
| # Step 3: submit a uniformly random function from the real list | |
| chosen = rng.choice(fns) | |
| result = env.step(Action(action_type=ActionType.SUBMIT_FUNCTION, | |
| params={"function_name": chosen})) | |
| v = result.reward.value | |
| score = 1.0 if v >= 4.9 else (0.3 if v >= 1.0 else 0.0) | |
| return {"seed": seed, "grader_score": score, "submitted": chosen, | |
| "cumulative_reward": result.observation.cumulative_reward} | |
| def floor_t3(env: Task3Environment, seed: int) -> Dict[str, Any]: | |
| """Always submits 'constructor' β guaranteed score = 0.0.""" | |
| env.reset(seed=seed) | |
| result = env.step(Action(action_type=ActionType.SUBMIT_FUNCTION, | |
| params={"function_name": "constructor"})) | |
| return {"seed": seed, "grader_score": 0.0, | |
| "cumulative_reward": result.observation.cumulative_reward} |