Spaces:
Sleeping
Sleeping
Final cleanup for FORGE-v4: Colab entrypoint, OpenEnv API, 10x optimization, and Judge Narrative generation
3978c05 | """Task generation utilities for FORGE-v4.""" | |
| import random | |
| from typing import Any | |
| from config import ARRAY_VALUE_RANGE, MAX_ARRAY_SIZE, MIN_ARRAY_SIZE, NUM_HIDDEN_TESTS | |
| def generate_task() -> dict[str, Any]: | |
| """ | |
| Generate a single sorting task. | |
| Returns a dict with: | |
| - prompt: natural-language task description | |
| - public_example: one visible (input, expected_output) pair | |
| - hidden_tests: list of (input, expected_output) pairs kept secret from agents | |
| """ | |
| size = random.randint(MIN_ARRAY_SIZE, MAX_ARRAY_SIZE) | |
| arr = [random.randint(*ARRAY_VALUE_RANGE) for _ in range(size)] | |
| public_example = { | |
| "input": arr, | |
| "expected_output": sorted(arr), | |
| } | |
| hidden_tests = _generate_hidden_tests(NUM_HIDDEN_TESTS) | |
| task = { | |
| "prompt": ( | |
| "Write a Python function named `solution(arr)` that takes a list of integers " | |
| "and returns a new list sorted in ascending order. " | |
| "Do not use `arr.sort()` in-place — return a new sorted list.\n\n" | |
| f"Example:\n Input: {arr}\n Output: {sorted(arr)}" | |
| ), | |
| "public_example": public_example, | |
| "hidden_tests": hidden_tests, | |
| } | |
| return task | |
| def _generate_hidden_tests(n: int) -> list[dict[str, Any]]: | |
| """Generate exactly n hidden tests with difficulty weighting.""" | |
| tests: list[dict[str, Any]] = [] | |
| # 1. Random small arrays (Easy) | |
| for _ in range(1): | |
| size = random.randint(3, 5) | |
| arr = [random.randint(1, 10) for _ in range(size)] | |
| tests.append({"input": arr, "expected_output": sorted(arr), "weight": 1.0}) | |
| # 2. Negatives and Duplicates (Medium) | |
| for _ in range(2): | |
| arr = [random.randint(-100, 0) for _ in range(8)] | |
| tests.append({"input": arr, "expected_output": sorted(arr), "weight": 2.5}) | |
| for _ in range(1): | |
| arr = [random.choice([1, 2, 3]) for _ in range(15)] | |
| tests.append({"input": arr, "expected_output": sorted(arr), "weight": 2.5}) | |
| # 3. Large arrays, Zeros, and Boundary values (Hard) | |
| arr = [random.randint(*ARRAY_VALUE_RANGE) for _ in range(MAX_ARRAY_SIZE)] | |
| tests.append({"input": arr, "expected_output": sorted(arr), "weight": 4.0}) | |
| # Boundary and Zeros specifically for weak_coder_v3 | |
| arr = [0, ARRAY_VALUE_RANGE[0], ARRAY_VALUE_RANGE[1], 0, 5, -5] | |
| tests.append({"input": arr, "expected_output": sorted(arr), "weight": 5.5}) | |
| return tests[:n] | |
| def generate_breaker_task(original_task: dict[str, Any]) -> dict[str, Any]: | |
| """ | |
| Given an existing task, produce adversarial test cases for the Breaker agent. | |
| The Breaker is asked to produce arrays that are likely to break a naive solution. | |
| Returns a dict with the adversarial prompt and a set of candidate adversarial arrays. | |
| """ | |
| adversarial_candidates = [ | |
| # All identical elements | |
| [0] * random.randint(3, 8), | |
| # All negative values | |
| [random.randint(-100, -1) for _ in range(random.randint(3, 8))], | |
| # Large array | |
| [random.randint(*ARRAY_VALUE_RANGE) for _ in range(MAX_ARRAY_SIZE)], | |
| # Duplicate-heavy array | |
| [random.choice([1, 2, 3]) for _ in range(random.randint(4, 10))], | |
| # Mixed positive/negative with duplicates | |
| [random.randint(-5, 5) for _ in range(random.randint(4, 12))], | |
| ] | |
| adversarial_tests = [ | |
| {"input": arr, "expected_output": sorted(arr), "weight": 2.0 if len(arr) >= MAX_ARRAY_SIZE else 1.5} | |
| for arr in adversarial_candidates | |
| ] | |
| breaker_task = { | |
| "prompt": ( | |
| "You are the Breaker agent. Generate adversarial integer arrays that are " | |
| "likely to expose flaws in a naive sorting implementation. " | |
| "Focus on edge cases: duplicates, negatives, large inputs, already-sorted, " | |
| "reverse-sorted, and single-element arrays." | |
| ), | |
| "adversarial_tests": adversarial_tests, | |
| } | |
| return breaker_task | |