Team404_FORGE / tasks.py
sanjay7676's picture
Final cleanup for FORGE-v4: Colab entrypoint, OpenEnv API, 10x optimization, and Judge Narrative generation
3978c05
"""Task generation utilities for FORGE-v4."""
import random
from typing import Any
from config import ARRAY_VALUE_RANGE, MAX_ARRAY_SIZE, MIN_ARRAY_SIZE, NUM_HIDDEN_TESTS
def generate_task() -> dict[str, Any]:
"""
Generate a single sorting task.
Returns a dict with:
- prompt: natural-language task description
- public_example: one visible (input, expected_output) pair
- hidden_tests: list of (input, expected_output) pairs kept secret from agents
"""
size = random.randint(MIN_ARRAY_SIZE, MAX_ARRAY_SIZE)
arr = [random.randint(*ARRAY_VALUE_RANGE) for _ in range(size)]
public_example = {
"input": arr,
"expected_output": sorted(arr),
}
hidden_tests = _generate_hidden_tests(NUM_HIDDEN_TESTS)
task = {
"prompt": (
"Write a Python function named `solution(arr)` that takes a list of integers "
"and returns a new list sorted in ascending order. "
"Do not use `arr.sort()` in-place — return a new sorted list.\n\n"
f"Example:\n Input: {arr}\n Output: {sorted(arr)}"
),
"public_example": public_example,
"hidden_tests": hidden_tests,
}
return task
def _generate_hidden_tests(n: int) -> list[dict[str, Any]]:
"""Generate exactly n hidden tests with difficulty weighting."""
tests: list[dict[str, Any]] = []
# 1. Random small arrays (Easy)
for _ in range(1):
size = random.randint(3, 5)
arr = [random.randint(1, 10) for _ in range(size)]
tests.append({"input": arr, "expected_output": sorted(arr), "weight": 1.0})
# 2. Negatives and Duplicates (Medium)
for _ in range(2):
arr = [random.randint(-100, 0) for _ in range(8)]
tests.append({"input": arr, "expected_output": sorted(arr), "weight": 2.5})
for _ in range(1):
arr = [random.choice([1, 2, 3]) for _ in range(15)]
tests.append({"input": arr, "expected_output": sorted(arr), "weight": 2.5})
# 3. Large arrays, Zeros, and Boundary values (Hard)
arr = [random.randint(*ARRAY_VALUE_RANGE) for _ in range(MAX_ARRAY_SIZE)]
tests.append({"input": arr, "expected_output": sorted(arr), "weight": 4.0})
# Boundary and Zeros specifically for weak_coder_v3
arr = [0, ARRAY_VALUE_RANGE[0], ARRAY_VALUE_RANGE[1], 0, 5, -5]
tests.append({"input": arr, "expected_output": sorted(arr), "weight": 5.5})
return tests[:n]
def generate_breaker_task(original_task: dict[str, Any]) -> dict[str, Any]:
"""
Given an existing task, produce adversarial test cases for the Breaker agent.
The Breaker is asked to produce arrays that are likely to break a naive solution.
Returns a dict with the adversarial prompt and a set of candidate adversarial arrays.
"""
adversarial_candidates = [
# All identical elements
[0] * random.randint(3, 8),
# All negative values
[random.randint(-100, -1) for _ in range(random.randint(3, 8))],
# Large array
[random.randint(*ARRAY_VALUE_RANGE) for _ in range(MAX_ARRAY_SIZE)],
# Duplicate-heavy array
[random.choice([1, 2, 3]) for _ in range(random.randint(4, 10))],
# Mixed positive/negative with duplicates
[random.randint(-5, 5) for _ in range(random.randint(4, 12))],
]
adversarial_tests = [
{"input": arr, "expected_output": sorted(arr), "weight": 2.0 if len(arr) >= MAX_ARRAY_SIZE else 1.5}
for arr in adversarial_candidates
]
breaker_task = {
"prompt": (
"You are the Breaker agent. Generate adversarial integer arrays that are "
"likely to expose flaws in a naive sorting implementation. "
"Focus on edge cases: duplicates, negatives, large inputs, already-sorted, "
"reverse-sorted, and single-element arrays."
),
"adversarial_tests": adversarial_tests,
}
return breaker_task