kishan51's picture
Add files using upload-large-folder tool
4f99f73 verified
Raw
History Blame Contribute Delete
1.84 kB
import random
from datasets import Dataset
from src.rewards import safe_eval_expression
def random_expression(numbers, rng):
parts = [str(number) for number in numbers]
while len(parts) > 1:
i, j = sorted(rng.sample(range(len(parts)), 2), reverse=True)
left, right = parts.pop(i), parts.pop(j)
parts.append(f"({left} {rng.choice(['+', '-', '*'])} {right})")
return parts[0]
def build_countdown_dataset(size, seed, num_numbers=4, disable_thinking=False):
rng = random.Random(seed)
rows, seen = [], set()
while len(rows) < size:
numbers = [rng.randint(1, 10) for _ in range(num_numbers)]
solution = random_expression(numbers, rng)
target, _ = safe_eval_expression(solution)
if not target.is_integer() or abs(target) > 500:
continue
key = (tuple(numbers), int(target))
if key in seen:
continue
seen.add(key)
prompt = (
f"Use each number exactly once to make {int(target)}. Numbers: {numbers}. "
"Allowed operations: +, -, *, /. Return only the computed arithmetic expression "
"wrapped between the XML tags <answer> and </answer>. Do not output placeholder "
"words such as EXPRESSION."
)
rows.append({
"prompt": [
{
"role": "system",
"content": (
"Solve the arithmetic task. Follow the requested answer format exactly."
+ (" /no_think" if disable_thinking else "")
),
},
{"role": "user", "content": prompt},
],
"numbers": numbers,
"target": int(target),
"solution": solution,
})
return Dataset.from_list(rows)