Spaces:
Runtime error
Runtime error
| #!/usr/bin/env python3 | |
| """ | |
| Augment reasoning data with additional examples. | |
| """ | |
| import json | |
| import random | |
| import sys | |
| from pathlib import Path | |
| def generate_examples(num_samples: int = 100): | |
| """Generate synthetic reasoning examples.""" | |
| operators = [ | |
| ("+", lambda a, b: a + b, "add"), | |
| ("-", lambda a, b: a - b, "subtract"), | |
| ("×", lambda a, b: a * b, "multiply"), | |
| ] | |
| examples = [] | |
| for i in range(num_samples): | |
| op_func, op_name = random.choice(operators) | |
| a = random.randint(1, 100) | |
| b = random.randint(1, 20) | |
| result = op_func(a, b) | |
| example = { | |
| "id": f"reasoning_{i+100:03d}", | |
| "problem": f"What is {a} {op_name} {b}?", | |
| "steps": [ | |
| {"step": 1, "description": f"Apply {op_name}", "result": str(result)}, | |
| {"step": 2, "description": "Verify", "result": "✓"}, | |
| {"step": 3, "description": "Final answer", "result": str(result)}, | |
| ], | |
| "final_answer": str(result), | |
| } | |
| examples.append(example) | |
| return examples | |
| def augment_data(input_file: str, output_file: str, num_new: int = 100): | |
| """Augment existing data with new examples.""" | |
| input_path = Path(input_file) | |
| # Read existing | |
| existing = [] | |
| if input_path.exists(): | |
| with open(input_path) as f: | |
| for line in f: | |
| existing.append(json.loads(line)) | |
| # Generate new | |
| new_examples = generate_examples(num_new) | |
| # Write combined | |
| output_path = Path(output_file) | |
| output_path.parent.mkdir(parents=True, exist_ok=True) | |
| with open(output_path, "w") as f: | |
| for item in existing + new_examples: | |
| f.write(json.dumps(item, ensure_ascii=False) + "\n") | |
| print(f"Saved {len(existing) + len(new_examples)} examples to {output_file}") | |
| if __name__ == "__main__": | |
| input_file = sys.argv[1] if len(sys.argv) > 1 else "data/brain/reasoning_steps.jsonl" | |
| output_file = sys.argv[2] if len(sys.argv) > 2 else "data/brain/reasoning_steps.jsonl" | |
| num_new = int(sys.argv[3]) if len(sys.argv) > 3 else 100 | |
| augment_data(input_file, output_file, num_new) |