| import json | |
| dataset = 'sudoku_600' | |
| split = 'test' | |
| # Read original data | |
| with open(f'{dataset}/{split}.jsonl', 'r') as f: | |
| data = [json.loads(line) for line in f] | |
| # Transform to simplified format | |
| new_data = [{'prompt': d['prompt'], 'image': d['image']} for d in data] | |
| # Save simplified data to {split}.jsonl | |
| with open(f'{dataset}/{split}.jsonl', 'w') as f: | |
| f.writelines(json.dumps(item) + '\n' for item in new_data) | |
| # Save original data to {split}_info.jsonl | |
| with open(f'{dataset}/{split}_info.jsonl', 'w') as f: | |
| f.writelines(json.dumps(item) + '\n' for item in data) |