import json dataset = 'sudoku_600' split = 'test' # Read original data with open(f'{dataset}/{split}.jsonl', 'r') as f: data = [json.loads(line) for line in f] # Transform to simplified format new_data = [{'prompt': d['prompt'], 'image': d['image']} for d in data] # Save simplified data to {split}.jsonl with open(f'{dataset}/{split}.jsonl', 'w') as f: f.writelines(json.dumps(item) + '\n' for item in new_data) # Save original data to {split}_info.jsonl with open(f'{dataset}/{split}_info.jsonl', 'w') as f: f.writelines(json.dumps(item) + '\n' for item in data)