File size: 581 Bytes
7cdb0ca | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 | import json
dataset = 'sudoku_600'
split = 'test'
# Read original data
with open(f'{dataset}/{split}.jsonl', 'r') as f:
data = [json.loads(line) for line in f]
# Transform to simplified format
new_data = [{'prompt': d['prompt'], 'image': d['image']} for d in data]
# Save simplified data to {split}.jsonl
with open(f'{dataset}/{split}.jsonl', 'w') as f:
f.writelines(json.dumps(item) + '\n' for item in new_data)
# Save original data to {split}_info.jsonl
with open(f'{dataset}/{split}_info.jsonl', 'w') as f:
f.writelines(json.dumps(item) + '\n' for item in data) |