File size: 581 Bytes
7cdb0ca
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
import json

dataset = 'sudoku_600'
split = 'test'

# Read original data
with open(f'{dataset}/{split}.jsonl', 'r') as f:
    data = [json.loads(line) for line in f]

# Transform to simplified format
new_data = [{'prompt': d['prompt'], 'image': d['image']} for d in data]

# Save simplified data to {split}.jsonl
with open(f'{dataset}/{split}.jsonl', 'w') as f:
    f.writelines(json.dumps(item) + '\n' for item in new_data)

# Save original data to {split}_info.jsonl
with open(f'{dataset}/{split}_info.jsonl', 'w') as f:
    f.writelines(json.dumps(item) + '\n' for item in data)