Visual-Reasoning / sudoku /simplify_dataset.py
Jayce-Ping's picture
Add files using upload-large-folder tool
7cdb0ca verified
raw
history blame contribute delete
581 Bytes
import json
dataset = 'sudoku_600'
split = 'test'
# Read original data
with open(f'{dataset}/{split}.jsonl', 'r') as f:
data = [json.loads(line) for line in f]
# Transform to simplified format
new_data = [{'prompt': d['prompt'], 'image': d['image']} for d in data]
# Save simplified data to {split}.jsonl
with open(f'{dataset}/{split}.jsonl', 'w') as f:
f.writelines(json.dumps(item) + '\n' for item in new_data)
# Save original data to {split}_info.jsonl
with open(f'{dataset}/{split}_info.jsonl', 'w') as f:
f.writelines(json.dumps(item) + '\n' for item in data)