File size: 393 Bytes
fdf190d
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
from tqdm import tqdm
import json
f = open("/home/aiscuser/fhw/data/all_instruct_with_answers.json", "r+")
fw = open("/home/aiscuser/fhw/data/all_instruct_with_answers_cleaned.json", "w+")

lines = f.readlines()
for line in lines:
    d = json.loads(line)
    d["bestanswer"] = d["bestanswer"].strip("<|start_header_id|>assistant<|end_header_id|>").strip("\n")
    fw.write(json.dumps(d)+"\n")