from tqdm import tqdm import json f = open("/home/aiscuser/fhw/data/all_instruct_with_answers.json", "r+") fw = open("/home/aiscuser/fhw/data/all_instruct_with_answers_cleaned.json", "w+") lines = f.readlines() for line in lines: d = json.loads(line) d["bestanswer"] = d["bestanswer"].strip("<|start_header_id|>assistant<|end_header_id|>").strip("\n") fw.write(json.dumps(d)+"\n")