| from tqdm import tqdm | |
| import json | |
| f = open("/home/aiscuser/fhw/data/all_instruct_with_answers.json", "r+") | |
| fw = open("/home/aiscuser/fhw/data/all_instruct_with_answers_cleaned.json", "w+") | |
| lines = f.readlines() | |
| for line in lines: | |
| d = json.loads(line) | |
| d["bestanswer"] = d["bestanswer"].strip("<|start_header_id|>assistant<|end_header_id|>").strip("\n") | |
| fw.write(json.dumps(d)+"\n") |