codescripts / cleanans.py
f541119578's picture
Upload folder using huggingface_hub
fdf190d verified
from tqdm import tqdm
import json
f = open("/home/aiscuser/fhw/data/all_instruct_with_answers.json", "r+")
fw = open("/home/aiscuser/fhw/data/all_instruct_with_answers_cleaned.json", "w+")
lines = f.readlines()
for line in lines:
d = json.loads(line)
d["bestanswer"] = d["bestanswer"].strip("<|start_header_id|>assistant<|end_header_id|>").strip("\n")
fw.write(json.dumps(d)+"\n")