| import re | |
| import json | |
| from tqdm import tqdm | |
| import os | |
| import random | |
| f = open(f"/home/aiscuser/fhw/data/qwq_python_final.json", 'r+') | |
| fw = open(f"/home/aiscuser/fhw/data/qwq_python_selected.json", 'w+') | |
| lines = f.readlines() | |
| random.shuffle(lines) | |
| scores = {"1": 0, "2": 0, "3": 0, "4": 0, "5": 0, "6": 0, "7": 0, "8": 0, "9": 0, "10": 0} | |
| for line in tqdm(lines): | |
| d = json.loads(line) | |
| if d["score"]>=9 and d["score"]<=10: | |
| d["instruction"] = d["instruction"].replace("<|start_header_id|>assistant", "") | |
| fw.write(line) | |
| if d["score"]>=1 and d["score"]<=10: | |
| scores[str(int(d["score"]))] = scores[str(int(d["score"]))] + 1 | |
| print(scores) | |