| import re |
| import json |
| from tqdm import tqdm |
| import os |
| |
| f = open("/home/aiscuser/fhw/data/llama_instruct_final.json", "r+") |
| fw = open("/home/aiscuser/fhw/data/llama_instruct_selected.json", 'w+') |
| """ |
| lines = [] |
| for name in names: |
| if "llama_python_scored" in name: |
| f = open(f"processed_data/{name}", 'r+') |
| lines.extend(f.readlines()) |
| """ |
| lines = f.readlines() |
| for line in tqdm(lines): |
| d = json.loads(line) |
| instruction = d["instruction"] |
| judgement = d["quality_judgement"] |
| extracted = re.findall(r"\[\[(\d*\.\d+|\d+)/10\]\]", judgement, re.S) |
| if len(extracted) > 0: |
| d["score"] = float(extracted[-1]) |
| fw.write(json.dumps(d)+"\n") |
| continue |
| extracted = re.findall(r"\[\[(\d*\.\d+|\d+)\]\]", judgement, re.S) |
| if len(extracted) > 0: |
| d["score"] = float(extracted[-1]) |
| fw.write(json.dumps(d)+"\n") |
| continue |
| extracted = re.findall(r"\*\*Score: \[(\d*\.\d+|\d+)/10\]\*\*", judgement, re.S) |
| if len(extracted) > 0: |
| d["score"] = float(extracted[-1]) |
| fw.write(json.dumps(d)+"\n") |
| continue |
| extracted = re.findall(r"\*\*Score: \[(\d*\.\d+|\d+)\]\*\*", judgement, re.S) |
| if len(extracted) > 0: |
| d["score"] = float(extracted[-1]) |
| fw.write(json.dumps(d)+"\n") |
| continue |
| extracted = re.findall(r"\*\*Score: (\d*\.\d+|\d+)/10\*\*", judgement, re.S) |
| if len(extracted) > 0: |
| d["score"] = float(extracted[-1]) |
| fw.write(json.dumps(d)+"\n") |
| continue |
| extracted = re.findall(r"\*\*Score: (\d*\.\d+|\d+)\*\*", judgement, re.S) |
| if len(extracted) > 0: |
| d["score"] = float(extracted[-1]) |
| fw.write(json.dumps(d)+"\n") |
| continue |
| extracted = re.findall(r"\*\*Score:\*\* (\d*\.\d+|\d+)/10", judgement, re.S) |
| if len(extracted) > 0: |
| d["score"] = float(extracted[-1]) |
| fw.write(json.dumps(d)+"\n") |
| continue |
| extracted = re.findall(r"\*\*Score:\*\* (\d*\.\d+|\d+)", judgement, re.S) |
| if len(extracted) > 0: |
| d["score"] = float(extracted[-1]) |
| fw.write(json.dumps(d)+"\n") |
| continue |
| extracted = re.findall(r"Score(.*?)", judgement, re.S) |
| if len(extracted) > 0: |
| judgement = extracted[-1] |
| extracted = re.findall(r"\d*\.\d+|\d+", judgement, re.S) |
| if len(extracted) > 0: |
| d["score"] = float(extracted[-1]) |
| fw.write(json.dumps(d)+"\n") |
| continue |
| extracted = re.findall(r"\d*\.\d+|\d+", judgement, re.S) |
| if len(extracted) > 0: |
| d["score"] = float(extracted[0]) |
| fw.write(json.dumps(d)+"\n") |
| continue |
| |
| |
|
|