from transformers import AutoTokenizer from vllm import LLM, SamplingParams import argparse import json from tqdm import tqdm import re def extract_score(judgement): d = {} extracted = re.findall(r"\[\[(\d*\.\d+|\d+)/10\]\]", judgement, re.S) if len(extracted) > 0: d["score"] = float(extracted[-1]) return int(d["score"]) extracted = re.findall(r"\[\[(\d*\.\d+|\d+)\]\]", judgement, re.S) if len(extracted) > 0: d["score"] = float(extracted[-1]) return int(d["score"]) extracted = re.findall(r"\*\*Score: \[(\d*\.\d+|\d+)/10\]\*\*", judgement, re.S) if len(extracted) > 0: d["score"] = float(extracted[-1]) return int(d["score"]) extracted = re.findall(r"\*\*Score: \[(\d*\.\d+|\d+)\]\*\*", judgement, re.S) if len(extracted) > 0: d["score"] = float(extracted[-1]) return int(d["score"]) extracted = re.findall(r"\*\*Score: (\d*\.\d+|\d+)/10\*\*", judgement, re.S) if len(extracted) > 0: d["score"] = float(extracted[-1]) return int(d["score"]) extracted = re.findall(r"\*\*Score: (\d*\.\d+|\d+)\*\*", judgement, re.S) if len(extracted) > 0: d["score"] = float(extracted[-1]) return int(d["score"]) extracted = re.findall(r"\*\*Score:\*\* (\d*\.\d+|\d+)/10", judgement, re.S) if len(extracted) > 0: d["score"] = float(extracted[-1]) return int(d["score"]) extracted = re.findall(r"\*\*Score:\*\* (\d*\.\d+|\d+)", judgement, re.S) if len(extracted) > 0: d["score"] = float(extracted[-1]) return int(d["score"]) extracted = re.findall(r"Score(.*?)", judgement, re.S) if len(extracted) > 0: judgement = extracted[-1] extracted = re.findall(r"\d*\.\d+|\d+", judgement, re.S) if len(extracted) > 0: d["score"] = float(extracted[-1]) return int(d["score"]) return -1 parser = argparse.ArgumentParser() parser.add_argument('--judgename', type=str,help='模型路径') parser.add_argument('--modelnames', nargs='+') args = parser.parse_args() f = open(f"/home/aiscuser/fhw/data/{args.judgename}_filtered_by_answer.json", "r+") ddd = json.loads(f.readlines()[0]) fr = open(f"/home/aiscuser/fhw/data/{args.judgename}_answerby_{args.judgename}.json", 'r+') linesr = fr.readlines() all_lines = [] for modelname in args.modelnames: f = open(f"/home/aiscuser/fhw/data/{args.judgename}_judge_{modelname}.json", 'r+') all_lines.append(f.readlines()) a, b, c, d = 0, 0, 0, 0 fw = open(f"/home/aiscuser/fhw/data/{args.judgename}_with_best_answer.json", "w+") for i in tqdm(ddd[args.judgename]): reference = json.loads(linesr[i]) da = json.loads(all_lines[0][a]) if a6: bestname = args.modelnames[maxindex] bestanswer = [da, db, dc, dd][maxindex]["response"] else: bestname = args.judgename bestanswer = reference["response"] fw.write(json.dumps({"instruction": instruction, "scorelist": scorelist, "bestname": bestname, "bestanswer": bestanswer, "modelnames": args.modelnames, "judgename": args.judgename})+"\n")