File size: 4,167 Bytes
fdf190d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
from transformers import AutoTokenizer
from vllm import LLM, SamplingParams
import argparse
import json
from tqdm import tqdm
import re
def extract_score(judgement):
    d = {}
    extracted = re.findall(r"\[\[(\d*\.\d+|\d+)/10\]\]", judgement, re.S)
    if len(extracted) > 0:
        d["score"] = float(extracted[-1])
        return int(d["score"])
    extracted = re.findall(r"\[\[(\d*\.\d+|\d+)\]\]", judgement, re.S)
    if len(extracted) > 0:
        d["score"] = float(extracted[-1])
        return int(d["score"])
    extracted = re.findall(r"\*\*Score: \[(\d*\.\d+|\d+)/10\]\*\*", judgement, re.S)
    if len(extracted) > 0:
        d["score"] = float(extracted[-1])
        return int(d["score"])
    extracted = re.findall(r"\*\*Score: \[(\d*\.\d+|\d+)\]\*\*", judgement, re.S)
    if len(extracted) > 0:
        d["score"] = float(extracted[-1])
        return int(d["score"])
    extracted = re.findall(r"\*\*Score: (\d*\.\d+|\d+)/10\*\*", judgement, re.S)
    if len(extracted) > 0:
        d["score"] = float(extracted[-1])
        return int(d["score"])
    extracted = re.findall(r"\*\*Score: (\d*\.\d+|\d+)\*\*", judgement, re.S)
    if len(extracted) > 0:
        d["score"] = float(extracted[-1])
        return int(d["score"])
    extracted = re.findall(r"\*\*Score:\*\* (\d*\.\d+|\d+)/10", judgement, re.S)
    if len(extracted) > 0:
        d["score"] = float(extracted[-1])
        return int(d["score"])
    extracted = re.findall(r"\*\*Score:\*\* (\d*\.\d+|\d+)", judgement, re.S)
    if len(extracted) > 0:
        d["score"] = float(extracted[-1])
        return int(d["score"])
    extracted = re.findall(r"Score(.*?)", judgement, re.S)
    if len(extracted) > 0:
        judgement = extracted[-1]
        extracted = re.findall(r"\d*\.\d+|\d+", judgement, re.S)
        if len(extracted) > 0:
            d["score"] = float(extracted[-1])
            return int(d["score"])
    return -1
parser = argparse.ArgumentParser()
parser.add_argument('--judgename', type=str,help='模型路径')
parser.add_argument('--modelnames', nargs='+')
args = parser.parse_args()

f = open(f"/home/aiscuser/fhw/data/{args.judgename}_filtered_by_answer.json", "r+")
ddd = json.loads(f.readlines()[0])

fr = open(f"/home/aiscuser/fhw/data/{args.judgename}_answerby_{args.judgename}.json", 'r+')
linesr = fr.readlines()

all_lines = []
for modelname in args.modelnames:
    f = open(f"/home/aiscuser/fhw/data/{args.judgename}_judge_{modelname}.json", 'r+')
    all_lines.append(f.readlines())


a, b, c, d = 0, 0, 0, 0

fw = open(f"/home/aiscuser/fhw/data/{args.judgename}_with_best_answer.json", "w+")

for i in tqdm(ddd[args.judgename]):
    reference = json.loads(linesr[i])
    da = json.loads(all_lines[0][a]) if a<len(all_lines[0]) else json.loads(all_lines[0][0])
    db = json.loads(all_lines[1][b]) if b<len(all_lines[1]) else json.loads(all_lines[1][0])
    dc = json.loads(all_lines[2][c]) if c<len(all_lines[2]) else json.loads(all_lines[2][0])
    dd = json.loads(all_lines[3][d]) if d<len(all_lines[3]) else json.loads(all_lines[3][0])

    da["battlescore"], db["battlescore"], dc["battlescore"], dd["battlescore"] = -1, -1, -1, -1

    if da["index"] == i:
        da["battlescore"] = extract_score(da["battle"])
        a = a + 1
    if db["index"] == i:
        db["battlescore"] = extract_score(db["battle"])
        b = b + 1
    if dc["index"] == i:
        dc["battlescore"] = extract_score(dc["battle"])
        c = c + 1
    if dd["index"] == i:
        dd["battlescore"] = extract_score(dd["battle"])
        d = d + 1

    instruction = reference["instruction"]
    scorelist = [da["battlescore"], db["battlescore"], dc["battlescore"], dd["battlescore"]]
    maxscore = max(scorelist)
    maxindex = scorelist.index(maxscore)

    if maxscore>6:
        bestname = args.modelnames[maxindex]
        bestanswer = [da, db, dc, dd][maxindex]["response"]
    else:
        bestname = args.judgename
        bestanswer = reference["response"]
    fw.write(json.dumps({"instruction": instruction, "scorelist": scorelist, "bestname": bestname, "bestanswer": bestanswer, "modelnames": args.modelnames, "judgename": args.judgename})+"\n")