| from transformers import AutoTokenizer |
| import argparse |
| import json |
| from tqdm import tqdm |
| import os |
|
|
| parser = argparse.ArgumentParser() |
| parser.add_argument('--judgemodel', type=str,help='评判模型') |
| args = parser.parse_args() |
| names = os.listdir("/home/aiscuser/fhw/data") |
| all_lines=[] |
| modelnames = [] |
| for name in names: |
| if f"{args.judgemodel}_answerby_" not in name: |
| continue |
| else: |
| print(name) |
| f = open(f"/home/aiscuser/fhw/data/{name}", 'r+') |
| lines = f.readlines() |
| modelname = name.replace(".json","").split("_")[-1] |
| print(modelname) |
| modelnames.append(modelname) |
| all_lines.append(lines) |
|
|
| t = 0 |
| good0 = [] |
| good1 = [] |
| good2 = [] |
| good3 = [] |
| good4 = [] |
| for line0, line1, line2, line3, line4 in tqdm(zip(all_lines[0], all_lines[1], all_lines[2], all_lines[3], all_lines[4])): |
| d0 = json.loads(line0) |
| d1 = json.loads(line1) |
| d2 = json.loads(line2) |
| d3 = json.loads(line3) |
| d4 = json.loads(line4) |
| len0 = len(d0["response"].split(" ")) |
| len1 = len(d1["response"].split(" ")) |
| len2 = len(d2["response"].split(" ")) |
| len3 = len(d3["response"].split(" ")) |
| len4 = len(d4["response"].split(" ")) |
| if len0<=1800 and len0>=3: |
| good0.append(t) |
| if len1<=1800 and len1>=3: |
| good1.append(t) |
| if len2<=1800 and len2>=3: |
| good2.append(t) |
| if len3<=1800 and len3>=3: |
| good3.append(t) |
| if len4<=1800 and len4>=3: |
| good4.append(t) |
| t = t + 1 |
|
|
| fw = open(f"/home/aiscuser/fhw/data/{args.judgemodel}_filtered_by_answer.json","w+") |
| fw.write(json.dumps({modelnames[0]: good0, modelnames[1]: good1, modelnames[2]: good2, modelnames[3]: good3, modelnames[4]: good4})+"\n") |
|
|
|
|
|
|
|
|