| from transformers import AutoTokenizer | |
| import argparse | |
| import json | |
| from tqdm import tqdm | |
| import os | |
| parser = argparse.ArgumentParser() | |
| parser.add_argument('--judgemodel', type=str,help='评判模型') | |
| args = parser.parse_args() | |
| names = os.listdir("/home/aiscuser/fhw/data") | |
| all_lines=[] | |
| modelnames = [] | |
| for name in names: | |
| if f"{args.judgemodel}_answerby_" not in name: | |
| continue | |
| else: | |
| print(name) | |
| f = open(f"/home/aiscuser/fhw/data/{name}", 'r+') | |
| lines = f.readlines() | |
| modelname = name.replace(".json","").split("_")[-1] | |
| print(modelname) | |
| modelnames.append(modelname) | |
| all_lines.append(lines) | |
| t = 0 | |
| good0 = [] | |
| good1 = [] | |
| good2 = [] | |
| good3 = [] | |
| good4 = [] | |
| for line0, line1, line2, line3, line4 in tqdm(zip(all_lines[0], all_lines[1], all_lines[2], all_lines[3], all_lines[4])): | |
| d0 = json.loads(line0) | |
| d1 = json.loads(line1) | |
| d2 = json.loads(line2) | |
| d3 = json.loads(line3) | |
| d4 = json.loads(line4) | |
| len0 = len(d0["response"].split(" ")) | |
| len1 = len(d1["response"].split(" ")) | |
| len2 = len(d2["response"].split(" ")) | |
| len3 = len(d3["response"].split(" ")) | |
| len4 = len(d4["response"].split(" ")) | |
| if len0<=1800 and len0>=3: | |
| good0.append(t) | |
| if len1<=1800 and len1>=3: | |
| good1.append(t) | |
| if len2<=1800 and len2>=3: | |
| good2.append(t) | |
| if len3<=1800 and len3>=3: | |
| good3.append(t) | |
| if len4<=1800 and len4>=3: | |
| good4.append(t) | |
| t = t + 1 | |
| fw = open(f"/home/aiscuser/fhw/data/{args.judgemodel}_filtered_by_answer.json","w+") | |
| fw.write(json.dumps({modelnames[0]: good0, modelnames[1]: good1, modelnames[2]: good2, modelnames[3]: good3, modelnames[4]: good4})+"\n") | |