from transformers import AutoTokenizer import argparse import json from tqdm import tqdm import os parser = argparse.ArgumentParser() parser.add_argument('--judgemodel', type=str,help='评判模型') args = parser.parse_args() names = os.listdir("/home/aiscuser/fhw/data") all_lines=[] modelnames = [] for name in names: if f"{args.judgemodel}_answerby_" not in name: continue else: print(name) f = open(f"/home/aiscuser/fhw/data/{name}", 'r+') lines = f.readlines() modelname = name.replace(".json","").split("_")[-1] print(modelname) modelnames.append(modelname) all_lines.append(lines) t = 0 good0 = [] good1 = [] good2 = [] good3 = [] good4 = [] for line0, line1, line2, line3, line4 in tqdm(zip(all_lines[0], all_lines[1], all_lines[2], all_lines[3], all_lines[4])): d0 = json.loads(line0) d1 = json.loads(line1) d2 = json.loads(line2) d3 = json.loads(line3) d4 = json.loads(line4) len0 = len(d0["response"].split(" ")) len1 = len(d1["response"].split(" ")) len2 = len(d2["response"].split(" ")) len3 = len(d3["response"].split(" ")) len4 = len(d4["response"].split(" ")) if len0<=1800 and len0>=3: good0.append(t) if len1<=1800 and len1>=3: good1.append(t) if len2<=1800 and len2>=3: good2.append(t) if len3<=1800 and len3>=3: good3.append(t) if len4<=1800 and len4>=3: good4.append(t) t = t + 1 fw = open(f"/home/aiscuser/fhw/data/{args.judgemodel}_filtered_by_answer.json","w+") fw.write(json.dumps({modelnames[0]: good0, modelnames[1]: good1, modelnames[2]: good2, modelnames[3]: good3, modelnames[4]: good4})+"\n")