from transformers import AutoTokenizer from vllm import LLM, SamplingParams import argparse import json from tqdm import tqdm parser = argparse.ArgumentParser() parser.add_argument('--model', type=str,help='模型路径') parser.add_argument('--judge', type=str,help='模型路径') parser.add_argument('--split', type=str,help='模型路径') args = parser.parse_args() # Initialize the tokenizer tokenizer = AutoTokenizer.from_pretrained(f"/home/aiscuser/fhw/model_weights/{args.model}", trust_remote_code=True) # Input the model name or path. Can be GPTQ or AWQ models. llm = LLM(f"/home/aiscuser/fhw/model_weights/{args.model}", dtype='float16', tensor_parallel_size=8, trust_remote_code=True, enforce_eager=True, max_model_len=8192) sampling_params = SamplingParams(temperature=1.0, top_p=0.95, max_tokens=8192) # Prepare your prompts f = open(f"/home/aiscuser/fhw/data/{args.judge}_split_{args.split}.json", 'r+') lines = f.readlines() fw = open(f"/home/aiscuser/fhw/data/{args.judge}_split_{args.split}_answerby_{args.model}.json", 'w+') prompts = [] for line in tqdm(lines): d = json.loads(line) instruction = d["instruction"] messages = [{"role": "user", "content": instruction}] text = tokenizer.apply_chat_template( messages, tokenize=False ) prompts.append(text) outputs = llm.generate(prompts=prompts, sampling_params=sampling_params) for line, output in zip(lines, outputs): d =json.loads(line) d["response"] = output.outputs[0].text fw.write(json.dumps(d)+"\n")