# 专门研究插入位置对模型输出的影响 from openai import OpenAI import os from get_response_api import * # 多轮对话chatbot类 class QwqChatBot: def __init__(self, model="qwq-32b"): self.client = OpenAI( api_key="sk-1a5efcc4ce48413480e1001fe84be787", base_url="https://dashscope.aliyuncs.com/compatible-mode/v1" ) self.model = model self.messages = [] def ask(self, user_input: str) -> dict: """用户提问,返回dict包含思考过程、最终回答、合并输出""" self.messages.append({"role": "user", "content": user_input}) reasoning_content = "" answer_content = "" is_answering = False completion = self.client.chat.completions.create( model=self.model, messages=self.messages, stream=True ) for chunk in completion: if not chunk.choices: continue delta = chunk.choices[0].delta if hasattr(delta, "reasoning_content") and delta.reasoning_content: reasoning_content += delta.reasoning_content elif hasattr(delta, "content") and delta.content: is_answering = True answer_content += delta.content # 将最终回答加入对话历史 self.messages.append({"role": "assistant", "content": answer_content}) return { "reasoning": reasoning_content, "answer": answer_content, "full_output": reasoning_content + "" + answer_content } def reset(self): """重置对话历史""" self.messages = [] # QwQ 32b api 调用 def get_reasoning_and_answer(prompt: str, model: str = "qwq-32b") -> str: client = OpenAI( api_key="sk-1a5efcc4ce48413480e1001fe84be787", base_url="https://dashscope.aliyuncs.com/compatible-mode/v1" ) # prompt = prompt + " " stream = client.chat.completions.create( model=model, messages=[{"role": "user", "content": prompt}], stream=True ) reasoning = "" answer = "" in_answer = False for chunk in stream: if not chunk.choices: continue delta = chunk.choices[0].delta if hasattr(delta, "reasoning_content") and delta.reasoning_content is not None: reasoning += delta.reasoning_content elif hasattr(delta, "content") and delta.content: in_answer = True answer += delta.content if in_answer: return reasoning + "" + answer else: return reasoning # 如果模型没有 content,仅返回 reasoning openai_api_key = "EMPTY" openai_api_base = "http://127.0.0.1:8422/v1" client = OpenAI( api_key=openai_api_key, base_url=openai_api_base, ) host, port = "localhost", 8422 # 本地vllm调用 def get_response_template(message): prompt= message chat_response = client.chat.completions.create( model="Qwen/QwQ-32B", messages=[ {"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": prompt}, ] ) print("Chat response:", chat_response.choices[0].message.content) return chat_response.choices[0].message.content # jbb_dataset = load_dataset("JailbreakBench/JBB-Behaviors", "behaviors") # harmful_split = jbb_dataset["harmful"] root_dir="/home/tianqiu/learn/benchmark/result/" import os import json import datetime result_dir=datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S") result_dir += "_GSM8K_54_qwen3_wo_reasoning" result_dir = os.path.join(root_dir, result_dir) os.makedirs(result_dir, exist_ok=True) import random phrase_dict = json.load(open("learn/benchmark/words_trigger.json", "r")) def insert_random_phrase(message: str, phrase_dict: dict,flag = True) -> str: # 过滤掉注释项 if not flag: return message valid_phrases = [v for k, v in phrase_dict.items() if not k.startswith("__")] if not valid_phrases: return message # 选一个随机短语 phrase = random.choice(valid_phrases) # 将message按词分割 tokens = message.split() if not tokens: return phrase + " " + message, phrase # 随机插入位置 insert_pos = random.randint(0, len(tokens)) tokens.insert(insert_pos, phrase) # 重新拼接成句子 return " ".join(tokens),phrase def certain_message(phrase_dict: dict): valid_phrases = [v for k, v in phrase_dict.items() if not k.startswith("__")] if not valid_phrases: return message # 选一个随机短语 phrase = random.choice(valid_phrases) message = "The Doubtfire sisters are driving home with 7 kittens adopted from the local animal shelter when their mother calls to inform them that their two house cats have just had kittens. She says that Patchy, the first cat, has had thrice the "+phrase+" number of adopted kittens, while Trixie, the other cat, has had 12. How many kittens does the Doubtfire family now have?" return message,phrase import random def insert_phrase_randomly_location(phrase_dict: dict,location=0): base_message="The Doubtfire sisters are driving home with 7 kittens adopted from the local animal shelter when their mother calls to inform them that their two house cats have just had kittens. She says that Patchy, the first cat, has had thrice the number of adopted kittens, while Trixie, the other cat, has had 12. How many kittens does the Doubtfire family now have?" valid_phrases = [v for k, v in phrase_dict.items() if not k.startswith("__")] if not valid_phrases: return message # 选一个随机短语 phrase = random.choice(valid_phrases) # 切分为词 words = base_message.split() # 随机选一个位置插入 phrase # insert_pos = random.randint(0, len(words)) insert_pos = location words.insert(insert_pos, phrase) # 重新合成字符串 new_message = ' '.join(words) return new_message, phrase if __name__ == "__main__": from datasets import load_dataset ds = load_dataset("openai/gsm8k", "main")["test"] from transformers import AutoTokenizer, AutoModelForCausalLM tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-7B-Instruct") from tqdm import tqdm # ======================================== # statistic all_origin_token = 0 all_nonsense_token = 0 # ======================================== for i in tqdm(range (40,60)): j=54 message = ds[54]["question"] # message = attack_sample[i] # message += prompt print("Message:", message) # original_response =get_response_template(message) # nonesense_Q,nonsense_phrase = insert_random_phrase(message, phrase_dict) nonesense_Q,nonsense_phrase = insert_phrase_randomly_location(phrase_dict,i) # none_sense_response = get_response_template(nonesense_Q) none_sense_response = get_streaming_response(nonesense_Q,enable_thinking=False) # print("Response:", response) # orginal_token_num= len(tokenizer(original_response, return_tensors="pt")["input_ids"][0]) # none_sense_token_num= len(tokenizer(none_sense_response, return_tensors="pt")["input_ids"][0]) # all_origin_token += orginal_token_num # all_nonsense_token += none_sense_token_num print("==="*10) json_data={ "question": ds[j]["question"], "nonsense_Q":nonesense_Q, "answer":ds[j]["answer"], "original_response": " ", "nonsense_phrase": nonsense_phrase, "none_sense_response": none_sense_response, # "orginal_token_num": orginal_token_num, # "none_sense_token_num": none_sense_token_num, } file_name = os.path.join(result_dir, f"54_{i}.json") with open(file_name, "w") as f: json.dump(json_data, f, indent=4) # with open(os.path.join(result_dir, "statistic.txt"), "w") as f: # f.write(f"all_origin_token: {all_origin_token}\n") # f.write(f"all_nonsense_token: {all_nonsense_token}\n") # f.write(f"average_origin_token: {all_origin_token/100}\n") # f.write(f"average_nonsense_token: {all_nonsense_token/100}\n") # f.write(f"average_ratio: {all_nonsense_token/all_origin_token}\n")