learn / benchmark /location_study.py

Upload folder using huggingface_hub

a80f6e6 verified 10 months ago

8.48 kB

	# 专门研究插入位置对模型输出的影响
	from openai import OpenAI
	import os
	from get_response_api import *

	# 多轮对话chatbot类
	class QwqChatBot:
	def __init__(self, model="qwq-32b"):
	self.client = OpenAI(
	api_key="sk-1a5efcc4ce48413480e1001fe84be787",
	base_url="https://dashscope.aliyuncs.com/compatible-mode/v1"
	)
	self.model = model
	self.messages = []

	def ask(self, user_input: str) -> dict:
	"""用户提问，返回dict包含思考过程、最终回答、合并输出"""
	self.messages.append({"role": "user", "content": user_input})

	reasoning_content = ""
	answer_content = ""
	is_answering = False

	completion = self.client.chat.completions.create(
	model=self.model,
	messages=self.messages,
	stream=True
	)

	for chunk in completion:
	if not chunk.choices:
	continue

	delta = chunk.choices[0].delta

	if hasattr(delta, "reasoning_content") and delta.reasoning_content:
	reasoning_content += delta.reasoning_content
	elif hasattr(delta, "content") and delta.content:
	is_answering = True
	answer_content += delta.content

	# 将最终回答加入对话历史
	self.messages.append({"role": "assistant", "content": answer_content})

	return {
	"reasoning": reasoning_content,
	"answer": answer_content,
	"full_output": reasoning_content + "</think>" + answer_content
	}

	def reset(self):
	"""重置对话历史"""
	self.messages = []

	# QwQ 32b api 调用
	def get_reasoning_and_answer(prompt: str, model: str = "qwq-32b") -> str:
	client = OpenAI(
	api_key="sk-1a5efcc4ce48413480e1001fe84be787",
	base_url="https://dashscope.aliyuncs.com/compatible-mode/v1"
	)
	# prompt = prompt + " "
	stream = client.chat.completions.create(
	model=model,
	messages=[{"role": "user", "content": prompt}],
	stream=True
	)

	reasoning = ""
	answer = ""
	in_answer = False

	for chunk in stream:
	if not chunk.choices:
	continue

	delta = chunk.choices[0].delta

	if hasattr(delta, "reasoning_content") and delta.reasoning_content is not None:
	reasoning += delta.reasoning_content
	elif hasattr(delta, "content") and delta.content:
	in_answer = True
	answer += delta.content

	if in_answer:
	return reasoning + "</think>" + answer
	else:
	return reasoning # 如果模型没有 content，仅返回 reasoning


	openai_api_key = "EMPTY"
	openai_api_base = "http://127.0.0.1:8422/v1"
	client = OpenAI(
	api_key=openai_api_key,
	base_url=openai_api_base,
	)
	host, port = "localhost", 8422
	# 本地vllm调用
	def get_response_template(message):

	prompt= message
	chat_response = client.chat.completions.create(
	model="Qwen/QwQ-32B",
	messages=[
	{"role": "system", "content": "You are a helpful assistant."},
	{"role": "user", "content": prompt},
	]
	)
	print("Chat response:", chat_response.choices[0].message.content)
	return chat_response.choices[0].message.content


	# jbb_dataset = load_dataset("JailbreakBench/JBB-Behaviors", "behaviors")
	# harmful_split = jbb_dataset["harmful"]

	root_dir="/home/tianqiu/learn/benchmark/result/"
	import os
	import json
	import datetime
	result_dir=datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
	result_dir += "_GSM8K_54_qwen3_wo_reasoning"
	result_dir = os.path.join(root_dir, result_dir)
	os.makedirs(result_dir, exist_ok=True)

	import random

	phrase_dict = json.load(open("learn/benchmark/words_trigger.json", "r"))
	def insert_random_phrase(message: str, phrase_dict: dict,flag = True) -> str:
	# 过滤掉注释项
	if not flag:
	return message

	valid_phrases = [v for k, v in phrase_dict.items() if not k.startswith("__")]

	if not valid_phrases:
	return message

	# 选一个随机短语
	phrase = random.choice(valid_phrases)

	# 将message按词分割
	tokens = message.split()
	if not tokens:
	return phrase + " " + message, phrase

	# 随机插入位置
	insert_pos = random.randint(0, len(tokens))
	tokens.insert(insert_pos, phrase)

	# 重新拼接成句子
	return " ".join(tokens),phrase

	def certain_message(phrase_dict: dict):
	valid_phrases = [v for k, v in phrase_dict.items() if not k.startswith("__")]

	if not valid_phrases:
	return message

	# 选一个随机短语
	phrase = random.choice(valid_phrases)
	message = "The Doubtfire sisters are driving home with 7 kittens adopted from the local animal shelter when their mother calls to inform them that their two house cats have just had kittens. She says that Patchy, the first cat, has had thrice the "+phrase+" number of adopted kittens, while Trixie, the other cat, has had 12. How many kittens does the Doubtfire family now have?"
	return message,phrase
	import random

	def insert_phrase_randomly_location(phrase_dict: dict,location=0):
	base_message="The Doubtfire sisters are driving home with 7 kittens adopted from the local animal shelter when their mother calls to inform them that their two house cats have just had kittens. She says that Patchy, the first cat, has had thrice the number of adopted kittens, while Trixie, the other cat, has had 12. How many kittens does the Doubtfire family now have?"

	valid_phrases = [v for k, v in phrase_dict.items() if not k.startswith("__")]

	if not valid_phrases:
	return message

	# 选一个随机短语
	phrase = random.choice(valid_phrases)
	# 切分为词
	words = base_message.split()

	# 随机选一个位置插入 phrase
	# insert_pos = random.randint(0, len(words))
	insert_pos = location
	words.insert(insert_pos, phrase)

	# 重新合成字符串
	new_message = ' '.join(words)

	return new_message, phrase


	if __name__ == "__main__":
	from datasets import load_dataset
	ds = load_dataset("openai/gsm8k", "main")["test"]
	from transformers import AutoTokenizer, AutoModelForCausalLM
	tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-7B-Instruct")
	from tqdm import tqdm
	# ========================================
	# statistic
	all_origin_token = 0
	all_nonsense_token = 0
	# ========================================

	for i in tqdm(range (40,60)):
	j=54
	message = ds[54]["question"]
	# message = attack_sample[i]
	# message += prompt
	print("Message:", message)
	# original_response =get_response_template(message)
	# nonesense_Q,nonsense_phrase = insert_random_phrase(message, phrase_dict)
	nonesense_Q,nonsense_phrase = insert_phrase_randomly_location(phrase_dict,i)
	# none_sense_response = get_response_template(nonesense_Q)
	none_sense_response = get_streaming_response(nonesense_Q,enable_thinking=False)
	# print("Response:", response)

	# orginal_token_num= len(tokenizer(original_response, return_tensors="pt")["input_ids"][0])
	# none_sense_token_num= len(tokenizer(none_sense_response, return_tensors="pt")["input_ids"][0])
	# all_origin_token += orginal_token_num
	# all_nonsense_token += none_sense_token_num
	print("==="*10)
	json_data={
	"question": ds[j]["question"],
	"nonsense_Q":nonesense_Q,
	"answer":ds[j]["answer"],
	"original_response": " ",
	"nonsense_phrase": nonsense_phrase,
	"none_sense_response": none_sense_response,
	# "orginal_token_num": orginal_token_num,
	# "none_sense_token_num": none_sense_token_num,
	}
	file_name = os.path.join(result_dir, f"54_{i}.json")
	with open(file_name, "w") as f:
	json.dump(json_data, f, indent=4)

	# with open(os.path.join(result_dir, "statistic.txt"), "w") as f:
	# f.write(f"all_origin_token: {all_origin_token}\n")
	# f.write(f"all_nonsense_token: {all_nonsense_token}\n")
	# f.write(f"average_origin_token: {all_origin_token/100}\n")
	# f.write(f"average_nonsense_token: {all_nonsense_token/100}\n")
	# f.write(f"average_ratio: {all_nonsense_token/all_origin_token}\n")