learn / benchmark /location_study.py
unfair11212's picture
Upload folder using huggingface_hub
a80f6e6 verified
# 专门研究插入位置对模型输出的影响
from openai import OpenAI
import os
from get_response_api import *
# 多轮对话chatbot类
class QwqChatBot:
def __init__(self, model="qwq-32b"):
self.client = OpenAI(
api_key="sk-1a5efcc4ce48413480e1001fe84be787",
base_url="https://dashscope.aliyuncs.com/compatible-mode/v1"
)
self.model = model
self.messages = []
def ask(self, user_input: str) -> dict:
"""用户提问,返回dict包含思考过程、最终回答、合并输出"""
self.messages.append({"role": "user", "content": user_input})
reasoning_content = ""
answer_content = ""
is_answering = False
completion = self.client.chat.completions.create(
model=self.model,
messages=self.messages,
stream=True
)
for chunk in completion:
if not chunk.choices:
continue
delta = chunk.choices[0].delta
if hasattr(delta, "reasoning_content") and delta.reasoning_content:
reasoning_content += delta.reasoning_content
elif hasattr(delta, "content") and delta.content:
is_answering = True
answer_content += delta.content
# 将最终回答加入对话历史
self.messages.append({"role": "assistant", "content": answer_content})
return {
"reasoning": reasoning_content,
"answer": answer_content,
"full_output": reasoning_content + "</think>" + answer_content
}
def reset(self):
"""重置对话历史"""
self.messages = []
# QwQ 32b api 调用
def get_reasoning_and_answer(prompt: str, model: str = "qwq-32b") -> str:
client = OpenAI(
api_key="sk-1a5efcc4ce48413480e1001fe84be787",
base_url="https://dashscope.aliyuncs.com/compatible-mode/v1"
)
# prompt = prompt + " "
stream = client.chat.completions.create(
model=model,
messages=[{"role": "user", "content": prompt}],
stream=True
)
reasoning = ""
answer = ""
in_answer = False
for chunk in stream:
if not chunk.choices:
continue
delta = chunk.choices[0].delta
if hasattr(delta, "reasoning_content") and delta.reasoning_content is not None:
reasoning += delta.reasoning_content
elif hasattr(delta, "content") and delta.content:
in_answer = True
answer += delta.content
if in_answer:
return reasoning + "</think>" + answer
else:
return reasoning # 如果模型没有 content,仅返回 reasoning
openai_api_key = "EMPTY"
openai_api_base = "http://127.0.0.1:8422/v1"
client = OpenAI(
api_key=openai_api_key,
base_url=openai_api_base,
)
host, port = "localhost", 8422
# 本地vllm调用
def get_response_template(message):
prompt= message
chat_response = client.chat.completions.create(
model="Qwen/QwQ-32B",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt},
]
)
print("Chat response:", chat_response.choices[0].message.content)
return chat_response.choices[0].message.content
# jbb_dataset = load_dataset("JailbreakBench/JBB-Behaviors", "behaviors")
# harmful_split = jbb_dataset["harmful"]
root_dir="/home/tianqiu/learn/benchmark/result/"
import os
import json
import datetime
result_dir=datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
result_dir += "_GSM8K_54_qwen3_wo_reasoning"
result_dir = os.path.join(root_dir, result_dir)
os.makedirs(result_dir, exist_ok=True)
import random
phrase_dict = json.load(open("learn/benchmark/words_trigger.json", "r"))
def insert_random_phrase(message: str, phrase_dict: dict,flag = True) -> str:
# 过滤掉注释项
if not flag:
return message
valid_phrases = [v for k, v in phrase_dict.items() if not k.startswith("__")]
if not valid_phrases:
return message
# 选一个随机短语
phrase = random.choice(valid_phrases)
# 将message按词分割
tokens = message.split()
if not tokens:
return phrase + " " + message, phrase
# 随机插入位置
insert_pos = random.randint(0, len(tokens))
tokens.insert(insert_pos, phrase)
# 重新拼接成句子
return " ".join(tokens),phrase
def certain_message(phrase_dict: dict):
valid_phrases = [v for k, v in phrase_dict.items() if not k.startswith("__")]
if not valid_phrases:
return message
# 选一个随机短语
phrase = random.choice(valid_phrases)
message = "The Doubtfire sisters are driving home with 7 kittens adopted from the local animal shelter when their mother calls to inform them that their two house cats have just had kittens. She says that Patchy, the first cat, has had thrice the "+phrase+" number of adopted kittens, while Trixie, the other cat, has had 12. How many kittens does the Doubtfire family now have?"
return message,phrase
import random
def insert_phrase_randomly_location(phrase_dict: dict,location=0):
base_message="The Doubtfire sisters are driving home with 7 kittens adopted from the local animal shelter when their mother calls to inform them that their two house cats have just had kittens. She says that Patchy, the first cat, has had thrice the number of adopted kittens, while Trixie, the other cat, has had 12. How many kittens does the Doubtfire family now have?"
valid_phrases = [v for k, v in phrase_dict.items() if not k.startswith("__")]
if not valid_phrases:
return message
# 选一个随机短语
phrase = random.choice(valid_phrases)
# 切分为词
words = base_message.split()
# 随机选一个位置插入 phrase
# insert_pos = random.randint(0, len(words))
insert_pos = location
words.insert(insert_pos, phrase)
# 重新合成字符串
new_message = ' '.join(words)
return new_message, phrase
if __name__ == "__main__":
from datasets import load_dataset
ds = load_dataset("openai/gsm8k", "main")["test"]
from transformers import AutoTokenizer, AutoModelForCausalLM
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-7B-Instruct")
from tqdm import tqdm
# ========================================
# statistic
all_origin_token = 0
all_nonsense_token = 0
# ========================================
for i in tqdm(range (40,60)):
j=54
message = ds[54]["question"]
# message = attack_sample[i]
# message += prompt
print("Message:", message)
# original_response =get_response_template(message)
# nonesense_Q,nonsense_phrase = insert_random_phrase(message, phrase_dict)
nonesense_Q,nonsense_phrase = insert_phrase_randomly_location(phrase_dict,i)
# none_sense_response = get_response_template(nonesense_Q)
none_sense_response = get_streaming_response(nonesense_Q,enable_thinking=False)
# print("Response:", response)
# orginal_token_num= len(tokenizer(original_response, return_tensors="pt")["input_ids"][0])
# none_sense_token_num= len(tokenizer(none_sense_response, return_tensors="pt")["input_ids"][0])
# all_origin_token += orginal_token_num
# all_nonsense_token += none_sense_token_num
print("==="*10)
json_data={
"question": ds[j]["question"],
"nonsense_Q":nonesense_Q,
"answer":ds[j]["answer"],
"original_response": " ",
"nonsense_phrase": nonsense_phrase,
"none_sense_response": none_sense_response,
# "orginal_token_num": orginal_token_num,
# "none_sense_token_num": none_sense_token_num,
}
file_name = os.path.join(result_dir, f"54_{i}.json")
with open(file_name, "w") as f:
json.dump(json_data, f, indent=4)
# with open(os.path.join(result_dir, "statistic.txt"), "w") as f:
# f.write(f"all_origin_token: {all_origin_token}\n")
# f.write(f"all_nonsense_token: {all_nonsense_token}\n")
# f.write(f"average_origin_token: {all_origin_token/100}\n")
# f.write(f"average_nonsense_token: {all_nonsense_token/100}\n")
# f.write(f"average_ratio: {all_nonsense_token/all_origin_token}\n")