File size: 8,478 Bytes

a80f6e6

# 专门研究插入位置对模型输出的影响
from openai import OpenAI
import os
from get_response_api import *

# 多轮对话chatbot类
class QwqChatBot:
    def __init__(self, model="qwq-32b"):
        self.client = OpenAI(
            api_key="sk-1a5efcc4ce48413480e1001fe84be787",
            base_url="https://dashscope.aliyuncs.com/compatible-mode/v1"
        )
        self.model = model
        self.messages = []

    def ask(self, user_input: str) -> dict:
        """用户提问，返回dict包含思考过程、最终回答、合并输出"""
        self.messages.append({"role": "user", "content": user_input})

        reasoning_content = ""
        answer_content = ""
        is_answering = False

        completion = self.client.chat.completions.create(
            model=self.model,
            messages=self.messages,
            stream=True
        )

        for chunk in completion:
            if not chunk.choices:
                continue

            delta = chunk.choices[0].delta

            if hasattr(delta, "reasoning_content") and delta.reasoning_content:
                reasoning_content += delta.reasoning_content
            elif hasattr(delta, "content") and delta.content:
                is_answering = True
                answer_content += delta.content

        # 将最终回答加入对话历史
        self.messages.append({"role": "assistant", "content": answer_content})

        return {
            "reasoning": reasoning_content,
            "answer": answer_content,
            "full_output": reasoning_content + "</think>" + answer_content
        }
        
    def reset(self):
        """重置对话历史"""
        self.messages = []

# QwQ 32b api 调用
def get_reasoning_and_answer(prompt: str, model: str = "qwq-32b") -> str:
    client = OpenAI(
        api_key="sk-1a5efcc4ce48413480e1001fe84be787",
        base_url="https://dashscope.aliyuncs.com/compatible-mode/v1"
    )
    # prompt = prompt + " "
    stream = client.chat.completions.create(
        model=model,
        messages=[{"role": "user", "content": prompt}],
        stream=True
    )

    reasoning = ""
    answer = ""
    in_answer = False

    for chunk in stream:
        if not chunk.choices:
            continue

        delta = chunk.choices[0].delta

        if hasattr(delta, "reasoning_content") and delta.reasoning_content is not None:
            reasoning += delta.reasoning_content
        elif hasattr(delta, "content") and delta.content:
            in_answer = True
            answer += delta.content

    if in_answer:
        return reasoning + "</think>" + answer
    else:
        return reasoning  # 如果模型没有 content，仅返回 reasoning
    
    
openai_api_key = "EMPTY"
openai_api_base = "http://127.0.0.1:8422/v1"
client = OpenAI(
        api_key=openai_api_key,
        base_url=openai_api_base,
    )
host, port = "localhost", 8422
# 本地vllm调用
def get_response_template(message):
    
    prompt= message
    chat_response = client.chat.completions.create(
    model="Qwen/QwQ-32B",
    messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt},
        ]
    )
    print("Chat response:", chat_response.choices[0].message.content)
    return chat_response.choices[0].message.content
    

# jbb_dataset = load_dataset("JailbreakBench/JBB-Behaviors", "behaviors")
# harmful_split = jbb_dataset["harmful"]

root_dir="/home/tianqiu/learn/benchmark/result/"
import os
import json
import datetime
result_dir=datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
result_dir += "_GSM8K_54_qwen3_wo_reasoning"
result_dir = os.path.join(root_dir, result_dir)
os.makedirs(result_dir, exist_ok=True)

import random

phrase_dict = json.load(open("learn/benchmark/words_trigger.json", "r"))
def insert_random_phrase(message: str, phrase_dict: dict,flag = True) -> str:
    # 过滤掉注释项
    if not flag:
        return message
    
    valid_phrases = [v for k, v in phrase_dict.items() if not k.startswith("__")]

    if not valid_phrases:
        return message

    # 选一个随机短语
    phrase = random.choice(valid_phrases)

    # 将message按词分割
    tokens = message.split()
    if not tokens:
        return phrase + " " + message, phrase

    # 随机插入位置
    insert_pos = random.randint(0, len(tokens))
    tokens.insert(insert_pos, phrase)

    # 重新拼接成句子
    return " ".join(tokens),phrase

def certain_message(phrase_dict: dict):
    valid_phrases = [v for k, v in phrase_dict.items() if not k.startswith("__")]

    if not valid_phrases:
        return message

    # 选一个随机短语
    phrase = random.choice(valid_phrases)
    message = "The Doubtfire sisters are driving home with 7 kittens adopted from the local animal shelter when their mother calls to inform them that their two house cats have just had kittens. She says that Patchy, the first cat, has had thrice the "+phrase+" number of adopted kittens, while Trixie, the other cat, has had 12. How many kittens does the Doubtfire family now have?"
    return message,phrase
import random

def insert_phrase_randomly_location(phrase_dict: dict,location=0):
    base_message="The Doubtfire sisters are driving home with 7 kittens adopted from the local animal shelter when their mother calls to inform them that their two house cats have just had kittens. She says that Patchy, the first cat, has had thrice the number of adopted kittens, while Trixie, the other cat, has had 12. How many kittens does the Doubtfire family now have?"
   
    valid_phrases = [v for k, v in phrase_dict.items() if not k.startswith("__")]

    if not valid_phrases:
        return message

    # 选一个随机短语
    phrase = random.choice(valid_phrases)
    # 切分为词
    words = base_message.split()
    
    # 随机选一个位置插入 phrase
    # insert_pos = random.randint(0, len(words))
    insert_pos = location
    words.insert(insert_pos, phrase)
    
    # 重新合成字符串
    new_message = ' '.join(words)
    
    return new_message, phrase


if __name__ == "__main__":
    from datasets import load_dataset
    ds = load_dataset("openai/gsm8k", "main")["test"]
    from transformers import AutoTokenizer, AutoModelForCausalLM
    tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-7B-Instruct")
    from tqdm import tqdm
    # ========================================
    # statistic
    all_origin_token = 0
    all_nonsense_token = 0
    # ========================================
        
    for i in tqdm(range (40,60)):
        j=54
        message = ds[54]["question"]
    # message = attack_sample[i]
    # message += prompt
        print("Message:", message)
        # original_response =get_response_template(message)
        # nonesense_Q,nonsense_phrase = insert_random_phrase(message, phrase_dict)
        nonesense_Q,nonsense_phrase = insert_phrase_randomly_location(phrase_dict,i)
        # none_sense_response = get_response_template(nonesense_Q)
        none_sense_response = get_streaming_response(nonesense_Q,enable_thinking=False)
        # print("Response:", response)
        
        # orginal_token_num= len(tokenizer(original_response, return_tensors="pt")["input_ids"][0])
        # none_sense_token_num= len(tokenizer(none_sense_response, return_tensors="pt")["input_ids"][0])     
        # all_origin_token += orginal_token_num
        # all_nonsense_token += none_sense_token_num               
        print("==="*10)
        json_data={
        "question": ds[j]["question"],
        "nonsense_Q":nonesense_Q,
        "answer":ds[j]["answer"],
        "original_response": " ",
        "nonsense_phrase": nonsense_phrase,
        "none_sense_response": none_sense_response,
        # "orginal_token_num": orginal_token_num,
        # "none_sense_token_num": none_sense_token_num,
        }
        file_name = os.path.join(result_dir, f"54_{i}.json")
        with open(file_name, "w") as f:
            json.dump(json_data, f, indent=4)

    # with open(os.path.join(result_dir, "statistic.txt"), "w") as f:
    #     f.write(f"all_origin_token: {all_origin_token}\n")
    #     f.write(f"all_nonsense_token: {all_nonsense_token}\n")
    #     f.write(f"average_origin_token: {all_origin_token/100}\n")
    #     f.write(f"average_nonsense_token: {all_nonsense_token/100}\n")
    #     f.write(f"average_ratio: {all_nonsense_token/all_origin_token}\n")