File size: 8,478 Bytes
a80f6e6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
# 专门研究插入位置对模型输出的影响
from openai import OpenAI
import os
from get_response_api import *

# 多轮对话chatbot类
class QwqChatBot:
    def __init__(self, model="qwq-32b"):
        self.client = OpenAI(
            api_key="sk-1a5efcc4ce48413480e1001fe84be787",
            base_url="https://dashscope.aliyuncs.com/compatible-mode/v1"
        )
        self.model = model
        self.messages = []

    def ask(self, user_input: str) -> dict:
        """用户提问,返回dict包含思考过程、最终回答、合并输出"""
        self.messages.append({"role": "user", "content": user_input})

        reasoning_content = ""
        answer_content = ""
        is_answering = False

        completion = self.client.chat.completions.create(
            model=self.model,
            messages=self.messages,
            stream=True
        )

        for chunk in completion:
            if not chunk.choices:
                continue

            delta = chunk.choices[0].delta

            if hasattr(delta, "reasoning_content") and delta.reasoning_content:
                reasoning_content += delta.reasoning_content
            elif hasattr(delta, "content") and delta.content:
                is_answering = True
                answer_content += delta.content

        # 将最终回答加入对话历史
        self.messages.append({"role": "assistant", "content": answer_content})

        return {
            "reasoning": reasoning_content,
            "answer": answer_content,
            "full_output": reasoning_content + "</think>" + answer_content
        }
        
    def reset(self):
        """重置对话历史"""
        self.messages = []

# QwQ 32b api 调用
def get_reasoning_and_answer(prompt: str, model: str = "qwq-32b") -> str:
    client = OpenAI(
        api_key="sk-1a5efcc4ce48413480e1001fe84be787",
        base_url="https://dashscope.aliyuncs.com/compatible-mode/v1"
    )
    # prompt = prompt + " "
    stream = client.chat.completions.create(
        model=model,
        messages=[{"role": "user", "content": prompt}],
        stream=True
    )

    reasoning = ""
    answer = ""
    in_answer = False

    for chunk in stream:
        if not chunk.choices:
            continue

        delta = chunk.choices[0].delta

        if hasattr(delta, "reasoning_content") and delta.reasoning_content is not None:
            reasoning += delta.reasoning_content
        elif hasattr(delta, "content") and delta.content:
            in_answer = True
            answer += delta.content

    if in_answer:
        return reasoning + "</think>" + answer
    else:
        return reasoning  # 如果模型没有 content,仅返回 reasoning
    
    
openai_api_key = "EMPTY"
openai_api_base = "http://127.0.0.1:8422/v1"
client = OpenAI(
        api_key=openai_api_key,
        base_url=openai_api_base,
    )
host, port = "localhost", 8422
# 本地vllm调用
def get_response_template(message):
    
    prompt= message
    chat_response = client.chat.completions.create(
    model="Qwen/QwQ-32B",
    messages=[
            {"role": "system", "content": "You are a helpful assistant."},
            {"role": "user", "content": prompt},
        ]
    )
    print("Chat response:", chat_response.choices[0].message.content)
    return chat_response.choices[0].message.content
    

# jbb_dataset = load_dataset("JailbreakBench/JBB-Behaviors", "behaviors")
# harmful_split = jbb_dataset["harmful"]

root_dir="/home/tianqiu/learn/benchmark/result/"
import os
import json
import datetime
result_dir=datetime.datetime.now().strftime("%Y-%m-%d_%H-%M-%S")
result_dir += "_GSM8K_54_qwen3_wo_reasoning"
result_dir = os.path.join(root_dir, result_dir)
os.makedirs(result_dir, exist_ok=True)

import random

phrase_dict = json.load(open("learn/benchmark/words_trigger.json", "r"))
def insert_random_phrase(message: str, phrase_dict: dict,flag = True) -> str:
    # 过滤掉注释项
    if not flag:
        return message
    
    valid_phrases = [v for k, v in phrase_dict.items() if not k.startswith("__")]

    if not valid_phrases:
        return message

    # 选一个随机短语
    phrase = random.choice(valid_phrases)

    # 将message按词分割
    tokens = message.split()
    if not tokens:
        return phrase + " " + message, phrase

    # 随机插入位置
    insert_pos = random.randint(0, len(tokens))
    tokens.insert(insert_pos, phrase)

    # 重新拼接成句子
    return " ".join(tokens),phrase

def certain_message(phrase_dict: dict):
    valid_phrases = [v for k, v in phrase_dict.items() if not k.startswith("__")]

    if not valid_phrases:
        return message

    # 选一个随机短语
    phrase = random.choice(valid_phrases)
    message = "The Doubtfire sisters are driving home with 7 kittens adopted from the local animal shelter when their mother calls to inform them that their two house cats have just had kittens. She says that Patchy, the first cat, has had thrice the "+phrase+" number of adopted kittens, while Trixie, the other cat, has had 12. How many kittens does the Doubtfire family now have?"
    return message,phrase
import random

def insert_phrase_randomly_location(phrase_dict: dict,location=0):
    base_message="The Doubtfire sisters are driving home with 7 kittens adopted from the local animal shelter when their mother calls to inform them that their two house cats have just had kittens. She says that Patchy, the first cat, has had thrice the number of adopted kittens, while Trixie, the other cat, has had 12. How many kittens does the Doubtfire family now have?"
   
    valid_phrases = [v for k, v in phrase_dict.items() if not k.startswith("__")]

    if not valid_phrases:
        return message

    # 选一个随机短语
    phrase = random.choice(valid_phrases)
    # 切分为词
    words = base_message.split()
    
    # 随机选一个位置插入 phrase
    # insert_pos = random.randint(0, len(words))
    insert_pos = location
    words.insert(insert_pos, phrase)
    
    # 重新合成字符串
    new_message = ' '.join(words)
    
    return new_message, phrase


if __name__ == "__main__":
    from datasets import load_dataset
    ds = load_dataset("openai/gsm8k", "main")["test"]
    from transformers import AutoTokenizer, AutoModelForCausalLM
    tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-7B-Instruct")
    from tqdm import tqdm
    # ========================================
    # statistic
    all_origin_token = 0
    all_nonsense_token = 0
    # ========================================
        
    for i in tqdm(range (40,60)):
        j=54
        message = ds[54]["question"]
    # message = attack_sample[i]
    # message += prompt
        print("Message:", message)
        # original_response =get_response_template(message)
        # nonesense_Q,nonsense_phrase = insert_random_phrase(message, phrase_dict)
        nonesense_Q,nonsense_phrase = insert_phrase_randomly_location(phrase_dict,i)
        # none_sense_response = get_response_template(nonesense_Q)
        none_sense_response = get_streaming_response(nonesense_Q,enable_thinking=False)
        # print("Response:", response)
        
        # orginal_token_num= len(tokenizer(original_response, return_tensors="pt")["input_ids"][0])
        # none_sense_token_num= len(tokenizer(none_sense_response, return_tensors="pt")["input_ids"][0])     
        # all_origin_token += orginal_token_num
        # all_nonsense_token += none_sense_token_num               
        print("==="*10)
        json_data={
        "question": ds[j]["question"],
        "nonsense_Q":nonesense_Q,
        "answer":ds[j]["answer"],
        "original_response": " ",
        "nonsense_phrase": nonsense_phrase,
        "none_sense_response": none_sense_response,
        # "orginal_token_num": orginal_token_num,
        # "none_sense_token_num": none_sense_token_num,
        }
        file_name = os.path.join(result_dir, f"54_{i}.json")
        with open(file_name, "w") as f:
            json.dump(json_data, f, indent=4)

    # with open(os.path.join(result_dir, "statistic.txt"), "w") as f:
    #     f.write(f"all_origin_token: {all_origin_token}\n")
    #     f.write(f"all_nonsense_token: {all_nonsense_token}\n")
    #     f.write(f"average_origin_token: {all_origin_token/100}\n")
    #     f.write(f"average_nonsense_token: {all_nonsense_token/100}\n")
    #     f.write(f"average_ratio: {all_nonsense_token/all_origin_token}\n")