zz1358m commited on
Commit
a70e97e
·
verified ·
1 Parent(s): cb3648c

Update stage2-dpo-label-guide-r2.py

Browse files
Files changed (1) hide show
  1. stage2-dpo-label-guide-r2.py +295 -295
stage2-dpo-label-guide-r2.py CHANGED
@@ -1,295 +1,295 @@
1
- import re
2
- import json
3
- from random import random
4
-
5
- from vllm import LLM, SamplingParams
6
-
7
- Instruction = '''Task: Validate the following claim using the provided context.
8
- Your goal is to determine whether the claim can be supported by the context. Choose between "support" or "refute".
9
-
10
- Instructions:
11
- 1. Analyze the claim step by step, verifying each crucial component in the claim as they appear.
12
- 2. Structure your reasoning on crucial components in the claim in detailed steps, from 1 to a maximum of 10. Make sure each step is the smallest possible logical unit necessary for validation.
13
- 3. Ensure that your reasoning correlates consistently with your conclusion. Use "##" to format each step clearly, e.g., "## Reasoning Step 1".
14
- 4. Finally, conclude with either "support" or "refute" enclosed in a pair of curly braces, noting the overall judgment regarding the claim.
15
- '''
16
- if __name__ == "__main__":
17
- model_path = f"************************************/reasoner-guide-r1"
18
- vllm_model = LLM(model=model_path, gpu_memory_utilization=0.90, max_model_len=4000, max_num_seqs=64)
19
- sampling_params = SamplingParams(temperature=0.75, top_p=0.95, max_tokens=4000)
20
- file_path1 = 'trainingset/Feverous_train.json'
21
- file_path2 = 'trainingset/Hover_train.json'
22
-
23
- data1 = []
24
- data2 = []
25
- # Open the file and read line by line
26
- with open(file_path1, 'r', encoding='utf-8') as file:
27
- raw_data1 = json.load(file)
28
- for item in raw_data1:
29
- data1.append(item)
30
- with open(file_path2, 'r', encoding='utf-8') as file:
31
- raw_data2 = json.load(file)
32
- for item in raw_data2:
33
- data2.append(item)
34
- data = data1 + data2
35
- prompt_list_run1 = []
36
- prompt_list_run2 = []
37
- for now in range(len(data)):
38
- # Now `data` contains all the JSON objects from the file
39
- prompt_judge1 = f'''<|begin_of_text|><|start_header_id|>user<|end_header_id|>
40
-
41
- Task: Validate the following claim using the provided context.
42
- Your goal is to determine whether the claim can be supported by the context. Choose between "support" or "refute".
43
-
44
- Instructions:
45
- 1. Analyze the claim step by step, verifying each crucial component in the claim as they appear.
46
- 2. Structure your reasoning on crucial components in the claim in detailed steps, from 1 to a maximum of 10. Make sure each step is the smallest possible logical unit necessary for validation.
47
- 3. Ensure that your reasoning correlates consistently with your conclusion. Use "##" to format each step clearly, e.g., "## Reasoning Step 1".
48
- 4. Finally, conclude with either "support" or "refute" enclosed in a pair of curly braces, noting the overall judgment regarding the claim.
49
-
50
- Context: {data[now]['evidence']}
51
-
52
- Claim: {data[now]['claim']}
53
-
54
-
55
- The ground truth is
56
-
57
- ---
58
-
59
- Answer: support. You mush generate results that match the ground truth.
60
-
61
- <|eot_id|><|start_header_id|>assistant<|end_header_id|>
62
-
63
- '''
64
- prompt_judge2 = f'''<|begin_of_text|><|start_header_id|>user<|end_header_id|>
65
-
66
- Task: Validate the following claim using the provided context.
67
- Your goal is to determine whether the claim can be supported by the context. Choose between "support" or "refute".
68
-
69
- Instructions:
70
- 1. Analyze the claim step by step, verifying each crucial component in the claim as they appear.
71
- 2. Structure your reasoning on crucial components in the claim in detailed steps, from 1 to a maximum of 10. Make sure each step is the smallest possible logical unit necessary for validation.
72
- 3. Ensure that your reasoning correlates consistently with your conclusion. Use "##" to format each step clearly, e.g., "## Reasoning Step 1".
73
- 4. Finally, conclude with either "support" or "refute" enclosed in a pair of curly braces, noting the overall judgment regarding the claim.
74
-
75
- Context: {data[now]['evidence']}
76
-
77
- Claim: {data[now]['claim']}
78
-
79
-
80
- The ground truth is
81
-
82
- ---
83
-
84
- Answer: refute. You mush generate results that match the ground truth.
85
-
86
- <|eot_id|><|start_header_id|>assistant<|end_header_id|>
87
-
88
- '''
89
- prompt_list_run2.append(prompt_judge2)
90
- prompt_list_run1.append(prompt_judge1)
91
- outputs1 = vllm_model.generate(prompt_list_run1, sampling_params)
92
- outputs2 = vllm_model.generate(prompt_list_run2, sampling_params)
93
- training_dataset = []
94
- for i in range(len(outputs1)):
95
- label = data[i]['label']
96
- if label == 'Refutes' or label == 'refutes' or label == 'CONTRADICT':
97
- label_unified = "refute"
98
- elif label == 'UNKNOWN' or label == 'Neutral':
99
- label_unified = "refute"
100
- elif label == 'SUPPORT' or label == 'Supports' or label == 'supports':
101
- label_unified = "support"
102
- user_prompt = f'''Context: {data[i]['evidence']}
103
-
104
- Claim: {data[i]['claim']}
105
- '''
106
- generated_text1 = outputs1[i].outputs[0].text
107
- generated_text2 = outputs2[i].outputs[0].text
108
- match1 = re.findall(r'\{([^{}]*)\}', generated_text1)
109
- match2 = re.findall(r'\{([^{}]*)\}', generated_text2)
110
-
111
- if len(generated_text2) > 2000 or len(generated_text1) > 2000 or len(user_prompt)>3000:
112
- continue
113
- if match1 == [] and match2 == []:
114
- continue
115
-
116
- if match1 == [] and match2 != []:
117
- predict2 = re.findall(r'\{([^{}]*)\}', generated_text2)[-1]
118
- if predict2.strip() == label_unified.strip():
119
- save_dict = {"instruction": Instruction, "input": user_prompt, "chosen": generated_text2,
120
- "rejected": generated_text1}
121
- training_dataset.append(save_dict)
122
- continue
123
- else:
124
- continue
125
-
126
- if match1 != [] and match2 == []:
127
- predict1 = re.findall(r'\{([^{}]*)\}', generated_text1)[-1]
128
- if predict1.strip() == label_unified.strip():
129
- save_dict = {"instruction": Instruction, "input": user_prompt, "chosen": generated_text1,
130
- "rejected": generated_text2}
131
- training_dataset.append(save_dict)
132
- continue
133
- else:
134
- continue
135
-
136
- predict1 = re.findall(r'\{([^{}]*)\}', generated_text1)[-1]
137
- predict2 = re.findall(r'\{([^{}]*)\}', generated_text2)[-1]
138
- if predict1.strip() == predict2.strip():
139
- continue
140
- if predict1.strip() == label_unified.strip():
141
- save_dict = {"instruction": Instruction, "input": user_prompt, "chosen": generated_text1,
142
- "rejected": generated_text2}
143
- training_dataset.append(save_dict)
144
- elif predict2.strip() == label_unified.strip():
145
- save_dict = {"instruction": Instruction, "input": user_prompt, "chosen": generated_text2,
146
- "rejected": generated_text1}
147
- training_dataset.append(save_dict)
148
-
149
- file_path = 'trainingset/Healthver_train.json'
150
-
151
- data = []
152
- # Open the file and read line by line
153
- with open(file_path, 'r', encoding='utf-8') as file:
154
- raw_data = json.load(file)
155
- for item in raw_data:
156
- data.append(item)
157
- prompt_list_run1 = []
158
- prompt_list_run2 = []
159
- prompt_list_run3 = []
160
- for now in range(len(data)):
161
- prompt_judge_support = f'''<|begin_of_text|><|start_header_id|>user<|end_header_id|>
162
-
163
- Task: Validate the following claim using the provided context.
164
- Your goal is to determine whether the claim can be supported with the context. Choose between "support", "refute", or "not enough information".
165
-
166
- Instructions:
167
- 1. Analyze the claim step by step, verifying each crucial component in the claim as they appear.
168
- 2. Structure your reasoning on crucial components in the claim in detailed steps, from 1 to a maximum of 10. Make sure each step is the smallest possible logical unit necessary for validation.
169
- 3. Ensure that your reasoning correlates consistently with your conclusion. Use "##" to format each step clearly, e.g., "## Reasoning Step 1".
170
- 4. Finally, conclude with "support", "refute", or "not enough information" enclosed in a pair of curly braces, noting the overall judgment regarding the claim.
171
-
172
- Context: {data[now]['evidence']}
173
-
174
- Claim: {data[now]['claim']}
175
-
176
-
177
- The ground truth is
178
-
179
- ---
180
-
181
- Answer: support. You mush generate results that match the ground truth.
182
-
183
- <|eot_id|><|start_header_id|>assistant<|end_header_id|>
184
-
185
- '''
186
- prompt_judge_refute = f'''<|begin_of_text|><|start_header_id|>user<|end_header_id|>
187
-
188
- Task: Validate the following claim using the provided context.
189
- Your goal is to determine whether the claim can be supported with the context. Choose between "support", "refute", or "not enough information".
190
-
191
- Instructions:
192
- 1. Analyze the claim step by step, verifying each crucial component in the claim as they appear.
193
- 2. Structure your reasoning on crucial components in the claim in detailed steps, from 1 to a maximum of 10. Make sure each step is the smallest possible logical unit necessary for validation.
194
- 3. Ensure that your reasoning correlates consistently with your conclusion. Use "##" to format each step clearly, e.g., "## Reasoning Step 1".
195
- 4. Finally, conclude with "support", "refute", or "not enough information" enclosed in a pair of curly braces, noting the overall judgment regarding the claim.
196
-
197
- Context: {data[now]['evidence']}
198
-
199
- Claim: {data[now]['claim']}
200
-
201
-
202
- The ground truth is
203
-
204
- ---
205
-
206
- Answer: refute. You mush generate results that match the ground truth.
207
-
208
- <|eot_id|><|start_header_id|>assistant<|end_header_id|>
209
-
210
- '''
211
- prompt_judge_nei = f'''<|begin_of_text|><|start_header_id|>user<|end_header_id|>
212
-
213
- Task: Validate the following claim using the provided context.
214
- Your goal is to determine whether the claim can be supported with the context. Choose between "support", "refute", or "not enough information".
215
-
216
- Instructions:
217
- 1. Analyze the claim step by step, verifying each crucial component in the claim as they appear.
218
- 2. Structure your reasoning on crucial components in the claim in detailed steps, from 1 to a maximum of 10. Make sure each step is the smallest possible logical unit necessary for validation.
219
- 3. Ensure that your reasoning correlates consistently with your conclusion. Use "##" to format each step clearly, e.g., "## Reasoning Step 1".
220
- 4. Finally, conclude with "support", "refute", or "not enough information" enclosed in a pair of curly braces, noting the overall judgment regarding the claim.
221
-
222
- Context: {data[now]['evidence']}
223
-
224
- Claim: {data[now]['claim']}
225
-
226
-
227
- The ground truth is
228
-
229
- ---
230
-
231
- Answer: not enough information. You mush generate results that match the ground truth.
232
-
233
- <|eot_id|><|start_header_id|>assistant<|end_header_id|>
234
-
235
- '''
236
- prompt_list_run3.append(prompt_judge_nei)
237
- prompt_list_run2.append(prompt_judge_refute)
238
- prompt_list_run1.append(prompt_judge_support)
239
- outputs1 = vllm_model.generate(prompt_list_run1, sampling_params)
240
- outputs2 = vllm_model.generate(prompt_list_run2, sampling_params)
241
- outputs3 = vllm_model.generate(prompt_list_run3, sampling_params)
242
- training_dataset = []
243
- for i in range(len(outputs1)):
244
- label = data[i]['label']
245
- if label == 'Refutes' or label == 'refutes' or label == 'CONTRADICT':
246
- label_unified = "refute"
247
- elif label == 'UNKNOWN' or label == 'Neutral':
248
- label_unified = "not enough information"
249
- elif label == 'SUPPORT' or label == 'Supports' or label == 'supports':
250
- label_unified = "support"
251
- user_prompt = f'''Context: {data[i]['evidence']}
252
-
253
- Claim: {data[i]['claim']}
254
- '''
255
- generated_text1 = outputs1[i].outputs[0].text
256
- generated_text2 = outputs2[i].outputs[0].text
257
- generated_text3 = outputs3[i].outputs[0].text
258
- match1 = re.findall(r'\{([^{}]*)\}', generated_text1)
259
- match2 = re.findall(r'\{([^{}]*)\}', generated_text2)
260
- match3 = re.findall(r'\{([^{}]*)\}', generated_text3)
261
-
262
- if len(generated_text2) > 2000 or len(generated_text1) > 2000 or len(generated_text3) > 2000 or len(
263
- user_prompt) > 3000:
264
- continue
265
- if match1 == [] or match2 == [] or match3 == []:
266
- continue
267
- predict1 = re.findall(r'\{([^{}]*)\}', generated_text1)[-1]
268
- predict2 = re.findall(r'\{([^{}]*)\}', generated_text2)[-1]
269
- predict3 = re.findall(r'\{([^{}]*)\}', generated_text3)[-1]
270
- if predict1.strip() == 'support' and predict2.strip() == 'refute' and predict3.strip() == 'not enough information':
271
- if label_unified == 'refute':
272
- save_dict1 = {"instruction": Instruction, "input": user_prompt, "chosen": generated_text2,
273
- "rejected": generated_text3}
274
- save_dict2 = {"instruction": Instruction, "input": user_prompt, "chosen": generated_text2,
275
- "rejected": generated_text1}
276
- training_dataset.append(save_dict1)
277
- training_dataset.append(save_dict2)
278
-
279
- elif label_unified == 'support':
280
- save_dict1 = {"instruction": Instruction, "input": user_prompt, "chosen": generated_text1,
281
- "rejected": generated_text2}
282
- save_dict2 = {"instruction": Instruction, "input": user_prompt, "chosen": generated_text1,
283
- "rejected": generated_text3}
284
- training_dataset.append(save_dict1)
285
- training_dataset.append(save_dict2)
286
-
287
- elif label_unified == 'not enough information':
288
- save_dict1 = {"instruction": Instruction, "input": user_prompt, "chosen": generated_text3,
289
- "rejected": generated_text2}
290
- save_dict2 = {"instruction": Instruction, "input": user_prompt, "chosen": generated_text3,
291
- "rejected": generated_text1}
292
- training_dataset.append(save_dict1)
293
- random.shuffle(training_dataset)
294
- with open('Training_claim_reason_guide_alpaca_merge_nei_r2.json', 'w', encoding='utf-8') as f:
295
- json.dump(training_dataset, f, ensure_ascii=False, indent=4)
 
1
+ import re
2
+ import json
3
+ from random import random
4
+
5
+ from vllm import LLM, SamplingParams
6
+
7
+ Instruction = '''Task: Validate the following claim using the provided context.
8
+ Your goal is to determine whether the claim can be supported by the context. Choose between "support" or "refute".
9
+
10
+ Instructions:
11
+ 1. Analyze the claim step by step, verifying each crucial component in the claim as they appear.
12
+ 2. Structure your reasoning on crucial components in the claim in detailed steps, from 1 to a maximum of 10. Make sure each step is the smallest possible logical unit necessary for validation.
13
+ 3. Ensure that your reasoning correlates consistently with your conclusion. Use "##" to format each step clearly, e.g., "## Reasoning Step 1".
14
+ 4. Finally, conclude with either "support" or "refute" enclosed in a pair of curly braces, noting the overall judgment regarding the claim.
15
+ '''
16
+ if __name__ == "__main__":
17
+ model_path = f"************************************/reasoner-guide-r1"
18
+ vllm_model = LLM(model=model_path, gpu_memory_utilization=0.90, max_model_len=4000, max_num_seqs=64)
19
+ sampling_params = SamplingParams(temperature=0.75, top_p=0.95, max_tokens=4000)
20
+ file_path1 = 'trainingset/Feverous_train.json'
21
+ file_path2 = 'trainingset/Hover_train.json'
22
+
23
+ data1 = []
24
+ data2 = []
25
+ # Open the file and read line by line
26
+ with open(file_path1, 'r', encoding='utf-8') as file:
27
+ raw_data1 = json.load(file)
28
+ for item in raw_data1:
29
+ data1.append(item)
30
+ with open(file_path2, 'r', encoding='utf-8') as file:
31
+ raw_data2 = json.load(file)
32
+ for item in raw_data2:
33
+ data2.append(item)
34
+ data = data1 + data2
35
+ prompt_list_run1 = []
36
+ prompt_list_run2 = []
37
+ for now in range(len(data)):
38
+ # Now `data` contains all the JSON objects from the file
39
+ prompt_judge1 = f'''<|begin_of_text|><|start_header_id|>user<|end_header_id|>
40
+
41
+ Task: Validate the following claim using the provided context.
42
+ Your goal is to determine whether the claim can be supported by the context. Choose between "support" or "refute".
43
+
44
+ Instructions:
45
+ 1. Analyze the claim step by step, verifying each crucial component in the claim as they appear.
46
+ 2. Structure your reasoning on crucial components in the claim in detailed steps, from 1 to a maximum of 10. Make sure each step is the smallest possible logical unit necessary for validation.
47
+ 3. Ensure that your reasoning correlates consistently with your conclusion. Use "##" to format each step clearly, e.g., "## Reasoning Step 1".
48
+ 4. Finally, conclude with either "support" or "refute" enclosed in a pair of curly braces, noting the overall judgment regarding the claim.
49
+
50
+ Context: {data[now]['evidence']}
51
+
52
+ Claim: {data[now]['claim']}
53
+
54
+
55
+ The ground truth is
56
+
57
+ ---
58
+
59
+ Answer: support. You must generate results that match the ground truth.
60
+
61
+ <|eot_id|><|start_header_id|>assistant<|end_header_id|>
62
+
63
+ '''
64
+ prompt_judge2 = f'''<|begin_of_text|><|start_header_id|>user<|end_header_id|>
65
+
66
+ Task: Validate the following claim using the provided context.
67
+ Your goal is to determine whether the claim can be supported by the context. Choose between "support" or "refute".
68
+
69
+ Instructions:
70
+ 1. Analyze the claim step by step, verifying each crucial component in the claim as they appear.
71
+ 2. Structure your reasoning on crucial components in the claim in detailed steps, from 1 to a maximum of 10. Make sure each step is the smallest possible logical unit necessary for validation.
72
+ 3. Ensure that your reasoning correlates consistently with your conclusion. Use "##" to format each step clearly, e.g., "## Reasoning Step 1".
73
+ 4. Finally, conclude with either "support" or "refute" enclosed in a pair of curly braces, noting the overall judgment regarding the claim.
74
+
75
+ Context: {data[now]['evidence']}
76
+
77
+ Claim: {data[now]['claim']}
78
+
79
+
80
+ The ground truth is
81
+
82
+ ---
83
+
84
+ Answer: refute. You must generate results that match the ground truth.
85
+
86
+ <|eot_id|><|start_header_id|>assistant<|end_header_id|>
87
+
88
+ '''
89
+ prompt_list_run2.append(prompt_judge2)
90
+ prompt_list_run1.append(prompt_judge1)
91
+ outputs1 = vllm_model.generate(prompt_list_run1, sampling_params)
92
+ outputs2 = vllm_model.generate(prompt_list_run2, sampling_params)
93
+ training_dataset = []
94
+ for i in range(len(outputs1)):
95
+ label = data[i]['label']
96
+ if label == 'Refutes' or label == 'refutes' or label == 'CONTRADICT':
97
+ label_unified = "refute"
98
+ elif label == 'UNKNOWN' or label == 'Neutral':
99
+ label_unified = "refute"
100
+ elif label == 'SUPPORT' or label == 'Supports' or label == 'supports':
101
+ label_unified = "support"
102
+ user_prompt = f'''Context: {data[i]['evidence']}
103
+
104
+ Claim: {data[i]['claim']}
105
+ '''
106
+ generated_text1 = outputs1[i].outputs[0].text
107
+ generated_text2 = outputs2[i].outputs[0].text
108
+ match1 = re.findall(r'\{([^{}]*)\}', generated_text1)
109
+ match2 = re.findall(r'\{([^{}]*)\}', generated_text2)
110
+
111
+ if len(generated_text2) > 2000 or len(generated_text1) > 2000 or len(user_prompt)>3000:
112
+ continue
113
+ if match1 == [] and match2 == []:
114
+ continue
115
+
116
+ if match1 == [] and match2 != []:
117
+ predict2 = re.findall(r'\{([^{}]*)\}', generated_text2)[-1]
118
+ if predict2.strip() == label_unified.strip():
119
+ save_dict = {"instruction": Instruction, "input": user_prompt, "chosen": generated_text2,
120
+ "rejected": generated_text1}
121
+ training_dataset.append(save_dict)
122
+ continue
123
+ else:
124
+ continue
125
+
126
+ if match1 != [] and match2 == []:
127
+ predict1 = re.findall(r'\{([^{}]*)\}', generated_text1)[-1]
128
+ if predict1.strip() == label_unified.strip():
129
+ save_dict = {"instruction": Instruction, "input": user_prompt, "chosen": generated_text1,
130
+ "rejected": generated_text2}
131
+ training_dataset.append(save_dict)
132
+ continue
133
+ else:
134
+ continue
135
+
136
+ predict1 = re.findall(r'\{([^{}]*)\}', generated_text1)[-1]
137
+ predict2 = re.findall(r'\{([^{}]*)\}', generated_text2)[-1]
138
+ if predict1.strip() == predict2.strip():
139
+ continue
140
+ if predict1.strip() == label_unified.strip():
141
+ save_dict = {"instruction": Instruction, "input": user_prompt, "chosen": generated_text1,
142
+ "rejected": generated_text2}
143
+ training_dataset.append(save_dict)
144
+ elif predict2.strip() == label_unified.strip():
145
+ save_dict = {"instruction": Instruction, "input": user_prompt, "chosen": generated_text2,
146
+ "rejected": generated_text1}
147
+ training_dataset.append(save_dict)
148
+
149
+ file_path = 'trainingset/Healthver_train.json'
150
+
151
+ data = []
152
+ # Open the file and read line by line
153
+ with open(file_path, 'r', encoding='utf-8') as file:
154
+ raw_data = json.load(file)
155
+ for item in raw_data:
156
+ data.append(item)
157
+ prompt_list_run1 = []
158
+ prompt_list_run2 = []
159
+ prompt_list_run3 = []
160
+ for now in range(len(data)):
161
+ prompt_judge_support = f'''<|begin_of_text|><|start_header_id|>user<|end_header_id|>
162
+
163
+ Task: Validate the following claim using the provided context.
164
+ Your goal is to determine whether the claim can be supported with the context. Choose between "support", "refute", or "not enough information".
165
+
166
+ Instructions:
167
+ 1. Analyze the claim step by step, verifying each crucial component in the claim as they appear.
168
+ 2. Structure your reasoning on crucial components in the claim in detailed steps, from 1 to a maximum of 10. Make sure each step is the smallest possible logical unit necessary for validation.
169
+ 3. Ensure that your reasoning correlates consistently with your conclusion. Use "##" to format each step clearly, e.g., "## Reasoning Step 1".
170
+ 4. Finally, conclude with "support", "refute", or "not enough information" enclosed in a pair of curly braces, noting the overall judgment regarding the claim.
171
+
172
+ Context: {data[now]['evidence']}
173
+
174
+ Claim: {data[now]['claim']}
175
+
176
+
177
+ The ground truth is
178
+
179
+ ---
180
+
181
+ Answer: support. You must generate results that match the ground truth.
182
+
183
+ <|eot_id|><|start_header_id|>assistant<|end_header_id|>
184
+
185
+ '''
186
+ prompt_judge_refute = f'''<|begin_of_text|><|start_header_id|>user<|end_header_id|>
187
+
188
+ Task: Validate the following claim using the provided context.
189
+ Your goal is to determine whether the claim can be supported with the context. Choose between "support", "refute", or "not enough information".
190
+
191
+ Instructions:
192
+ 1. Analyze the claim step by step, verifying each crucial component in the claim as they appear.
193
+ 2. Structure your reasoning on crucial components in the claim in detailed steps, from 1 to a maximum of 10. Make sure each step is the smallest possible logical unit necessary for validation.
194
+ 3. Ensure that your reasoning correlates consistently with your conclusion. Use "##" to format each step clearly, e.g., "## Reasoning Step 1".
195
+ 4. Finally, conclude with "support", "refute", or "not enough information" enclosed in a pair of curly braces, noting the overall judgment regarding the claim.
196
+
197
+ Context: {data[now]['evidence']}
198
+
199
+ Claim: {data[now]['claim']}
200
+
201
+
202
+ The ground truth is
203
+
204
+ ---
205
+
206
+ Answer: refute. You must generate results that match the ground truth.
207
+
208
+ <|eot_id|><|start_header_id|>assistant<|end_header_id|>
209
+
210
+ '''
211
+ prompt_judge_nei = f'''<|begin_of_text|><|start_header_id|>user<|end_header_id|>
212
+
213
+ Task: Validate the following claim using the provided context.
214
+ Your goal is to determine whether the claim can be supported with the context. Choose between "support", "refute", or "not enough information".
215
+
216
+ Instructions:
217
+ 1. Analyze the claim step by step, verifying each crucial component in the claim as they appear.
218
+ 2. Structure your reasoning on crucial components in the claim in detailed steps, from 1 to a maximum of 10. Make sure each step is the smallest possible logical unit necessary for validation.
219
+ 3. Ensure that your reasoning correlates consistently with your conclusion. Use "##" to format each step clearly, e.g., "## Reasoning Step 1".
220
+ 4. Finally, conclude with "support", "refute", or "not enough information" enclosed in a pair of curly braces, noting the overall judgment regarding the claim.
221
+
222
+ Context: {data[now]['evidence']}
223
+
224
+ Claim: {data[now]['claim']}
225
+
226
+
227
+ The ground truth is
228
+
229
+ ---
230
+
231
+ Answer: not enough information. You must generate results that match the ground truth.
232
+
233
+ <|eot_id|><|start_header_id|>assistant<|end_header_id|>
234
+
235
+ '''
236
+ prompt_list_run3.append(prompt_judge_nei)
237
+ prompt_list_run2.append(prompt_judge_refute)
238
+ prompt_list_run1.append(prompt_judge_support)
239
+ outputs1 = vllm_model.generate(prompt_list_run1, sampling_params)
240
+ outputs2 = vllm_model.generate(prompt_list_run2, sampling_params)
241
+ outputs3 = vllm_model.generate(prompt_list_run3, sampling_params)
242
+ training_dataset = []
243
+ for i in range(len(outputs1)):
244
+ label = data[i]['label']
245
+ if label == 'Refutes' or label == 'refutes' or label == 'CONTRADICT':
246
+ label_unified = "refute"
247
+ elif label == 'UNKNOWN' or label == 'Neutral':
248
+ label_unified = "not enough information"
249
+ elif label == 'SUPPORT' or label == 'Supports' or label == 'supports':
250
+ label_unified = "support"
251
+ user_prompt = f'''Context: {data[i]['evidence']}
252
+
253
+ Claim: {data[i]['claim']}
254
+ '''
255
+ generated_text1 = outputs1[i].outputs[0].text
256
+ generated_text2 = outputs2[i].outputs[0].text
257
+ generated_text3 = outputs3[i].outputs[0].text
258
+ match1 = re.findall(r'\{([^{}]*)\}', generated_text1)
259
+ match2 = re.findall(r'\{([^{}]*)\}', generated_text2)
260
+ match3 = re.findall(r'\{([^{}]*)\}', generated_text3)
261
+
262
+ if len(generated_text2) > 2000 or len(generated_text1) > 2000 or len(generated_text3) > 2000 or len(
263
+ user_prompt) > 3000:
264
+ continue
265
+ if match1 == [] or match2 == [] or match3 == []:
266
+ continue
267
+ predict1 = re.findall(r'\{([^{}]*)\}', generated_text1)[-1]
268
+ predict2 = re.findall(r'\{([^{}]*)\}', generated_text2)[-1]
269
+ predict3 = re.findall(r'\{([^{}]*)\}', generated_text3)[-1]
270
+ if predict1.strip() == 'support' and predict2.strip() == 'refute' and predict3.strip() == 'not enough information':
271
+ if label_unified == 'refute':
272
+ save_dict1 = {"instruction": Instruction, "input": user_prompt, "chosen": generated_text2,
273
+ "rejected": generated_text3}
274
+ save_dict2 = {"instruction": Instruction, "input": user_prompt, "chosen": generated_text2,
275
+ "rejected": generated_text1}
276
+ training_dataset.append(save_dict1)
277
+ training_dataset.append(save_dict2)
278
+
279
+ elif label_unified == 'support':
280
+ save_dict1 = {"instruction": Instruction, "input": user_prompt, "chosen": generated_text1,
281
+ "rejected": generated_text2}
282
+ save_dict2 = {"instruction": Instruction, "input": user_prompt, "chosen": generated_text1,
283
+ "rejected": generated_text3}
284
+ training_dataset.append(save_dict1)
285
+ training_dataset.append(save_dict2)
286
+
287
+ elif label_unified == 'not enough information':
288
+ save_dict1 = {"instruction": Instruction, "input": user_prompt, "chosen": generated_text3,
289
+ "rejected": generated_text2}
290
+ save_dict2 = {"instruction": Instruction, "input": user_prompt, "chosen": generated_text3,
291
+ "rejected": generated_text1}
292
+ training_dataset.append(save_dict1)
293
+ random.shuffle(training_dataset)
294
+ with open('Training_claim_reason_guide_alpaca_merge_nei_r2.json', 'w', encoding='utf-8') as f:
295
+ json.dump(training_dataset, f, ensure_ascii=False, indent=4)