probejie commited on
Commit
237451b
·
verified ·
1 Parent(s): 84b5727

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +67 -0
  2. judge_retrieval_necessary_check/data_format.py +238 -0
  3. judge_retrieval_necessary_check/data_output/Ministral-8B-Instruct-2410-step8.json +3 -0
  4. judge_retrieval_necessary_check/data_output/bm25/Llama-3.1-70B-Instruct.json +3 -0
  5. judge_retrieval_necessary_check/data_output/bm25/Llama-3.1-8B-Instruct.json +3 -0
  6. judge_retrieval_necessary_check/data_output/bm25/Llama-3.2-1B-Instruct.json +3 -0
  7. judge_retrieval_necessary_check/data_output/bm25/Llama-3.2-3B-Instruct.json +3 -0
  8. judge_retrieval_necessary_check/data_output/bm25/Ministral-8B-Instruct-2410.json +3 -0
  9. judge_retrieval_necessary_check/data_output/bm25/Mistral-7B-Instruct-v0.3.json +3 -0
  10. judge_retrieval_necessary_check/data_output/bm25/Mixtral-8x7B-Instruct-v0.1.json +3 -0
  11. judge_retrieval_necessary_check/data_output/bm25/Phi-3-medium-4k-instruct.json +3 -0
  12. judge_retrieval_necessary_check/data_output/bm25/Phi-3-mini-4k-instruct.json +3 -0
  13. judge_retrieval_necessary_check/data_output/bm25/Phi-3-small-8k-instruct.json +3 -0
  14. judge_retrieval_necessary_check/data_output/bm25/Qwen2.5-1.5B-Instruct.json +3 -0
  15. judge_retrieval_necessary_check/data_output/bm25/Qwen2.5-14B-Instruct.json +3 -0
  16. judge_retrieval_necessary_check/data_output/bm25/Qwen2.5-32B-Instruct.json +3 -0
  17. judge_retrieval_necessary_check/data_output/bm25/Qwen2.5-3B-Instruct.json +3 -0
  18. judge_retrieval_necessary_check/data_output/bm25/Qwen2.5-72B-Instruct.json +3 -0
  19. judge_retrieval_necessary_check/data_output/bm25/Qwen2.5-7B-Instruct.json +3 -0
  20. judge_retrieval_necessary_check/data_output/bm25/gemma-2-27b-it.json +3 -0
  21. judge_retrieval_necessary_check/data_output/bm25/gemma-2-2b-it.json +3 -0
  22. judge_retrieval_necessary_check/data_output/bm25/gemma-2-9b-it.json +3 -0
  23. judge_retrieval_necessary_check/data_output/contrieve/Llama-3.1-70B-Instruct.json +0 -0
  24. judge_retrieval_necessary_check/data_output/contrieve/Llama-3.1-8B-Instruct.json +3 -0
  25. judge_retrieval_necessary_check/data_output/contrieve/Llama-3.2-1B-Instruct.json +3 -0
  26. judge_retrieval_necessary_check/data_output/contrieve/Llama-3.2-3B-Instruct.json +3 -0
  27. judge_retrieval_necessary_check/data_output/contrieve/Ministral-8B-Instruct-2410.json +3 -0
  28. judge_retrieval_necessary_check/data_output/contrieve/Mistral-7B-Instruct-v0.3.json +3 -0
  29. judge_retrieval_necessary_check/data_output/contrieve/Mixtral-8x7B-Instruct-v0.1.json +3 -0
  30. judge_retrieval_necessary_check/data_output/contrieve/Phi-3-medium-4k-instruct.json +3 -0
  31. judge_retrieval_necessary_check/data_output/contrieve/Phi-3-mini-4k-instruct.json +3 -0
  32. judge_retrieval_necessary_check/data_output/contrieve/Phi-3-small-8k-instruct.json +3 -0
  33. judge_retrieval_necessary_check/data_output/contrieve/Qwen2.5-1.5B-Instruct.json +3 -0
  34. judge_retrieval_necessary_check/data_output/contrieve/Qwen2.5-14B-Instruct.json +3 -0
  35. judge_retrieval_necessary_check/data_output/contrieve/Qwen2.5-32B-Instruct.json +3 -0
  36. judge_retrieval_necessary_check/data_output/contrieve/Qwen2.5-3B-Instruct.json +3 -0
  37. judge_retrieval_necessary_check/data_output/contrieve/Qwen2.5-72B-Instruct.json +3 -0
  38. judge_retrieval_necessary_check/data_output/contrieve/Qwen2.5-7B-Instruct.json +3 -0
  39. judge_retrieval_necessary_check/data_output/contrieve/gemma-2-27b-it.json +3 -0
  40. judge_retrieval_necessary_check/data_output/contrieve/gemma-2-2b-it.json +3 -0
  41. judge_retrieval_necessary_check/data_output/contrieve/gemma-2-9b-it.json +3 -0
  42. judge_retrieval_necessary_check/data_output/promptretrieval/Llama-3.1-70B-Instruct.json +0 -0
  43. judge_retrieval_necessary_check/data_output/promptretrieval/Llama-3.1-8B-Instruct.json +3 -0
  44. judge_retrieval_necessary_check/data_output/promptretrieval/Llama-3.2-1B-Instruct.json +3 -0
  45. judge_retrieval_necessary_check/data_output/promptretrieval/Llama-3.2-3B-Instruct.json +3 -0
  46. judge_retrieval_necessary_check/data_output/promptretrieval/Ministral-8B-Instruct-2410.json +3 -0
  47. judge_retrieval_necessary_check/data_output/promptretrieval/Mistral-7B-Instruct-v0.3.json +3 -0
  48. judge_retrieval_necessary_check/data_output/promptretrieval/Mixtral-8x7B-Instruct-v0.1.json +3 -0
  49. judge_retrieval_necessary_check/data_output/promptretrieval/Phi-3-medium-4k-instruct.json +3 -0
  50. judge_retrieval_necessary_check/data_output/promptretrieval/Phi-3-mini-4k-instruct.json +3 -0
.gitattributes CHANGED
@@ -136,3 +136,70 @@ nan_ex4_part4/gemma-2-9b-it_fre_dataset_with_promptriever.json filter=lfs diff=l
136
  nan_ex4_part4/gemma-2-9b-it_old_new_dataset_with_bm25.json filter=lfs diff=lfs merge=lfs -text
137
  nan_ex4_part4/gemma-2-9b-it_old_new_dataset_with_contriever.json filter=lfs diff=lfs merge=lfs -text
138
  nan_ex4_part4/gemma-2-9b-it_old_new_dataset_with_promptriever.json filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
  nan_ex4_part4/gemma-2-9b-it_old_new_dataset_with_bm25.json filter=lfs diff=lfs merge=lfs -text
137
  nan_ex4_part4/gemma-2-9b-it_old_new_dataset_with_contriever.json filter=lfs diff=lfs merge=lfs -text
138
  nan_ex4_part4/gemma-2-9b-it_old_new_dataset_with_promptriever.json filter=lfs diff=lfs merge=lfs -text
139
+ judge_retrieval_necessary_check/data_output/Ministral-8B-Instruct-2410-step8.json filter=lfs diff=lfs merge=lfs -text
140
+ judge_retrieval_necessary_check/data_output/bm25/Llama-3.1-70B-Instruct.json filter=lfs diff=lfs merge=lfs -text
141
+ judge_retrieval_necessary_check/data_output/bm25/Llama-3.1-8B-Instruct.json filter=lfs diff=lfs merge=lfs -text
142
+ judge_retrieval_necessary_check/data_output/bm25/Llama-3.2-1B-Instruct.json filter=lfs diff=lfs merge=lfs -text
143
+ judge_retrieval_necessary_check/data_output/bm25/Llama-3.2-3B-Instruct.json filter=lfs diff=lfs merge=lfs -text
144
+ judge_retrieval_necessary_check/data_output/bm25/Ministral-8B-Instruct-2410.json filter=lfs diff=lfs merge=lfs -text
145
+ judge_retrieval_necessary_check/data_output/bm25/Mistral-7B-Instruct-v0.3.json filter=lfs diff=lfs merge=lfs -text
146
+ judge_retrieval_necessary_check/data_output/bm25/Mixtral-8x7B-Instruct-v0.1.json filter=lfs diff=lfs merge=lfs -text
147
+ judge_retrieval_necessary_check/data_output/bm25/Phi-3-medium-4k-instruct.json filter=lfs diff=lfs merge=lfs -text
148
+ judge_retrieval_necessary_check/data_output/bm25/Phi-3-mini-4k-instruct.json filter=lfs diff=lfs merge=lfs -text
149
+ judge_retrieval_necessary_check/data_output/bm25/Phi-3-small-8k-instruct.json filter=lfs diff=lfs merge=lfs -text
150
+ judge_retrieval_necessary_check/data_output/bm25/Qwen2.5-1.5B-Instruct.json filter=lfs diff=lfs merge=lfs -text
151
+ judge_retrieval_necessary_check/data_output/bm25/Qwen2.5-14B-Instruct.json filter=lfs diff=lfs merge=lfs -text
152
+ judge_retrieval_necessary_check/data_output/bm25/Qwen2.5-32B-Instruct.json filter=lfs diff=lfs merge=lfs -text
153
+ judge_retrieval_necessary_check/data_output/bm25/Qwen2.5-3B-Instruct.json filter=lfs diff=lfs merge=lfs -text
154
+ judge_retrieval_necessary_check/data_output/bm25/Qwen2.5-72B-Instruct.json filter=lfs diff=lfs merge=lfs -text
155
+ judge_retrieval_necessary_check/data_output/bm25/Qwen2.5-7B-Instruct.json filter=lfs diff=lfs merge=lfs -text
156
+ judge_retrieval_necessary_check/data_output/bm25/gemma-2-27b-it.json filter=lfs diff=lfs merge=lfs -text
157
+ judge_retrieval_necessary_check/data_output/bm25/gemma-2-2b-it.json filter=lfs diff=lfs merge=lfs -text
158
+ judge_retrieval_necessary_check/data_output/bm25/gemma-2-9b-it.json filter=lfs diff=lfs merge=lfs -text
159
+ judge_retrieval_necessary_check/data_output/contrieve/Llama-3.1-8B-Instruct.json filter=lfs diff=lfs merge=lfs -text
160
+ judge_retrieval_necessary_check/data_output/contrieve/Llama-3.2-1B-Instruct.json filter=lfs diff=lfs merge=lfs -text
161
+ judge_retrieval_necessary_check/data_output/contrieve/Llama-3.2-3B-Instruct.json filter=lfs diff=lfs merge=lfs -text
162
+ judge_retrieval_necessary_check/data_output/contrieve/Ministral-8B-Instruct-2410.json filter=lfs diff=lfs merge=lfs -text
163
+ judge_retrieval_necessary_check/data_output/contrieve/Mistral-7B-Instruct-v0.3.json filter=lfs diff=lfs merge=lfs -text
164
+ judge_retrieval_necessary_check/data_output/contrieve/Mixtral-8x7B-Instruct-v0.1.json filter=lfs diff=lfs merge=lfs -text
165
+ judge_retrieval_necessary_check/data_output/contrieve/Phi-3-medium-4k-instruct.json filter=lfs diff=lfs merge=lfs -text
166
+ judge_retrieval_necessary_check/data_output/contrieve/Phi-3-mini-4k-instruct.json filter=lfs diff=lfs merge=lfs -text
167
+ judge_retrieval_necessary_check/data_output/contrieve/Phi-3-small-8k-instruct.json filter=lfs diff=lfs merge=lfs -text
168
+ judge_retrieval_necessary_check/data_output/contrieve/Qwen2.5-1.5B-Instruct.json filter=lfs diff=lfs merge=lfs -text
169
+ judge_retrieval_necessary_check/data_output/contrieve/Qwen2.5-14B-Instruct.json filter=lfs diff=lfs merge=lfs -text
170
+ judge_retrieval_necessary_check/data_output/contrieve/Qwen2.5-32B-Instruct.json filter=lfs diff=lfs merge=lfs -text
171
+ judge_retrieval_necessary_check/data_output/contrieve/Qwen2.5-3B-Instruct.json filter=lfs diff=lfs merge=lfs -text
172
+ judge_retrieval_necessary_check/data_output/contrieve/Qwen2.5-72B-Instruct.json filter=lfs diff=lfs merge=lfs -text
173
+ judge_retrieval_necessary_check/data_output/contrieve/Qwen2.5-7B-Instruct.json filter=lfs diff=lfs merge=lfs -text
174
+ judge_retrieval_necessary_check/data_output/contrieve/gemma-2-27b-it.json filter=lfs diff=lfs merge=lfs -text
175
+ judge_retrieval_necessary_check/data_output/contrieve/gemma-2-2b-it.json filter=lfs diff=lfs merge=lfs -text
176
+ judge_retrieval_necessary_check/data_output/contrieve/gemma-2-9b-it.json filter=lfs diff=lfs merge=lfs -text
177
+ judge_retrieval_necessary_check/data_output/promptretrieval/Llama-3.1-8B-Instruct.json filter=lfs diff=lfs merge=lfs -text
178
+ judge_retrieval_necessary_check/data_output/promptretrieval/Llama-3.2-1B-Instruct.json filter=lfs diff=lfs merge=lfs -text
179
+ judge_retrieval_necessary_check/data_output/promptretrieval/Llama-3.2-3B-Instruct.json filter=lfs diff=lfs merge=lfs -text
180
+ judge_retrieval_necessary_check/data_output/promptretrieval/Ministral-8B-Instruct-2410.json filter=lfs diff=lfs merge=lfs -text
181
+ judge_retrieval_necessary_check/data_output/promptretrieval/Mistral-7B-Instruct-v0.3.json filter=lfs diff=lfs merge=lfs -text
182
+ judge_retrieval_necessary_check/data_output/promptretrieval/Mixtral-8x7B-Instruct-v0.1.json filter=lfs diff=lfs merge=lfs -text
183
+ judge_retrieval_necessary_check/data_output/promptretrieval/Phi-3-medium-4k-instruct.json filter=lfs diff=lfs merge=lfs -text
184
+ judge_retrieval_necessary_check/data_output/promptretrieval/Phi-3-mini-4k-instruct.json filter=lfs diff=lfs merge=lfs -text
185
+ judge_retrieval_necessary_check/data_output/promptretrieval/Phi-3-small-8k-instruct.json filter=lfs diff=lfs merge=lfs -text
186
+ judge_retrieval_necessary_check/data_output/promptretrieval/Qwen2.5-1.5B-Instruct.json filter=lfs diff=lfs merge=lfs -text
187
+ judge_retrieval_necessary_check/data_output/promptretrieval/Qwen2.5-14B-Instruct.json filter=lfs diff=lfs merge=lfs -text
188
+ judge_retrieval_necessary_check/data_output/promptretrieval/Qwen2.5-32B-Instruct.json filter=lfs diff=lfs merge=lfs -text
189
+ judge_retrieval_necessary_check/data_output/promptretrieval/Qwen2.5-3B-Instruct.json filter=lfs diff=lfs merge=lfs -text
190
+ judge_retrieval_necessary_check/data_output/promptretrieval/Qwen2.5-72B-Instruct.json filter=lfs diff=lfs merge=lfs -text
191
+ judge_retrieval_necessary_check/data_output/promptretrieval/Qwen2.5-7B-Instruct.json filter=lfs diff=lfs merge=lfs -text
192
+ judge_retrieval_necessary_check/data_output/promptretrieval/gemma-2-27b-it.json filter=lfs diff=lfs merge=lfs -text
193
+ judge_retrieval_necessary_check/data_output/promptretrieval/gemma-2-2b-it.json filter=lfs diff=lfs merge=lfs -text
194
+ judge_retrieval_necessary_check/data_output/promptretrieval/gemma-2-9b-it.json filter=lfs diff=lfs merge=lfs -text
195
+ judge_retrieval_necessary_check/eval_tool.sh.e47062864 filter=lfs diff=lfs merge=lfs -text
196
+ judge_retrieval_necessary_check/eval_tool.sh.e47089079 filter=lfs diff=lfs merge=lfs -text
197
+ judge_retrieval_necessary_check/eval_tool.sh.e47154461 filter=lfs diff=lfs merge=lfs -text
198
+ judge_retrieval_necessary_check/eval_tool.sh.o47154461 filter=lfs diff=lfs merge=lfs -text
199
+ judge_retrieval_necessary_check/eval_tool_bm25.sh.e47066837 filter=lfs diff=lfs merge=lfs -text
200
+ judge_retrieval_necessary_check/eval_tool_bm25.sh.e47125941 filter=lfs diff=lfs merge=lfs -text
201
+ judge_retrieval_necessary_check/eval_tool_prompt.sh.e47066836 filter=lfs diff=lfs merge=lfs -text
202
+ judge_retrieval_necessary_check/eval_tool_prompt.sh.e47089102 filter=lfs diff=lfs merge=lfs -text
203
+ judge_retrieval_necessary_check/eval_tool_prompt.sh.e47109199 filter=lfs diff=lfs merge=lfs -text
204
+ judge_retrieval_necessary_check/eval_tool_prompt.sh.e47156409 filter=lfs diff=lfs merge=lfs -text
205
+ judge_retrieval_necessary_check/eval_tool_prompt.sh.o47156409 filter=lfs diff=lfs merge=lfs -text
judge_retrieval_necessary_check/data_format.py ADDED
@@ -0,0 +1,238 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # gemma = """
2
+ # <start_of_turn>user
3
+ # Below is a question, please answer it directly and keep your answer as short as possible.
4
+ # Question: {query}<end_of_turn><start_of_turn>model
5
+ # """
6
+ # gemma ="""
7
+ # """
8
+ # llama = """
9
+ # <|begin_of_text|><|start_header_id|>user<|end_header_id|>
10
+ # Below is a question, please answer it directly and keep your answer as short as possible.
11
+ # Question: {query}<|eot_id|><|start_header_id|>assistant<|end_header_id|>
12
+ # """
13
+ llama = """
14
+ {query}"""
15
+ gemma = """
16
+ {query}"""
17
+ mistral = """
18
+ {query}"""
19
+ qwen = """
20
+ {query}"""
21
+ phi = """
22
+ {query}"""
23
+
24
+ generate_answer = '''
25
+ Below is a question, please answer it directly and keep your answer as short as possible.
26
+ Question: {query}
27
+ Answer:
28
+ '''
29
+
30
+ generate_answer_based_on_context = '''
31
+ Given some related documents: {re_content}.
32
+ This is a question: {question}
33
+ Please answer the question directly. Please keep your answer as short as possible.
34
+ Answer:
35
+ '''
36
+
37
+ generate_subquestion = '''
38
+ Given a main question and optional previous subquestion-answer pairs, you may need to generate subquestions to help answer this main question. Please ensure to only generate subquestions that are relevant to answering the main question. When there are no more subquestions needed, output "finish".
39
+
40
+ ## Input Format
41
+ Required:
42
+ - Main Question: [question]
43
+
44
+ Optional:
45
+ - Previous Subquestion: [subquestion]
46
+ - Previous Answer: [subanswer]
47
+
48
+ ## Output Format
49
+ One of:
50
+ - Next Subquestion: [new subquestion]
51
+ - "finish" (when no further subquestions are needed)
52
+
53
+ ## Generation Guidelines
54
+ 1. Subquestions should:
55
+ - Break down complex aspects of the main question
56
+ - Follow a logical progression
57
+ - Be specific and focused
58
+ - Build upon previous answers when available
59
+
60
+ 2. Output "finish" when:
61
+ - All relevant aspects have been covered
62
+ - Further breakdown would not add value
63
+ - The question has been fully addressed
64
+
65
+ ## There are some examples
66
+
67
+ Example 1:
68
+ Input:
69
+ - Main Question: "What is the location of the headquarters of the institution where Percival Lowell was educated?"
70
+ - Previous Subquestion: "Where did Percival Lowell receive his education?""
71
+ - Previous Answer: "Harvard University."
72
+
73
+ Output:
74
+ - Next Subquestion: "Where is the headquarters of the Harvard University?"
75
+
76
+ Example 2:
77
+ Input:
78
+ - Main Question: "What is the capital of France?"
79
+
80
+ Output:
81
+ - "finish"
82
+
83
+ Main Question: {question}
84
+ {previous_subquestion_answer_pairs}
85
+
86
+ Output:
87
+ '''
88
+
89
+ generate_summary_prompt = '''
90
+ Based on the main question and all subquestion-answer pairs, please provide a comprehensive final answer. Please keep your answer as short as possible.
91
+
92
+ Main Question: {main_question}
93
+
94
+ Previous Subquestions and Answers:
95
+ {history_str}
96
+
97
+ Final Answer:
98
+ '''
99
+ # def generate_summary_prompt(main_question: str, subquestion_answers: List[Dict]) -> str:
100
+ # """生成总结性提示"""
101
+ # history_str = "\n".join([
102
+ # f"Subquestion: {qa['question']}\nAnswer: {qa['answer']}"
103
+ # for qa in subquestion_answers
104
+ # ])
105
+
106
+ # return f""""""
107
+
108
+ # generate_confidence_prompt = '''Answer the following question based on your internal knowledge with one or few words.
109
+ # If you are sure the answer is accurate and correct, please say “certain” after the answer.
110
+ # If you are not confident with the answer, please say “uncertain”.'''
111
+ generate_confidence_prompt = '''
112
+ Answer the following question based on your internal knowledge with one or few words.
113
+ Add a confidence indicator after your answer:
114
+ - "certain" if you are completely confident in the accuracy
115
+ - "uncertain" if you have any doubts
116
+
117
+ ## Input Format
118
+ Input:
119
+ - Question: [question]
120
+
121
+ ## Output Format
122
+ Output:
123
+ - Answer: [brief answer]
124
+ - Confidence: [certain/uncertain]
125
+
126
+
127
+ Question: {question}
128
+ Output:
129
+ '''
130
+ ## Confidence Criteria
131
+ # - Use "certain" when:
132
+ # * The answer is based on widely accepted facts
133
+ # * The information is verifiable through authoritative sources
134
+ # * There is no reasonable doubt about the accuracy
135
+
136
+ # - Use "uncertain" when:
137
+ # * The answer involves estimates or approximations
138
+ # * Multiple valid interpretations are possible
139
+ # * The information might be outdated or context-dependent
140
+ # * There is any doubt about the completeness or accuracy
141
+
142
+ # judgement_prompt_part1 = '''
143
+ # In the following task, you are given a Question, a model Prediction for the Question, and a Ground-truth Answer to the Question. You should decide whether the model Prediction implies the Ground-truth Answer.
144
+ # Question {question}
145
+ # Prediction {model output}
146
+ # Ground-truth Answer {answer}
147
+ # Does the Prediction imply the Ground-truth Answer? Output Yes or No:
148
+ # '''
149
+
150
+ judgement_prompt_qa= '''
151
+ Given a question and a model-generated answer, determine if we can be highly confident that the model's answer is definitively correct for this question.
152
+
153
+ ## Input Format
154
+ Input:
155
+ - Question: [question]
156
+ - Model Answer: [model output]
157
+
158
+ ## Output Format
159
+ Output:
160
+ - Confidence: [Yes/No]
161
+ - Yes: The answer can be definitively confirmed as correct
162
+ - No: The answer cannot be definitively confirmed as correct
163
+
164
+ Note: Only output "Yes" if there is a very high degree of certainty that the answer is correct and complete.
165
+ Question: {question}
166
+ Model Answer: {model_output}
167
+
168
+ Output:
169
+ '''
170
+
171
+
172
+ # judgement_prompt = '''Input:
173
+ # <SYS> You are a helpful assistant. Your task is to parse user input into structured formats and accomplish the task according to the heuristic answer. </SYS>
174
+ # Heuristic answer: {Heuristic Answer} Question: {user question}
175
+ # Retrieval Necessity Judgment Output: Output:
176
+ # Known (True / False)'''
177
+ judgement_prompt_retrieval = '''
178
+ Determine whether external document retrieval is necessary to provide an accurate and complete answer to the given question. Please keep your answer as short as possible.
179
+
180
+ ## Input Format
181
+ Input:
182
+ - Question: [question]
183
+
184
+ ## Output Format
185
+ Output:
186
+ - Retrieval Needed: [Yes/No]
187
+
188
+ Question: {question}
189
+
190
+ Output:
191
+ '''
192
+ # first_hop_prompt = '''Given some related documents: {document}.
193
+ # This is a question: {question}
194
+ # Please answer the question directly. Please keep your answer as short as possible.
195
+ # Answer:'''
196
+
197
+ # multi_hop_prompt ='''Given a question: {question}
198
+ # The subsequent sub-questions: {sub_questions}
199
+ # You have two choices now.
200
+ # <choice A> answer the final sub-question directly.
201
+ # <choice B> retrieve some document to help you answer the question. Just output retrieval as a placeholder.
202
+ # If you choose <choice A> please output {{"choice A": {{"answer": "your_answer_here"}}}}
203
+ # If you choose <choice B> please output {{"choice B": retrieval}}
204
+ # The final output should be in the form of a JSON string, without any additional content. Please keep your answer as short as possible.
205
+ # Output: '''
206
+
207
+ # multi_hop_prompt_2_1 = '''Given a question: {question}
208
+ # And subquestion-answer pairs: {subquestion_answer_pairs}
209
+ # Please judge if the question has been finished. You have two choices now.
210
+ # <choice A> The answer can be found in the subquestion-answer pairs.
211
+ # <choice B> The answer cannot be found and a new sub-question needs to be generated.
212
+ # If you choose <choice A>, please output {{"choice A": {{"answer": "final_answer_here"}}}}
213
+ # If you choose <choice B>, please output {{"choice B": {{"subquestion": "new_subquestion_here"}}}}
214
+ # The final output should be in the form of a JSON string, without any additional content. Please keep your answer as short as possible.
215
+ # Output:'''
216
+
217
+ # multi_hop_prompt_2_2 = '''Given some related documents: {documents}
218
+ # This is a question: {question}
219
+ # The subsequent sub-questions: {sub_questions}
220
+ # please answer the final sub-question. You can use the information in the related documents.
221
+ # Please keep your answer as short as possible.
222
+ # Answer: '''
223
+ # mistral = """
224
+ # text = "<s>[INST] Below is a question, please answer it directly and keep your answer as short as possible.
225
+ # Question: {query} [/INST]"
226
+ # """
227
+ # qwen = """
228
+ # <|im_start|>user
229
+ # Below is a question, please answer it directly and keep your answer as short as possible.
230
+ # Question: {query}<|im_end|>
231
+ # <|im_start|>assistant
232
+ # """
233
+ # phi = """
234
+ # <|user|>
235
+ # Below is a question, please answer it directly and keep your answer as short as possible.
236
+ # Question: {query}<|end|>
237
+ # <|assistant|>
238
+ # """
judge_retrieval_necessary_check/data_output/Ministral-8B-Instruct-2410-step8.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5ba87131570e4b6f9f804b9b4f0215dfdea5414a61a2a7d1c0ab54472c192231
3
+ size 15872867
judge_retrieval_necessary_check/data_output/bm25/Llama-3.1-70B-Instruct.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:faa96bdc092d155daae341a185c18006b292318a2054c85a24277fd26fbd9649
3
+ size 16109684
judge_retrieval_necessary_check/data_output/bm25/Llama-3.1-8B-Instruct.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37851237124b4d3f17d806150ce47751b780ebe031d08602d421daf38b8c128d
3
+ size 56173321
judge_retrieval_necessary_check/data_output/bm25/Llama-3.2-1B-Instruct.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:890a2f1b3255ad7924709c0974acc2587c3347a040bbf912063c65963d60bcaf
3
+ size 19077477
judge_retrieval_necessary_check/data_output/bm25/Llama-3.2-3B-Instruct.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66b2e8e559c1044019dbb43d66021ffa1919d4db0d7b1c185f8f06966daa484e
3
+ size 34915948
judge_retrieval_necessary_check/data_output/bm25/Ministral-8B-Instruct-2410.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83a5f988f5d7df9a5fab0c53564748d6b4ed092554330735846f7d4bc66541cc
3
+ size 16298786
judge_retrieval_necessary_check/data_output/bm25/Mistral-7B-Instruct-v0.3.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d893506f291cd177ec1c6d5d073ba1e980c290fbe079bab00428099c905034cc
3
+ size 34929343
judge_retrieval_necessary_check/data_output/bm25/Mixtral-8x7B-Instruct-v0.1.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6d47187722f6d04433cad9c183266253cf9c9268fa61f519387ba42a86ba3c7
3
+ size 60310919
judge_retrieval_necessary_check/data_output/bm25/Phi-3-medium-4k-instruct.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58f1cb352e894e82e00b254804c95e8e57b7c11d17d2ea1761de0a2f01ac5cdd
3
+ size 31936999
judge_retrieval_necessary_check/data_output/bm25/Phi-3-mini-4k-instruct.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bcddc8376edcba4c8cdb72387f8f1ffd2369e2d77ea7dd794ba12a65eba1c270
3
+ size 65937179
judge_retrieval_necessary_check/data_output/bm25/Phi-3-small-8k-instruct.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:215c197f0c315f125222b8db9b20b53eb292fc8b430e094f52c40a7b3eaab3ca
3
+ size 47651096
judge_retrieval_necessary_check/data_output/bm25/Qwen2.5-1.5B-Instruct.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb044f0b686bef40cbf91dac1e7fa5c1cfac5e3f20c192c48840344a4e2089ea
3
+ size 16577492
judge_retrieval_necessary_check/data_output/bm25/Qwen2.5-14B-Instruct.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43ff58ee3064d5537ec9befc5b4afa7697e6d2cb3df3491bb17242f9585857d6
3
+ size 51891869
judge_retrieval_necessary_check/data_output/bm25/Qwen2.5-32B-Instruct.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:176c0e886e30b07f7ec564e29f912deae6c12cfab4d9afdae1d28e0829c9c02e
3
+ size 43180533
judge_retrieval_necessary_check/data_output/bm25/Qwen2.5-3B-Instruct.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:937653c646d2278d44a2f4f5afbf6899444a6902151b5a6a17eef3a7757a4a48
3
+ size 27450273
judge_retrieval_necessary_check/data_output/bm25/Qwen2.5-72B-Instruct.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fecd97524cbf5b15ad172e5e8bd536ff4bb96916d632ae0757bd806edd19f89
3
+ size 40379482
judge_retrieval_necessary_check/data_output/bm25/Qwen2.5-7B-Instruct.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a76dbb65397f2085f808aaf53f060157df21e437272d6c5af28f6823f2771a41
3
+ size 28861668
judge_retrieval_necessary_check/data_output/bm25/gemma-2-27b-it.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:38f6972eae83edcf8d09563d9560d436d3849c63aaa563f77c24cd109ad88c97
3
+ size 19065442
judge_retrieval_necessary_check/data_output/bm25/gemma-2-2b-it.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3413218b7270e19b9cb1beb78886d52e3ba97eeb1e533de8ebe8c65332c90047
3
+ size 37987422
judge_retrieval_necessary_check/data_output/bm25/gemma-2-9b-it.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4ec4df4c50256a6f145cac2f82604b4af16f757e7dd55ae7f63a25662c393f0
3
+ size 22499404
judge_retrieval_necessary_check/data_output/contrieve/Llama-3.1-70B-Instruct.json ADDED
The diff for this file is too large to render. See raw diff
 
judge_retrieval_necessary_check/data_output/contrieve/Llama-3.1-8B-Instruct.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e447dd899c01fb7458f230556defb4fd957b2471788220034dcfd6ef50e48446
3
+ size 56623791
judge_retrieval_necessary_check/data_output/contrieve/Llama-3.2-1B-Instruct.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdb46a6fe142bae371f89181af4f33cf1c34d7d5961f34f69ed6f0dccceabfe5
3
+ size 18559614
judge_retrieval_necessary_check/data_output/contrieve/Llama-3.2-3B-Instruct.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00c7ea96b8a4e4735473434ce7272961cc5753409c85c925914b31de40e05399
3
+ size 34596566
judge_retrieval_necessary_check/data_output/contrieve/Ministral-8B-Instruct-2410.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4be5088df87fcf4781b056990c1c731794cbfb8a7946ae2d511ea04d8c3acc7e
3
+ size 16292730
judge_retrieval_necessary_check/data_output/contrieve/Mistral-7B-Instruct-v0.3.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00419f8ff785aeafcac842201c42d2766eb7fceec27b67ace4f89a2e54139a7e
3
+ size 35508743
judge_retrieval_necessary_check/data_output/contrieve/Mixtral-8x7B-Instruct-v0.1.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ecc08fdc612c6cb9b122c4341d5250ffe775c14600f411a4bf2a742bbe07eab
3
+ size 30167228
judge_retrieval_necessary_check/data_output/contrieve/Phi-3-medium-4k-instruct.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c136fdf68bd2939fa42ed99413cddb6c42e696540249d0a2fbdbd191d3875c46
3
+ size 30904252
judge_retrieval_necessary_check/data_output/contrieve/Phi-3-mini-4k-instruct.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94f29c312a4a71345cb3b6432e172fbcc9ff0f33bf1c87f5c652219784ad2a1d
3
+ size 63799640
judge_retrieval_necessary_check/data_output/contrieve/Phi-3-small-8k-instruct.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:654a650baddca6b6f8df5c75e8f889cb4edba718653e32dbb115775b432c051d
3
+ size 44591779
judge_retrieval_necessary_check/data_output/contrieve/Qwen2.5-1.5B-Instruct.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2a296c70ce24953f241b4e4b078826fd80afe8391e5fa16c6473b4a8adf591c
3
+ size 16625470
judge_retrieval_necessary_check/data_output/contrieve/Qwen2.5-14B-Instruct.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c219fc7d58acbe99e157dbebd8b3b2af9914caeab0cedab377bd50a2ec0c84a
3
+ size 52568500
judge_retrieval_necessary_check/data_output/contrieve/Qwen2.5-32B-Instruct.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0d0fd330567196a3cc03e8e5347e41294feaf80af6bc362b7a1c21a82169040d
3
+ size 44098786
judge_retrieval_necessary_check/data_output/contrieve/Qwen2.5-3B-Instruct.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:475f321ede35c2ce72296ca84cc9eff8264eaacb446ca0f1ab09f60e148caa63
3
+ size 27383239
judge_retrieval_necessary_check/data_output/contrieve/Qwen2.5-72B-Instruct.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e358ff17b32f30aa979a55ae70350515ef136ca46d76d84c21676b4bd2ca6ad
3
+ size 16932492
judge_retrieval_necessary_check/data_output/contrieve/Qwen2.5-7B-Instruct.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:693b9a9a109f120479b5d10c92dc25234dc684fbcb699619b9c835c2c9061a73
3
+ size 28892321
judge_retrieval_necessary_check/data_output/contrieve/gemma-2-27b-it.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a22a1f797409bdc2f30d681bfa6f53ef8e969048eb0305b0d540ebd162790f2a
3
+ size 19013173
judge_retrieval_necessary_check/data_output/contrieve/gemma-2-2b-it.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:faa578d1c785aa124d82a3ab3d5b126ab07aa4f03fef28bf2f83fcf0475d546b
3
+ size 37638727
judge_retrieval_necessary_check/data_output/contrieve/gemma-2-9b-it.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f722e5bc7af29a942fec93a4ff0584b0f9c9f66cf1c9f493d79b5f40470c52fe
3
+ size 22357608
judge_retrieval_necessary_check/data_output/promptretrieval/Llama-3.1-70B-Instruct.json ADDED
The diff for this file is too large to render. See raw diff
 
judge_retrieval_necessary_check/data_output/promptretrieval/Llama-3.1-8B-Instruct.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9091ed57891b4f8c5144723847c98fd6c1ff713cb863411946873051a86e672a
3
+ size 56325981
judge_retrieval_necessary_check/data_output/promptretrieval/Llama-3.2-1B-Instruct.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61340f865683c7c0796fc18eec8c425eacec1fa0f3479475c22a643cc150aea4
3
+ size 18748078
judge_retrieval_necessary_check/data_output/promptretrieval/Llama-3.2-3B-Instruct.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c275e767579d8cd0e3e1e59983ad287d6d76aca97afdcbe0271a21254e0bd97
3
+ size 34492532
judge_retrieval_necessary_check/data_output/promptretrieval/Ministral-8B-Instruct-2410.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9d4b76fe41c61b7da11fde1d418457eb84756be98583aa0346503059f37c08c
3
+ size 16266969
judge_retrieval_necessary_check/data_output/promptretrieval/Mistral-7B-Instruct-v0.3.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e8b4d1c4d29549c43ef03fa43b36e2a8bee32e1f988644a705371246b0cd219b
3
+ size 35241428
judge_retrieval_necessary_check/data_output/promptretrieval/Mixtral-8x7B-Instruct-v0.1.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:330ac9f60d8c18f7c7ac746e03f6e5b08ace49cdb18abc4c7db8b135881349f5
3
+ size 30124598
judge_retrieval_necessary_check/data_output/promptretrieval/Phi-3-medium-4k-instruct.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5f411899e4b289613a2c54205b72cf70bf2b00f8c0b42774e0231faed9b98d8
3
+ size 61168156
judge_retrieval_necessary_check/data_output/promptretrieval/Phi-3-mini-4k-instruct.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e92ee45b62ee3f22e85ca0540ed45fd6337b7255952a95baa25520300bcd721
3
+ size 64158411