abhi1294 commited on
Commit
0370135
·
1 Parent(s): 1e22ea2

Fix prompts and utils

Browse files
Files changed (3) hide show
  1. .env +1 -1
  2. prompts.py +119 -34
  3. utils.py +137 -23
.env CHANGED
@@ -1,2 +1,2 @@
1
  SPACE_ID = "abhi1294/Final_Assignment_Template"
2
- HF_DEBUG=1
 
1
  SPACE_ID = "abhi1294/Final_Assignment_Template"
2
+ HF_DEBUG=1
prompts.py CHANGED
@@ -1,55 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from __future__ import annotations
2
 
3
 
4
- SYSTEM_PROMPT = """
5
- You are a benchmark-solving AI agent.
 
6
 
7
- Your task is to answer questions as accurately as possible.
8
 
9
- Rules:
10
- - Return ONLY the final answer.
11
- - Do NOT include explanations.
12
- - Do NOT include reasoning.
13
- - Do NOT include the words "FINAL ANSWER".
14
- - Do NOT include labels like "Answer:".
15
- - Output must be exactly the answer text.
16
-
17
- Formatting rules:
18
- - If the answer is a number, output only the number.
19
- - If the answer is a word or phrase, output only that word or phrase.
20
- - If the answer is a date, return the exact date string.
21
- - Do not add punctuation unless it is part of the answer.
22
-
23
- Your response must contain only the final answer string.
24
  """
25
 
26
 
27
  def build_solver_prompt(question: str, context: str = "") -> str:
28
- """
29
- Builds the final prompt sent to the model.
30
- Includes optional file context when a task provides additional data.
31
- """
32
-
33
  if context:
34
- prompt = f"""
35
- {SYSTEM_PROMPT}
36
 
37
- Context information:
38
  {context}
39
 
40
  Question:
41
  {question}
 
42
 
43
- Return only the final answer.
44
- """
45
- else:
46
- prompt = f"""
47
- {SYSTEM_PROMPT}
48
 
49
  Question:
50
  {question}
 
51
 
52
- Return only the final answer.
53
- """
54
 
55
- return prompt.strip()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # from __future__ import annotations
2
+
3
+
4
+ # SYSTEM_PROMPT = """
5
+ # You are a benchmark-solving AI agent.
6
+
7
+ # Your task is to answer questions as accurately as possible.
8
+
9
+ # Rules:
10
+ # - Return only the final answer.
11
+ # - If unsure, return your best short answer only.
12
+ # - Do not explain.
13
+ # - Do not include reasoning.
14
+ # - Do not include complete sentences unless the answer itself is a sentence.
15
+ # - For lists, preserve exact order only if supported by evidence.
16
+ # - Do not invent information not present in the question or provided context.
17
+
18
+ # Formatting rules:
19
+ # - If the answer is a number, output only the number.
20
+ # - If the answer is a word or phrase, output only that word or phrase.
21
+ # - If the answer is a date, return the exact date string.
22
+ # - Do not add punctuation unless it is part of the answer.
23
+
24
+ # Your response must contain only the final answer string.
25
+ # """
26
+
27
+
28
+ # def build_solver_prompt(question: str, context: str = "") -> str:
29
+ # """
30
+ # Builds the final prompt sent to the model.
31
+ # Includes optional file context when a task provides additional data.
32
+ # """
33
+
34
+ # if context:
35
+ # prompt = f"""
36
+ # {SYSTEM_PROMPT}
37
+
38
+ # Context information:
39
+ # {context}
40
+
41
+ # Question:
42
+ # {question}
43
+
44
+ # Return only the final answer.
45
+ # """
46
+ # else:
47
+ # prompt = f"""
48
+ # {SYSTEM_PROMPT}
49
+
50
+ # Question:
51
+ # {question}
52
+
53
+ # Return only the final answer.
54
+ # """
55
+
56
+ # return prompt.strip()
57
+
58
  from __future__ import annotations
59
 
60
 
61
+ GAIA_SYSTEM_PROMPT = """
62
+ You are a general AI assistant. I will ask you a question.
63
+ Report your thoughts, and finish your answer with the following template:
64
 
65
+ FINAL ANSWER: [YOUR FINAL ANSWER]
66
 
67
+ YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
68
+
69
+ If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
70
+ If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
71
+ If you are asked for a comma separated list, apply the above rules depending on whether the element to be put in the list is a number or a string.
 
 
 
 
 
 
 
 
 
 
72
  """
73
 
74
 
75
  def build_solver_prompt(question: str, context: str = "") -> str:
 
 
 
 
 
76
  if context:
77
+ return f"""
78
+ {GAIA_SYSTEM_PROMPT}
79
 
80
+ Context:
81
  {context}
82
 
83
  Question:
84
  {question}
85
+ """.strip()
86
 
87
+ return f"""
88
+ {GAIA_SYSTEM_PROMPT}
 
 
 
89
 
90
  Question:
91
  {question}
92
+ """.strip()
93
 
 
 
94
 
95
+ def build_verifier_prompt(question: str, draft_answer: str, context: str = "") -> str:
96
+ if context:
97
+ return f"""
98
+ You are checking an answer for GAIA exact-match evaluation.
99
+
100
+ Rewrite the draft so that the final output strictly follows this template:
101
+
102
+ FINAL ANSWER: [YOUR FINAL ANSWER]
103
+
104
+ Rules:
105
+ - Keep the final answer as short as possible.
106
+ - Remove unsupported guesses, repetition, and unnecessary words.
107
+ - For strings, avoid articles and abbreviations unless explicitly required.
108
+ - For numbers, do not include commas or units unless required.
109
+ - For comma separated lists, keep only the list.
110
+
111
+ Context:
112
+ {context}
113
+
114
+ Question:
115
+ {question}
116
+
117
+ Draft:
118
+ {draft_answer}
119
+ """.strip()
120
+
121
+ return f"""
122
+ You are checking an answer for GAIA exact-match evaluation.
123
+
124
+ Rewrite the draft so that the final output strictly follows this template:
125
+
126
+ FINAL ANSWER: [YOUR FINAL ANSWER]
127
+
128
+ Rules:
129
+ - Keep the final answer as short as possible.
130
+ - Remove unsupported guesses, repetition, and unnecessary words.
131
+ - For strings, avoid articles and abbreviations unless explicitly required.
132
+ - For numbers, do not include commas or units unless required.
133
+ - For comma separated lists, keep only the list.
134
+
135
+ Question:
136
+ {question}
137
+
138
+ Draft:
139
+ {draft_answer}
140
+ """.strip()
utils.py CHANGED
@@ -1,3 +1,104 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from __future__ import annotations
2
 
3
  import re
@@ -5,12 +106,13 @@ import re
5
 
6
  def extract_final_answer(text: str) -> str:
7
  """
8
- Extract the most likely final answer from raw model output.
 
9
 
10
- In V1 we keep this conservative:
11
- - if the model returns a normal short answer, keep it
12
- - if it adds common prefixes like 'Answer:' or 'Final answer:', remove them
13
- - if it returns multiple lines, prefer the last non-empty line
14
  """
15
  if text is None:
16
  return ""
@@ -19,11 +121,22 @@ def extract_final_answer(text: str) -> str:
19
  if not text:
20
  return ""
21
 
22
- # Remove fenced code blocks if the model wraps the answer oddly
23
  text = re.sub(r"^```[a-zA-Z0-9_-]*\s*", "", text)
24
  text = re.sub(r"\s*```$", "", text)
25
 
26
- # Common exact-answer markers
 
 
 
 
 
 
 
 
 
 
 
27
  marker_patterns = [
28
  r"(?i)\bfinal answer\s*:\s*",
29
  r"(?i)\banswer\s*:\s*",
@@ -35,7 +148,6 @@ def extract_final_answer(text: str) -> str:
35
  for pattern in marker_patterns:
36
  cleaned = re.sub(pattern, "", cleaned).strip()
37
 
38
- # If multi-line, prefer the last meaningful line
39
  lines = [line.strip() for line in cleaned.splitlines() if line.strip()]
40
  if not lines:
41
  return ""
@@ -48,14 +160,15 @@ def extract_final_answer(text: str) -> str:
48
 
49
  def normalize_final_answer(text: str) -> str:
50
  """
51
- Normalize answer text for safer exact-match submission without being too aggressive.
52
 
53
  Rules:
54
- - trim outer whitespace
55
- - collapse internal repeated whitespace
56
- - remove wrapping quotes if they wrap the full answer
57
- - remove a single trailing period only for plain word/phrase answers
58
- but keep decimal numbers and date punctuation intact
 
59
  """
60
  if text is None:
61
  return ""
@@ -68,25 +181,26 @@ def normalize_final_answer(text: str) -> str:
68
  text = re.sub(r"\s+", " ", text).strip()
69
 
70
  # Remove matching surrounding quotes
71
- if len(text) >= 2:
72
- if (text[0] == text[-1]) and text[0] in {'"', "'"}:
73
- text = text[1:-1].strip()
74
 
75
- # Remove common leading labels again, just in case
76
  text = re.sub(r"(?i)^(final answer|answer)\s*:\s*", "", text).strip()
77
 
78
  # Remove one trailing period for simple phrase answers only
79
- # Keep decimals like 3.14 intact
80
- if text.endswith("."):
81
- if not re.fullmatch(r"\d+\.\d+", text):
82
- text = text[:-1].strip()
 
 
83
 
84
  return text
85
 
86
 
87
  def is_placeholder_answer(text: str) -> bool:
88
  """
89
- Detect placeholder/fallback outputs so app.py can optionally flag them.
90
  """
91
  if text is None:
92
  return True
 
1
+ # from __future__ import annotations
2
+
3
+ # import re
4
+
5
+
6
+ # def extract_final_answer(text: str) -> str:
7
+ # """
8
+ # Extract the most likely final answer from raw model output.
9
+
10
+ # In V1 we keep this conservative:
11
+ # - if the model returns a normal short answer, keep it
12
+ # - if it adds common prefixes like 'Answer:' or 'Final answer:', remove them
13
+ # - if it returns multiple lines, prefer the last non-empty line
14
+ # """
15
+ # if text is None:
16
+ # return ""
17
+
18
+ # text = str(text).strip()
19
+ # if not text:
20
+ # return ""
21
+
22
+ # # Remove fenced code blocks if the model wraps the answer oddly
23
+ # text = re.sub(r"^```[a-zA-Z0-9_-]*\s*", "", text)
24
+ # text = re.sub(r"\s*```$", "", text)
25
+
26
+ # # Common exact-answer markers
27
+ # marker_patterns = [
28
+ # r"(?i)\bfinal answer\s*:\s*",
29
+ # r"(?i)\banswer\s*:\s*",
30
+ # r"(?i)\bthe answer is\s*:\s*",
31
+ # r"(?i)\bthe answer is\s+",
32
+ # ]
33
+
34
+ # cleaned = text
35
+ # for pattern in marker_patterns:
36
+ # cleaned = re.sub(pattern, "", cleaned).strip()
37
+
38
+ # # If multi-line, prefer the last meaningful line
39
+ # lines = [line.strip() for line in cleaned.splitlines() if line.strip()]
40
+ # if not lines:
41
+ # return ""
42
+
43
+ # if len(lines) == 1:
44
+ # return lines[0]
45
+
46
+ # return lines[-1]
47
+
48
+
49
+ # def normalize_final_answer(text: str) -> str:
50
+ # """
51
+ # Normalize answer text for safer exact-match submission without being too aggressive.
52
+
53
+ # Rules:
54
+ # - trim outer whitespace
55
+ # - collapse internal repeated whitespace
56
+ # - remove wrapping quotes if they wrap the full answer
57
+ # - remove a single trailing period only for plain word/phrase answers
58
+ # but keep decimal numbers and date punctuation intact
59
+ # """
60
+ # if text is None:
61
+ # return ""
62
+
63
+ # text = str(text).strip()
64
+ # if not text:
65
+ # return ""
66
+
67
+ # # Collapse repeated whitespace
68
+ # text = re.sub(r"\s+", " ", text).strip()
69
+
70
+ # # Remove matching surrounding quotes
71
+ # if len(text) >= 2:
72
+ # if (text[0] == text[-1]) and text[0] in {'"', "'"}:
73
+ # text = text[1:-1].strip()
74
+
75
+ # # Remove common leading labels again, just in case
76
+ # text = re.sub(r"(?i)^(final answer|answer)\s*:\s*", "", text).strip()
77
+
78
+ # # Remove one trailing period for simple phrase answers only
79
+ # # Keep decimals like 3.14 intact
80
+ # if text.endswith("."):
81
+ # if not re.fullmatch(r"\d+\.\d+", text):
82
+ # text = text[:-1].strip()
83
+
84
+ # return text
85
+
86
+
87
+ # def is_placeholder_answer(text: str) -> bool:
88
+ # """
89
+ # Detect placeholder/fallback outputs so app.py can optionally flag them.
90
+ # """
91
+ # if text is None:
92
+ # return True
93
+
94
+ # normalized = normalize_final_answer(text).lower()
95
+ # return normalized in {
96
+ # "",
97
+ # "placeholder",
98
+ # "n/a",
99
+ # "unknown",
100
+ # }
101
+
102
  from __future__ import annotations
103
 
104
  import re
 
106
 
107
  def extract_final_answer(text: str) -> str:
108
  """
109
+ Extract the final answer, preferring GAIA-style:
110
+ FINAL ANSWER: ...
111
 
112
+ Fallback behavior:
113
+ - strip code fences
114
+ - remove common answer prefixes
115
+ - if multiple lines remain, prefer the last non-empty line
116
  """
117
  if text is None:
118
  return ""
 
121
  if not text:
122
  return ""
123
 
124
+ # Remove fenced code blocks if present
125
  text = re.sub(r"^```[a-zA-Z0-9_-]*\s*", "", text)
126
  text = re.sub(r"\s*```$", "", text)
127
 
128
+ # Prefer GAIA-style final answer extraction
129
+ gaia_match = re.search(
130
+ r"FINAL ANSWER:\s*(.*)",
131
+ text,
132
+ flags=re.IGNORECASE | re.DOTALL,
133
+ )
134
+ if gaia_match:
135
+ extracted = gaia_match.group(1).strip()
136
+ lines = [line.strip() for line in extracted.splitlines() if line.strip()]
137
+ return lines[0] if lines else extracted
138
+
139
+ # Fallback exact-answer markers
140
  marker_patterns = [
141
  r"(?i)\bfinal answer\s*:\s*",
142
  r"(?i)\banswer\s*:\s*",
 
148
  for pattern in marker_patterns:
149
  cleaned = re.sub(pattern, "", cleaned).strip()
150
 
 
151
  lines = [line.strip() for line in cleaned.splitlines() if line.strip()]
152
  if not lines:
153
  return ""
 
160
 
161
  def normalize_final_answer(text: str) -> str:
162
  """
163
+ Normalize answer text for exact-match-style submission.
164
 
165
  Rules:
166
+ - trim whitespace
167
+ - collapse repeated spaces
168
+ - remove wrapping quotes
169
+ - remove labels again if present
170
+ - remove one trailing period for plain phrase answers
171
+ - remove leading articles for short string answers
172
  """
173
  if text is None:
174
  return ""
 
181
  text = re.sub(r"\s+", " ", text).strip()
182
 
183
  # Remove matching surrounding quotes
184
+ if len(text) >= 2 and text[0] == text[-1] and text[0] in {'"', "'"}:
185
+ text = text[1:-1].strip()
 
186
 
187
+ # Remove common labels again
188
  text = re.sub(r"(?i)^(final answer|answer)\s*:\s*", "", text).strip()
189
 
190
  # Remove one trailing period for simple phrase answers only
191
+ if text.endswith(".") and not re.fullmatch(r"\d+\.\d+", text):
192
+ text = text[:-1].strip()
193
+
194
+ # Remove leading articles for short string answers
195
+ # Helps align with GAIA string-format guidance
196
+ text = re.sub(r"(?i)^(a|an|the)\s+", "", text).strip()
197
 
198
  return text
199
 
200
 
201
  def is_placeholder_answer(text: str) -> bool:
202
  """
203
+ Detect placeholder or clearly non-useful outputs.
204
  """
205
  if text is None:
206
  return True