Final_Assignment_Template

Sleeping

App Files Files Community

abhi1294 commited on Mar 15

Commit

0370135

1 Parent(s): 1e22ea2

Fix prompts and utils

Browse files

Files changed (3) hide show

.env +1 -1
prompts.py +119 -34
utils.py +137 -23

.env CHANGED Viewed

	@@ -1,2 +1,2 @@
1	SPACE_ID = "abhi1294/Final_Assignment_Template"
2	- HF_DEBUG=1


1	SPACE_ID = "abhi1294/Final_Assignment_Template"
2	+ HF_DEBUG=1

prompts.py CHANGED Viewed

@@ -1,55 +1,140 @@
 from __future__ import annotations
-SYSTEM_PROMPT = """
-You are a benchmark-solving AI agent.
-Your task is to answer questions as accurately as possible.
-Rules:
-- Return ONLY the final answer.
-- Do NOT include explanations.
-- Do NOT include reasoning.
-- Do NOT include the words "FINAL ANSWER".
-- Do NOT include labels like "Answer:".
-- Output must be exactly the answer text.
-Formatting rules:
-- If the answer is a number, output only the number.
-- If the answer is a word or phrase, output only that word or phrase.
-- If the answer is a date, return the exact date string.
-- Do not add punctuation unless it is part of the answer.
-Your response must contain only the final answer string.
 """
 def build_solver_prompt(question: str, context: str = "") -> str:
-    """
-    Builds the final prompt sent to the model.
-    Includes optional file context when a task provides additional data.
-    """
     if context:
-        prompt = f"""
-{SYSTEM_PROMPT}
-Context information:
 {context}
 Question:
 {question}
-Return only the final answer.
-"""
-    else:
-        prompt = f"""
-{SYSTEM_PROMPT}
 Question:
 {question}
-Return only the final answer.
-"""
-    return prompt.strip()

+# from __future__ import annotations
+# SYSTEM_PROMPT = """
+# You are a benchmark-solving AI agent.
+# Your task is to answer questions as accurately as possible.
+# Rules:
+# - Return only the final answer.
+# - If unsure, return your best short answer only.
+# - Do not explain.
+# - Do not include reasoning.
+# - Do not include complete sentences unless the answer itself is a sentence.
+# - For lists, preserve exact order only if supported by evidence.
+# - Do not invent information not present in the question or provided context.
+# Formatting rules:
+# - If the answer is a number, output only the number.
+# - If the answer is a word or phrase, output only that word or phrase.
+# - If the answer is a date, return the exact date string.
+# - Do not add punctuation unless it is part of the answer.
+# Your response must contain only the final answer string.
+# """
+# def build_solver_prompt(question: str, context: str = "") -> str:
+#     """
+#     Builds the final prompt sent to the model.
+#     Includes optional file context when a task provides additional data.
+#     """
+#     if context:
+#         prompt = f"""
+# {SYSTEM_PROMPT}
+# Context information:
+# {context}
+# Question:
+# {question}
+# Return only the final answer.
+# """
+#     else:
+#         prompt = f"""
+# {SYSTEM_PROMPT}
+# Question:
+# {question}
+# Return only the final answer.
+# """
+#     return prompt.strip()
 from __future__ import annotations
+GAIA_SYSTEM_PROMPT = """
+You are a general AI assistant. I will ask you a question.
+Report your thoughts, and finish your answer with the following template:
+FINAL ANSWER: [YOUR FINAL ANSWER]
+YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
+If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
+If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
+If you are asked for a comma separated list, apply the above rules depending on whether the element to be put in the list is a number or a string.
 """
 def build_solver_prompt(question: str, context: str = "") -> str:
     if context:
+        return f"""
+{GAIA_SYSTEM_PROMPT}
+Context:
 {context}
 Question:
 {question}
+""".strip()
+    return f"""
+{GAIA_SYSTEM_PROMPT}
 Question:
 {question}
+""".strip()
+def build_verifier_prompt(question: str, draft_answer: str, context: str = "") -> str:
+    if context:
+        return f"""
+You are checking an answer for GAIA exact-match evaluation.
+Rewrite the draft so that the final output strictly follows this template:
+FINAL ANSWER: [YOUR FINAL ANSWER]
+Rules:
+- Keep the final answer as short as possible.
+- Remove unsupported guesses, repetition, and unnecessary words.
+- For strings, avoid articles and abbreviations unless explicitly required.
+- For numbers, do not include commas or units unless required.
+- For comma separated lists, keep only the list.
+Context:
+{context}
+Question:
+{question}
+Draft:
+{draft_answer}
+""".strip()
+    return f"""
+You are checking an answer for GAIA exact-match evaluation.
+Rewrite the draft so that the final output strictly follows this template:
+FINAL ANSWER: [YOUR FINAL ANSWER]
+Rules:
+- Keep the final answer as short as possible.
+- Remove unsupported guesses, repetition, and unnecessary words.
+- For strings, avoid articles and abbreviations unless explicitly required.
+- For numbers, do not include commas or units unless required.
+- For comma separated lists, keep only the list.
+Question:
+{question}
+Draft:
+{draft_answer}
+""".strip()

utils.py CHANGED Viewed

@@ -1,3 +1,104 @@
 from __future__ import annotations
 import re
@@ -5,12 +106,13 @@ import re
 def extract_final_answer(text: str) -> str:
     """
-    Extract the most likely final answer from raw model output.
-    In V1 we keep this conservative:
-    - if the model returns a normal short answer, keep it
-    - if it adds common prefixes like 'Answer:' or 'Final answer:', remove them
-    - if it returns multiple lines, prefer the last non-empty line
     """
     if text is None:
         return ""
@@ -19,11 +121,22 @@ def extract_final_answer(text: str) -> str:
     if not text:
         return ""
-    # Remove fenced code blocks if the model wraps the answer oddly
     text = re.sub(r"^```[a-zA-Z0-9_-]*\s*", "", text)
     text = re.sub(r"\s*```$", "", text)
-    # Common exact-answer markers
     marker_patterns = [
         r"(?i)\bfinal answer\s*:\s*",
         r"(?i)\banswer\s*:\s*",
@@ -35,7 +148,6 @@ def extract_final_answer(text: str) -> str:
     for pattern in marker_patterns:
         cleaned = re.sub(pattern, "", cleaned).strip()
-    # If multi-line, prefer the last meaningful line
     lines = [line.strip() for line in cleaned.splitlines() if line.strip()]
     if not lines:
         return ""
@@ -48,14 +160,15 @@ def extract_final_answer(text: str) -> str:
 def normalize_final_answer(text: str) -> str:
     """
-    Normalize answer text for safer exact-match submission without being too aggressive.
     Rules:
-    - trim outer whitespace
-    - collapse internal repeated whitespace
-    - remove wrapping quotes if they wrap the full answer
-    - remove a single trailing period only for plain word/phrase answers
-      but keep decimal numbers and date punctuation intact
     """
     if text is None:
         return ""
@@ -68,25 +181,26 @@ def normalize_final_answer(text: str) -> str:
     text = re.sub(r"\s+", " ", text).strip()
     # Remove matching surrounding quotes
-    if len(text) >= 2:
-        if (text[0] == text[-1]) and text[0] in {'"', "'"}:
-            text = text[1:-1].strip()
-    # Remove common leading labels again, just in case
     text = re.sub(r"(?i)^(final answer|answer)\s*:\s*", "", text).strip()
     # Remove one trailing period for simple phrase answers only
-    # Keep decimals like 3.14 intact
-    if text.endswith("."):
-        if not re.fullmatch(r"\d+\.\d+", text):
-            text = text[:-1].strip()
     return text
 def is_placeholder_answer(text: str) -> bool:
     """
-    Detect placeholder/fallback outputs so app.py can optionally flag them.
     """
     if text is None:
         return True

+# from __future__ import annotations
+# import re
+# def extract_final_answer(text: str) -> str:
+#     """
+#     Extract the most likely final answer from raw model output.
+#     In V1 we keep this conservative:
+#     - if the model returns a normal short answer, keep it
+#     - if it adds common prefixes like 'Answer:' or 'Final answer:', remove them
+#     - if it returns multiple lines, prefer the last non-empty line
+#     """
+#     if text is None:
+#         return ""
+#     text = str(text).strip()
+#     if not text:
+#         return ""
+#     # Remove fenced code blocks if the model wraps the answer oddly
+#     text = re.sub(r"^```[a-zA-Z0-9_-]*\s*", "", text)
+#     text = re.sub(r"\s*```$", "", text)
+#     # Common exact-answer markers
+#     marker_patterns = [
+#         r"(?i)\bfinal answer\s*:\s*",
+#         r"(?i)\banswer\s*:\s*",
+#         r"(?i)\bthe answer is\s*:\s*",
+#         r"(?i)\bthe answer is\s+",
+#     ]
+#     cleaned = text
+#     for pattern in marker_patterns:
+#         cleaned = re.sub(pattern, "", cleaned).strip()
+#     # If multi-line, prefer the last meaningful line
+#     lines = [line.strip() for line in cleaned.splitlines() if line.strip()]
+#     if not lines:
+#         return ""
+#     if len(lines) == 1:
+#         return lines[0]
+#     return lines[-1]
+# def normalize_final_answer(text: str) -> str:
+#     """
+#     Normalize answer text for safer exact-match submission without being too aggressive.
+#     Rules:
+#     - trim outer whitespace
+#     - collapse internal repeated whitespace
+#     - remove wrapping quotes if they wrap the full answer
+#     - remove a single trailing period only for plain word/phrase answers
+#       but keep decimal numbers and date punctuation intact
+#     """
+#     if text is None:
+#         return ""
+#     text = str(text).strip()
+#     if not text:
+#         return ""
+#     # Collapse repeated whitespace
+#     text = re.sub(r"\s+", " ", text).strip()
+#     # Remove matching surrounding quotes
+#     if len(text) >= 2:
+#         if (text[0] == text[-1]) and text[0] in {'"', "'"}:
+#             text = text[1:-1].strip()
+#     # Remove common leading labels again, just in case
+#     text = re.sub(r"(?i)^(final answer|answer)\s*:\s*", "", text).strip()
+#     # Remove one trailing period for simple phrase answers only
+#     # Keep decimals like 3.14 intact
+#     if text.endswith("."):
+#         if not re.fullmatch(r"\d+\.\d+", text):
+#             text = text[:-1].strip()
+#     return text
+# def is_placeholder_answer(text: str) -> bool:
+#     """
+#     Detect placeholder/fallback outputs so app.py can optionally flag them.
+#     """
+#     if text is None:
+#         return True
+#     normalized = normalize_final_answer(text).lower()
+#     return normalized in {
+#         "",
+#         "placeholder",
+#         "n/a",
+#         "unknown",
+#     }
 from __future__ import annotations
 import re
 def extract_final_answer(text: str) -> str:
     """
+    Extract the final answer, preferring GAIA-style:
+    FINAL ANSWER: ...
+    Fallback behavior:
+    - strip code fences
+    - remove common answer prefixes
+    - if multiple lines remain, prefer the last non-empty line
     """
     if text is None:
         return ""
     if not text:
         return ""
+    # Remove fenced code blocks if present
     text = re.sub(r"^```[a-zA-Z0-9_-]*\s*", "", text)
     text = re.sub(r"\s*```$", "", text)
+    # Prefer GAIA-style final answer extraction
+    gaia_match = re.search(
+        r"FINAL ANSWER:\s*(.*)",
+        text,
+        flags=re.IGNORECASE | re.DOTALL,
+    )
+    if gaia_match:
+        extracted = gaia_match.group(1).strip()
+        lines = [line.strip() for line in extracted.splitlines() if line.strip()]
+        return lines[0] if lines else extracted
+    # Fallback exact-answer markers
     marker_patterns = [
         r"(?i)\bfinal answer\s*:\s*",
         r"(?i)\banswer\s*:\s*",
     for pattern in marker_patterns:
         cleaned = re.sub(pattern, "", cleaned).strip()
     lines = [line.strip() for line in cleaned.splitlines() if line.strip()]
     if not lines:
         return ""
 def normalize_final_answer(text: str) -> str:
     """
+    Normalize answer text for exact-match-style submission.
     Rules:
+    - trim whitespace
+    - collapse repeated spaces
+    - remove wrapping quotes
+    - remove labels again if present
+    - remove one trailing period for plain phrase answers
+    - remove leading articles for short string answers
     """
     if text is None:
         return ""
     text = re.sub(r"\s+", " ", text).strip()
     # Remove matching surrounding quotes
+    if len(text) >= 2 and text[0] == text[-1] and text[0] in {'"', "'"}:
+        text = text[1:-1].strip()
+    # Remove common labels again
     text = re.sub(r"(?i)^(final answer|answer)\s*:\s*", "", text).strip()
     # Remove one trailing period for simple phrase answers only
+    if text.endswith(".") and not re.fullmatch(r"\d+\.\d+", text):
+        text = text[:-1].strip()
+    # Remove leading articles for short string answers
+    # Helps align with GAIA string-format guidance
+    text = re.sub(r"(?i)^(a|an|the)\s+", "", text).strip()
     return text
 def is_placeholder_answer(text: str) -> bool:
     """
+    Detect placeholder or clearly non-useful outputs.
     """
     if text is None:
         return True