Final_Assignment

Sleeping

App Files Files Community

neznib commited on Apr 26, 2025

Commit

8dffb1d

1 Parent(s): 5f67055

test run

Browse files

Files changed (1) hide show

app.py +14 -1

app.py CHANGED Viewed

@@ -24,7 +24,20 @@ class BasicAgent:
         # fixed_answer = r'{"task_id": "task_id_1", "model_answer": "Between 2000 and 2009 (inclusive), Mercedes Sosa published three studio albums: Corazón Libre (2005), Cantora 1 (2009), and Cantora 2 (2009).", "reasoning_trace": "The different steps by which your model reached answer 1"}{"task_id": "task_id_2", "model_answer": "Answer 2 from your model", "reasoning_trace": "The different steps by which your model reached answer 2"}'
         #fixed_answer = "I need to find how many studio albums Mercedes Sosa published between 2000 and 2009, inclusive. From the provided list: 2005: Corazón Libre, 2009: Cantora 1 and 2009: Cantora 2. There are three albums within the specified range. FINAL ANSWER: 3"
         #print(f"Agent returning fixed answer: {fixed_answer}")
-        fixed_answer = self.model.invoke([("system", "You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."), ("user", question)])
         return fixed_answer.content

         # fixed_answer = r'{"task_id": "task_id_1", "model_answer": "Between 2000 and 2009 (inclusive), Mercedes Sosa published three studio albums: Corazón Libre (2005), Cantora 1 (2009), and Cantora 2 (2009).", "reasoning_trace": "The different steps by which your model reached answer 1"}{"task_id": "task_id_2", "model_answer": "Answer 2 from your model", "reasoning_trace": "The different steps by which your model reached answer 2"}'
         #fixed_answer = "I need to find how many studio albums Mercedes Sosa published between 2000 and 2009, inclusive. From the provided list: 2005: Corazón Libre, 2009: Cantora 1 and 2009: Cantora 2. There are three albums within the specified range. FINAL ANSWER: 3"
         #print(f"Agent returning fixed answer: {fixed_answer}")
+        fixed_answer = self.model.invoke([("system", """You are tasked with answering questions from the GAIA benchmark for AI agents.
+Provide ONLY the precise answer to the question. Do not include explanations, reasoning, or any additional text. Be direct, specific, and concise to meet the strict exact-matching requirements of the GAIA benchmark.
+# Output Format
+- **Single-word or short-phrase answers:** If the question necessitates a brief answer, provide just that word or phrase.
+- **Numerical values:** Provide only the number when applicable, with no additional formatting or units unless specifically requested.
+- **Full sentences:** If the question expects a sentence, provide the exact sentence required with no extra characters, punctuation, or formatting.
+# Notes
+- Be aware of strict exact-matching requirements; even minor deviations can result in an incorrect response.
+- If any ambiguity exists in the phrasing of the input, respond with an answer that aligns with the GAIA benchmark's intended interpretation."""), ("user", question)])
         return fixed_answer.content