Spaces:

Isics
/

agents_gaia

Runtime error

App Files Files Community

Isics commited on Dec 11, 2025

Commit

6aef72f

1 Parent(s): 32844c7

fixed some shit

Browse files

Files changed (5) hide show

agents/manager.py +34 -14
agents/utils.py +3 -2
agents/web_browser.py +3 -2
app.py +46 -18
pyproject.toml +3 -1

agents/manager.py CHANGED Viewed

@@ -1,14 +1,11 @@
-import os
 import re
-from PIL import Image
-from smolagents import CodeAgent, OpenAIServerModel, Model
-from smolagents.utils import encode_image_base64, make_image_url
 from config import authorized_libraries
-def check_no_refusal(final_answer: str) -> str | None:
     refusal_phrases = [
         "cannot answer", "unable to answer", "i don't know", "no se puede responder",
         "lo siento", "no tengo acceso", "provide more information"
@@ -22,17 +19,40 @@ def check_no_refusal(final_answer: str) -> str | None:
     return None
-def check_file_existence(final_answer: str) -> str | None:
-    file_pattern = r"[\w,\s-]+\.(csv|xlsx|txt|pdf|png|jpg|json)"
-    match = re.search(file_pattern, str(final_answer))
-    if match:
-        filename = match.group().strip()
-        if not os.path.exists(filename):
-            return f"You mentioned the file '{filename}' in your final answer, but I cannot find it in the current directory. Please, make sure you have successfully executed the code to generate the file before responding."
-    return None
 def create_manager(model: Model, agents: list[CodeAgent], **kwargs) -> CodeAgent:
     return CodeAgent(
@@ -41,7 +61,7 @@ def create_manager(model: Model, agents: list[CodeAgent], **kwargs) -> CodeAgent
         add_base_tools=True,
         additional_authorized_imports=authorized_libraries,
         verbosity_level=2,
-        final_answer_checks=[],
         max_steps=25,
         **kwargs
     )

 import re
+from smolagents import CodeAgent, Model
 from config import authorized_libraries
+def check_no_refusal(final_answer: str, **kwargs) -> str | None:
     refusal_phrases = [
         "cannot answer", "unable to answer", "i don't know", "no se puede responder",
         "lo siento", "no tengo acceso", "provide more information"
     return None
+def validate_format_gaia(final_answer: str, **kwargs) -> str | None:
+    """
+    Valida que la respuesta cumpla estrictamente con el formato de GAIA.
+    """
+    ans = str(final_answer).strip()
+    # 1. REGLA DE CONCISIÓN (No frases largas)
+    # Si tiene más de 20 palabras, probablemente está explicando algo. GAIA quiere el dato crudo.
+    if len(ans.split()) > 20:
+        return f"Your answer is too long ({len(ans.split())} words). GAIA expects only a number, a short string, or a comma-separated list. Eliminate explanations."
+    # 2. REGLA DE UNIDADES (No $ ni %)
+    if "$" in ans or "%" in ans:
+        return "FORMAT VIOLATION: Do not use units such as '$' or '%'. Returns only the numeric value."
+    # 3. REGLA DE ARTÍCULOS (Para Strings)
+    # Comprobamos si empieza por "The ", "A " o "An " (case insensitive)
+    lower_ans = ans.lower()
+    if lower_ans.startswith(("the ", "a ", "an ")):
+        return "FORMAT VIOLATION: Do not use articles ('The', 'A', 'An') at the beginning of the response. Write only the noun or noun."
+    # 4. REGLA DE NÚMEROS (No usar comas como separador de miles)
+    # Buscamos el patrón específico de dígito + coma + 3 dígitos (ej: 1,000)
+    # Ojo: No bloqueamos listas (10, 20) porque hay un espacio después de la coma normalmente.
+    # Bloqueamos "1,200" pero permitimos "1.200" (decimal) o "1, 2" (lista)
+    if re.search(r'\d,\d{3}', ans):
+        return "FORMAT VIOLATION: Do not use commas to separate thousands (e.g. use '1200' instead of '1,200')."
+    # 5. REGLA DE DIGITOS EN TEXTO (Para Strings)
+    # La docu dice "write the digits in plain text unless specified otherwise".
+    # Esto es difícil de validar automáticamente sin saber la pregunta,
+    # pero podemos avisar si vemos mezcla rara. Por ahora lo dejamos pasar para no ser demasiado estrictos.
+    return None  # Todo correcto
 def create_manager(model: Model, agents: list[CodeAgent], **kwargs) -> CodeAgent:
     return CodeAgent(
         add_base_tools=True,
         additional_authorized_imports=authorized_libraries,
         verbosity_level=2,
+        final_answer_checks=[], # validate_format_gaia, check_no_refusal],
         max_steps=25,
         **kwargs
     )

agents/utils.py CHANGED Viewed

@@ -1,5 +1,5 @@
 import pandas as pd
-from smolagents import CodeAgent, Model, tool
 from config import authorized_libraries
@@ -20,7 +20,8 @@ def create_utils(model: Model) -> CodeAgent:
     return CodeAgent(
         model=model,
         tools=[
-            reverse_string
         ],
         add_base_tools=True,
         additional_authorized_imports=authorized_libraries,

 import pandas as pd
+from smolagents import CodeAgent, Model, tool, SpeechToTextTool
 from config import authorized_libraries
     return CodeAgent(
         model=model,
         tools=[
+            reverse_string,
+            SpeechToTextTool(),
         ],
         add_base_tools=True,
         additional_authorized_imports=authorized_libraries,

agents/web_browser.py CHANGED Viewed

@@ -1,4 +1,4 @@
-from smolagents import CodeAgent, VisitWebpageTool, DuckDuckGoSearchTool, Model
 from config import authorized_libraries
@@ -9,11 +9,12 @@ def create_web_agent(model: Model) -> CodeAgent:
         tools=[
             DuckDuckGoSearchTool(),
             VisitWebpageTool(),
         ],
         add_base_tools=True,
         additional_authorized_imports=authorized_libraries,
         name="web_agent",
-        description="Browses the web to find information, can also look for information using the search engine DuckDuckGo",
         verbosity_level=0,
         max_steps=8,
     )

+from smolagents import CodeAgent, VisitWebpageTool, DuckDuckGoSearchTool, WikipediaSearchTool, Model
 from config import authorized_libraries
         tools=[
             DuckDuckGoSearchTool(),
             VisitWebpageTool(),
+            WikipediaSearchTool(),
         ],
         add_base_tools=True,
         additional_authorized_imports=authorized_libraries,
         name="web_agent",
+        description="Browses the web to find information, can also look for information using the search engine DuckDuckGo, Wikipedia, etc",
         verbosity_level=0,
         max_steps=8,
     )

app.py CHANGED Viewed

@@ -1,9 +1,10 @@
 import tempfile
 import json
 from tqdm import tqdm
-from smolagents import OpenAIServerModel
 from agents.file_reader import create_file_reader
 from agents.manager import create_manager
@@ -12,12 +13,14 @@ from agents.utils import create_utils
 from agents.web_browser import create_web_agent
 from questions_api import QuestionsAPI
 from tools.vision_tools import analyze_image
-from config import IP_WINDOWS
-model = OpenAIServerModel(model_id="qwen2.5:14b",  # "deepseek-r1:14b",#
-                          api_base='http://localhost:11435/v1',
-                          api_key="ollama")
-#model_fast = OpenAIServerModel(model_id="qwen2.5:1.5b",
 #                               api_base=f"http://{IP_WINDOWS}:11434/v1",
 #                               api_key="ollama")
 # model_light = OpenAIServerModel(model_id="phi3",
@@ -25,13 +28,17 @@ model = OpenAIServerModel(model_id="qwen2.5:14b",  # "deepseek-r1:14b",#
 #                                api_key="ollama")
 manager_agent = create_manager(model,
-                               tools=[analyze_image],
                                agents=[create_file_reader(model),
                                        create_web_agent(model),
                                        create_utils(model),
                                        create_mathematician(model)])
-prompt = """ You are a strategic Orchestrator Agent. Your primary goal is to solve tasks efficiently by leveraging your available team of managed agents.
 I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]
 Instructions:
  - Follow the following template: FINAL ANSWER: [YOUR FINAL ANSWER]
@@ -42,11 +49,14 @@ Instructions:
  - CRITICAL: When giving the final answer, be extremely concise. If the user asks for a number, provide ONLY the number. If asked for a specific format, strictly follow it without chatting.
  - IMPORTANT: Before giving the final answer or using a tool, you MUST think step by step. Break down the problem.
  - If the deduction seems illogical, review it.
- - Before trying to solve a step on your own, take into account the agents .
- CRITICAL RULES FOR DELEGATION:
-1.  **Team First Approach:** Before writing any Python code, you MUST evaluate if one of your managed agents (e.g., 'becario_windows', 'vision_tool') is capable of handling the task.
-2.  **Code Execution:** Only write and execute Python code for complex reasoning, data integration, or tasks that no other agent can perform.
 CODING RULES:
 1.  **Print to Debug:** You cannot see the value of variables unless you print them. ALWAYS print the head of a dataframe or the result of a calculation to confirm it's correct.
@@ -61,21 +71,39 @@ ALWAYS check your `managed_agents` list before acting. If a tool or agent exists
  Extra info:
     {extra_info_123blabla}
  """
 with tempfile.TemporaryDirectory() as tmpdir:
     results = []
     questions_api = QuestionsAPI(tmpdir)
     for question in tqdm(questions_api.questions_generator(), total=len(questions_api.questions)):
         extra_info = {}
         if question["file_name"] != "":
             extra_info["file_name"] = f"{tmpdir}/{question['file_name']}"
         formatted_question = prompt.format(question_123blabla=question["question"],
                                            extra_info_123blabla=extra_info)
-        response = manager_agent.run(formatted_question, max_steps=30, return_full_result=True)
-        results.append({"task_id": question["task_id"],
-                        "submitted_answer": response.output})
-        with open('results.jsonl', 'a', encoding='utf-8') as f:
-            json.dump(results[-1], f, ensure_ascii=True, indent=4)
 print(questions_api.post_answers(results))

 import tempfile
 import json
+import yaml
 from tqdm import tqdm
+from smolagents import OpenAIServerModel, PythonInterpreterTool
 from agents.file_reader import create_file_reader
 from agents.manager import create_manager
 from agents.web_browser import create_web_agent
 from questions_api import QuestionsAPI
 from tools.vision_tools import analyze_image
+from config import IP_WINDOWS, authorized_libraries
+model = OpenAIServerModel(
+    model_id="qwen2.5:14b",
+    #model_id="deepseek-r1:14b",  #
+    api_base='http://localhost:11435/v1',
+    api_key="ollama")
+# model_fast = OpenAIServerModel(model_id="qwen2.5:1.5b",
 #                               api_base=f"http://{IP_WINDOWS}:11434/v1",
 #                               api_key="ollama")
 # model_light = OpenAIServerModel(model_id="phi3",
 #                                api_key="ollama")
 manager_agent = create_manager(model,
+                               tools=[analyze_image,
+                                      PythonInterpreterTool(authorized_imports=authorized_libraries)],
                                agents=[create_file_reader(model),
                                        create_web_agent(model),
                                        create_utils(model),
                                        create_mathematician(model)])
+prompt = """
+You are a high-level Orchestrator Agent.
+Your role is to PLAN and DELEGATE. You do NOT have direct access to external tools like web search or wikipedia.
 I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]
 Instructions:
  - Follow the following template: FINAL ANSWER: [YOUR FINAL ANSWER]
  - CRITICAL: When giving the final answer, be extremely concise. If the user asks for a number, provide ONLY the number. If asked for a specific format, strictly follow it without chatting.
  - IMPORTANT: Before giving the final answer or using a tool, you MUST think step by step. Break down the problem.
  - If the deduction seems illogical, review it.
+CRITICAL RULES:
+1. **Delegation is Mandatory:** If you need to search the web, find a discography, or look up facts, you MUST use your managed agent named 'web_agent'.
+2. **Forbidden Actions:** Do NOT attempt to call functions like 'wikipedia_search', 'google_search', or 'requests.get' directly. You will crash if you try.
+3. **Syntax:** To use the web agent, you must generate a tool call for it.
+   Example: `result = web_agent(task="Find the discography of Mercedes Sosa...")`
+4.  **Team First Approach:** Before writing any Python code, you MUST evaluate if one of your managed agents (e.g., 'becario_windows', 'vision_tool') is capable of handling the task.
+5.  **Code Execution:** Only write and execute Python code for complex reasoning, data integration, or tasks that no other agent can perform.
 CODING RULES:
 1.  **Print to Debug:** You cannot see the value of variables unless you print them. ALWAYS print the head of a dataframe or the result of a calculation to confirm it's correct.
  Extra info:
     {extra_info_123blabla}
  """
+answers_file = "answers.yaml"
+def add_answer_to_yaml(file_path: str, task_id: str, answer: str):
+    with open(file_path, 'r') as f:
+        cur_yaml = yaml.safe_load(f)
+        cur_yaml[task_id] = answer
+    if cur_yaml:
+        with open(file_path,'w') as yamlfile:
+            yaml.safe_dump(cur_yaml, yamlfile)
+def load_yaml(file_path: str):
+    with open(file_path, 'r') as f:
+        return yaml.safe_load(f)
+current_answers = load_yaml(answers_file)
 with tempfile.TemporaryDirectory() as tmpdir:
     results = []
     questions_api = QuestionsAPI(tmpdir)
     for question in tqdm(questions_api.questions_generator(), total=len(questions_api.questions)):
+        if question["task_id"] in current_answers.keys():
+            continue
         extra_info = {}
         if question["file_name"] != "":
             extra_info["file_name"] = f"{tmpdir}/{question['file_name']}"
         formatted_question = prompt.format(question_123blabla=question["question"],
                                            extra_info_123blabla=extra_info)
+        response = manager_agent.run(formatted_question, max_steps=30, return_full_result=False)
+        response = str(response)
+        results.append({"task_id": question["task_id"], "submitted_answer": response})
+        add_answer_to_yaml(file_path=answers_file, task_id=question["task_id"], answer=response)
 print(questions_api.post_answers(results))

pyproject.toml CHANGED Viewed

@@ -12,6 +12,8 @@ dependencies = [
     "openpyxl>=3.1.5",
     "pillow>=12.0.0",
     "pypdf>=6.4.0",
-    "smolagents[openai]>=1.23.0",
     "sympy>=1.14.0",
 ]

     "openpyxl>=3.1.5",
     "pillow>=12.0.0",
     "pypdf>=6.4.0",
+    "smolagents[openai,transformers]>=1.23.0",
     "sympy>=1.14.0",
+    "transformers>=4.57.3",
+    "wikipedia-api>=0.8.1",
 ]