Spaces:

Isics
/

agents_gaia

Runtime error

App Files Files Community

Isics commited on Dec 10, 2025

Commit

32844c7

1 Parent(s): 8e8d062

initial commit

Browse files

Files changed (12) hide show

.gitignore +6 -0
agents/__init__.py +0 -0
agents/file_reader.py +154 -0
agents/manager.py +47 -0
agents/mathematician.py +87 -0
agents/utils.py +31 -0
agents/web_browser.py +19 -0
app.py +81 -0
pyproject.toml +17 -0
questions_api.py +44 -0
tools/__init__.py +0 -0
tools/vision_tools.py +46 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,6 @@

+.venv
+*.json
+*.jsonl
+config.py
+.idea/

agents/__init__.py ADDED Viewed

File without changes

agents/file_reader.py ADDED Viewed

	@@ -0,0 +1,154 @@

+import json
+import pandas as pd
+import pypdf
+import yaml
+from smolagents import CodeAgent, Model, tool
+from config import authorized_libraries
+@tool
+def read_yaml(path: str) -> str:
+    """
+    Reads a YAML file and returns the contents as a dictionary parsed as a string.
+    Args:
+        path (str): path to YAML file.
+    Returns:
+        str: contents of YAML file.
+    Example:
+        >>> result = read_yaml("path/to/file.yaml")
+    """
+    with open(path, 'r') as f:
+        return yaml.load(f, Loader=yaml.FullLoader)
+@tool
+def read_json(path: str) -> str:
+    """
+    Reads a JSON file and returns the contents as a dictionary parsed as a string.
+    Args:
+        path (str): path to JSON file.
+    Returns:
+        str: contents of JSON file.
+    Example:
+        >>> result = read_json("path/to/file.json")
+    """
+    with open(path, 'r') as f:
+        return json.load(f)
+@tool
+def read_txt(path: str) -> str:
+    """
+    Reads a txt file and returns the contents as a string.
+    Args:
+        path (str): path to a text file.
+    Returns:
+        str: contents of the text file.
+    Example:
+        >>> result = read_yaml("path/to/textfile.text")
+    """
+    with open(path, 'r') as f:
+        return f.read()
+@tool
+def read_csv(path: str) -> str:
+    """
+    Reads a CSV file and returns its content formatted as a markdown table.
+    Useful for understanding the structure and data of comma-separated files.
+    Args:
+        path (str): path to the CSV file (e.g., 'data.csv').
+    Returns:
+        str: The content of the CSV as a markdown string.
+    """
+    try:
+        df = pd.read_csv(path)
+        return df.to_markdown(index=False)
+    except Exception as e:
+        return f"Error reading CSV: {str(e)}"
+@tool
+def read_excel(path: str) -> str:
+    """
+    Reads the first sheet of an Excel file and returns its content as a markdown table.
+    Args:
+        path (str): path to the .xlsx file.
+    Returns:
+        str: The content of the first sheet as a markdown string.
+    """
+    try:
+        df = pd.read_excel(path, engine='openpyxl')
+        return df.to_markdown(index=False)
+    except Exception as e:
+        return f"Error reading Excel: {str(e)}"
+@tool
+def read_pdf(path: str) -> str:
+    """
+    Extracts text from a PDF file.
+    Args:
+        path (str): path to the PDF file.
+    Returns:
+        str: The raw text content extracted from the PDF pages.
+    """
+    try:
+        reader = pypdf.PdfReader(path)
+        text_content = []
+        for i, page in enumerate(reader.pages):
+            text = page.extract_text()
+            if text:
+                text_content.append(f"--- Page {i + 1} ---\n{text}")
+        return "\n".join(text_content)
+    except Exception as e:
+        return f"Error reading PDF: {str(e)}"
+@tool
+def inspect_csv(path: str) -> str:
+    """
+    Reads the first 5 rows and the columns of a CSV file.
+    Use this to understand the data structure before writing code to process the full file.
+    Args:
+        path (str): path to the CSV file.
+    """
+    try:
+        df = pd.read_csv(path)
+        info = f"Columns: {list(df.columns)}\n"
+        info += f"Total Rows: {len(df)}\n\n"
+        info += "First 5 rows:\n"
+        info += df.head(5).to_markdown(index=False)
+        return info
+    except Exception as e:
+        return f"Error inspecting CSV: {str(e)}"
+def create_file_reader(model: Model) -> CodeAgent:
+    return CodeAgent(
+        model=model,
+        tools=[
+            read_yaml, read_json, read_txt, read_csv, read_pdf, inspect_csv, read_excel
+        ],
+        add_base_tools=True,
+        additional_authorized_imports=authorized_libraries,
+        name="files_manager",
+        description="Reads a file and returns the contents as a string, multiple formats accepted.",
+        verbosity_level=0,
+        max_steps=8,
+    )

agents/manager.py ADDED Viewed

	@@ -0,0 +1,47 @@

+import os
+import re
+from PIL import Image
+from smolagents import CodeAgent, OpenAIServerModel, Model
+from smolagents.utils import encode_image_base64, make_image_url
+from config import authorized_libraries
+def check_no_refusal(final_answer: str) -> str | None:
+    refusal_phrases = [
+        "cannot answer", "unable to answer", "i don't know", "no se puede responder",
+        "lo siento", "no tengo acceso", "provide more information"
+    ]
+    answer_lower = str(final_answer).lower()
+    if any(phrase in answer_lower for phrase in refusal_phrases):
+        return "Your answer shows that you were unable to complete your task. Please, try using a different tool or rephrase your search strategy to find the answer. Don't give up yet, you can do it."
+    return None
+def check_file_existence(final_answer: str) -> str | None:
+    file_pattern = r"[\w,\s-]+\.(csv|xlsx|txt|pdf|png|jpg|json)"
+    match = re.search(file_pattern, str(final_answer))
+    if match:
+        filename = match.group().strip()
+        if not os.path.exists(filename):
+            return f"You mentioned the file '{filename}' in your final answer, but I cannot find it in the current directory. Please, make sure you have successfully executed the code to generate the file before responding."
+    return None
+def create_manager(model: Model, agents: list[CodeAgent], **kwargs) -> CodeAgent:
+    return CodeAgent(
+        model=model,
+        managed_agents=agents,
+        add_base_tools=True,
+        additional_authorized_imports=authorized_libraries,
+        verbosity_level=2,
+        final_answer_checks=[],
+        max_steps=25,
+        **kwargs
+    )

agents/mathematician.py ADDED Viewed

	@@ -0,0 +1,87 @@

+from typing import List
+from smolagents import CodeAgent, tool, Model
+from sympy import solve, parse_expr, Eq
+from sympy.parsing.sympy_parser import standard_transformations, implicit_multiplication_application
+from config import authorized_libraries
+@tool
+def calculator(expression: str) -> float:
+    """
+    Calculates result of the mathematical expression given in expression.
+    Args:
+        expression (str): The mathematical expression to calculate.
+    Returns:
+        float: The result of the mathematical expression.
+    Example:
+        >>> result = calculator("((1 + 2) * 3) ** (1 / 2)")
+    """
+    return float(eval(expression))
+@tool
+def solve_linear_system(equations: List[str]) -> str:
+    """
+    Solves a system of linear equations and returns the results for each variable.
+    Args:
+        equations: A list of strings where each string is a linear equation.
+                   Examples: ["2x + 4 = 10"] or ["x + y = 5", "x - y = 1"]
+    Returns:
+        A string describing the solution for each variable, or a message if no solution exists.
+    """
+    transformations = (standard_transformations + (implicit_multiplication_application,))
+    parsed_eqs = []
+    all_symbols = set()
+    try:
+        for eq_str in equations:
+            # Handle "=" split or assume "= 0"
+            if "=" in eq_str:
+                lhs_str, rhs_str = eq_str.split("=")
+            else:
+                lhs_str, rhs_str = eq_str, "0"
+            lhs = parse_expr(lhs_str, transformations=transformations)
+            rhs = parse_expr(rhs_str, transformations=transformations)
+            # Create Equation Object
+            eq_obj = Eq(lhs, rhs)
+            parsed_eqs.append(eq_obj)
+            # Track variables
+            all_symbols.update(lhs.free_symbols)
+            all_symbols.update(rhs.free_symbols)
+        # Solve
+        solution = solve(parsed_eqs, list(all_symbols))
+        # Formatting for the Agent (Agents prefer String outputs)
+        if not solution:
+            return "No solution found."
+        return str(solution)
+    except Exception as e:
+        return f"Error solving equations: {str(e)}"
+def create_mathematician(model: Model) -> CodeAgent:
+    return CodeAgent(
+        model=model,
+        tools=[
+            calculator,
+            solve_linear_system
+        ],
+        add_base_tools=True,
+        additional_authorized_imports=authorized_libraries,
+        name="mathematician_agent",
+        description="Calculates mathematical expressions and solves equations",
+        verbosity_level=0,
+        max_steps=8,
+    )

agents/utils.py ADDED Viewed

	@@ -0,0 +1,31 @@

+import pandas as pd
+from smolagents import CodeAgent, Model, tool
+from config import authorized_libraries
+@tool
+def reverse_string(string: str) -> str:
+    """
+    Given a string and return it reversed.
+    For example, given "siht dnatsrednu uoy fI" will return "If you understand this"
+    Args:
+        string (str): String to reverse
+    """
+    return string[::-1]
+def create_utils(model: Model) -> CodeAgent:
+    return CodeAgent(
+        model=model,
+        tools=[
+            reverse_string
+        ],
+        add_base_tools=True,
+        additional_authorized_imports=authorized_libraries,
+        name="utils_manager",
+        description="Have a bunch of useful utilities, like functions to reverse string.",
+        verbosity_level=0,
+        max_steps=8,
+    )

agents/web_browser.py ADDED Viewed

	@@ -0,0 +1,19 @@

+from smolagents import CodeAgent, VisitWebpageTool, DuckDuckGoSearchTool, Model
+from config import authorized_libraries
+def create_web_agent(model: Model) -> CodeAgent:
+    return CodeAgent(
+        model=model,
+        tools=[
+            DuckDuckGoSearchTool(),
+            VisitWebpageTool(),
+        ],
+        add_base_tools=True,
+        additional_authorized_imports=authorized_libraries,
+        name="web_agent",
+        description="Browses the web to find information, can also look for information using the search engine DuckDuckGo",
+        verbosity_level=0,
+        max_steps=8,
+    )

app.py ADDED Viewed

	@@ -0,0 +1,81 @@

+import tempfile
+import json
+from tqdm import tqdm
+from smolagents import OpenAIServerModel
+from agents.file_reader import create_file_reader
+from agents.manager import create_manager
+from agents.mathematician import create_mathematician
+from agents.utils import create_utils
+from agents.web_browser import create_web_agent
+from questions_api import QuestionsAPI
+from tools.vision_tools import analyze_image
+from config import IP_WINDOWS
+model = OpenAIServerModel(model_id="qwen2.5:14b",  # "deepseek-r1:14b",#
+                          api_base='http://localhost:11435/v1',
+                          api_key="ollama")
+#model_fast = OpenAIServerModel(model_id="qwen2.5:1.5b",
+#                               api_base=f"http://{IP_WINDOWS}:11434/v1",
+#                               api_key="ollama")
+# model_light = OpenAIServerModel(model_id="phi3",
+#                                api_base="http://localhost:11434/v1",
+#                                api_key="ollama")
+manager_agent = create_manager(model,
+                               tools=[analyze_image],
+                               agents=[create_file_reader(model),
+                                       create_web_agent(model),
+                                       create_utils(model),
+                                       create_mathematician(model)])
+prompt = """ You are a strategic Orchestrator Agent. Your primary goal is to solve tasks efficiently by leveraging your available team of managed agents.
+I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]
+Instructions:
+ - Follow the following template: FINAL ANSWER: [YOUR FINAL ANSWER]
+ - YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
+ - If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
+ - If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
+ - If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
+ - CRITICAL: When giving the final answer, be extremely concise. If the user asks for a number, provide ONLY the number. If asked for a specific format, strictly follow it without chatting.
+ - IMPORTANT: Before giving the final answer or using a tool, you MUST think step by step. Break down the problem.
+ - If the deduction seems illogical, review it.
+ - Before trying to solve a step on your own, take into account the agents .
+ CRITICAL RULES FOR DELEGATION:
+1.  **Team First Approach:** Before writing any Python code, you MUST evaluate if one of your managed agents (e.g., 'becario_windows', 'vision_tool') is capable of handling the task.
+2.  **Code Execution:** Only write and execute Python code for complex reasoning, data integration, or tasks that no other agent can perform.
+CODING RULES:
+1.  **Print to Debug:** You cannot see the value of variables unless you print them. ALWAYS print the head of a dataframe or the result of a calculation to confirm it's correct.
+2.  **File Paths:** Assume files are in the current directory. Do not invent subfolders.
+3.  **Persistence:** If you create a plot or a file, you MUST save it to disk (e.g., 'output.png') and tell me the filename. Do not try to use plt.show().
+4.  **Libraries:** Use 'pandas' for data and 'requests' for web. Do not use complex scrapers like Selenium.
+ALWAYS check your `managed_agents` list before acting. If a tool or agent exists for the job, use it. Do not reinvent the wheel.
+ Question:
+    {question_123blabla}
+ Extra info:
+    {extra_info_123blabla}
+ """
+with tempfile.TemporaryDirectory() as tmpdir:
+    results = []
+    questions_api = QuestionsAPI(tmpdir)
+    for question in tqdm(questions_api.questions_generator(), total=len(questions_api.questions)):
+        extra_info = {}
+        if question["file_name"] != "":
+            extra_info["file_name"] = f"{tmpdir}/{question['file_name']}"
+        formatted_question = prompt.format(question_123blabla=question["question"],
+                                           extra_info_123blabla=extra_info)
+        response = manager_agent.run(formatted_question, max_steps=30, return_full_result=True)
+        results.append({"task_id": question["task_id"],
+                        "submitted_answer": response.output})
+        with open('results.jsonl', 'a', encoding='utf-8') as f:
+            json.dump(results[-1], f, ensure_ascii=True, indent=4)
+print(questions_api.post_answers(results))

pyproject.toml ADDED Viewed

	@@ -0,0 +1,17 @@

+[project]
+name = "agentscourse"
+version = "0.1.0"
+description = "Add your description here"
+requires-python = ">=3.13"
+dependencies = [
+    "datasets>=4.4.1",
+    "ddgs>=9.9.2",
+    "markdownify>=1.2.2",
+    "matplotlib>=3.10.7",
+    "ollama>=0.6.1",
+    "openpyxl>=3.1.5",
+    "pillow>=12.0.0",
+    "pypdf>=6.4.0",
+    "smolagents[openai]>=1.23.0",
+    "sympy>=1.14.0",
+]

questions_api.py ADDED Viewed

	@@ -0,0 +1,44 @@

+import shutil
+from typing import TypedDict, Generator
+import requests
+from config import USER
+Question = TypedDict("Question", {"task_id": str,
+                                  "question": str,
+                                  "level": str,
+                                  "file_name": str})
+class QuestionsAPI:
+    questions: list[Question] = []
+    images_dir = "/Users/isaacgonzalez/Documents/Datasets/GAIA/2023/validation/"
+    def __init__(self, files_dir: str):
+        self._files_dir = files_dir
+        self.url = "https://agents-course-unit4-scoring.hf.space"
+        self._download_questions()
+        self._download_files()
+    def _download_questions(self) -> None:
+        self.questions = requests.get(f"{self.url}/questions").json()
+    def _download_files(self):
+        for question in self.questions:
+            if question["file_name"] != '':
+                self._download_file(question["file_name"])
+    def _download_file(self, file_name: str) -> None:
+        shutil.copy(f"{self.images_dir}/{file_name}", f"{self._files_dir}/{file_name}")
+    def questions_generator(self) -> Generator[Question, None, None]:
+        for question in self.questions:
+            yield question
+    def post_answers(self, answers: list[dict]) -> dict:
+        return requests.post(f"{self.url}/submit",
+                             json={"username": USER,
+                                     "agent_code": "stringstri",
+                                     "answers": answers}).json()

tools/__init__.py ADDED Viewed

File without changes

tools/vision_tools.py ADDED Viewed

	@@ -0,0 +1,46 @@

+import base64
+import requests
+from config import IP_WINDOWS
+import ollama
+from smolagents import tool
+@tool
+def analyze_image(image_path: str, question: str) -> str:
+    """ Analyze an image using a local vision model and answer a question about it.
+    Use this tool when you need to extract information from a jpg/png file.
+    Args:
+        image_path: The local path to the image file (e.g. 'images/grafico.png').
+        question: The specific question about what to look for in the image (e.g. 'What value is the red bar?').
+    Returns:
+        str: The answer to the question, based on the image.
+    Example:
+        >>> result = analise_image("images/grafico.png", "What value is the red bar?")
+    """
+    # url = f"http://{IP_WINDOWS}:11434/api/generate"
+    try:
+        # Codificamos la imagen a base64 para enviarla por red
+        with open(image_path, "rb") as image_file:
+            img_str = base64.b64encode(image_file.read()).decode('utf-8')
+        # payload = {
+        #    "model": "llava", # Asegúrate de tener este modelo en Windows
+        #    "prompt": question,
+        #    "images": [img_str],
+        #    "stream": False
+        #}
+        # response = requests.post(url, json=payload)
+        response = ollama.chat(model='llava',
+                               messages=[{'role': 'user',
+                                          'content': question,
+                                          'images': image_file}])
+        return response['messages']['content']
+    except Exception as e:
+        return f"Error conectando con Windows: {str(e)}"