Isics commited on
Commit
32844c7
·
1 Parent(s): 8e8d062

initial commit

Browse files
.gitignore ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ .venv
2
+ *.json
3
+ *.jsonl
4
+ config.py
5
+ .idea/
6
+
agents/__init__.py ADDED
File without changes
agents/file_reader.py ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+
3
+ import pandas as pd
4
+ import pypdf
5
+ import yaml
6
+ from smolagents import CodeAgent, Model, tool
7
+
8
+ from config import authorized_libraries
9
+
10
+
11
+ @tool
12
+ def read_yaml(path: str) -> str:
13
+ """
14
+ Reads a YAML file and returns the contents as a dictionary parsed as a string.
15
+ Args:
16
+ path (str): path to YAML file.
17
+
18
+ Returns:
19
+ str: contents of YAML file.
20
+
21
+ Example:
22
+ >>> result = read_yaml("path/to/file.yaml")
23
+ """
24
+ with open(path, 'r') as f:
25
+ return yaml.load(f, Loader=yaml.FullLoader)
26
+
27
+
28
+ @tool
29
+ def read_json(path: str) -> str:
30
+ """
31
+ Reads a JSON file and returns the contents as a dictionary parsed as a string.
32
+ Args:
33
+ path (str): path to JSON file.
34
+
35
+ Returns:
36
+ str: contents of JSON file.
37
+
38
+ Example:
39
+ >>> result = read_json("path/to/file.json")
40
+ """
41
+ with open(path, 'r') as f:
42
+ return json.load(f)
43
+
44
+
45
+ @tool
46
+ def read_txt(path: str) -> str:
47
+ """
48
+ Reads a txt file and returns the contents as a string.
49
+ Args:
50
+ path (str): path to a text file.
51
+
52
+ Returns:
53
+ str: contents of the text file.
54
+
55
+ Example:
56
+ >>> result = read_yaml("path/to/textfile.text")
57
+ """
58
+ with open(path, 'r') as f:
59
+ return f.read()
60
+
61
+
62
+ @tool
63
+ def read_csv(path: str) -> str:
64
+ """
65
+ Reads a CSV file and returns its content formatted as a markdown table.
66
+ Useful for understanding the structure and data of comma-separated files.
67
+
68
+ Args:
69
+ path (str): path to the CSV file (e.g., 'data.csv').
70
+
71
+ Returns:
72
+ str: The content of the CSV as a markdown string.
73
+ """
74
+ try:
75
+ df = pd.read_csv(path)
76
+ return df.to_markdown(index=False)
77
+ except Exception as e:
78
+ return f"Error reading CSV: {str(e)}"
79
+
80
+
81
+ @tool
82
+ def read_excel(path: str) -> str:
83
+ """
84
+ Reads the first sheet of an Excel file and returns its content as a markdown table.
85
+
86
+ Args:
87
+ path (str): path to the .xlsx file.
88
+
89
+ Returns:
90
+ str: The content of the first sheet as a markdown string.
91
+ """
92
+ try:
93
+ df = pd.read_excel(path, engine='openpyxl')
94
+ return df.to_markdown(index=False)
95
+ except Exception as e:
96
+ return f"Error reading Excel: {str(e)}"
97
+
98
+
99
+ @tool
100
+ def read_pdf(path: str) -> str:
101
+ """
102
+ Extracts text from a PDF file.
103
+
104
+ Args:
105
+ path (str): path to the PDF file.
106
+
107
+ Returns:
108
+ str: The raw text content extracted from the PDF pages.
109
+ """
110
+ try:
111
+ reader = pypdf.PdfReader(path)
112
+ text_content = []
113
+ for i, page in enumerate(reader.pages):
114
+ text = page.extract_text()
115
+ if text:
116
+ text_content.append(f"--- Page {i + 1} ---\n{text}")
117
+
118
+ return "\n".join(text_content)
119
+ except Exception as e:
120
+ return f"Error reading PDF: {str(e)}"
121
+
122
+
123
+ @tool
124
+ def inspect_csv(path: str) -> str:
125
+ """
126
+ Reads the first 5 rows and the columns of a CSV file.
127
+ Use this to understand the data structure before writing code to process the full file.
128
+
129
+ Args:
130
+ path (str): path to the CSV file.
131
+ """
132
+ try:
133
+ df = pd.read_csv(path)
134
+ info = f"Columns: {list(df.columns)}\n"
135
+ info += f"Total Rows: {len(df)}\n\n"
136
+ info += "First 5 rows:\n"
137
+ info += df.head(5).to_markdown(index=False)
138
+ return info
139
+ except Exception as e:
140
+ return f"Error inspecting CSV: {str(e)}"
141
+
142
+ def create_file_reader(model: Model) -> CodeAgent:
143
+ return CodeAgent(
144
+ model=model,
145
+ tools=[
146
+ read_yaml, read_json, read_txt, read_csv, read_pdf, inspect_csv, read_excel
147
+ ],
148
+ add_base_tools=True,
149
+ additional_authorized_imports=authorized_libraries,
150
+ name="files_manager",
151
+ description="Reads a file and returns the contents as a string, multiple formats accepted.",
152
+ verbosity_level=0,
153
+ max_steps=8,
154
+ )
agents/manager.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import re
3
+
4
+ from PIL import Image
5
+ from smolagents import CodeAgent, OpenAIServerModel, Model
6
+ from smolagents.utils import encode_image_base64, make_image_url
7
+
8
+ from config import authorized_libraries
9
+
10
+
11
+ def check_no_refusal(final_answer: str) -> str | None:
12
+ refusal_phrases = [
13
+ "cannot answer", "unable to answer", "i don't know", "no se puede responder",
14
+ "lo siento", "no tengo acceso", "provide more information"
15
+ ]
16
+
17
+ answer_lower = str(final_answer).lower()
18
+
19
+ if any(phrase in answer_lower for phrase in refusal_phrases):
20
+ return "Your answer shows that you were unable to complete your task. Please, try using a different tool or rephrase your search strategy to find the answer. Don't give up yet, you can do it."
21
+
22
+ return None
23
+
24
+
25
+ def check_file_existence(final_answer: str) -> str | None:
26
+ file_pattern = r"[\w,\s-]+\.(csv|xlsx|txt|pdf|png|jpg|json)"
27
+ match = re.search(file_pattern, str(final_answer))
28
+
29
+ if match:
30
+ filename = match.group().strip()
31
+ if not os.path.exists(filename):
32
+ return f"You mentioned the file '{filename}' in your final answer, but I cannot find it in the current directory. Please, make sure you have successfully executed the code to generate the file before responding."
33
+
34
+ return None
35
+
36
+
37
+ def create_manager(model: Model, agents: list[CodeAgent], **kwargs) -> CodeAgent:
38
+ return CodeAgent(
39
+ model=model,
40
+ managed_agents=agents,
41
+ add_base_tools=True,
42
+ additional_authorized_imports=authorized_libraries,
43
+ verbosity_level=2,
44
+ final_answer_checks=[],
45
+ max_steps=25,
46
+ **kwargs
47
+ )
agents/mathematician.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List
2
+
3
+ from smolagents import CodeAgent, tool, Model
4
+ from sympy import solve, parse_expr, Eq
5
+ from sympy.parsing.sympy_parser import standard_transformations, implicit_multiplication_application
6
+
7
+ from config import authorized_libraries
8
+
9
+
10
+ @tool
11
+ def calculator(expression: str) -> float:
12
+ """
13
+ Calculates result of the mathematical expression given in expression.
14
+ Args:
15
+ expression (str): The mathematical expression to calculate.
16
+
17
+ Returns:
18
+ float: The result of the mathematical expression.
19
+
20
+ Example:
21
+ >>> result = calculator("((1 + 2) * 3) ** (1 / 2)")
22
+ """
23
+ return float(eval(expression))
24
+
25
+
26
+ @tool
27
+ def solve_linear_system(equations: List[str]) -> str:
28
+ """
29
+ Solves a system of linear equations and returns the results for each variable.
30
+
31
+ Args:
32
+ equations: A list of strings where each string is a linear equation.
33
+ Examples: ["2x + 4 = 10"] or ["x + y = 5", "x - y = 1"]
34
+
35
+ Returns:
36
+ A string describing the solution for each variable, or a message if no solution exists.
37
+ """
38
+ transformations = (standard_transformations + (implicit_multiplication_application,))
39
+ parsed_eqs = []
40
+ all_symbols = set()
41
+
42
+ try:
43
+ for eq_str in equations:
44
+ # Handle "=" split or assume "= 0"
45
+ if "=" in eq_str:
46
+ lhs_str, rhs_str = eq_str.split("=")
47
+ else:
48
+ lhs_str, rhs_str = eq_str, "0"
49
+
50
+ lhs = parse_expr(lhs_str, transformations=transformations)
51
+ rhs = parse_expr(rhs_str, transformations=transformations)
52
+
53
+ # Create Equation Object
54
+ eq_obj = Eq(lhs, rhs)
55
+ parsed_eqs.append(eq_obj)
56
+
57
+ # Track variables
58
+ all_symbols.update(lhs.free_symbols)
59
+ all_symbols.update(rhs.free_symbols)
60
+
61
+ # Solve
62
+ solution = solve(parsed_eqs, list(all_symbols))
63
+
64
+ # Formatting for the Agent (Agents prefer String outputs)
65
+ if not solution:
66
+ return "No solution found."
67
+
68
+ return str(solution)
69
+
70
+ except Exception as e:
71
+ return f"Error solving equations: {str(e)}"
72
+
73
+
74
+ def create_mathematician(model: Model) -> CodeAgent:
75
+ return CodeAgent(
76
+ model=model,
77
+ tools=[
78
+ calculator,
79
+ solve_linear_system
80
+ ],
81
+ add_base_tools=True,
82
+ additional_authorized_imports=authorized_libraries,
83
+ name="mathematician_agent",
84
+ description="Calculates mathematical expressions and solves equations",
85
+ verbosity_level=0,
86
+ max_steps=8,
87
+ )
agents/utils.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import pandas as pd
2
+ from smolagents import CodeAgent, Model, tool
3
+
4
+ from config import authorized_libraries
5
+
6
+
7
+ @tool
8
+ def reverse_string(string: str) -> str:
9
+ """
10
+ Given a string and return it reversed.
11
+ For example, given "siht dnatsrednu uoy fI" will return "If you understand this"
12
+
13
+ Args:
14
+ string (str): String to reverse
15
+ """
16
+ return string[::-1]
17
+
18
+
19
+ def create_utils(model: Model) -> CodeAgent:
20
+ return CodeAgent(
21
+ model=model,
22
+ tools=[
23
+ reverse_string
24
+ ],
25
+ add_base_tools=True,
26
+ additional_authorized_imports=authorized_libraries,
27
+ name="utils_manager",
28
+ description="Have a bunch of useful utilities, like functions to reverse string.",
29
+ verbosity_level=0,
30
+ max_steps=8,
31
+ )
agents/web_browser.py ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from smolagents import CodeAgent, VisitWebpageTool, DuckDuckGoSearchTool, Model
2
+
3
+ from config import authorized_libraries
4
+
5
+
6
+ def create_web_agent(model: Model) -> CodeAgent:
7
+ return CodeAgent(
8
+ model=model,
9
+ tools=[
10
+ DuckDuckGoSearchTool(),
11
+ VisitWebpageTool(),
12
+ ],
13
+ add_base_tools=True,
14
+ additional_authorized_imports=authorized_libraries,
15
+ name="web_agent",
16
+ description="Browses the web to find information, can also look for information using the search engine DuckDuckGo",
17
+ verbosity_level=0,
18
+ max_steps=8,
19
+ )
app.py ADDED
@@ -0,0 +1,81 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tempfile
2
+ import json
3
+
4
+ from tqdm import tqdm
5
+
6
+ from smolagents import OpenAIServerModel
7
+
8
+ from agents.file_reader import create_file_reader
9
+ from agents.manager import create_manager
10
+ from agents.mathematician import create_mathematician
11
+ from agents.utils import create_utils
12
+ from agents.web_browser import create_web_agent
13
+ from questions_api import QuestionsAPI
14
+ from tools.vision_tools import analyze_image
15
+ from config import IP_WINDOWS
16
+
17
+ model = OpenAIServerModel(model_id="qwen2.5:14b", # "deepseek-r1:14b",#
18
+ api_base='http://localhost:11435/v1',
19
+ api_key="ollama")
20
+ #model_fast = OpenAIServerModel(model_id="qwen2.5:1.5b",
21
+ # api_base=f"http://{IP_WINDOWS}:11434/v1",
22
+ # api_key="ollama")
23
+ # model_light = OpenAIServerModel(model_id="phi3",
24
+ # api_base="http://localhost:11434/v1",
25
+ # api_key="ollama")
26
+
27
+ manager_agent = create_manager(model,
28
+ tools=[analyze_image],
29
+ agents=[create_file_reader(model),
30
+ create_web_agent(model),
31
+ create_utils(model),
32
+ create_mathematician(model)])
33
+
34
+ prompt = """ You are a strategic Orchestrator Agent. Your primary goal is to solve tasks efficiently by leveraging your available team of managed agents.
35
+ I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]
36
+ Instructions:
37
+ - Follow the following template: FINAL ANSWER: [YOUR FINAL ANSWER]
38
+ - YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
39
+ - If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
40
+ - If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
41
+ - If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
42
+ - CRITICAL: When giving the final answer, be extremely concise. If the user asks for a number, provide ONLY the number. If asked for a specific format, strictly follow it without chatting.
43
+ - IMPORTANT: Before giving the final answer or using a tool, you MUST think step by step. Break down the problem.
44
+ - If the deduction seems illogical, review it.
45
+ - Before trying to solve a step on your own, take into account the agents .
46
+
47
+ CRITICAL RULES FOR DELEGATION:
48
+ 1. **Team First Approach:** Before writing any Python code, you MUST evaluate if one of your managed agents (e.g., 'becario_windows', 'vision_tool') is capable of handling the task.
49
+ 2. **Code Execution:** Only write and execute Python code for complex reasoning, data integration, or tasks that no other agent can perform.
50
+
51
+ CODING RULES:
52
+ 1. **Print to Debug:** You cannot see the value of variables unless you print them. ALWAYS print the head of a dataframe or the result of a calculation to confirm it's correct.
53
+ 2. **File Paths:** Assume files are in the current directory. Do not invent subfolders.
54
+ 3. **Persistence:** If you create a plot or a file, you MUST save it to disk (e.g., 'output.png') and tell me the filename. Do not try to use plt.show().
55
+ 4. **Libraries:** Use 'pandas' for data and 'requests' for web. Do not use complex scrapers like Selenium.
56
+
57
+ ALWAYS check your `managed_agents` list before acting. If a tool or agent exists for the job, use it. Do not reinvent the wheel.
58
+
59
+ Question:
60
+ {question_123blabla}
61
+ Extra info:
62
+ {extra_info_123blabla}
63
+ """
64
+
65
+ with tempfile.TemporaryDirectory() as tmpdir:
66
+ results = []
67
+ questions_api = QuestionsAPI(tmpdir)
68
+ for question in tqdm(questions_api.questions_generator(), total=len(questions_api.questions)):
69
+ extra_info = {}
70
+ if question["file_name"] != "":
71
+ extra_info["file_name"] = f"{tmpdir}/{question['file_name']}"
72
+
73
+ formatted_question = prompt.format(question_123blabla=question["question"],
74
+ extra_info_123blabla=extra_info)
75
+ response = manager_agent.run(formatted_question, max_steps=30, return_full_result=True)
76
+ results.append({"task_id": question["task_id"],
77
+ "submitted_answer": response.output})
78
+
79
+ with open('results.jsonl', 'a', encoding='utf-8') as f:
80
+ json.dump(results[-1], f, ensure_ascii=True, indent=4)
81
+ print(questions_api.post_answers(results))
pyproject.toml ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [project]
2
+ name = "agentscourse"
3
+ version = "0.1.0"
4
+ description = "Add your description here"
5
+ requires-python = ">=3.13"
6
+ dependencies = [
7
+ "datasets>=4.4.1",
8
+ "ddgs>=9.9.2",
9
+ "markdownify>=1.2.2",
10
+ "matplotlib>=3.10.7",
11
+ "ollama>=0.6.1",
12
+ "openpyxl>=3.1.5",
13
+ "pillow>=12.0.0",
14
+ "pypdf>=6.4.0",
15
+ "smolagents[openai]>=1.23.0",
16
+ "sympy>=1.14.0",
17
+ ]
questions_api.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import shutil
2
+ from typing import TypedDict, Generator
3
+
4
+ import requests
5
+
6
+ from config import USER
7
+
8
+ Question = TypedDict("Question", {"task_id": str,
9
+ "question": str,
10
+ "level": str,
11
+ "file_name": str})
12
+
13
+
14
+ class QuestionsAPI:
15
+ questions: list[Question] = []
16
+ images_dir = "/Users/isaacgonzalez/Documents/Datasets/GAIA/2023/validation/"
17
+
18
+ def __init__(self, files_dir: str):
19
+ self._files_dir = files_dir
20
+
21
+ self.url = "https://agents-course-unit4-scoring.hf.space"
22
+ self._download_questions()
23
+ self._download_files()
24
+
25
+ def _download_questions(self) -> None:
26
+ self.questions = requests.get(f"{self.url}/questions").json()
27
+
28
+ def _download_files(self):
29
+ for question in self.questions:
30
+ if question["file_name"] != '':
31
+ self._download_file(question["file_name"])
32
+
33
+ def _download_file(self, file_name: str) -> None:
34
+ shutil.copy(f"{self.images_dir}/{file_name}", f"{self._files_dir}/{file_name}")
35
+
36
+ def questions_generator(self) -> Generator[Question, None, None]:
37
+ for question in self.questions:
38
+ yield question
39
+
40
+ def post_answers(self, answers: list[dict]) -> dict:
41
+ return requests.post(f"{self.url}/submit",
42
+ json={"username": USER,
43
+ "agent_code": "stringstri",
44
+ "answers": answers}).json()
tools/__init__.py ADDED
File without changes
tools/vision_tools.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+
3
+ import requests
4
+ from config import IP_WINDOWS
5
+ import ollama
6
+ from smolagents import tool
7
+
8
+
9
+ @tool
10
+ def analyze_image(image_path: str, question: str) -> str:
11
+ """ Analyze an image using a local vision model and answer a question about it.
12
+ Use this tool when you need to extract information from a jpg/png file.
13
+
14
+ Args:
15
+ image_path: The local path to the image file (e.g. 'images/grafico.png').
16
+ question: The specific question about what to look for in the image (e.g. 'What value is the red bar?').
17
+
18
+ Returns:
19
+ str: The answer to the question, based on the image.
20
+
21
+ Example:
22
+ >>> result = analise_image("images/grafico.png", "What value is the red bar?")
23
+ """
24
+ # url = f"http://{IP_WINDOWS}:11434/api/generate"
25
+
26
+ try:
27
+ # Codificamos la imagen a base64 para enviarla por red
28
+ with open(image_path, "rb") as image_file:
29
+ img_str = base64.b64encode(image_file.read()).decode('utf-8')
30
+
31
+ # payload = {
32
+ # "model": "llava", # Asegúrate de tener este modelo en Windows
33
+ # "prompt": question,
34
+ # "images": [img_str],
35
+ # "stream": False
36
+ #}
37
+
38
+ # response = requests.post(url, json=payload)
39
+ response = ollama.chat(model='llava',
40
+ messages=[{'role': 'user',
41
+ 'content': question,
42
+ 'images': image_file}])
43
+ return response['messages']['content']
44
+
45
+ except Exception as e:
46
+ return f"Error conectando con Windows: {str(e)}"