Final_Assignment_Template

Sleeping

aelin commited on May 6, 2025

Commit

f9bd7be

1 Parent(s): f791164

Refatora a inicialização do agente para usar agentes individuais em vez de funções, melhorando a organização e a clareza do código.

Refactors to use individual agents for tool handling

Replaces function-based tool registration with dedicated agents for each tool, enhancing code organization and clarity. Simplifies agent initialization and improves maintainability by leveraging agent-based workflows.

Relates to improved code structure and readability.

Files changed (2) hide show

_tools.py +83 -85
app.py +30 -5

_tools.py CHANGED Viewed

@@ -1,16 +1,13 @@
 import re
 from markdownify import markdownify
 import requests
 import io
 import pandas as pd
 from PIL import Image
 from llama_index.tools.duckduckgo import DuckDuckGoSearchToolSpec
-from llama_index.core.tools import FunctionTool
 from huggingface_hub import InferenceClient
 client = InferenceClient(
   provider="hf-inference",
@@ -20,70 +17,60 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 search_tool_spec = DuckDuckGoSearchToolSpec()
 # Searching tools
-def _search_tool(query: str) -> str:
     """Browse the web using DuckDuckGo."""
     print(f"🔍 Executando busca no DuckDuckGo para: {query}")
     return search_tool_spec.duckduckgo_full_search(query=query)
-def _fetch_file_bytes(task_id: str) -> str | None:
     """
     Fetch a file from the given task ID.
     """
     try:
         response = requests.get(f"{DEFAULT_API_URL}/files/{task_id}", timeout=15)
         response.raise_for_status()
         print(f"File {task_id} fetched successfully.")
         return response.content
     except requests.exceptions.RequestException as e:
         print(f"Error fetching file {task_id}: {e}")
         return None
-# Parsing tools
-def _bytes_to_image(image_bytes: bytes) -> Image:
     """Convert bytes to image URL."""
     file = Image.open(io.BytesIO(image_bytes))
     file.save("temp_image.png")
     return file
-def _document_bytes_to_text(doc_bytes: bytes) -> str:
-    """Convert document bytes to text."""
     return doc_bytes.decode("utf-8")
-def _xlsx_to_text(file_bytes: bytes) -> str:
     """Convert XLSX file bytes to text using pandas."""
     io_bytes = io.BytesIO(file_bytes)
     df = pd.read_excel(io_bytes, engine='openpyxl')
     return df.to_string(index=False)
-# Extracting text tools
-def _extract_text_from_image(image_url: bytes) -> str:
     """Extract text from an image using Tesseract."""
     return client.image_to_text(image_url=image_url, task="image-to-text", model="Salesforce/blip-image-captioning-base").generated_text
-def _extract_text_from_csv(file_bytes: bytes) -> str:
     """Extract text from a CSV file."""
     io_bytes = io.BytesIO(file_bytes)
     df = pd.read_csv(io_bytes)
     return df.to_string(index=False)
-def _extract_text_from_code_file(bytes: bytes) -> str:
     """Extract text from a code file."""
     return bytes.decode("utf-8")
-def _extract_text_from_audio_file(file_bytes: bytes) -> str:
     """Extract text from an audio file."""
     return client.automatic_speech_recognition(file_bytes, model="openai/whisper-large-v2").text
-def _webpage_to_markdown(url: str) -> str:
     """
     Access a web page and return its content as markdown.
     Limits output to 10,000 characters to avoid excessive responses.
@@ -103,76 +90,87 @@ def _webpage_to_markdown(url: str) -> str:
 # Initialize tools
-search_tool = FunctionTool.from_defaults(
-  _search_tool,
-  name="DuckDuckGo Search",
-  description="Search the web using DuckDuckGo."
-)
-fetch_file_bytes_tool = FunctionTool.from_defaults(
-  _fetch_file_bytes,
-  name="Fetch File Bytes",
-  description="Fetch a file from the given task ID."
 )
-bytes_to_image_tool = FunctionTool.from_defaults(
-  _bytes_to_image,
-  name="Bytes to Image",
-  description="Convert bytes to image URL."
 )
-document_bytes_to_text_tool = FunctionTool.from_defaults(
-  _document_bytes_to_text,
-  name="Document Bytes to Text",
-  description="Convert bytes to document text, i.e., .txt, .pdf, etc."
 )
-xlsx_to_text_tool = FunctionTool.from_defaults(
-  _xlsx_to_text,
-  name="XLSX to Text",
-  description="Convert XLSX file bytes to text."
 )
-extract_text_from_image_tool = FunctionTool.from_defaults(
-  _extract_text_from_image,
-  name="Extract Text from Image",
-  description="Extract text from an image using Tesseract."
 )
-extract_text_from_csv_tool = FunctionTool.from_defaults(
-  _extract_text_from_csv,
-  name="Extract Text from CSV",
-  description="Extract text from a CSV file."
 )
-extract_text_from_code_file_tool = FunctionTool.from_defaults(
-  _extract_text_from_code_file,
-  name="Extract Text from Code File",
-  description="Extract text from a code file, i.e., .py, .js, .java, etc."
 )
-extract_text_from_audio_file_tool = FunctionTool.from_defaults(
-  _extract_text_from_audio_file,
-  name="Extract Text from Audio File",
-  description="Extract text from an audio file."
 )
-webpage_to_markdown_tool = FunctionTool.from_defaults(
-    _webpage_to_markdown,
-    name="Webpage to Markdown",
-    description="Access a web page by URL and return the content as markdown. Use to read web pages."
 )
-tools = [
-  search_tool,
-  fetch_file_bytes_tool,
-  bytes_to_image_tool,
-  document_bytes_to_text_tool,
-  extract_text_from_image_tool,
-  extract_text_from_csv_tool,
-  extract_text_from_code_file_tool,
-  extract_text_from_audio_file_tool,
-  xlsx_to_text_tool,
-  webpage_to_markdown_tool,
-]

 import re
 from markdownify import markdownify
 import requests
 import io
 import pandas as pd
 from PIL import Image
 from llama_index.tools.duckduckgo import DuckDuckGoSearchToolSpec
 from huggingface_hub import InferenceClient
+from llama_index.core.agent.workflow import ReActAgent
 client = InferenceClient(
   provider="hf-inference",
 search_tool_spec = DuckDuckGoSearchToolSpec()
 # Searching tools
+def search_tool(query: str) -> str:
     """Browse the web using DuckDuckGo."""
     print(f"🔍 Executando busca no DuckDuckGo para: {query}")
     return search_tool_spec.duckduckgo_full_search(query=query)
+def fetch_file_bytes(task_id: str) -> str | None:
     """
     Fetch a file from the given task ID.
     """
     try:
         response = requests.get(f"{DEFAULT_API_URL}/files/{task_id}", timeout=15)
         response.raise_for_status()
         print(f"File {task_id} fetched successfully.")
         return response.content
     except requests.exceptions.RequestException as e:
         print(f"Error fetching file {task_id}: {e}")
         return None
+def bytes_to_image(image_bytes: bytes) -> Image:
     """Convert bytes to image URL."""
     file = Image.open(io.BytesIO(image_bytes))
     file.save("temp_image.png")
     return file
+def document_bytes_to_text(doc_bytes: bytes) -> str:
+    """Convert document bytes to text."""
     return doc_bytes.decode("utf-8")
+def xlsx_to_text(file_bytes: bytes) -> str:
     """Convert XLSX file bytes to text using pandas."""
     io_bytes = io.BytesIO(file_bytes)
     df = pd.read_excel(io_bytes, engine='openpyxl')
     return df.to_string(index=False)
+def extract_text_from_image(image_url: bytes) -> str:
     """Extract text from an image using Tesseract."""
     return client.image_to_text(image_url=image_url, task="image-to-text", model="Salesforce/blip-image-captioning-base").generated_text
+def extract_text_from_csv(file_bytes: bytes) -> str:
     """Extract text from a CSV file."""
     io_bytes = io.BytesIO(file_bytes)
     df = pd.read_csv(io_bytes)
     return df.to_string(index=False)
+def extract_text_from_code_file(bytes: bytes) -> str:
     """Extract text from a code file."""
     return bytes.decode("utf-8")
+def extract_text_from_audio_file(file_bytes: bytes) -> str:
     """Extract text from an audio file."""
     return client.automatic_speech_recognition(file_bytes, model="openai/whisper-large-v2").text
+def webpage_to_markdown(url: str) -> str:
     """
     Access a web page and return its content as markdown.
     Limits output to 10,000 characters to avoid excessive responses.
 # Initialize tools
+# --- ReActAgent and AgentWorkflow tool declaration ---
+# Define agents for each tool (one agent per tool, with a clear description)
+search_agent = ReActAgent(
+    name="search_agent",
+    description="Searches the web using DuckDuckGo.",
+    system_prompt="A helpful assistant that can search the web using DuckDuckGo.",
+    tools=[search_tool],
+    llm=None,
 )
+fetch_file_agent = ReActAgent(
+    name="fetch_file_agent",
+    description="Fetches a file from a given task ID.",
+    system_prompt="A helpful assistant that can fetch files by task ID.",
+    tools=[fetch_file_bytes],
+    llm=None,
 )
+bytes_to_image_agent = ReActAgent(
+    name="bytes_to_image_agent",
+    description="Converts bytes to an image.",
+    system_prompt="A helpful assistant that can convert bytes to an image.",
+    tools=[bytes_to_image],
+    llm=None,
 )
+document_bytes_to_text_agent = ReActAgent(
+    name="document_bytes_to_text_agent",
+    description="Converts document bytes to text.",
+    system_prompt="A helpful assistant that can convert document bytes to text.",
+    tools=[document_bytes_to_text],
+    llm=None,
 )
+xlsx_to_text_agent = ReActAgent(
+    name="xlsx_to_text_agent",
+    description="Converts XLSX file bytes to text.",
+    system_prompt="A helpful assistant that can convert XLSX file bytes to text.",
+    tools=[xlsx_to_text],
+    llm=None,
 )
+extract_text_from_image_agent = ReActAgent(
+    name="extract_text_from_image_agent",
+    description="Extracts text from an image using Tesseract.",
+    system_prompt="A helpful assistant that can extract text from images.",
+    tools=[extract_text_from_image],
+    llm=None,
 )
+extract_text_from_csv_agent = ReActAgent(
+    name="extract_text_from_csv_agent",
+    description="Extracts text from a CSV file.",
+    system_prompt="A helpful assistant that can extract text from CSV files.",
+    tools=[extract_text_from_csv],
+    llm=None,
 )
+extract_text_from_code_file_agent = ReActAgent(
+    name="extract_text_from_code_file_agent",
+    description="Extracts text from a code file.",
+    system_prompt="A helpful assistant that can extract text from code files.",
+    tools=[extract_text_from_code_file],
+    llm=None,
 )
+extract_text_from_audio_file_agent = ReActAgent(
+    name="extract_text_from_audio_file_agent",
+    description="Extracts text from an audio file.",
+    system_prompt="A helpful assistant that can extract text from audio files.",
+    tools=[extract_text_from_audio_file],
+    llm=None,
 )
+webpage_to_markdown_agent = ReActAgent(
+    name="webpage_to_markdown_agent",
+    description="Accesses a web page by URL and returns the content as markdown.",
+    system_prompt="A helpful assistant that can access web pages and return markdown.",
+    tools=[webpage_to_markdown],
+    llm=None,
+)

app.py CHANGED Viewed

@@ -7,7 +7,19 @@ from _types import Questions, Question, UserScore
 from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
 from llama_index.core.agent.workflow import AgentWorkflow
 from llama_index.core.workflow import Context
-from _tools import tools
 import asyncio
 from utils import cache_answers, update_cache_answer, get_cached_answer, load_cache
@@ -22,13 +34,26 @@ class BasicAgent:
     def __init__(self):
         print("BasicAgent initialized.")
         llm = HuggingFaceInferenceAPI(model_name="Qwen/Qwen2.5-Coder-32B-Instruct")
-        agent = AgentWorkflow.from_tools_or_functions(
-            tools_or_functions=tools,
             llm=llm,
             verbose=True,
-            system_prompt= """
                 You are a general AI assistant. I will ask you a question. Think carefully and give your answer straight away as asked in the question or
                 in the format below:
@@ -41,7 +66,7 @@ class BasicAgent:
                 Don't use any other format than the one above and limit your attempts to answer the question to 3 times.
             """,
         )
         context = Context(agent)
         self.agent = agent
         self.context = context

 from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
 from llama_index.core.agent.workflow import AgentWorkflow
 from llama_index.core.workflow import Context
+from _tools import (
+    search_agent,
+    fetch_file_agent,
+    bytes_to_image_agent,
+    document_bytes_to_text_agent,
+    xlsx_to_text_agent,
+    extract_text_from_image_agent,
+    extract_text_from_csv_agent,
+    extract_text_from_code_file_agent,
+    extract_text_from_audio_file_agent,
+    webpage_to_markdown_agent,
+)
+from llama_index.core.agent.workflow import AgentWorkflow
 import asyncio
 from utils import cache_answers, update_cache_answer, get_cached_answer, load_cache
     def __init__(self):
         print("BasicAgent initialized.")
         llm = HuggingFaceInferenceAPI(model_name="Qwen/Qwen2.5-Coder-32B-Instruct")
+        agent = AgentWorkflow(
+            agents=[
+                search_agent,
+                fetch_file_agent,
+                bytes_to_image_agent,
+                document_bytes_to_text_agent,
+                xlsx_to_text_agent,
+                extract_text_from_image_agent,
+                extract_text_from_csv_agent,
+                extract_text_from_code_file_agent,
+                extract_text_from_audio_file_agent,
+                webpage_to_markdown_agent,
+            ],
+            root_agent="search_agent",
             llm=llm,
             verbose=True,
+            system_prompt="""
                 You are a general AI assistant. I will ask you a question. Think carefully and give your answer straight away as asked in the question or
                 in the format below:
                 Don't use any other format than the one above and limit your attempts to answer the question to 3 times.
             """,
         )
         context = Context(agent)
         self.agent = agent
         self.context = context