Spaces:
Sleeping
Sleeping
switch from langgraph to smolagents
Browse files- app.py +1 -1
- geminiAgent.py +61 -10
- tools/WebSearchTool.py +64 -64
- tools/final_answer.py +14 -0
- tools/visit_webpage.py +45 -0
- tools/web_search.py +27 -0
app.py
CHANGED
|
@@ -12,7 +12,7 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
|
|
| 12 |
# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
|
| 13 |
class BasicAgent:
|
| 14 |
def __init__(self):
|
| 15 |
-
|
| 16 |
def __call__(self, question: str) -> str:
|
| 17 |
print(f"Agent received question (first 50 chars): {question[:50]}...")
|
| 18 |
fixed_answer = "This is a default answer."
|
|
|
|
| 12 |
# ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
|
| 13 |
class BasicAgent:
|
| 14 |
def __init__(self):
|
| 15 |
+
self.agent = create_agent()
|
| 16 |
def __call__(self, question: str) -> str:
|
| 17 |
print(f"Agent received question (first 50 chars): {question[:50]}...")
|
| 18 |
fixed_answer = "This is a default answer."
|
geminiAgent.py
CHANGED
|
@@ -1,22 +1,73 @@
|
|
| 1 |
-
from typing import TypedDict, Literal, List, Dict, Optional
|
| 2 |
import time
|
| 3 |
-
import requests
|
| 4 |
-
from bs4 import BeautifulSoup
|
| 5 |
import operator
|
| 6 |
-
from
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
from
|
| 12 |
-
from
|
|
|
|
|
|
|
|
|
|
| 13 |
import pytesseract
|
|
|
|
| 14 |
from PIL import Image
|
| 15 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 16 |
@tool
|
| 17 |
def ocr_tool(image: str) -> str:
|
| 18 |
"""
|
| 19 |
A tool that performs OCR processing on an image.
|
|
|
|
|
|
|
|
|
|
| 20 |
"""
|
| 21 |
image = Image.open(image)
|
| 22 |
return pytesseract.image_to_string(image)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import time
|
|
|
|
|
|
|
| 2 |
import operator
|
| 3 |
+
from smolagents import CodeAgent,DuckDuckGoSearchTool, HfApiModel, load_tool, tool, WikipediaSearchTool, LiteLLMModel
|
| 4 |
+
import datetime
|
| 5 |
+
import requests
|
| 6 |
+
import pytz
|
| 7 |
+
import yaml
|
| 8 |
+
from tools.final_answer import FinalAnswerTool
|
| 9 |
+
from tools.web_search import DuckDuckGoSearchTool
|
| 10 |
+
from tools.visit_webpage import VisitWebpageTool
|
| 11 |
+
import utils
|
| 12 |
+
import pandas as pd
|
| 13 |
import pytesseract
|
| 14 |
+
import openpyxl
|
| 15 |
from PIL import Image
|
| 16 |
|
| 17 |
+
def create_agent():
|
| 18 |
+
return CodeAgent(
|
| 19 |
+
model=LiteLLMModel(model_id="gemini_model_id", api_key=os.getenv("GEMINI_KEY")),
|
| 20 |
+
tools=[DuckDuckGoSearchTool(), WikipediaSearchTool(), VisitWebpageTool(), DownloadTaskAttachmentTool()],
|
| 21 |
+
add_base_tools=True,
|
| 22 |
+
additional_authorized_imports=['pandas','numpy','csv','subprocess', 'exec']
|
| 23 |
+
)
|
| 24 |
@tool
|
| 25 |
def ocr_tool(image: str) -> str:
|
| 26 |
"""
|
| 27 |
A tool that performs OCR processing on an image.
|
| 28 |
+
|
| 29 |
+
Args:
|
| 30 |
+
image: path for the image
|
| 31 |
"""
|
| 32 |
image = Image.open(image)
|
| 33 |
return pytesseract.image_to_string(image)
|
| 34 |
+
|
| 35 |
+
@tool
|
| 36 |
+
def read_csv(csv: str) -> str:
|
| 37 |
+
"""
|
| 38 |
+
Reads a CSV file and returns its contents as a human-readable string.
|
| 39 |
+
|
| 40 |
+
Args:
|
| 41 |
+
csv: path for the csv file
|
| 42 |
+
"""
|
| 43 |
+
csv_file = pd.read_csv(csv)
|
| 44 |
+
|
| 45 |
+
if csv_file.empty:
|
| 46 |
+
return "The CSV file is empty."
|
| 47 |
+
|
| 48 |
+
# Format the DataFrame as a string with clear headers and a separator
|
| 49 |
+
output = ""
|
| 50 |
+
output += "CSV Data:\n"
|
| 51 |
+
output += "--------------------------------\n"
|
| 52 |
+
output += csv_file.to_string(index=False)
|
| 53 |
+
output += "\n--------------------------------\n"
|
| 54 |
+
|
| 55 |
+
return output
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
|
| 59 |
+
@tool
|
| 60 |
+
def read_excel(excel_path: str) -> str:
|
| 61 |
+
"""
|
| 62 |
+
Reads an Excel file and returns its contents as a human-readable string.
|
| 63 |
+
|
| 64 |
+
Args:
|
| 65 |
+
excel_path: The path to the Excel file.
|
| 66 |
+
"""
|
| 67 |
+
try:
|
| 68 |
+
df = pd.read_excel(excel_path)
|
| 69 |
+
return df.to_string()
|
| 70 |
+
except Exception as e:
|
| 71 |
+
return f"Error reading Excel file: {e}"
|
| 72 |
+
|
| 73 |
+
|
tools/WebSearchTool.py
CHANGED
|
@@ -1,75 +1,75 @@
|
|
| 1 |
-
import time
|
| 2 |
-
from typing import Optional
|
| 3 |
-
import requests
|
| 4 |
-
from bs4 import BeautifulSoup
|
| 5 |
-
from langchain.tools import tool
|
| 6 |
|
| 7 |
-
class WebSearchTool:
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
|
| 36 |
-
|
| 37 |
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
|
| 51 |
-
|
| 52 |
-
|
| 53 |
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
|
| 72 |
-
|
| 73 |
|
| 74 |
-
|
| 75 |
-
|
|
|
|
| 1 |
+
# import time
|
| 2 |
+
# from typing import Optional
|
| 3 |
+
# import requests
|
| 4 |
+
# from bs4 import BeautifulSoup
|
| 5 |
+
# from langchain.tools import tool
|
| 6 |
|
| 7 |
+
# class WebSearchTool:
|
| 8 |
+
# def __init__(self):
|
| 9 |
+
# self.last_request_time = 0
|
| 10 |
+
# self.min_request_interval = 2.0 # Minimum time between requests in seconds
|
| 11 |
+
# self.max_retries = 10
|
| 12 |
|
| 13 |
+
# def search(self, query: str, domain: Optional[str] = None) -> str:
|
| 14 |
+
# """Perform web search with rate limiting and retries."""
|
| 15 |
+
# for attempt in range(self.max_retries):
|
| 16 |
+
# # Implement rate limiting
|
| 17 |
+
# current_time = time.time()
|
| 18 |
+
# time_since_last = current_time - self.last_request_time
|
| 19 |
+
# if time_since_last < self.min_request_interval:
|
| 20 |
+
# time.sleep(self.min_request_interval - time_since_last)
|
| 21 |
|
| 22 |
+
# try:
|
| 23 |
+
# # Make the search request
|
| 24 |
+
# results = self._do_search(query, domain)
|
| 25 |
+
# self.last_request_time = time.time()
|
| 26 |
+
# return results
|
| 27 |
+
# except Exception as e:
|
| 28 |
+
# if "202 Ratelimit" in str(e):
|
| 29 |
+
# if attempt < self.max_retries - 1:
|
| 30 |
+
# # Exponential backoff
|
| 31 |
+
# wait_time = (2 ** attempt) * self.min_request_interval
|
| 32 |
+
# time.sleep(wait_time)
|
| 33 |
+
# continue
|
| 34 |
+
# return f"Search failed after {self.max_retries} attempts: {str(e)}"
|
| 35 |
|
| 36 |
+
# return "Search failed due to rate limiting"
|
| 37 |
|
| 38 |
+
# def _do_search(self, query: str, domain: Optional[str] = None) -> str:
|
| 39 |
+
# """Perform the actual search request."""
|
| 40 |
+
# try:
|
| 41 |
+
# # Construct search URL
|
| 42 |
+
# base_url = "https://html.duckduckgo.com/html"
|
| 43 |
+
# params = {"q": query}
|
| 44 |
+
# if domain:
|
| 45 |
+
# params["q"] += f" site:{domain}"
|
| 46 |
|
| 47 |
+
# # Make request with increased timeout
|
| 48 |
+
# response = requests.get(base_url, params=params, timeout=10)
|
| 49 |
+
# response.raise_for_status()
|
| 50 |
|
| 51 |
+
# if response.status_code == 202:
|
| 52 |
+
# raise Exception("202 Ratelimit")
|
| 53 |
|
| 54 |
+
# # Extract search results
|
| 55 |
+
# results = []
|
| 56 |
+
# soup = BeautifulSoup(response.text, 'html.parser')
|
| 57 |
+
# for result in soup.find_all('div', {'class': 'result'}):
|
| 58 |
+
# title = result.find('a', {'class': 'result__a'})
|
| 59 |
+
# snippet = result.find('a', {'class': 'result__snippet'})
|
| 60 |
+
# if title and snippet:
|
| 61 |
+
# results.append({
|
| 62 |
+
# 'title': title.get_text(),
|
| 63 |
+
# 'snippet': snippet.get_text(),
|
| 64 |
+
# 'url': title.get('href')
|
| 65 |
+
# })
|
| 66 |
|
| 67 |
+
# # Format results
|
| 68 |
+
# formatted_results = []
|
| 69 |
+
# for r in results[:10]: # Limit to top 5 results
|
| 70 |
+
# formatted_results.append(f"[{r['title']}]({r['url']})\n{r['snippet']}\n")
|
| 71 |
|
| 72 |
+
# return "## Search Results\n\n" + "\n".join(formatted_results)
|
| 73 |
|
| 74 |
+
# except requests.RequestException as e:
|
| 75 |
+
# raise Exception(f"Search request failed: {str(e)}")
|
tools/final_answer.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Any, Optional
|
| 2 |
+
from smolagents.tools import Tool
|
| 3 |
+
|
| 4 |
+
class FinalAnswerTool(Tool):
|
| 5 |
+
name = "final_answer"
|
| 6 |
+
description = "Provides a final answer to the given problem."
|
| 7 |
+
inputs = {'answer': {'type': 'any', 'description': 'The final answer to the problem'}}
|
| 8 |
+
output_type = "any"
|
| 9 |
+
|
| 10 |
+
def forward(self, answer: Any) -> Any:
|
| 11 |
+
return answer
|
| 12 |
+
|
| 13 |
+
def __init__(self, *args, **kwargs):
|
| 14 |
+
self.is_initialized = False
|
tools/visit_webpage.py
ADDED
|
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Any, Optional
|
| 2 |
+
from smolagents.tools import Tool
|
| 3 |
+
import requests
|
| 4 |
+
import markdownify
|
| 5 |
+
import smolagents
|
| 6 |
+
|
| 7 |
+
class VisitWebpageTool(Tool):
|
| 8 |
+
name = "visit_webpage"
|
| 9 |
+
description = "Visits a webpage at the given url and reads its content as a markdown string. Use this to browse webpages."
|
| 10 |
+
inputs = {'url': {'type': 'string', 'description': 'The url of the webpage to visit.'}}
|
| 11 |
+
output_type = "string"
|
| 12 |
+
|
| 13 |
+
def forward(self, url: str) -> str:
|
| 14 |
+
try:
|
| 15 |
+
import requests
|
| 16 |
+
from markdownify import markdownify
|
| 17 |
+
from requests.exceptions import RequestException
|
| 18 |
+
|
| 19 |
+
from smolagents.utils import truncate_content
|
| 20 |
+
except ImportError as e:
|
| 21 |
+
raise ImportError(
|
| 22 |
+
"You must install packages `markdownify` and `requests` to run this tool: for instance run `pip install markdownify requests`."
|
| 23 |
+
) from e
|
| 24 |
+
try:
|
| 25 |
+
# Send a GET request to the URL with a 20-second timeout
|
| 26 |
+
response = requests.get(url, timeout=20)
|
| 27 |
+
response.raise_for_status() # Raise an exception for bad status codes
|
| 28 |
+
|
| 29 |
+
# Convert the HTML content to Markdown
|
| 30 |
+
markdown_content = markdownify(response.text).strip()
|
| 31 |
+
|
| 32 |
+
# Remove multiple line breaks
|
| 33 |
+
markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
|
| 34 |
+
|
| 35 |
+
return truncate_content(markdown_content, 10000)
|
| 36 |
+
|
| 37 |
+
except requests.exceptions.Timeout:
|
| 38 |
+
return "The request timed out. Please try again later or check the URL."
|
| 39 |
+
except RequestException as e:
|
| 40 |
+
return f"Error fetching the webpage: {str(e)}"
|
| 41 |
+
except Exception as e:
|
| 42 |
+
return f"An unexpected error occurred: {str(e)}"
|
| 43 |
+
|
| 44 |
+
def __init__(self, *args, **kwargs):
|
| 45 |
+
self.is_initialized = False
|
tools/web_search.py
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
from typing import Any, Optional
|
| 2 |
+
from smolagents.tools import Tool
|
| 3 |
+
import duckduckgo_search
|
| 4 |
+
|
| 5 |
+
class DuckDuckGoSearchTool(Tool):
|
| 6 |
+
name = "web_search"
|
| 7 |
+
description = "Performs a duckduckgo web search based on your query (think a Google search) then returns the top search results."
|
| 8 |
+
inputs = {'query': {'type': 'string', 'description': 'The search query to perform.'}}
|
| 9 |
+
output_type = "string"
|
| 10 |
+
|
| 11 |
+
def __init__(self, max_results=10, **kwargs):
|
| 12 |
+
super().__init__()
|
| 13 |
+
self.max_results = max_results
|
| 14 |
+
try:
|
| 15 |
+
from duckduckgo_search import DDGS
|
| 16 |
+
except ImportError as e:
|
| 17 |
+
raise ImportError(
|
| 18 |
+
"You must install package `duckduckgo_search` to run this tool: for instance run `pip install duckduckgo-search`."
|
| 19 |
+
) from e
|
| 20 |
+
self.ddgs = DDGS(**kwargs)
|
| 21 |
+
|
| 22 |
+
def forward(self, query: str) -> str:
|
| 23 |
+
results = self.ddgs.text(query, max_results=self.max_results)
|
| 24 |
+
if len(results) == 0:
|
| 25 |
+
raise Exception("No results found! Try a less restrictive/shorter query.")
|
| 26 |
+
postprocessed_results = [f"[{result['title']}]({result['href']})\n{result['body']}" for result in results]
|
| 27 |
+
return "## Search Results\n\n" + "\n\n".join(postprocessed_results)
|