ninooo96 commited on
Commit
e820e26
·
1 Parent(s): 80ea88d

switch from langgraph to smolagents

Browse files
app.py CHANGED
@@ -12,7 +12,7 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
12
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
13
  class BasicAgent:
14
  def __init__(self):
15
- print("BasicAgent initialized.")
16
  def __call__(self, question: str) -> str:
17
  print(f"Agent received question (first 50 chars): {question[:50]}...")
18
  fixed_answer = "This is a default answer."
 
12
  # ----- THIS IS WERE YOU CAN BUILD WHAT YOU WANT ------
13
  class BasicAgent:
14
  def __init__(self):
15
+ self.agent = create_agent()
16
  def __call__(self, question: str) -> str:
17
  print(f"Agent received question (first 50 chars): {question[:50]}...")
18
  fixed_answer = "This is a default answer."
geminiAgent.py CHANGED
@@ -1,22 +1,73 @@
1
- from typing import TypedDict, Literal, List, Dict, Optional
2
  import time
3
- import requests
4
- from bs4 import BeautifulSoup
5
  import operator
6
- from langchain_core.prompts import ChatPromptTemplate
7
- from langchain_core.runnables import RunnablePassthrough
8
- from langchain_core.messages import HumanMessage, AIMessage
9
- from langchain.tools import tool
10
- from langchain_core.output_parsers import StrOutputParser
11
- from langchain_core.pydantic_v1 import BaseModel, Field
12
- from langgraph.graph import StateGraph, START, END
 
 
 
13
  import pytesseract
 
14
  from PIL import Image
15
 
 
 
 
 
 
 
 
16
  @tool
17
  def ocr_tool(image: str) -> str:
18
  """
19
  A tool that performs OCR processing on an image.
 
 
 
20
  """
21
  image = Image.open(image)
22
  return pytesseract.image_to_string(image)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import time
 
 
2
  import operator
3
+ from smolagents import CodeAgent,DuckDuckGoSearchTool, HfApiModel, load_tool, tool, WikipediaSearchTool, LiteLLMModel
4
+ import datetime
5
+ import requests
6
+ import pytz
7
+ import yaml
8
+ from tools.final_answer import FinalAnswerTool
9
+ from tools.web_search import DuckDuckGoSearchTool
10
+ from tools.visit_webpage import VisitWebpageTool
11
+ import utils
12
+ import pandas as pd
13
  import pytesseract
14
+ import openpyxl
15
  from PIL import Image
16
 
17
+ def create_agent():
18
+ return CodeAgent(
19
+ model=LiteLLMModel(model_id="gemini_model_id", api_key=os.getenv("GEMINI_KEY")),
20
+ tools=[DuckDuckGoSearchTool(), WikipediaSearchTool(), VisitWebpageTool(), DownloadTaskAttachmentTool()],
21
+ add_base_tools=True,
22
+ additional_authorized_imports=['pandas','numpy','csv','subprocess', 'exec']
23
+ )
24
  @tool
25
  def ocr_tool(image: str) -> str:
26
  """
27
  A tool that performs OCR processing on an image.
28
+
29
+ Args:
30
+ image: path for the image
31
  """
32
  image = Image.open(image)
33
  return pytesseract.image_to_string(image)
34
+
35
+ @tool
36
+ def read_csv(csv: str) -> str:
37
+ """
38
+ Reads a CSV file and returns its contents as a human-readable string.
39
+
40
+ Args:
41
+ csv: path for the csv file
42
+ """
43
+ csv_file = pd.read_csv(csv)
44
+
45
+ if csv_file.empty:
46
+ return "The CSV file is empty."
47
+
48
+ # Format the DataFrame as a string with clear headers and a separator
49
+ output = ""
50
+ output += "CSV Data:\n"
51
+ output += "--------------------------------\n"
52
+ output += csv_file.to_string(index=False)
53
+ output += "\n--------------------------------\n"
54
+
55
+ return output
56
+
57
+
58
+
59
+ @tool
60
+ def read_excel(excel_path: str) -> str:
61
+ """
62
+ Reads an Excel file and returns its contents as a human-readable string.
63
+
64
+ Args:
65
+ excel_path: The path to the Excel file.
66
+ """
67
+ try:
68
+ df = pd.read_excel(excel_path)
69
+ return df.to_string()
70
+ except Exception as e:
71
+ return f"Error reading Excel file: {e}"
72
+
73
+
tools/WebSearchTool.py CHANGED
@@ -1,75 +1,75 @@
1
- import time
2
- from typing import Optional
3
- import requests
4
- from bs4 import BeautifulSoup
5
- from langchain.tools import tool
6
 
7
- class WebSearchTool:
8
- def __init__(self):
9
- self.last_request_time = 0
10
- self.min_request_interval = 2.0 # Minimum time between requests in seconds
11
- self.max_retries = 10
12
 
13
- def search(self, query: str, domain: Optional[str] = None) -> str:
14
- """Perform web search with rate limiting and retries."""
15
- for attempt in range(self.max_retries):
16
- # Implement rate limiting
17
- current_time = time.time()
18
- time_since_last = current_time - self.last_request_time
19
- if time_since_last < self.min_request_interval:
20
- time.sleep(self.min_request_interval - time_since_last)
21
 
22
- try:
23
- # Make the search request
24
- results = self._do_search(query, domain)
25
- self.last_request_time = time.time()
26
- return results
27
- except Exception as e:
28
- if "202 Ratelimit" in str(e):
29
- if attempt < self.max_retries - 1:
30
- # Exponential backoff
31
- wait_time = (2 ** attempt) * self.min_request_interval
32
- time.sleep(wait_time)
33
- continue
34
- return f"Search failed after {self.max_retries} attempts: {str(e)}"
35
 
36
- return "Search failed due to rate limiting"
37
 
38
- def _do_search(self, query: str, domain: Optional[str] = None) -> str:
39
- """Perform the actual search request."""
40
- try:
41
- # Construct search URL
42
- base_url = "https://html.duckduckgo.com/html"
43
- params = {"q": query}
44
- if domain:
45
- params["q"] += f" site:{domain}"
46
 
47
- # Make request with increased timeout
48
- response = requests.get(base_url, params=params, timeout=10)
49
- response.raise_for_status()
50
 
51
- if response.status_code == 202:
52
- raise Exception("202 Ratelimit")
53
 
54
- # Extract search results
55
- results = []
56
- soup = BeautifulSoup(response.text, 'html.parser')
57
- for result in soup.find_all('div', {'class': 'result'}):
58
- title = result.find('a', {'class': 'result__a'})
59
- snippet = result.find('a', {'class': 'result__snippet'})
60
- if title and snippet:
61
- results.append({
62
- 'title': title.get_text(),
63
- 'snippet': snippet.get_text(),
64
- 'url': title.get('href')
65
- })
66
 
67
- # Format results
68
- formatted_results = []
69
- for r in results[:10]: # Limit to top 5 results
70
- formatted_results.append(f"[{r['title']}]({r['url']})\n{r['snippet']}\n")
71
 
72
- return "## Search Results\n\n" + "\n".join(formatted_results)
73
 
74
- except requests.RequestException as e:
75
- raise Exception(f"Search request failed: {str(e)}")
 
1
+ # import time
2
+ # from typing import Optional
3
+ # import requests
4
+ # from bs4 import BeautifulSoup
5
+ # from langchain.tools import tool
6
 
7
+ # class WebSearchTool:
8
+ # def __init__(self):
9
+ # self.last_request_time = 0
10
+ # self.min_request_interval = 2.0 # Minimum time between requests in seconds
11
+ # self.max_retries = 10
12
 
13
+ # def search(self, query: str, domain: Optional[str] = None) -> str:
14
+ # """Perform web search with rate limiting and retries."""
15
+ # for attempt in range(self.max_retries):
16
+ # # Implement rate limiting
17
+ # current_time = time.time()
18
+ # time_since_last = current_time - self.last_request_time
19
+ # if time_since_last < self.min_request_interval:
20
+ # time.sleep(self.min_request_interval - time_since_last)
21
 
22
+ # try:
23
+ # # Make the search request
24
+ # results = self._do_search(query, domain)
25
+ # self.last_request_time = time.time()
26
+ # return results
27
+ # except Exception as e:
28
+ # if "202 Ratelimit" in str(e):
29
+ # if attempt < self.max_retries - 1:
30
+ # # Exponential backoff
31
+ # wait_time = (2 ** attempt) * self.min_request_interval
32
+ # time.sleep(wait_time)
33
+ # continue
34
+ # return f"Search failed after {self.max_retries} attempts: {str(e)}"
35
 
36
+ # return "Search failed due to rate limiting"
37
 
38
+ # def _do_search(self, query: str, domain: Optional[str] = None) -> str:
39
+ # """Perform the actual search request."""
40
+ # try:
41
+ # # Construct search URL
42
+ # base_url = "https://html.duckduckgo.com/html"
43
+ # params = {"q": query}
44
+ # if domain:
45
+ # params["q"] += f" site:{domain}"
46
 
47
+ # # Make request with increased timeout
48
+ # response = requests.get(base_url, params=params, timeout=10)
49
+ # response.raise_for_status()
50
 
51
+ # if response.status_code == 202:
52
+ # raise Exception("202 Ratelimit")
53
 
54
+ # # Extract search results
55
+ # results = []
56
+ # soup = BeautifulSoup(response.text, 'html.parser')
57
+ # for result in soup.find_all('div', {'class': 'result'}):
58
+ # title = result.find('a', {'class': 'result__a'})
59
+ # snippet = result.find('a', {'class': 'result__snippet'})
60
+ # if title and snippet:
61
+ # results.append({
62
+ # 'title': title.get_text(),
63
+ # 'snippet': snippet.get_text(),
64
+ # 'url': title.get('href')
65
+ # })
66
 
67
+ # # Format results
68
+ # formatted_results = []
69
+ # for r in results[:10]: # Limit to top 5 results
70
+ # formatted_results.append(f"[{r['title']}]({r['url']})\n{r['snippet']}\n")
71
 
72
+ # return "## Search Results\n\n" + "\n".join(formatted_results)
73
 
74
+ # except requests.RequestException as e:
75
+ # raise Exception(f"Search request failed: {str(e)}")
tools/final_answer.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any, Optional
2
+ from smolagents.tools import Tool
3
+
4
+ class FinalAnswerTool(Tool):
5
+ name = "final_answer"
6
+ description = "Provides a final answer to the given problem."
7
+ inputs = {'answer': {'type': 'any', 'description': 'The final answer to the problem'}}
8
+ output_type = "any"
9
+
10
+ def forward(self, answer: Any) -> Any:
11
+ return answer
12
+
13
+ def __init__(self, *args, **kwargs):
14
+ self.is_initialized = False
tools/visit_webpage.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any, Optional
2
+ from smolagents.tools import Tool
3
+ import requests
4
+ import markdownify
5
+ import smolagents
6
+
7
+ class VisitWebpageTool(Tool):
8
+ name = "visit_webpage"
9
+ description = "Visits a webpage at the given url and reads its content as a markdown string. Use this to browse webpages."
10
+ inputs = {'url': {'type': 'string', 'description': 'The url of the webpage to visit.'}}
11
+ output_type = "string"
12
+
13
+ def forward(self, url: str) -> str:
14
+ try:
15
+ import requests
16
+ from markdownify import markdownify
17
+ from requests.exceptions import RequestException
18
+
19
+ from smolagents.utils import truncate_content
20
+ except ImportError as e:
21
+ raise ImportError(
22
+ "You must install packages `markdownify` and `requests` to run this tool: for instance run `pip install markdownify requests`."
23
+ ) from e
24
+ try:
25
+ # Send a GET request to the URL with a 20-second timeout
26
+ response = requests.get(url, timeout=20)
27
+ response.raise_for_status() # Raise an exception for bad status codes
28
+
29
+ # Convert the HTML content to Markdown
30
+ markdown_content = markdownify(response.text).strip()
31
+
32
+ # Remove multiple line breaks
33
+ markdown_content = re.sub(r"\n{3,}", "\n\n", markdown_content)
34
+
35
+ return truncate_content(markdown_content, 10000)
36
+
37
+ except requests.exceptions.Timeout:
38
+ return "The request timed out. Please try again later or check the URL."
39
+ except RequestException as e:
40
+ return f"Error fetching the webpage: {str(e)}"
41
+ except Exception as e:
42
+ return f"An unexpected error occurred: {str(e)}"
43
+
44
+ def __init__(self, *args, **kwargs):
45
+ self.is_initialized = False
tools/web_search.py ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import Any, Optional
2
+ from smolagents.tools import Tool
3
+ import duckduckgo_search
4
+
5
+ class DuckDuckGoSearchTool(Tool):
6
+ name = "web_search"
7
+ description = "Performs a duckduckgo web search based on your query (think a Google search) then returns the top search results."
8
+ inputs = {'query': {'type': 'string', 'description': 'The search query to perform.'}}
9
+ output_type = "string"
10
+
11
+ def __init__(self, max_results=10, **kwargs):
12
+ super().__init__()
13
+ self.max_results = max_results
14
+ try:
15
+ from duckduckgo_search import DDGS
16
+ except ImportError as e:
17
+ raise ImportError(
18
+ "You must install package `duckduckgo_search` to run this tool: for instance run `pip install duckduckgo-search`."
19
+ ) from e
20
+ self.ddgs = DDGS(**kwargs)
21
+
22
+ def forward(self, query: str) -> str:
23
+ results = self.ddgs.text(query, max_results=self.max_results)
24
+ if len(results) == 0:
25
+ raise Exception("No results found! Try a less restrictive/shorter query.")
26
+ postprocessed_results = [f"[{result['title']}]({result['href']})\n{result['body']}" for result in results]
27
+ return "## Search Results\n\n" + "\n\n".join(postprocessed_results)