elokquence37 commited on
Commit
7aa7afd
·
verified ·
1 Parent(s): 2336062

Create tools.py

Browse files
Files changed (1) hide show
  1. tools.py +94 -0
tools.py ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from smolagents import Tool
2
+ import time
3
+ import random
4
+ from duckduckgo_search import DDGS
5
+ import requests
6
+ import os
7
+ import wikipedia
8
+ from bs4 import BeautifulSoup
9
+
10
+
11
+ class DuckDuckGoSearchToolWH(Tool):
12
+ name = "web_search"
13
+ description = """Performs a DuckDuckGo web search based on your query (think a Google search) then returns the top search results."""
14
+ inputs = {"query": {"type": "string", "description": "The search query to perform."}}
15
+ output_type = "string"
16
+
17
+ def __init__(self, max_results=10, **kwargs):
18
+ super().__init__()
19
+ self.max_results = max_results
20
+ self.USER_AGENTS = [
21
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
22
+ "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:122.0) Gecko/20100101 Firefox/122.0",
23
+ "Mozilla/5.0 (Macintosh; Intel Mac OS X 13_0) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.1 Safari/605.1.15",
24
+ "Mozilla/5.0 (Linux; Android 11; Pixel 5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Mobile Safari/537.36",
25
+ "Mozilla/5.0 (iPhone; CPU iPhone OS 16_0 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.0 Mobile/15E148 Safari/604.1"
26
+ ]
27
+ self.kwargs = kwargs
28
+ self.kwargs.pop('headers', None)
29
+
30
+
31
+ def forward(self, query: str) -> str:
32
+ headers = {"User-Agent": random.choice(self.USER_AGENTS)}
33
+ self.ddgs = DDGS(headers=headers, **self.kwargs)
34
+ time.sleep(2.0)
35
+ results = self.ddgs.text(query, max_results=self.max_results)
36
+ if not results:
37
+ raise Exception("No results found! Try a less restrictive/shorter query.")
38
+ postprocessed_results = [
39
+ f"[{result['title']}]({result['href']})\n{result['body']}"
40
+ for result in results
41
+ ]
42
+ return "## Search Results\n\n" + "\n\n".join(postprocessed_results)
43
+
44
+ class WikipediaSearchTool(Tool):
45
+ name = "wikipedia_search"
46
+ description = "Searches Wikipedia and returns a short summary of the most relevant article."
47
+ inputs = {
48
+ "query": {"type": "string", "description": "The search term or topic to look up on Wikipedia."}
49
+ }
50
+ output_type = "string"
51
+
52
+ def __init__(self, summary_sentences=3):
53
+ super().__init__()
54
+ self.summary_sentences = summary_sentences
55
+
56
+ def forward(self, query: str) -> str:
57
+ try:
58
+ page_title = wikipedia.search(query)[0]
59
+ page = wikipedia.page(page_title)
60
+ return f"**{page.title}**\n\n{page.content}"
61
+ except IndexError:
62
+ return "No Wikipedia results found for that query."
63
+ except Exception as e:
64
+ return f"Error during Wikipedia search: {e}"
65
+
66
+
67
+ class WebpageReaderTool(Tool):
68
+ name = "read_webpage"
69
+ description = "Fetches the text content from a given URL and returns the main body text."
70
+ inputs = {
71
+ "url": {"type": "string", "description": "The URL of the webpage to read."}
72
+ }
73
+ output_type = "string"
74
+
75
+ def forward(self, url: str) -> str:
76
+ try:
77
+ headers = {
78
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
79
+ "(KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36"
80
+ }
81
+ response = requests.get(url, headers=headers, timeout=10)
82
+ response.raise_for_status()
83
+
84
+ soup = BeautifulSoup(response.text, "html.parser")
85
+
86
+ # Extract visible text (ignore scripts/styles)
87
+ for tag in soup(["script", "style", "noscript"]):
88
+ tag.extract()
89
+ text = soup.get_text(separator="\n")
90
+ cleaned = "\n".join(line.strip() for line in text.splitlines() if line.strip())
91
+
92
+ return cleaned[:5000] # Optionally limit to 5,000 chars
93
+ except Exception as e:
94
+ return f"Error reading webpage: {e}"