File size: 5,636 Bytes
e19c5d9 799a766 e19c5d9 799a766 e19c5d9 799a766 e19c5d9 799a766 e19c5d9 799a766 e19c5d9 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 |
from duckduckgo_search import DDGS
import datetime
import requests
from bs4 import BeautifulSoup
class Agent:
"""
An enhanced agent that gathers comprehensive context from various sources,
excluding weather.
"""
def __init__(self):
"""
Initializes the Agent with a DDGS search object.
"""
self.ddgs = DDGS()
def search(self, query, num_results=3):
"""
Performs a web search using DuckDuckGo.
Args:
query (str): The search query.
num_results (int): The maximum number of results to return.
Returns:
str: A formatted string of search results.
"""
results = ""
try:
search_results = list(self.ddgs.text(query, max_results=num_results))
if not search_results:
return "No search results found.\n"
for r in search_results:
results += f"Title: {r.get('title', 'N/A')}\n"
results += f"Link: {r.get('href', 'N/A')}\n"
results += f"Snippet: {r.get('body', 'N/A')}\n\n"
except Exception as e:
results = f"An error occurred during search: {e}\n"
return results
def get_news(self, query="top stories", num_results=5):
"""
Fetches the latest news headlines.
Args:
query (str): The news query (e.g., "technology", "business").
num_results (int): The maximum number of news articles to return.
Returns:
str: A formatted string of news headlines.
"""
results = ""
try:
news_results = list(self.ddgs.news(query, max_results=num_results))
if not news_results:
return "No news found.\n"
for r in news_results:
results += f"Title: {r.get('title', 'N/A')}\n"
results += f"Source: {r.get('source', 'N/A')}\n"
results += f"URL: {r.get('url', 'N/A')}\n"
results += f"Date: {r.get('date', 'N/A')}\n"
results += f"Snippet: {r.get('body', 'N/A')}\n\n"
except Exception as e:
results = f"An error occurred while fetching news: {e}\n"
return results
def get_website_content(self, url):
"""
Fetches and summarizes the content of a given URL.
Args:
url (str): The URL of the website to scrape.
Returns:
str: The plain text content of the website.
"""
try:
response = requests.get(url, timeout=10)
response.raise_for_status() # Raise an exception for bad status codes
soup = BeautifulSoup(response.text, 'html.parser')
# Remove script and style elements
for script_or_style in soup(["script", "style"]):
script_or_style.decompose()
text = ' '.join(soup.stripped_strings)
return f"Content of {url}:\n{text[:2000]}...\n" # Return a snippet of the content
except requests.exceptions.RequestException as e:
return f"Error fetching website content: {e}\n"
def search_images(self, query, num_results=3):
"""
Performs an image search using DuckDuckGo.
Args:
query (str): The search query for images.
num_results (int): The maximum number of image results to return.
Returns:
str: A formatted string of image results.
"""
results = ""
try:
image_results = list(self.ddgs.images(query, max_results=num_results))
if not image_results:
return "No image results found.\n"
for i in image_results:
results += f"Title: {i.get('title', 'N/A')}\n"
results += f"Image URL: {i.get('image', 'N/A')}\n"
results += f"Source URL: {i.get('url', 'N/A')}\n\n"
except Exception as e:
results = f"An error occurred during image search: {e}\n"
return results
def GatherContext(self, query="today's news", num_results=3, target_url=None):
"""
Gathers a comprehensive context from various sources.
Args:
query (str): The primary search query.
num_results (int): The number of results for searches.
target_url (str, optional): A specific URL to fetch content from.
Returns:
str: A single multiline string containing all the gathered context.
"""
todaydate = datetime.datetime.now().strftime("%Y-%m-%d")
current_time = datetime.datetime.now().strftime("%H:%M:%S")
context = f"--- Context Gathered at {current_time} on {todaydate} ---\n\n"
context += "--- System Time ---\n"
context += f"Date: {todaydate}\n"
context += f"Time: {current_time}\n\n"
context += "--- Latest News ---\n"
context += self.get_news(query, num_results) + "\n"
context += "--- Web Search Results ---\n"
context += self.search(query, num_results) + "\n"
context += "--- Image Search Results ---\n"
context += self.search_images(query, num_results) + "\n"
if target_url:
context += "--- Website Content ---\n"
context += self.get_website_content(target_url) + "\n"
context += "--- End of Context ---"
return context
if __name__ == "__main__":
agent = Agent()
context1 = agent.GatherContext(query="latest advancements in AI", num_results=3)
print("--- Example 1: General Query ---")
print(context1)
|