|
|
from duckduckgo_search import DDGS |
|
|
import datetime |
|
|
import requests |
|
|
from bs4 import BeautifulSoup |
|
|
|
|
|
class Agent: |
|
|
""" |
|
|
An enhanced agent that gathers comprehensive context from various sources, |
|
|
excluding weather. |
|
|
""" |
|
|
|
|
|
def __init__(self): |
|
|
""" |
|
|
Initializes the Agent with a DDGS search object. |
|
|
""" |
|
|
self.ddgs = DDGS() |
|
|
|
|
|
def search(self, query, num_results=3): |
|
|
""" |
|
|
Performs a web search using DuckDuckGo. |
|
|
|
|
|
Args: |
|
|
query (str): The search query. |
|
|
num_results (int): The maximum number of results to return. |
|
|
|
|
|
Returns: |
|
|
str: A formatted string of search results. |
|
|
""" |
|
|
results = "" |
|
|
try: |
|
|
search_results = list(self.ddgs.text(query, max_results=num_results)) |
|
|
if not search_results: |
|
|
return "No search results found.\n" |
|
|
for r in search_results: |
|
|
results += f"Title: {r.get('title', 'N/A')}\n" |
|
|
results += f"Link: {r.get('href', 'N/A')}\n" |
|
|
results += f"Snippet: {r.get('body', 'N/A')}\n\n" |
|
|
except Exception as e: |
|
|
results = f"An error occurred during search: {e}\n" |
|
|
return results |
|
|
|
|
|
def get_news(self, query="top stories", num_results=5): |
|
|
""" |
|
|
Fetches the latest news headlines. |
|
|
|
|
|
Args: |
|
|
query (str): The news query (e.g., "technology", "business"). |
|
|
num_results (int): The maximum number of news articles to return. |
|
|
|
|
|
Returns: |
|
|
str: A formatted string of news headlines. |
|
|
""" |
|
|
results = "" |
|
|
try: |
|
|
news_results = list(self.ddgs.news(query, max_results=num_results)) |
|
|
if not news_results: |
|
|
return "No news found.\n" |
|
|
for r in news_results: |
|
|
results += f"Title: {r.get('title', 'N/A')}\n" |
|
|
results += f"Source: {r.get('source', 'N/A')}\n" |
|
|
results += f"URL: {r.get('url', 'N/A')}\n" |
|
|
results += f"Date: {r.get('date', 'N/A')}\n" |
|
|
results += f"Snippet: {r.get('body', 'N/A')}\n\n" |
|
|
except Exception as e: |
|
|
results = f"An error occurred while fetching news: {e}\n" |
|
|
return results |
|
|
|
|
|
def get_website_content(self, url): |
|
|
""" |
|
|
Fetches and summarizes the content of a given URL. |
|
|
|
|
|
Args: |
|
|
url (str): The URL of the website to scrape. |
|
|
|
|
|
Returns: |
|
|
str: The plain text content of the website. |
|
|
""" |
|
|
try: |
|
|
response = requests.get(url, timeout=10) |
|
|
response.raise_for_status() |
|
|
soup = BeautifulSoup(response.text, 'html.parser') |
|
|
|
|
|
for script_or_style in soup(["script", "style"]): |
|
|
script_or_style.decompose() |
|
|
text = ' '.join(soup.stripped_strings) |
|
|
return f"Content of {url}:\n{text[:2000]}...\n" |
|
|
except requests.exceptions.RequestException as e: |
|
|
return f"Error fetching website content: {e}\n" |
|
|
|
|
|
def search_images(self, query, num_results=3): |
|
|
""" |
|
|
Performs an image search using DuckDuckGo. |
|
|
|
|
|
Args: |
|
|
query (str): The search query for images. |
|
|
num_results (int): The maximum number of image results to return. |
|
|
|
|
|
Returns: |
|
|
str: A formatted string of image results. |
|
|
""" |
|
|
results = "" |
|
|
try: |
|
|
image_results = list(self.ddgs.images(query, max_results=num_results)) |
|
|
if not image_results: |
|
|
return "No image results found.\n" |
|
|
for i in image_results: |
|
|
results += f"Title: {i.get('title', 'N/A')}\n" |
|
|
results += f"Image URL: {i.get('image', 'N/A')}\n" |
|
|
results += f"Source URL: {i.get('url', 'N/A')}\n\n" |
|
|
except Exception as e: |
|
|
results = f"An error occurred during image search: {e}\n" |
|
|
return results |
|
|
|
|
|
def GatherContext(self, query="today's news", num_results=3, target_url=None): |
|
|
""" |
|
|
Gathers a comprehensive context from various sources. |
|
|
|
|
|
Args: |
|
|
query (str): The primary search query. |
|
|
num_results (int): The number of results for searches. |
|
|
target_url (str, optional): A specific URL to fetch content from. |
|
|
|
|
|
Returns: |
|
|
str: A single multiline string containing all the gathered context. |
|
|
""" |
|
|
todaydate = datetime.datetime.now().strftime("%Y-%m-%d") |
|
|
current_time = datetime.datetime.now().strftime("%H:%M:%S") |
|
|
|
|
|
context = f"--- Context Gathered at {current_time} on {todaydate} ---\n\n" |
|
|
|
|
|
context += "--- System Time ---\n" |
|
|
context += f"Date: {todaydate}\n" |
|
|
context += f"Time: {current_time}\n\n" |
|
|
|
|
|
context += "--- Latest News ---\n" |
|
|
context += self.get_news(query, num_results) + "\n" |
|
|
|
|
|
context += "--- Web Search Results ---\n" |
|
|
context += self.search(query, num_results) + "\n" |
|
|
|
|
|
context += "--- Image Search Results ---\n" |
|
|
context += self.search_images(query, num_results) + "\n" |
|
|
|
|
|
if target_url: |
|
|
context += "--- Website Content ---\n" |
|
|
context += self.get_website_content(target_url) + "\n" |
|
|
|
|
|
context += "--- End of Context ---" |
|
|
|
|
|
return context |
|
|
|
|
|
if __name__ == "__main__": |
|
|
agent = Agent() |
|
|
context1 = agent.GatherContext(query="latest advancements in AI", num_results=3) |
|
|
print("--- Example 1: General Query ---") |
|
|
print(context1) |
|
|
|
|
|
|