import aiohttp from bs4 import BeautifulSoup import re from website_viewer import fetch_website_content async def search_n_browse(search_phrase: str) -> dict: url = "https://lite.duckduckgo.com/lite/" headers = { "Content-Type": "application/x-www-form-urlencoded", } data = { "q": search_phrase, } try: async with aiohttp.ClientSession() as session: async with session.post(url, data=data, headers=headers) as response: if response.status == 200: body = await response.text() soup = BeautifulSoup(body, 'html.parser') # Extract links links = [a['href'] for a in soup.find_all('a', href=True) if not a['href'].startswith('#')] links = [link if link.startswith('http') else '' for link in links] # Extract titles titles = [a.text.strip() for a in soup.select('tr > td > a')] # Extract snippets snippets = [td.text.strip() for td in soup.select('td.result-snippet')] # Extract link texts link_texts = [span.text for span in soup.select('td > span.link-text')] # Process results json_data = {} if snippets: for index, snippet in enumerate(snippets): json_data[f"Item_{index + 1}"] = { "title": titles[index] if index < len(titles) else "", "snippet": snippet, "linkText": link_texts[index] if index < len(link_texts) else "", } else: json_data["Result"] = { "title": "No results were found.", "snippet": "Our search engine could not find what you are looking for.", "linkText": "Thank you for using our search engine.", } return json_data else: raise Exception(f"Search API error. Status code: {response.status}") except Exception as e: raise Exception(f"Error during search: {str(e)}") async def search_with_content(search_phrase: str, top_n: int = 5) -> dict: """ Search for content and retrieve the content of the top N results """ try: # First, get the search results search_results = await search_n_browse(search_phrase) # Process only the top N results top_results = {} count = 0 for key, result in search_results.items(): if key.startswith("Item_") and count < top_n: try: # Get the URL from linkText url = result.get("linkText", "") if url: # Fetch the content of the page content = await fetch_website_content(url) # Add the content to the result top_results[key] = { "title": result.get("title", ""), "snippet": result.get("snippet", ""), "linkText": result.get("linkText", ""), "content": content } count += 1 except Exception as e: # If we can't fetch the content, still include the basic result top_results[key] = { "title": result.get("title", ""), "snippet": result.get("snippet", ""), "linkText": result.get("linkText", ""), "error": f"Could not fetch content: {str(e)}" } count += 1 return top_results except Exception as e: raise Exception(f"Error during search with content: {str(e)}")