import time import os from bs4 import BeautifulSoup from selenium import webdriver from selenium.webdriver.chrome.options import Options from selenium.webdriver.chrome.service import Service from webdriver_manager.chrome import ChromeDriverManager import urllib.parse class SearchEngine: def __init__(self): # Initialize chrome options once self.options = Options() self.options.add_argument("--headless") self.options.add_argument("--no-sandbox") self.options.add_argument("--disable-dev-shm-usage") chrome_bin = os.getenv("CHROME_BIN") if chrome_bin: self.options.binary_location = chrome_bin def semantic_search(self, query): try: service = Service(ChromeDriverManager().install()) driver = webdriver.Chrome(service=service, options=self.options) # Use DuckDuckGo HTML version for cleaner scraping encoded_query = urllib.parse.quote(query) url = f"https://html.duckduckgo.com/html/?q={encoded_query}" driver.get(url) time.sleep(2) # Wait for load html = driver.page_source soup = BeautifulSoup(html, 'html.parser') results = [] # DDG HTML results usually have class 'result__a' for the link links = soup.find_all('a', class_='result__a') for i, link in enumerate(links[:8]): # Top 8 results title = link.get_text() href = link.get('href') snippet = "" # Try to find snippet # Usually in 'result__snippet' parent = link.find_parent('div', class_='result__body') if parent: snip_div = parent.find('a', class_='result__snippet') if snip_div: snippet = snip_div.get_text() results.append(f"### {i+1}. [{title}]({href})\n{snippet}") driver.quit() if not results: return "❌ No results found. Try a different query." header = f"# 🌐 Search Results for '{query}'\n*(Source: DuckDuckGo)*\n\n" return header + "\n\n".join(results) except Exception as e: return f"❌ Search Failed: {str(e)}"