Spaces:
Paused
Paused
| import time | |
| import os | |
| from bs4 import BeautifulSoup | |
| from selenium import webdriver | |
| from selenium.webdriver.chrome.options import Options | |
| from selenium.webdriver.chrome.service import Service | |
| from webdriver_manager.chrome import ChromeDriverManager | |
| import urllib.parse | |
| class SearchEngine: | |
| def __init__(self): | |
| # Initialize chrome options once | |
| self.options = Options() | |
| self.options.add_argument("--headless") | |
| self.options.add_argument("--no-sandbox") | |
| self.options.add_argument("--disable-dev-shm-usage") | |
| chrome_bin = os.getenv("CHROME_BIN") | |
| if chrome_bin: | |
| self.options.binary_location = chrome_bin | |
| def semantic_search(self, query): | |
| try: | |
| service = Service(ChromeDriverManager().install()) | |
| driver = webdriver.Chrome(service=service, options=self.options) | |
| # Use DuckDuckGo HTML version for cleaner scraping | |
| encoded_query = urllib.parse.quote(query) | |
| url = f"https://html.duckduckgo.com/html/?q={encoded_query}" | |
| driver.get(url) | |
| time.sleep(2) # Wait for load | |
| html = driver.page_source | |
| soup = BeautifulSoup(html, 'html.parser') | |
| results = [] | |
| # DDG HTML results usually have class 'result__a' for the link | |
| links = soup.find_all('a', class_='result__a') | |
| for i, link in enumerate(links[:8]): # Top 8 results | |
| title = link.get_text() | |
| href = link.get('href') | |
| snippet = "" | |
| # Try to find snippet | |
| # Usually in 'result__snippet' | |
| parent = link.find_parent('div', class_='result__body') | |
| if parent: | |
| snip_div = parent.find('a', class_='result__snippet') | |
| if snip_div: | |
| snippet = snip_div.get_text() | |
| results.append(f"### {i+1}. [{title}]({href})\n{snippet}") | |
| driver.quit() | |
| if not results: | |
| return "β No results found. Try a different query." | |
| header = f"# π Search Results for '{query}'\n*(Source: DuckDuckGo)*\n\n" | |
| return header + "\n\n".join(results) | |
| except Exception as e: | |
| return f"β Search Failed: {str(e)}" | |