Spaces:

ztcoco
/

Semantic-Bookmark

Paused

File size: 2,487 Bytes

840261a

import time
import os
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import urllib.parse

class SearchEngine:
    def __init__(self):

        # Initialize chrome options once
        self.options = Options()
        self.options.add_argument("--headless")
        self.options.add_argument("--no-sandbox")
        self.options.add_argument("--disable-dev-shm-usage")
        
        chrome_bin = os.getenv("CHROME_BIN")
        if chrome_bin:
            self.options.binary_location = chrome_bin

    def semantic_search(self, query):

        
        try:
            service = Service(ChromeDriverManager().install())
            driver = webdriver.Chrome(service=service, options=self.options)
            
            # Use DuckDuckGo HTML version for cleaner scraping
            encoded_query = urllib.parse.quote(query)
            url = f"https://html.duckduckgo.com/html/?q={encoded_query}"
            
            driver.get(url)
            time.sleep(2) # Wait for load
            
            html = driver.page_source
            soup = BeautifulSoup(html, 'html.parser')
            
            results = []
            
            # DDG HTML results usually have class 'result__a' for the link
            links = soup.find_all('a', class_='result__a')
            
            for i, link in enumerate(links[:8]): # Top 8 results
                title = link.get_text()
                href = link.get('href')
                snippet = ""
                
                # Try to find snippet
                # Usually in 'result__snippet'
                parent = link.find_parent('div', class_='result__body')
                if parent:
                    snip_div = parent.find('a', class_='result__snippet')
                    if snip_div:
                        snippet = snip_div.get_text()
                
                results.append(f"### {i+1}. [{title}]({href})\n{snippet}")
                
            driver.quit()
            
            if not results:
                return "❌ No results found. Try a different query."
                
            header = f"# 🌐 Search Results for '{query}'\n*(Source: DuckDuckGo)*\n\n"
            return header + "\n\n".join(results)

        except Exception as e:
            return f"❌ Search Failed: {str(e)}"