File size: 2,487 Bytes
840261a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import time
import os
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.chrome.service import Service
from webdriver_manager.chrome import ChromeDriverManager
import urllib.parse

class SearchEngine:
    def __init__(self):

        # Initialize chrome options once
        self.options = Options()
        self.options.add_argument("--headless")
        self.options.add_argument("--no-sandbox")
        self.options.add_argument("--disable-dev-shm-usage")
        
        chrome_bin = os.getenv("CHROME_BIN")
        if chrome_bin:
            self.options.binary_location = chrome_bin

    def semantic_search(self, query):

        
        try:
            service = Service(ChromeDriverManager().install())
            driver = webdriver.Chrome(service=service, options=self.options)
            
            # Use DuckDuckGo HTML version for cleaner scraping
            encoded_query = urllib.parse.quote(query)
            url = f"https://html.duckduckgo.com/html/?q={encoded_query}"
            
            driver.get(url)
            time.sleep(2) # Wait for load
            
            html = driver.page_source
            soup = BeautifulSoup(html, 'html.parser')
            
            results = []
            
            # DDG HTML results usually have class 'result__a' for the link
            links = soup.find_all('a', class_='result__a')
            
            for i, link in enumerate(links[:8]): # Top 8 results
                title = link.get_text()
                href = link.get('href')
                snippet = ""
                
                # Try to find snippet
                # Usually in 'result__snippet'
                parent = link.find_parent('div', class_='result__body')
                if parent:
                    snip_div = parent.find('a', class_='result__snippet')
                    if snip_div:
                        snippet = snip_div.get_text()
                
                results.append(f"### {i+1}. [{title}]({href})\n{snippet}")
                
            driver.quit()
            
            if not results:
                return "โŒ No results found. Try a different query."
                
            header = f"# ๐ŸŒ Search Results for '{query}'\n*(Source: DuckDuckGo)*\n\n"
            return header + "\n\n".join(results)

        except Exception as e:
            return f"โŒ Search Failed: {str(e)}"