File size: 1,409 Bytes
723bbe6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
from langchain_community.tools import DuckDuckGoSearchResults
from langchain.tools import tool
from bs4 import BeautifulSoup
from langchain.agents import initialize_agent, AgentType
from services.llm_client import LLMClient
import requests, json

# creating the parametrix search tool
class ParametricSearch:
    def __init__(self, llm):
        self.llm = llm
        self.ddgsearch = DuckDuckGoSearchResults()
        self.tools = [self.parametric_search, self.ddgsearch]
        self.agent = initialize_agent(self.tools, self.llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)
        
    @tool
    def parametric_search(url: str) -> str:
        """Scrape visible text content from a company webpage."""
        headers = {
            "User-Agent": (
                "Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
                "AppleWebKit/537.36 (KHTML, like Gecko) "
                "Chrome/123.0 Safari/537.36"
            )
        }
        try:
            res = requests.get(url, headers=headers, timeout=10)
            res.raise_for_status()
            soup = BeautifulSoup(res.text, 'html.parser')
            for tag in soup(["script", "style", "noscript"]):
                tag.extract()
            text = soup.get_text(separator="\n", strip=True)
            return text[:5000]
        except Exception as e:
            return f"Error scraping the URL {url}: {str(e)}"