SmartLead / src /services /parametricSearch.py
Subhajit Chakraborty
add imp files
723bbe6
from langchain_community.tools import DuckDuckGoSearchResults
from langchain.tools import tool
from bs4 import BeautifulSoup
from langchain.agents import initialize_agent, AgentType
from services.llm_client import LLMClient
import requests, json
# creating the parametrix search tool
class ParametricSearch:
def __init__(self, llm):
self.llm = llm
self.ddgsearch = DuckDuckGoSearchResults()
self.tools = [self.parametric_search, self.ddgsearch]
self.agent = initialize_agent(self.tools, self.llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)
@tool
def parametric_search(url: str) -> str:
"""Scrape visible text content from a company webpage."""
headers = {
"User-Agent": (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/123.0 Safari/537.36"
)
}
try:
res = requests.get(url, headers=headers, timeout=10)
res.raise_for_status()
soup = BeautifulSoup(res.text, 'html.parser')
for tag in soup(["script", "style", "noscript"]):
tag.extract()
text = soup.get_text(separator="\n", strip=True)
return text[:5000]
except Exception as e:
return f"Error scraping the URL {url}: {str(e)}"