Spaces:

Subhajit01
/

SmartLead

Running

SmartLead / src /services /parametricSearch.py

Subhajit Chakraborty

add imp files

723bbe6 4 months ago

1.41 kB

	from langchain_community.tools import DuckDuckGoSearchResults
	from langchain.tools import tool
	from bs4 import BeautifulSoup
	from langchain.agents import initialize_agent, AgentType
	from services.llm_client import LLMClient
	import requests, json

	# creating the parametrix search tool
	class ParametricSearch:
	def __init__(self, llm):
	self.llm = llm
	self.ddgsearch = DuckDuckGoSearchResults()
	self.tools = [self.parametric_search, self.ddgsearch]
	self.agent = initialize_agent(self.tools, self.llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)

	@tool
	def parametric_search(url: str) -> str:
	"""Scrape visible text content from a company webpage."""
	headers = {
	"User-Agent": (
	"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
	"AppleWebKit/537.36 (KHTML, like Gecko) "
	"Chrome/123.0 Safari/537.36"
	)
	}
	try:
	res = requests.get(url, headers=headers, timeout=10)
	res.raise_for_status()
	soup = BeautifulSoup(res.text, 'html.parser')
	for tag in soup(["script", "style", "noscript"]):
	tag.extract()
	text = soup.get_text(separator="\n", strip=True)
	return text[:5000]
	except Exception as e:
	return f"Error scraping the URL {url}: {str(e)}"