Spaces:
Running
Running
File size: 1,409 Bytes
723bbe6 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
from langchain_community.tools import DuckDuckGoSearchResults
from langchain.tools import tool
from bs4 import BeautifulSoup
from langchain.agents import initialize_agent, AgentType
from services.llm_client import LLMClient
import requests, json
# creating the parametrix search tool
class ParametricSearch:
def __init__(self, llm):
self.llm = llm
self.ddgsearch = DuckDuckGoSearchResults()
self.tools = [self.parametric_search, self.ddgsearch]
self.agent = initialize_agent(self.tools, self.llm, agent=AgentType.ZERO_SHOT_REACT_DESCRIPTION, verbose=True)
@tool
def parametric_search(url: str) -> str:
"""Scrape visible text content from a company webpage."""
headers = {
"User-Agent": (
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) "
"AppleWebKit/537.36 (KHTML, like Gecko) "
"Chrome/123.0 Safari/537.36"
)
}
try:
res = requests.get(url, headers=headers, timeout=10)
res.raise_for_status()
soup = BeautifulSoup(res.text, 'html.parser')
for tag in soup(["script", "style", "noscript"]):
tag.extract()
text = soup.get_text(separator="\n", strip=True)
return text[:5000]
except Exception as e:
return f"Error scraping the URL {url}: {str(e)}"
|