Trip-Planner / tools /browser_tools.py
msaifee's picture
Correct import
a542dfc
import json
import requests
import streamlit as st
from crewai.tools import BaseTool
from pydantic import BaseModel, Field
from unstructured.partition.html import partition_html
from crewai import Agent, Task
from crewai import LLM
class WebsiteInput(BaseModel):
website: str = Field(..., description="The website URL to scrape")
class BrowserTools(BaseTool):
name: str = "Scrape website content"
description: str = "Useful to scrape and summarize a website content"
args_schema: type[BaseModel] = WebsiteInput
browserless_api_key : str = Field(..., description="Browserless API Key")
def _run(self, website: str) -> str:
try:
url = f"https://chrome.browserless.io/content?token={self.browserless_api_key}"
payload = json.dumps({"url": website})
headers = {'cache-control': 'no-cache', 'content-type': 'application/json'}
response = requests.request("POST", url, headers=headers, data=payload)
if response.status_code != 200:
return f"Error: Failed to fetch website content. Status code: {response.status_code}"
elements = partition_html(text=response.text)
content = "\n\n".join([str(el) for el in elements])
content = [content[i:i + 8000] for i in range(0, len(content), 8000)]
summaries = []
#llm = LLM(model="groq/deepseek-r1-distill-llama-70b")
llm = LLM(model="gemini/gemini-2.0-flash")
for chunk in content:
agent = Agent(
role='Principal Researcher',
goal='Do amazing researches and summaries based on the content you are working with',
backstory="You're a Principal Researcher at a big company and you need to do a research about a given topic.",
allow_delegation=False,
llm=llm
)
task = Task(
description=f'Analyze and summarize the content below, make sure to include the most relevant information in the summary, return only the summary nothing else.\n\nCONTENT\n----------\n{chunk}',
agent=agent
)
summary = task.execute()
summaries.append(summary)
return "\n\n".join(summaries)
except Exception as e:
return f"Error while processing website: {str(e)}"
async def _arun(self, website: str) -> str:
raise NotImplementedError("Async not implemented")