Spaces:
Runtime error
Runtime error
| import os | |
| from langchain import PromptTemplate | |
| from langchain.agents import initialize_agent, Tool | |
| from langchain.agents import AgentType | |
| from langchain.chat_models import ChatOpenAI | |
| from langchain.prompts import MessagesPlaceholder | |
| from langchain.memory import ConversationSummaryBufferMemory | |
| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain.chains.summarize import load_summarize_chain | |
| from langchain.tools import BaseTool | |
| from pydantic import BaseModel, Field | |
| from typing import Type | |
| import requests | |
| import json | |
| from langchain.schema import SystemMessage | |
| import chainlit as cl | |
| from newsplease import NewsPlease | |
| import time | |
| from duckduckgo_search import DDGS | |
| from itertools import islice | |
| def search(query, max_retries=5): | |
| """ | |
| Search the given query using DuckDuckGo. | |
| Args: | |
| - query (str): The search query. | |
| - max_retries (int): Maximum number of retries in case of request failure. | |
| Returns: | |
| - list[dict]: A list of search results with 'title' and 'url'. | |
| """ | |
| for attempt in range(max_retries): | |
| try: | |
| result = [] | |
| # Initialize the DuckDuckGo search object. | |
| with DDGS() as ddgs: | |
| response = ddgs.text(query, region='wt-wt', safesearch='Off', timelimit='y') | |
| for r in islice(response, 20): | |
| result.append({'title': r['title'], 'url': r['href']}) | |
| return result | |
| except requests.RequestException as e: | |
| # Handle request exceptions. | |
| print(f"Attempt {attempt + 1} raised an error: {e}. Retrying...") | |
| if attempt < max_retries - 1: | |
| time.sleep(1) | |
| except Exception as e: | |
| # Handle other exceptions. | |
| print(f"An unexpected error occurred on attempt {attempt + 1}: {e}. Retrying...") | |
| if attempt < max_retries - 1: | |
| time.sleep(1) | |
| else: | |
| # If max retries reached, exit the function. | |
| print("Max retries reached. Exiting...") | |
| return None | |
| def scrape_website(objective: str, url: str): | |
| """ | |
| Scrape and potentially summarize the content of a website based on a given objective. | |
| Args: | |
| - objective (str): The objective & task that users give to the agent. | |
| - url (str): The URL of the website to be scraped. | |
| Returns: | |
| - str: Extracted or summarized content of the website. | |
| """ | |
| print("Scraping website...") | |
| try: | |
| # Use NewsPlease to scrape the website. | |
| article = NewsPlease.from_url(url) | |
| print(f'{article.title} - {article.url}') | |
| text = article.maintext | |
| # Summarize if content is too large. | |
| if len(text) > 10000: | |
| output = summary(objective, text) | |
| return output | |
| else: | |
| return text | |
| except: | |
| pass | |
| def summary(objective, content): | |
| """ | |
| Generate a summary for a given content based on the objective. | |
| Args: | |
| - objective (str): The objective for the summary. | |
| - content (str): The content to be summarized. | |
| Returns: | |
| - str: Summarized content. | |
| """ | |
| llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-16k-0613", streaming=True) | |
| # Split the content into manageable chunks. | |
| text_splitter = RecursiveCharacterTextSplitter(separators=["\n\n", "\n"], chunk_size=10000, chunk_overlap=500) | |
| docs = text_splitter.create_documents([content]) | |
| map_prompt = """ | |
| Write a summary of the following text for {objective}: | |
| "{text}" | |
| SUMMARY: | |
| """ | |
| map_prompt_template = PromptTemplate(template=map_prompt, input_variables=["text", "objective"]) | |
| # Load the summary chain with necessary configurations. | |
| summary_chain = load_summarize_chain(llm=llm, chain_type='map_reduce', map_prompt=map_prompt_template, combine_prompt=map_prompt_template, verbose=True) | |
| output = summary_chain.run(input_documents=docs, objective=objective) | |
| return output | |
| class ScrapeWebsiteInput(BaseModel): | |
| """Inputs for scrape_website function.""" | |
| objective: str = Field(description="The objective & task that users give to the agent") | |
| url: str = Field(description="The url of the website to be scraped") | |
| class ScrapeWebsiteTool(BaseTool): | |
| """ | |
| A tool that provides functionality to scrape a website. | |
| """ | |
| name = "scrape_website" | |
| description = "useful when you need to get data from a website url, passing both url and objective to the function; DO NOT make up any url, the url should only be from the search results" | |
| args_schema: Type[BaseModel] = ScrapeWebsiteInput | |
| def _run(self, objective: str, url: str): | |
| """Runs the scrape_website function.""" | |
| return scrape_website(objective, url) | |
| def _arun(self, url: str): | |
| """Asynchronous version of _run. (Currently not implemented)""" | |
| raise NotImplementedError("error here") | |
| def run(): | |
| """ | |
| Initialize and return a langchain agent with search and scraping tools. | |
| Returns: | |
| - Agent: Initialized langchain agent. | |
| """ | |
| tools = [ | |
| Tool(name="Search", func=search, description="useful for when you need to answer questions about current events, data. You should ask targeted questions"), | |
| ScrapeWebsiteTool(), | |
| ] | |
| system_message = SystemMessage( | |
| content="""You are a world class researcher, who can do detailed research on any topic and produce facts based results; | |
| you do not make things up, you will try as hard as possible to gather facts & data to back up the research | |
| Please make sure you complete the objective above with the following rules: | |
| 1/ You should do enough research to gather as much information as possible about the objective | |
| 2/ If scraping a URL returns "None" go to the next URL. Keep on iterating over URLs untill you find sunstantial information. | |
| 3/ If there are url of relevant links & articles, you will scrape it to gather more information | |
| 4/ After scraping & search, you should think "is there any new things i should search & scraping based on the data I collected to increase research quality?" If answer is yes, continue; But don't do this more than 3 iteratins | |
| 5/ You should not make things up, you should only write facts & data that you have gathered | |
| 6/ In the final output, You should include all reference data & links to back up your research; You should include all reference data & links to back up your research | |
| 7/ In the final output, You should include all reference data & links to back up your research; You should include all reference data & links to back up your research""" | |
| ) | |
| agent_kwargs = { | |
| "extra_prompt_messages": [MessagesPlaceholder(variable_name="memory")], | |
| "system_message": system_message, | |
| } | |
| # Initialize the ChatOpenAI model. | |
| llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-16k-0613", streaming=True) | |
| memory = ConversationSummaryBufferMemory(memory_key="memory", return_messages=True, llm=llm) | |
| # Initialize the agent with tools and other configurations. | |
| return initialize_agent(tools, llm, agent=AgentType.OPENAI_FUNCTIONS, verbose=True, agent_kwargs=agent_kwargs, memory=memory) |