Spaces:

snrspeaks
/

ResearchAgent

Runtime error

App Files Files Community

snrspeaks commited on Jul 27, 2023

Commit

f04acf1

1 Parent(s): 860b13f

Update app.py

Browse files

Files changed (1) hide show

app.py +63 -84

app.py CHANGED Viewed

@@ -1,5 +1,4 @@
 import os
 from langchain import PromptTemplate
 from langchain.agents import initialize_agent, Tool
 from langchain.agents import AgentType
@@ -20,14 +19,23 @@ import time
 from duckduckgo_search import DDGS
 from itertools import islice
-# serper_api_key = os.environ.get('SERPER_API_KEY')
-# 1. Tool for search
 def search(query, max_retries=5):
     for attempt in range(max_retries):
         try:
             result = []
             with DDGS() as ddgs:
                 response = ddgs.text(query, region='wt-wt', safesearch='Off', timelimit='y')
                 for r in islice(response, 20):
@@ -35,68 +43,40 @@ def search(query, max_retries=5):
                 return result
         except requests.RequestException as e:
             print(f"Attempt {attempt + 1} raised an error: {e}. Retrying...")
-            if attempt < max_retries - 1:  # no need to sleep on the last attempt
                 time.sleep(1)
-        except Exception as e:  # Generic error handling
             print(f"An unexpected error occurred on attempt {attempt + 1}: {e}. Retrying...")
             if attempt < max_retries - 1:
                 time.sleep(1)
     else:
         print("Max retries reached. Exiting...")
         return None
-# def search(query, max_retries=5):
-#     url = "https://google.serper.dev/search"
-#     payload = json.dumps({
-#         "q": query
-#     })
-#     headers = {
-#         'X-API-KEY': serper_api_key,
-#         'Content-Type': 'application/json'
-#     }
-#     for attempt in range(max_retries):
-#         try:
-#             response = requests.request("POST", url, headers=headers, data=payload, verify=False)
-#             # Check if response is successful (e.g., HTTP 200 OK)
-#             if response.status_code == 200:
-#                 print(response.text)
-#                 return response.text
-#             else:
-#                 print(f"Attempt {attempt + 1} failed with status code {response.status_code}. Retrying...")
-#                 if attempt < max_retries - 1:  # no need to sleep on the last attempt
-#                     time.sleep(1)
-#                 else:
-#                     print("Max retries reached. Exiting...")
-#         except requests.RequestException as e:
-#             print(f"Attempt {attempt + 1} raised an error: {e}. Retrying...")
-#             if attempt < max_retries - 1:  # no need to sleep on the last attempt
-#                 time.sleep(1)
-#             else:
-#                 print("Max retries reached. Exiting...")
-#     return None
-# 2. Tool for scraping
 def scrape_website(objective: str, url: str):
-    # scrape website, and also will summarize the content based on objective if the content is too large
-    # objective is the original objective & task that user give to the agent, url is the url of the website to be scraped
     print("Scraping website...")
     try:
         article = NewsPlease.from_url(url)
         print(f'{article.title} - {article.url}')
         text = article.maintext
         if len(text) > 10000:
             output = summary(objective, text)
             return output
@@ -105,61 +85,67 @@ def scrape_website(objective: str, url: str):
     except:
         pass
 def summary(objective, content):
     llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-16k-0613", streaming=True)
-    text_splitter = RecursiveCharacterTextSplitter(
-        separators=["\n\n", "\n"], chunk_size=10000, chunk_overlap=500)
     docs = text_splitter.create_documents([content])
     map_prompt = """
     Write a summary of the following text for {objective}:
     "{text}"
     SUMMARY:
     """
-    map_prompt_template = PromptTemplate(
-        template=map_prompt, input_variables=["text", "objective"])
-    summary_chain = load_summarize_chain(
-        llm=llm,
-        chain_type='map_reduce',
-        map_prompt=map_prompt_template,
-        combine_prompt=map_prompt_template,
-        verbose=True
-    )
-    output = summary_chain.run(input_documents=docs, objective=objective)
     return output
 class ScrapeWebsiteInput(BaseModel):
-    """Inputs for scrape_website"""
-    objective: str = Field(
-        description="The objective & task that users give to the agent")
     url: str = Field(description="The url of the website to be scraped")
 class ScrapeWebsiteTool(BaseTool):
     name = "scrape_website"
     description = "useful when you need to get data from a website url, passing both url and objective to the function; DO NOT make up any url, the url should only be from the search results"
     args_schema: Type[BaseModel] = ScrapeWebsiteInput
     def _run(self, objective: str, url: str):
         return scrape_website(objective, url)
     def _arun(self, url: str):
         raise NotImplementedError("error here")
 @cl.langchain_factory(use_async=False)
 def run():
-    # 3. Create langchain agent with the tools above
     tools = [
-        Tool(
-            name="Search",
-            func=search,
-            description="useful for when you need to answer questions about current events, data. You should ask targeted questions"
-        ),
         ScrapeWebsiteTool(),
     ]
@@ -175,21 +161,14 @@ def run():
                 5/ In the final output, You should include all reference data & links to back up your research; You should include all reference data & links to back up your research
                 6/ In the final output, You should include all reference data & links to back up your research; You should include all reference data & links to back up your research"""
     )
     agent_kwargs = {
         "extra_prompt_messages": [MessagesPlaceholder(variable_name="memory")],
         "system_message": system_message,
     }
     llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-16k-0613", streaming=True)
-    memory = ConversationSummaryBufferMemory(
-        memory_key="memory", return_messages=True, llm=llm)
-    return initialize_agent(
-        tools,
-        llm,
-        agent=AgentType.OPENAI_FUNCTIONS,
-        verbose=True,
-        agent_kwargs=agent_kwargs,
-        memory=memory,
-    )

 import os
 from langchain import PromptTemplate
 from langchain.agents import initialize_agent, Tool
 from langchain.agents import AgentType
 from duckduckgo_search import DDGS
 from itertools import islice
 def search(query, max_retries=5):
+    """
+    Search the given query using DuckDuckGo.
+    Args:
+    - query (str): The search query.
+    - max_retries (int): Maximum number of retries in case of request failure.
+    Returns:
+    - list[dict]: A list of search results with 'title' and 'url'.
+    """
     for attempt in range(max_retries):
         try:
             result = []
+            # Initialize the DuckDuckGo search object.
             with DDGS() as ddgs:
                 response = ddgs.text(query, region='wt-wt', safesearch='Off', timelimit='y')
                 for r in islice(response, 20):
                 return result
         except requests.RequestException as e:
+            # Handle request exceptions.
             print(f"Attempt {attempt + 1} raised an error: {e}. Retrying...")
+            if attempt < max_retries - 1:
                 time.sleep(1)
+        except Exception as e:
+            # Handle other exceptions.
             print(f"An unexpected error occurred on attempt {attempt + 1}: {e}. Retrying...")
             if attempt < max_retries - 1:
                 time.sleep(1)
     else:
+        # If max retries reached, exit the function.
         print("Max retries reached. Exiting...")
         return None
 def scrape_website(objective: str, url: str):
+    """
+    Scrape and potentially summarize the content of a website based on a given objective.
+    Args:
+    - objective (str): The objective & task that users give to the agent.
+    - url (str): The URL of the website to be scraped.
+    Returns:
+    - str: Extracted or summarized content of the website.
+    """
     print("Scraping website...")
     try:
+        # Use NewsPlease to scrape the website.
         article = NewsPlease.from_url(url)
         print(f'{article.title} - {article.url}')
         text = article.maintext
+        # Summarize if content is too large.
         if len(text) > 10000:
             output = summary(objective, text)
             return output
     except:
         pass
 def summary(objective, content):
+    """
+    Generate a summary for a given content based on the objective.
+    Args:
+    - objective (str): The objective for the summary.
+    - content (str): The content to be summarized.
+    Returns:
+    - str: Summarized content.
+    """
     llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-16k-0613", streaming=True)
+    # Split the content into manageable chunks.
+    text_splitter = RecursiveCharacterTextSplitter(separators=["\n\n", "\n"], chunk_size=10000, chunk_overlap=500)
     docs = text_splitter.create_documents([content])
     map_prompt = """
     Write a summary of the following text for {objective}:
     "{text}"
     SUMMARY:
     """
+    map_prompt_template = PromptTemplate(template=map_prompt, input_variables=["text", "objective"])
+    # Load the summary chain with necessary configurations.
+    summary_chain = load_summarize_chain(llm=llm, chain_type='map_reduce', map_prompt=map_prompt_template, combine_prompt=map_prompt_template, verbose=True)
+    output = summary_chain.run(input_documents=docs, objective=objective)
     return output
 class ScrapeWebsiteInput(BaseModel):
+    """Inputs for scrape_website function."""
+    objective: str = Field(description="The objective & task that users give to the agent")
     url: str = Field(description="The url of the website to be scraped")
 class ScrapeWebsiteTool(BaseTool):
+    """
+    A tool that provides functionality to scrape a website.
+    """
     name = "scrape_website"
     description = "useful when you need to get data from a website url, passing both url and objective to the function; DO NOT make up any url, the url should only be from the search results"
     args_schema: Type[BaseModel] = ScrapeWebsiteInput
     def _run(self, objective: str, url: str):
+        """Runs the scrape_website function."""
         return scrape_website(objective, url)
     def _arun(self, url: str):
+        """Asynchronous version of _run. (Currently not implemented)"""
         raise NotImplementedError("error here")
 @cl.langchain_factory(use_async=False)
 def run():
+    """
+    Initialize and return a langchain agent with search and scraping tools.
+    Returns:
+    - Agent: Initialized langchain agent.
+    """
     tools = [
+        Tool(name="Search", func=search, description="useful for when you need to answer questions about current events, data. You should ask targeted questions"),
         ScrapeWebsiteTool(),
     ]
                 5/ In the final output, You should include all reference data & links to back up your research; You should include all reference data & links to back up your research
                 6/ In the final output, You should include all reference data & links to back up your research; You should include all reference data & links to back up your research"""
     )
     agent_kwargs = {
         "extra_prompt_messages": [MessagesPlaceholder(variable_name="memory")],
         "system_message": system_message,
     }
+    # Initialize the ChatOpenAI model.
     llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-16k-0613", streaming=True)
+    memory = ConversationSummaryBufferMemory(memory_key="memory", return_messages=True, llm=llm)
+    # Initialize the agent with tools and other configurations.
+    return initialize_agent(tools, llm, agent=AgentType.OPENAI_FUNCTIONS, verbose=True, agent_kwargs=agent_kwargs, memory=memory)