Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -1,5 +1,4 @@
|
|
| 1 |
import os
|
| 2 |
-
|
| 3 |
from langchain import PromptTemplate
|
| 4 |
from langchain.agents import initialize_agent, Tool
|
| 5 |
from langchain.agents import AgentType
|
|
@@ -20,14 +19,23 @@ import time
|
|
| 20 |
from duckduckgo_search import DDGS
|
| 21 |
from itertools import islice
|
| 22 |
|
| 23 |
-
# serper_api_key = os.environ.get('SERPER_API_KEY')
|
| 24 |
|
| 25 |
-
# 1. Tool for search
|
| 26 |
def search(query, max_retries=5):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 27 |
for attempt in range(max_retries):
|
| 28 |
try:
|
| 29 |
result = []
|
| 30 |
|
|
|
|
| 31 |
with DDGS() as ddgs:
|
| 32 |
response = ddgs.text(query, region='wt-wt', safesearch='Off', timelimit='y')
|
| 33 |
for r in islice(response, 20):
|
|
@@ -35,68 +43,40 @@ def search(query, max_retries=5):
|
|
| 35 |
return result
|
| 36 |
|
| 37 |
except requests.RequestException as e:
|
|
|
|
| 38 |
print(f"Attempt {attempt + 1} raised an error: {e}. Retrying...")
|
| 39 |
-
if attempt < max_retries - 1:
|
| 40 |
time.sleep(1)
|
| 41 |
|
| 42 |
-
except Exception as e:
|
|
|
|
| 43 |
print(f"An unexpected error occurred on attempt {attempt + 1}: {e}. Retrying...")
|
| 44 |
if attempt < max_retries - 1:
|
| 45 |
time.sleep(1)
|
| 46 |
|
| 47 |
else:
|
|
|
|
| 48 |
print("Max retries reached. Exiting...")
|
| 49 |
return None
|
| 50 |
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
# def search(query, max_retries=5):
|
| 54 |
-
# url = "https://google.serper.dev/search"
|
| 55 |
-
|
| 56 |
-
# payload = json.dumps({
|
| 57 |
-
# "q": query
|
| 58 |
-
# })
|
| 59 |
-
|
| 60 |
-
# headers = {
|
| 61 |
-
# 'X-API-KEY': serper_api_key,
|
| 62 |
-
# 'Content-Type': 'application/json'
|
| 63 |
-
# }
|
| 64 |
-
|
| 65 |
-
# for attempt in range(max_retries):
|
| 66 |
-
# try:
|
| 67 |
-
# response = requests.request("POST", url, headers=headers, data=payload, verify=False)
|
| 68 |
-
|
| 69 |
-
# # Check if response is successful (e.g., HTTP 200 OK)
|
| 70 |
-
# if response.status_code == 200:
|
| 71 |
-
# print(response.text)
|
| 72 |
-
# return response.text
|
| 73 |
-
# else:
|
| 74 |
-
# print(f"Attempt {attempt + 1} failed with status code {response.status_code}. Retrying...")
|
| 75 |
-
# if attempt < max_retries - 1: # no need to sleep on the last attempt
|
| 76 |
-
# time.sleep(1)
|
| 77 |
-
# else:
|
| 78 |
-
# print("Max retries reached. Exiting...")
|
| 79 |
-
|
| 80 |
-
# except requests.RequestException as e:
|
| 81 |
-
# print(f"Attempt {attempt + 1} raised an error: {e}. Retrying...")
|
| 82 |
-
# if attempt < max_retries - 1: # no need to sleep on the last attempt
|
| 83 |
-
# time.sleep(1)
|
| 84 |
-
# else:
|
| 85 |
-
# print("Max retries reached. Exiting...")
|
| 86 |
-
|
| 87 |
-
# return None
|
| 88 |
-
|
| 89 |
-
|
| 90 |
-
# 2. Tool for scraping
|
| 91 |
def scrape_website(objective: str, url: str):
|
| 92 |
-
|
| 93 |
-
|
| 94 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 95 |
print("Scraping website...")
|
| 96 |
try:
|
|
|
|
| 97 |
article = NewsPlease.from_url(url)
|
| 98 |
print(f'{article.title} - {article.url}')
|
| 99 |
text = article.maintext
|
|
|
|
| 100 |
if len(text) > 10000:
|
| 101 |
output = summary(objective, text)
|
| 102 |
return output
|
|
@@ -105,61 +85,67 @@ def scrape_website(objective: str, url: str):
|
|
| 105 |
except:
|
| 106 |
pass
|
| 107 |
|
| 108 |
-
|
| 109 |
def summary(objective, content):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 110 |
llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-16k-0613", streaming=True)
|
| 111 |
|
| 112 |
-
|
| 113 |
-
|
| 114 |
docs = text_splitter.create_documents([content])
|
|
|
|
| 115 |
map_prompt = """
|
| 116 |
Write a summary of the following text for {objective}:
|
| 117 |
"{text}"
|
| 118 |
SUMMARY:
|
| 119 |
"""
|
| 120 |
-
map_prompt_template = PromptTemplate(
|
| 121 |
-
template=map_prompt, input_variables=["text", "objective"])
|
| 122 |
-
|
| 123 |
-
summary_chain = load_summarize_chain(
|
| 124 |
-
llm=llm,
|
| 125 |
-
chain_type='map_reduce',
|
| 126 |
-
map_prompt=map_prompt_template,
|
| 127 |
-
combine_prompt=map_prompt_template,
|
| 128 |
-
verbose=True
|
| 129 |
-
)
|
| 130 |
|
| 131 |
-
|
|
|
|
| 132 |
|
|
|
|
| 133 |
return output
|
| 134 |
|
| 135 |
-
|
| 136 |
class ScrapeWebsiteInput(BaseModel):
|
| 137 |
-
"""Inputs for scrape_website"""
|
| 138 |
-
objective: str = Field(
|
| 139 |
-
description="The objective & task that users give to the agent")
|
| 140 |
url: str = Field(description="The url of the website to be scraped")
|
| 141 |
|
| 142 |
-
|
| 143 |
class ScrapeWebsiteTool(BaseTool):
|
|
|
|
|
|
|
|
|
|
| 144 |
name = "scrape_website"
|
| 145 |
description = "useful when you need to get data from a website url, passing both url and objective to the function; DO NOT make up any url, the url should only be from the search results"
|
| 146 |
args_schema: Type[BaseModel] = ScrapeWebsiteInput
|
| 147 |
|
| 148 |
def _run(self, objective: str, url: str):
|
|
|
|
| 149 |
return scrape_website(objective, url)
|
| 150 |
|
| 151 |
def _arun(self, url: str):
|
|
|
|
| 152 |
raise NotImplementedError("error here")
|
| 153 |
|
| 154 |
@cl.langchain_factory(use_async=False)
|
| 155 |
def run():
|
| 156 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 157 |
tools = [
|
| 158 |
-
Tool(
|
| 159 |
-
name="Search",
|
| 160 |
-
func=search,
|
| 161 |
-
description="useful for when you need to answer questions about current events, data. You should ask targeted questions"
|
| 162 |
-
),
|
| 163 |
ScrapeWebsiteTool(),
|
| 164 |
]
|
| 165 |
|
|
@@ -175,21 +161,14 @@ def run():
|
|
| 175 |
5/ In the final output, You should include all reference data & links to back up your research; You should include all reference data & links to back up your research
|
| 176 |
6/ In the final output, You should include all reference data & links to back up your research; You should include all reference data & links to back up your research"""
|
| 177 |
)
|
| 178 |
-
|
| 179 |
agent_kwargs = {
|
| 180 |
"extra_prompt_messages": [MessagesPlaceholder(variable_name="memory")],
|
| 181 |
"system_message": system_message,
|
| 182 |
}
|
| 183 |
|
|
|
|
| 184 |
llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-16k-0613", streaming=True)
|
| 185 |
-
memory = ConversationSummaryBufferMemory(
|
| 186 |
-
|
| 187 |
-
|
| 188 |
-
return initialize_agent(
|
| 189 |
-
tools,
|
| 190 |
-
llm,
|
| 191 |
-
agent=AgentType.OPENAI_FUNCTIONS,
|
| 192 |
-
verbose=True,
|
| 193 |
-
agent_kwargs=agent_kwargs,
|
| 194 |
-
memory=memory,
|
| 195 |
-
)
|
|
|
|
| 1 |
import os
|
|
|
|
| 2 |
from langchain import PromptTemplate
|
| 3 |
from langchain.agents import initialize_agent, Tool
|
| 4 |
from langchain.agents import AgentType
|
|
|
|
| 19 |
from duckduckgo_search import DDGS
|
| 20 |
from itertools import islice
|
| 21 |
|
|
|
|
| 22 |
|
|
|
|
| 23 |
def search(query, max_retries=5):
|
| 24 |
+
"""
|
| 25 |
+
Search the given query using DuckDuckGo.
|
| 26 |
+
|
| 27 |
+
Args:
|
| 28 |
+
- query (str): The search query.
|
| 29 |
+
- max_retries (int): Maximum number of retries in case of request failure.
|
| 30 |
+
|
| 31 |
+
Returns:
|
| 32 |
+
- list[dict]: A list of search results with 'title' and 'url'.
|
| 33 |
+
"""
|
| 34 |
for attempt in range(max_retries):
|
| 35 |
try:
|
| 36 |
result = []
|
| 37 |
|
| 38 |
+
# Initialize the DuckDuckGo search object.
|
| 39 |
with DDGS() as ddgs:
|
| 40 |
response = ddgs.text(query, region='wt-wt', safesearch='Off', timelimit='y')
|
| 41 |
for r in islice(response, 20):
|
|
|
|
| 43 |
return result
|
| 44 |
|
| 45 |
except requests.RequestException as e:
|
| 46 |
+
# Handle request exceptions.
|
| 47 |
print(f"Attempt {attempt + 1} raised an error: {e}. Retrying...")
|
| 48 |
+
if attempt < max_retries - 1:
|
| 49 |
time.sleep(1)
|
| 50 |
|
| 51 |
+
except Exception as e:
|
| 52 |
+
# Handle other exceptions.
|
| 53 |
print(f"An unexpected error occurred on attempt {attempt + 1}: {e}. Retrying...")
|
| 54 |
if attempt < max_retries - 1:
|
| 55 |
time.sleep(1)
|
| 56 |
|
| 57 |
else:
|
| 58 |
+
# If max retries reached, exit the function.
|
| 59 |
print("Max retries reached. Exiting...")
|
| 60 |
return None
|
| 61 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 62 |
def scrape_website(objective: str, url: str):
|
| 63 |
+
"""
|
| 64 |
+
Scrape and potentially summarize the content of a website based on a given objective.
|
| 65 |
|
| 66 |
+
Args:
|
| 67 |
+
- objective (str): The objective & task that users give to the agent.
|
| 68 |
+
- url (str): The URL of the website to be scraped.
|
| 69 |
+
|
| 70 |
+
Returns:
|
| 71 |
+
- str: Extracted or summarized content of the website.
|
| 72 |
+
"""
|
| 73 |
print("Scraping website...")
|
| 74 |
try:
|
| 75 |
+
# Use NewsPlease to scrape the website.
|
| 76 |
article = NewsPlease.from_url(url)
|
| 77 |
print(f'{article.title} - {article.url}')
|
| 78 |
text = article.maintext
|
| 79 |
+
# Summarize if content is too large.
|
| 80 |
if len(text) > 10000:
|
| 81 |
output = summary(objective, text)
|
| 82 |
return output
|
|
|
|
| 85 |
except:
|
| 86 |
pass
|
| 87 |
|
|
|
|
| 88 |
def summary(objective, content):
|
| 89 |
+
"""
|
| 90 |
+
Generate a summary for a given content based on the objective.
|
| 91 |
+
|
| 92 |
+
Args:
|
| 93 |
+
- objective (str): The objective for the summary.
|
| 94 |
+
- content (str): The content to be summarized.
|
| 95 |
+
|
| 96 |
+
Returns:
|
| 97 |
+
- str: Summarized content.
|
| 98 |
+
"""
|
| 99 |
llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-16k-0613", streaming=True)
|
| 100 |
|
| 101 |
+
# Split the content into manageable chunks.
|
| 102 |
+
text_splitter = RecursiveCharacterTextSplitter(separators=["\n\n", "\n"], chunk_size=10000, chunk_overlap=500)
|
| 103 |
docs = text_splitter.create_documents([content])
|
| 104 |
+
|
| 105 |
map_prompt = """
|
| 106 |
Write a summary of the following text for {objective}:
|
| 107 |
"{text}"
|
| 108 |
SUMMARY:
|
| 109 |
"""
|
| 110 |
+
map_prompt_template = PromptTemplate(template=map_prompt, input_variables=["text", "objective"])
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 111 |
|
| 112 |
+
# Load the summary chain with necessary configurations.
|
| 113 |
+
summary_chain = load_summarize_chain(llm=llm, chain_type='map_reduce', map_prompt=map_prompt_template, combine_prompt=map_prompt_template, verbose=True)
|
| 114 |
|
| 115 |
+
output = summary_chain.run(input_documents=docs, objective=objective)
|
| 116 |
return output
|
| 117 |
|
|
|
|
| 118 |
class ScrapeWebsiteInput(BaseModel):
|
| 119 |
+
"""Inputs for scrape_website function."""
|
| 120 |
+
objective: str = Field(description="The objective & task that users give to the agent")
|
|
|
|
| 121 |
url: str = Field(description="The url of the website to be scraped")
|
| 122 |
|
|
|
|
| 123 |
class ScrapeWebsiteTool(BaseTool):
|
| 124 |
+
"""
|
| 125 |
+
A tool that provides functionality to scrape a website.
|
| 126 |
+
"""
|
| 127 |
name = "scrape_website"
|
| 128 |
description = "useful when you need to get data from a website url, passing both url and objective to the function; DO NOT make up any url, the url should only be from the search results"
|
| 129 |
args_schema: Type[BaseModel] = ScrapeWebsiteInput
|
| 130 |
|
| 131 |
def _run(self, objective: str, url: str):
|
| 132 |
+
"""Runs the scrape_website function."""
|
| 133 |
return scrape_website(objective, url)
|
| 134 |
|
| 135 |
def _arun(self, url: str):
|
| 136 |
+
"""Asynchronous version of _run. (Currently not implemented)"""
|
| 137 |
raise NotImplementedError("error here")
|
| 138 |
|
| 139 |
@cl.langchain_factory(use_async=False)
|
| 140 |
def run():
|
| 141 |
+
"""
|
| 142 |
+
Initialize and return a langchain agent with search and scraping tools.
|
| 143 |
+
|
| 144 |
+
Returns:
|
| 145 |
+
- Agent: Initialized langchain agent.
|
| 146 |
+
"""
|
| 147 |
tools = [
|
| 148 |
+
Tool(name="Search", func=search, description="useful for when you need to answer questions about current events, data. You should ask targeted questions"),
|
|
|
|
|
|
|
|
|
|
|
|
|
| 149 |
ScrapeWebsiteTool(),
|
| 150 |
]
|
| 151 |
|
|
|
|
| 161 |
5/ In the final output, You should include all reference data & links to back up your research; You should include all reference data & links to back up your research
|
| 162 |
6/ In the final output, You should include all reference data & links to back up your research; You should include all reference data & links to back up your research"""
|
| 163 |
)
|
|
|
|
| 164 |
agent_kwargs = {
|
| 165 |
"extra_prompt_messages": [MessagesPlaceholder(variable_name="memory")],
|
| 166 |
"system_message": system_message,
|
| 167 |
}
|
| 168 |
|
| 169 |
+
# Initialize the ChatOpenAI model.
|
| 170 |
llm = ChatOpenAI(temperature=0, model="gpt-3.5-turbo-16k-0613", streaming=True)
|
| 171 |
+
memory = ConversationSummaryBufferMemory(memory_key="memory", return_messages=True, llm=llm)
|
| 172 |
+
|
| 173 |
+
# Initialize the agent with tools and other configurations.
|
| 174 |
+
return initialize_agent(tools, llm, agent=AgentType.OPENAI_FUNCTIONS, verbose=True, agent_kwargs=agent_kwargs, memory=memory)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|