Spaces:
Configuration error
Configuration error
oremaz
commited on
Commit
·
8012613
1
Parent(s):
81a79f6
Update agent.py
Browse files
agent.py
CHANGED
|
@@ -33,7 +33,7 @@ from llama_index.readers.youtube_transcript import YoutubeTranscriptReader
|
|
| 33 |
from llama_index.tools.arxiv import ArxivToolSpec
|
| 34 |
from llama_index.core.agent.workflow import AgentWorkflow
|
| 35 |
from llama_index.llms.vllm import Vllm
|
| 36 |
-
|
| 37 |
|
| 38 |
# Import all required official LlamaIndex Readers
|
| 39 |
from llama_index.readers.file import (
|
|
@@ -494,16 +494,23 @@ dynamic_qe_manager = DynamicQueryEngineManager()
|
|
| 494 |
# 1. Create the base DuckDuckGo search tool from the official spec.
|
| 495 |
# This tool returns text summaries of search results, not just URLs.
|
| 496 |
|
|
|
|
|
|
|
| 497 |
def search_and_extract_content_from_url(query: str) -> List[Document]:
|
| 498 |
"""
|
| 499 |
Searches web, gets top URL, and extracts both text content and images.
|
| 500 |
Returns a list of Document objects containing the extracted content.
|
| 501 |
"""
|
| 502 |
# Get URL from search
|
| 503 |
-
|
| 504 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 505 |
print(url)
|
| 506 |
-
|
| 507 |
documents = []
|
| 508 |
|
| 509 |
try:
|
|
|
|
| 33 |
from llama_index.tools.arxiv import ArxivToolSpec
|
| 34 |
from llama_index.core.agent.workflow import AgentWorkflow
|
| 35 |
from llama_index.llms.vllm import Vllm
|
| 36 |
+
from llama_index.tools.duckduckgo import DuckDuckGoSearchToolSpec
|
| 37 |
|
| 38 |
# Import all required official LlamaIndex Readers
|
| 39 |
from llama_index.readers.file import (
|
|
|
|
| 494 |
# 1. Create the base DuckDuckGo search tool from the official spec.
|
| 495 |
# This tool returns text summaries of search results, not just URLs.
|
| 496 |
|
| 497 |
+
base_duckduckgo_tool = DuckDuckGoSearchToolSpec().to_tool_list()[1]
|
| 498 |
+
|
| 499 |
def search_and_extract_content_from_url(query: str) -> List[Document]:
|
| 500 |
"""
|
| 501 |
Searches web, gets top URL, and extracts both text content and images.
|
| 502 |
Returns a list of Document objects containing the extracted content.
|
| 503 |
"""
|
| 504 |
# Get URL from search
|
| 505 |
+
#search_results = base_duckduckgo_tool(query, max_results=1)
|
| 506 |
+
#url_match = re.search(r"https?://\S+", str(search_results))
|
| 507 |
+
|
| 508 |
+
#if not url_match:
|
| 509 |
+
#return [Document(text="No URL could be extracted from the search results.")]
|
| 510 |
+
|
| 511 |
+
#url = url_match.group(0)[:-2]
|
| 512 |
+
url = "https://en.wikipedia.org/wiki/Mercedes_Sosa"
|
| 513 |
print(url)
|
|
|
|
| 514 |
documents = []
|
| 515 |
|
| 516 |
try:
|