Spaces:
Configuration error
Configuration error
oremaz
commited on
Commit
·
e0b6833
1
Parent(s):
c361079
Update agent.py
Browse files
agent.py
CHANGED
|
@@ -31,7 +31,6 @@ from llama_index.readers.json import JSONReader
|
|
| 31 |
from llama_index.readers.web import BeautifulSoupWebReader
|
| 32 |
from llama_index.readers.youtube_transcript import YoutubeTranscriptReader
|
| 33 |
from llama_index.tools.arxiv import ArxivToolSpec
|
| 34 |
-
from llama_index.tools.duckduckgo import DuckDuckGoSearchToolSpec
|
| 35 |
from llama_index.core.agent.workflow import AgentWorkflow
|
| 36 |
from llama_index.llms.vllm import Vllm
|
| 37 |
|
|
@@ -45,6 +44,7 @@ from llama_index.readers.file import (
|
|
| 45 |
VideoAudioReader # Adding VideoAudioReader for handling audio/video without API
|
| 46 |
)
|
| 47 |
from pydantic import PrivateAttr
|
|
|
|
| 48 |
|
| 49 |
# Optional API-based imports (conditionally loaded)
|
| 50 |
try:
|
|
@@ -486,7 +486,6 @@ dynamic_qe_manager = DynamicQueryEngineManager()
|
|
| 486 |
|
| 487 |
# 1. Create the base DuckDuckGo search tool from the official spec.
|
| 488 |
# This tool returns text summaries of search results, not just URLs.
|
| 489 |
-
base_duckduckgo_tool = DuckDuckGoSearchToolSpec().to_tool_list()[1]
|
| 490 |
|
| 491 |
def search_and_extract_content_from_url(query: str) -> List[Document]:
|
| 492 |
"""
|
|
@@ -494,14 +493,12 @@ def search_and_extract_content_from_url(query: str) -> List[Document]:
|
|
| 494 |
Returns a list of Document objects containing the extracted content.
|
| 495 |
"""
|
| 496 |
# Get URL from search
|
| 497 |
-
|
| 498 |
-
|
|
|
|
|
|
|
| 499 |
|
| 500 |
-
|
| 501 |
-
return [Document(text="No URL could be extracted from the search results.")]
|
| 502 |
-
|
| 503 |
-
url = url_match.group(0)[:-2]
|
| 504 |
-
print(url)
|
| 505 |
|
| 506 |
documents = []
|
| 507 |
|
|
|
|
| 31 |
from llama_index.readers.web import BeautifulSoupWebReader
|
| 32 |
from llama_index.readers.youtube_transcript import YoutubeTranscriptReader
|
| 33 |
from llama_index.tools.arxiv import ArxivToolSpec
|
|
|
|
| 34 |
from llama_index.core.agent.workflow import AgentWorkflow
|
| 35 |
from llama_index.llms.vllm import Vllm
|
| 36 |
|
|
|
|
| 44 |
VideoAudioReader # Adding VideoAudioReader for handling audio/video without API
|
| 45 |
)
|
| 46 |
from pydantic import PrivateAttr
|
| 47 |
+
from duckduckgo_search import DDGS
|
| 48 |
|
| 49 |
# Optional API-based imports (conditionally loaded)
|
| 50 |
try:
|
|
|
|
| 486 |
|
| 487 |
# 1. Create the base DuckDuckGo search tool from the official spec.
|
| 488 |
# This tool returns text summaries of search results, not just URLs.
|
|
|
|
| 489 |
|
| 490 |
def search_and_extract_content_from_url(query: str) -> List[Document]:
|
| 491 |
"""
|
|
|
|
| 493 |
Returns a list of Document objects containing the extracted content.
|
| 494 |
"""
|
| 495 |
# Get URL from search
|
| 496 |
+
with DDGS() as ddgs:
|
| 497 |
+
results = list(ddgs.text(query, max_results=1))
|
| 498 |
+
if not results:
|
| 499 |
+
return []
|
| 500 |
|
| 501 |
+
url = results[0]['href']
|
|
|
|
|
|
|
|
|
|
|
|
|
| 502 |
|
| 503 |
documents = []
|
| 504 |
|