toni5rovic's picture
add solution
0b7930f
from langchain_community.document_loaders import WikipediaLoader
from langchain_community.tools import DuckDuckGoSearchResults
from langchain_core.tools import tool
from langchain_community.tools.tavily_search import TavilySearchResults
from youtube_transcript_api import YouTubeTranscriptApi
@tool
def wiki_search(query: str) -> str:
"""Search Wikipedia using the query and return results.
Args:
query: The search query."""
print(f"Search Wikipedia for query '{query}'")
search_docs = WikipediaLoader(query=query, load_max_docs=2).load()
formatted_search_docs = "\n\n---\n\n".join(
[
f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>'
for doc in search_docs
]
)
return formatted_search_docs
@tool
def web_search_duckduckgo(query: str) -> str:
"""Search DuckDuckGo for a query and return maximum 3 results.
Args:
query: The search query."""
print(f"Search Web for query '{query}'")
search_tool = DuckDuckGoSearchResults(output_format="json", max_results=3)
search_docs = search_tool.invoke(query)
# Format into plain text
formatted_results = "\n\n---\n\n".join(
f"Title: {doc['title']}\nURL: {doc['link']}\Snippet: {doc['snippet']}"
for doc in search_docs
)
return formatted_results
@tool
def web_search(query: str) -> str:
"""Search the web using Tavily and return the top 3 results with summaries."""
search_tool = TavilySearchResults(max_results=3)
search_docs = search_tool.invoke(query)
formatted = "\n\n---\n\n".join(
f"Title: {doc.get('title', '')}\nURL: {doc.get('url', '')}\nSummary: {doc['content']}"
for doc in search_docs
)
return formatted or "No results found."
@tool
def youtube_transcript(url: str) -> str:
"""Get transcript from a YouTube video by URL."""
try:
video_id = url.split("v=")[-1].split("&")[0]
transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
transcript = " ".join([seg["text"] for seg in transcript_list])
return transcript[:2000] # limit for token safety
except Exception as e:
return f"Error retrieving transcript: {str(e)}"
tools = [
wiki_search,
web_search,
# youtube_transcript
]