from langchain_community.document_loaders import WikipediaLoader from langchain_community.tools import DuckDuckGoSearchResults from langchain_core.tools import tool from langchain_community.tools.tavily_search import TavilySearchResults from youtube_transcript_api import YouTubeTranscriptApi @tool def wiki_search(query: str) -> str: """Search Wikipedia using the query and return results. Args: query: The search query.""" print(f"Search Wikipedia for query '{query}'") search_docs = WikipediaLoader(query=query, load_max_docs=2).load() formatted_search_docs = "\n\n---\n\n".join( [ f'\n{doc.page_content}\n' for doc in search_docs ] ) return formatted_search_docs @tool def web_search_duckduckgo(query: str) -> str: """Search DuckDuckGo for a query and return maximum 3 results. Args: query: The search query.""" print(f"Search Web for query '{query}'") search_tool = DuckDuckGoSearchResults(output_format="json", max_results=3) search_docs = search_tool.invoke(query) # Format into plain text formatted_results = "\n\n---\n\n".join( f"Title: {doc['title']}\nURL: {doc['link']}\Snippet: {doc['snippet']}" for doc in search_docs ) return formatted_results @tool def web_search(query: str) -> str: """Search the web using Tavily and return the top 3 results with summaries.""" search_tool = TavilySearchResults(max_results=3) search_docs = search_tool.invoke(query) formatted = "\n\n---\n\n".join( f"Title: {doc.get('title', '')}\nURL: {doc.get('url', '')}\nSummary: {doc['content']}" for doc in search_docs ) return formatted or "No results found." @tool def youtube_transcript(url: str) -> str: """Get transcript from a YouTube video by URL.""" try: video_id = url.split("v=")[-1].split("&")[0] transcript_list = YouTubeTranscriptApi.get_transcript(video_id) transcript = " ".join([seg["text"] for seg in transcript_list]) return transcript[:2000] # limit for token safety except Exception as e: return f"Error retrieving transcript: {str(e)}" tools = [ wiki_search, web_search, # youtube_transcript ]