Spaces:
Runtime error
Runtime error
| from langchain_community.document_loaders import WikipediaLoader | |
| from langchain_community.tools import DuckDuckGoSearchResults | |
| from langchain_core.tools import tool | |
| from langchain_community.tools.tavily_search import TavilySearchResults | |
| from youtube_transcript_api import YouTubeTranscriptApi | |
| def wiki_search(query: str) -> str: | |
| """Search Wikipedia using the query and return results. | |
| Args: | |
| query: The search query.""" | |
| print(f"Search Wikipedia for query '{query}'") | |
| search_docs = WikipediaLoader(query=query, load_max_docs=2).load() | |
| formatted_search_docs = "\n\n---\n\n".join( | |
| [ | |
| f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>' | |
| for doc in search_docs | |
| ] | |
| ) | |
| return formatted_search_docs | |
| def web_search_duckduckgo(query: str) -> str: | |
| """Search DuckDuckGo for a query and return maximum 3 results. | |
| Args: | |
| query: The search query.""" | |
| print(f"Search Web for query '{query}'") | |
| search_tool = DuckDuckGoSearchResults(output_format="json", max_results=3) | |
| search_docs = search_tool.invoke(query) | |
| # Format into plain text | |
| formatted_results = "\n\n---\n\n".join( | |
| f"Title: {doc['title']}\nURL: {doc['link']}\Snippet: {doc['snippet']}" | |
| for doc in search_docs | |
| ) | |
| return formatted_results | |
| def web_search(query: str) -> str: | |
| """Search the web using Tavily and return the top 3 results with summaries.""" | |
| search_tool = TavilySearchResults(max_results=3) | |
| search_docs = search_tool.invoke(query) | |
| formatted = "\n\n---\n\n".join( | |
| f"Title: {doc.get('title', '')}\nURL: {doc.get('url', '')}\nSummary: {doc['content']}" | |
| for doc in search_docs | |
| ) | |
| return formatted or "No results found." | |
| def youtube_transcript(url: str) -> str: | |
| """Get transcript from a YouTube video by URL.""" | |
| try: | |
| video_id = url.split("v=")[-1].split("&")[0] | |
| transcript_list = YouTubeTranscriptApi.get_transcript(video_id) | |
| transcript = " ".join([seg["text"] for seg in transcript_list]) | |
| return transcript[:2000] # limit for token safety | |
| except Exception as e: | |
| return f"Error retrieving transcript: {str(e)}" | |
| tools = [ | |
| wiki_search, | |
| web_search, | |
| # youtube_transcript | |
| ] |