Add new tools and functionalities for audio transcription, code execution, document handling, image processing, and mathematical operations
d303e2f
| from langchain_core.tools import tool | |
| from langchain_community.tools.tavily_search import TavilySearchResults | |
| from langchain_community.document_loaders import WikipediaLoader | |
| from langchain_community.document_loaders import ArxivLoader | |
| from youtube_transcript_api import YouTubeTranscriptApi, TranscriptsDisabled, NoTranscriptFound # Added | |
| import os | |
| def wiki_search(query: str) -> str: | |
| """Search Wikipedia for a query and return maximum 2 results. | |
| Args: | |
| query: The search query.""" | |
| search_docs = WikipediaLoader(query=query, load_max_docs=2).load() | |
| formatted_search_docs = "\n\n---\n\n".join( | |
| [ | |
| f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>' | |
| for doc in search_docs | |
| ] | |
| ) | |
| return {"wiki_results": formatted_search_docs} | |
| def web_search(query: str) -> str: | |
| """Search Tavily for a query and return maximum 3 results. | |
| Args: | |
| query: The search query.""" | |
| search_docs = TavilySearchResults(max_results=3).invoke({"query": query}) | |
| formatted_search_docs = "\n\n---\n\n".join( | |
| [ | |
| f'<Document source="{doc.get("url", "")}">\n{doc.get("content", doc.get("snippet", ""))}\n</Document>' | |
| for doc in search_docs | |
| ] | |
| ) | |
| return {"web_results": formatted_search_docs} | |
| def arxiv_search(query: str) -> str: | |
| """Search Arxiv for a query and return maximum 3 result. | |
| Args: | |
| query: The search query.""" | |
| search_docs = ArxivLoader(query=query, load_max_docs=3).load() | |
| formatted_search_docs = "\n\n---\n\n".join( | |
| [ | |
| f'<Document source="{doc.metadata.get("source", "N/A")}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>' | |
| for doc in search_docs | |
| ] | |
| ) | |
| return {"arxiv_results": formatted_search_docs} | |
| def get_youtube_transcript(youtube_url: str) -> str: | |
| """Fetches the transcript for a given YouTube video URL using youtube-transcript-api directly. | |
| If the video has no transcript, it will return an error message. Then use web_search to find the transcript. | |
| Args: | |
| youtube_url: The URL of the YouTube video.""" | |
| try: | |
| video_id = None | |
| if "watch?v=" in youtube_url: | |
| video_id = youtube_url.split("watch?v=")[1].split("&")[0] | |
| elif "youtu.be/" in youtube_url: | |
| video_id = youtube_url.split("youtu.be/")[1].split("?")[0] | |
| if not video_id: | |
| return "Error: Could not parse YouTube video ID from URL." | |
| transcript_list = YouTubeTranscriptApi.list_transcripts(video_id) | |
| transcript = None | |
| try: | |
| # Try fetching English first if available, then any manual, then any generated | |
| transcript = transcript_list.find_manually_created_transcript(['en']) | |
| except NoTranscriptFound: | |
| try: | |
| transcript = transcript_list.find_generated_transcript(['en']) | |
| except NoTranscriptFound: | |
| # If English not found, try any manual transcript | |
| try: | |
| transcript = transcript_list.find_manually_created_transcript(transcript_list.languages) | |
| except NoTranscriptFound: | |
| # Finally, try any generated transcript | |
| try: | |
| transcript = transcript_list.find_generated_transcript(transcript_list.languages) | |
| except NoTranscriptFound: | |
| return "Error: No manual or auto-generated transcripts found for this video in any language." | |
| fetched_transcript = transcript.fetch() | |
| if not fetched_transcript: | |
| return "Could not retrieve transcript for the video. The video might not have transcripts available." | |
| # Changed item['text'] to item.text to handle cases where items are objects | |
| full_transcript = " ".join([item.text for item in fetched_transcript]) | |
| # Returning the transcript text directly, wrapped in a dictionary similar to other tools | |
| return {"youtube_transcript": full_transcript} | |
| except TranscriptsDisabled: | |
| return "Error: Transcripts are disabled for this video." | |
| except NoTranscriptFound: | |
| return "Error: No transcripts found for this video (this should have been caught earlier, but good fallback)." | |
| except Exception as e: | |
| # Catching potential network errors or other API issues specifically | |
| if "HTTP Error 403" in str(e) or "Too Many Requests" in str(e): | |
| return f"Error: YouTube API request failed, possibly due to rate limiting or access restrictions: {str(e)}" | |
| return f"Error fetching YouTube transcript using youtube-transcript-api: {str(e)}" | |