from langchain_core.tools import tool from langchain_community.utilities.duckduckgo_search import DuckDuckGoSearchAPIWrapper from langchain_community.tools.tavily_search import TavilySearchResults from langchain_community.document_loaders.wikipedia import WikipediaLoader from langchain_community.document_loaders.arxiv import ArxivLoader from langchain_community.document_loaders.pubmed import PubMedLoader from langchain_community.tools.google_search.tool import GoogleSearchRun from typing import Optional import os import tempfile import requests from urllib.parse import urlparse, parse_qs import pytesseract from PIL import Image import pandas as pd import uuid from youtube_transcript_api import YouTubeTranscriptApi ## Simple algebra tools @tool def add(a: float, b: float) -> float: """Add two numbers. Args: a: first float b: second float """ return a + b @tool def substract(a: float, b: float) -> float: """Substract two numbers. Args: a: first float b: second float """ return a - b @tool def multiply(a: float, b: float) -> float: """Multiply two numbers. Args: a: first float b: second float """ return a * b @tool def divide(a: float, b: float) -> float: """Divide two numbers. Args: a: first float b: second float """ if b == 0: raise ValueError("Cannot divide any number by zero.") return a / b # Maybe add algebra tools??? ## Search Tools def DuckDuckGoSearchTool(query: str) -> str: """Search DuckDuckGo for a query and return maximum 5 results. Args: query: The search query. """ results = DuckDuckGoSearchAPIWrapper().results(query = query, max_results=5) formatted_search_docs = "\n\n---\n\n".join( [ f'\n{res["snippet"]}\n' for res in results ]) return {"web_results": formatted_search_docs} @tool def TavilySearchTool(query: str) -> str: """Search Tavily for a query and return maximum 3 results. Args: query: The search query.""" search_docs = TavilySearchResults(max_results=5).invoke(query=query) formatted_search_docs = "\n\n---\n\n".join( [ f'\n{doc.page_content}\n' for doc in search_docs ]) return {"web_results": formatted_search_docs} @tool def combined_web_search(query: str) -> str: """Search Google, DuckDuckGo, and Tavily for a query and return combined results.""" google_docs = GoogleSearchAPIWrapper(k=5).run(query) duck_docs = DuckDuckGoSearchAPIWrapper().results(query = query, max_results=5) tavily_docs = TavilySearchResults(max_results=5).invoke(query=query) all_docs = google_docs + duck_docs + tavily_docs formatted_results = "\n\n---\n\n".join( f'\n{doc.page_content}\n' for doc in all_docs ) return {"web_results": formatted_results} @tool def WikipediaSearchTool(query: str) -> str: """Search Wikipedia for a query and return maximum 2 results. Args: query: The search query.""" search_docs = WikipediaLoader(query=query, load_max_docs=5).load() formatted_search_docs = "\n\n---\n\n".join( [ f'\n{doc.page_content}\n' for doc in search_docs ]) return {"wiki_results": formatted_search_docs} @tool def ArxivSearchTool(query: str) -> str: """Search Arxiv for a query and return maximum 3 result. Args: query: The search query.""" search_docs = ArxivLoader(query=query, load_max_docs=5).load() formatted_search_docs = "\n\n---\n\n".join( [ f'\n{doc.page_content[:1000]}\n' for doc in search_docs ]) return {"arvix_results": formatted_search_docs} @tool def PubmedSearchTool(query: str) -> str: """Search Arxiv for a query and return maximum 3 result. Args: query: The search query.""" search_docs = PubMedLoader(query=query, load_max_docs=5).load() formatted_search_docs = "\n\n---\n\n".join( [ f'\n{doc.page_content[:1000]}\n' for doc in search_docs ]) return {"pubmed_results": formatted_search_docs} @tool def save_and_read_file(content: str, filename: Optional[str] = None) -> str: """Save content to a file and return the path. Args: content (str): the content to save to the file filename (str, optional): the name of the file. If not provided, a random name file will be created. """ temp_dir = tempfile.gettempdir() if filename is None: temp_file = tempfile.NamedTemporaryFile(delete=False, dir=temp_dir) filepath = temp_file.name else: filepath = os.path.join(temp_dir, filename) with open(filepath, "w") as f: f.write(content) return f"File saved to {filepath}. You can read this file to process its contents." @tool def download_file_from_url(url: str, filename: Optional[str] = None) -> str: """Download a file from a URL and save it to a temporary location. Args: url (str): the URL of the file to download. filename (str, optional): the name of the file. If not provided, a random name file will be created. """ try: # Parse URL to get filename if not provided if not filename: path = urlparse(url).path filename = os.path.basename(path) if not filename: filename = f"downloaded_{uuid.uuid4().hex[:8]}" # Create temporary file temp_dir = tempfile.gettempdir() filepath = os.path.join(temp_dir, filename) # Download the file response = requests.get(url, stream=True) response.raise_for_status() # Save the file with open(filepath, "wb") as f: for chunk in response.iter_content(chunk_size=8192): f.write(chunk) return f"File downloaded to {filepath}. You can read this file to process its contents." except Exception as e: return f"Error downloading file: {str(e)}" @tool def extract_text_from_image(image_path: str) -> str: """Extract text from an image using OCR library pytesseract (if available). Args: image_path (str): the path to the image file. """ try: # Open the image image = Image.open(image_path) # Extract text from the image text = pytesseract.image_to_string(image) return f"Extracted text from image:\n\n{text}" except Exception as e: return f"Error extracting text from image: {str(e)}" @tool def analyze_csv_file(file_path: str, query: str) -> str: """Analyze a CSV file using pandas and answer a question about it. Args: file_path (str): the path to the CSV file. query (str): Question about the data """ try: # Read the CSV file df = pd.read_csv(file_path) # Run various analyses based on the query result = f"CSV file loaded with {len(df)} rows and {len(df.columns)} columns.\n" result += f"Columns: {', '.join(df.columns)}\n\n" # Add summary statistics result += "Summary statistics:\n" result += str(df.describe()) return result except Exception as e: return f"Error analyzing CSV file: {str(e)}" @tool def analyze_excel_file(file_path: str, query: str) -> str: """Analyze an Excel file using pandas and answer a question about it. Args: file_path (str): the path to the Excel file. query (str): Question about the data """ try: # Read the Excel file df = pd.read_excel(file_path) # Run various analyses based on the query result = ( f"Excel file loaded with {len(df)} rows and {len(df.columns)} columns.\n" ) result += f"Columns: {', '.join(df.columns)}\n\n" # Add summary statistics result += "Summary statistics:\n" result += str(df.describe()) return result except Exception as e: return f"Error analyzing Excel file: {str(e)}" ## Analyze Youtube Transcript tools def extract_video_id(youtube_url: str) -> str | None: """Extract the video ID from a YouTube URL. Supports standard and shortened formats like: - https://www.youtube.com/watch?v=VIDEO_ID - https://youtu.be/VIDEO_ID """ try: parsed_url = urlparse(youtube_url) host = parsed_url.hostname if host in ("www.youtube.com", "youtube.com"): return parse_qs(parsed_url.query).get("v", [None])[0] elif host == "youtu.be": return parsed_url.path.strip("/") except Exception: return None return None @tool def get_youtube_transcript(youtube_url: str) -> str: """Returns the transcript of a YouTube video as plain text. Use this tool to extract spoken words from videos for Q&A, summarization, or analysis. This does not include visual or on-screen content. """ video_id = extract_video_id(youtube_url) if not video_id: return "Invalid or unsupported YouTube URL format." try: transcript = YouTubeTranscriptApi.get_transcript(video_id) return " ".join(entry["text"] for entry in transcript) except Exception as e: return f"Transcript unavailable: {str(e)}"