import base64 import pandas as pd from langchain_core.messages import AnyMessage, HumanMessage, AIMessage from langchain.tools import tool from langchain_community.tools.tavily_search import TavilySearchResults from langchain_community.document_loaders import WikipediaLoader from langchain_community.document_loaders import ArxivLoader @tool def read_excel(file_path: str) -> str: """ Extract readable text from an Excel file (.xlsx or .xls). Args: file_path: Path to the Excel file. Returns: A string representation of all sheets and their content. """ try: df_dict = pd.read_excel(file_path, sheet_name=None) # Read all sheets result = [] for sheet_name, sheet_df in df_dict.items(): sheet_text = sheet_df.to_string(index=False) result.append(f"Sheet: {sheet_name}\n{sheet_text}") return "\n\n".join(result) except Exception as e: return f"Error reading Excel file: {str(e)}" @tool def read_python(file_path: str) -> str: """ Extract source code from a Python (.py) file. Args: file_path: Path to the Python file. Returns: A string containing the full source code of the file. """ try: with open(file_path, "r", encoding="utf-8") as f: return f.read() except Exception as e: return f"Error reading Python file: {str(e)}" class ExtractTextFromImage: def __init__(self, vision_llm): self.vision_llm = vision_llm @tool def __call__(self, img_path: str) -> str: """ Extract text from an image file. Args: img_path: A string representing the path to an image (e.g., PNG, JPEG). Returns: A single string containing the concatenated text extracted from the image. """ all_text = "" try: # Read image and encode as base64 with open(img_path, "rb") as image_file: image_bytes = image_file.read() image_base64 = base64.b64encode(image_bytes).decode("utf-8") # Prepare the prompt including the base64 image data message = [ HumanMessage( content=[ { "type": "text", "text": ( "Extract all the text from this image. " "Return only the extracted text, no explanations." ), }, { "type": "image_url", "image_url": { "url": f"data:image/png;base64,{image_base64}" }, }, ] ) ] # Call the vision-capable model response = self.vision_llm.invoke(message) # Append extracted text all_text += response.content + "\n\n" return all_text.strip() except Exception as e: error_msg = f"Error extracting text: {str(e)}" print(error_msg) return "" class DescribeImage: def __init__(self, vision_llm): self.vision_llm = vision_llm @tool def __call__(self, img_path: str) -> str: """ Generate a detailed description of an image. This function reads a image from an url, encodes it, and sends it to a vision-capable language model to obtain a comprehensive, natural language description of the image's content, including its objects, actions, and context, following a specific query. Args: img_path: A string representing the path to an image (e.g., PNG, JPEG). query: Information to extract from the image. Returns: A single string containing a detailed description of the image. """ try: # Read image and encode as base64 with open(img_path, "rb") as image_file: image_bytes = image_file.read() image_base64 = base64.b64encode(image_bytes).decode("utf-8") # Prepare message payload message = [ HumanMessage( content=[ { "type": "text", "text": ( f"Describe this image in rich detail. Include objects, people, setting, background elements, and any inferred actions or context. Avoid technical jargon. In particular, extract the following information: {query}" ), }, { "type": "image_url", "image_url": { "url": f"data:image/png;base64,{image_base64}" }, }, ] ) ] response = self.vision_llm.invoke(message) return response.content.strip() except Exception as e: error_msg = f"Error describing image: {str(e)}" print(error_msg) return "" @tool def wiki_search(query: str) -> str: """Search Wikipedia for a query and return maximum 2 results. Args: query: The search query.""" search_docs = WikipediaLoader(query=query, load_max_docs=2).load() formatted_search_docs = "\n\n---\n\n".join( [ f'\n{doc.page_content}\n' for doc in search_docs ]) return {"wiki_results": formatted_search_docs} @tool def web_search(query: str) -> str: """Search Tavily for a query and return maximum 3 results. Args: query: The search query.""" search_docs = TavilySearchResults(max_results=3).invoke(query) formatted_search_docs = "\n\n---\n\n".join( [ f'\n{doc.page_content}\n' for doc in search_docs ]) return {"web_results": formatted_search_docs} @tool def arxiv_search(query: str) -> str: """Search Arxiv for a query and return maximum 3 result. Args: query: The search query.""" search_docs = ArxivLoader(query=query, load_max_docs=3).load() formatted_search_docs = "\n\n---\n\n".join( [ f'\n{doc.page_content[:1000]}\n' for doc in search_docs ]) return {"arvix_results": formatted_search_docs}