| | import base64 |
| |
|
| | import ffmpeg |
| | import pandas as pd |
| | import whisper |
| | import yt_dlp |
| | from langchain.tools import tool |
| | from langchain.tools.tavily_search import TavilySearchResults |
| | from langchain_community.document_loaders import ArxivLoader, WikipediaLoader |
| | from langchain_core.messages import HumanMessage |
| | from typing import List |
| | from functools import reduce |
| | import operator |
| | import contextlib |
| | import os |
| |
|
| |
|
| | @tool |
| | def read_excel(file_path: str) -> str: |
| | """Extract readable text from an Excel file (.xlsx or .xls). |
| | |
| | Args: |
| | file_path: Path to the Excel file. |
| | |
| | Returns: |
| | A string representation of all sheets and their content. |
| | """ |
| | try: |
| | df_dict = pd.read_excel(file_path, sheet_name=None) |
| | result = [] |
| | for sheet_name, sheet_df in df_dict.items(): |
| | sheet_text = sheet_df.to_json(orient="records", lines=False) |
| | result.append({f"Sheet: {sheet_name}": sheet_text}) |
| |
|
| | full_text = "" |
| | for sheet in result: |
| | for sheet_name, sheet_data in sheet.items(): |
| | full_text += f"{sheet_name}\n{sheet_data}\n\n" |
| |
|
| | return full_text |
| |
|
| | except Exception as e: |
| | return f"Error reading Excel file: {str(e)}" |
| |
|
| |
|
| | @tool |
| | def read_python(file_path: str) -> str: |
| | """Extract source code from a Python (.py) file. |
| | |
| | Args: |
| | file_path: Path to the Python file. |
| | |
| | Returns: |
| | A string containing the full source code of the file. |
| | """ |
| | try: |
| | with open(file_path, "r", encoding="utf-8") as f: |
| | return f.read() |
| | except Exception as e: |
| | return f"Error reading Python file: {str(e)}" |
| |
|
| |
|
| | class ExtractTextFromImage: |
| | """Class to initialize the extract_text_from_image tool.""" |
| |
|
| | def __init__(self, multimodal_model): |
| | """Initialize multimodal model.""" |
| | self.multimodal_model = multimodal_model |
| |
|
| | def __call_extract_text_from_image__(self, img_path: str) -> str: |
| | """Extract text from an image file. |
| | |
| | Args: |
| | img_path: A string representing the path to an image (e.g., PNG, JPEG). |
| | |
| | Returns: |
| | A single string containing the concatenated text extracted from the image. |
| | """ |
| | all_text = "" |
| | try: |
| | |
| | with open(img_path, "rb") as image_file: |
| | image_bytes = image_file.read() |
| |
|
| | image_base64 = base64.b64encode(image_bytes).decode("utf-8") |
| |
|
| | |
| | message = [ |
| | HumanMessage( |
| | content=[ |
| | { |
| | "type": "text", |
| | "text": ( |
| | "Extract all the text from this image. " |
| | "Return only the extracted text, no explanations." |
| | ), |
| | }, |
| | { |
| | "type": "image_url", |
| | "image_url": { |
| | "url": f"data:image/png;base64,{image_base64}" |
| | }, |
| | }, |
| | ] |
| | ) |
| | ] |
| |
|
| | |
| | response = self.multimodal_model.invoke(message) |
| |
|
| | |
| | all_text += response.content + "\n\n" |
| |
|
| | return all_text.strip() |
| | except Exception as e: |
| | error_msg = f"Error extracting text: {str(e)}" |
| | print(error_msg) |
| | return "" |
| |
|
| |
|
| | class DescribeImage: |
| | """Class to initialize the describe_image tool.""" |
| |
|
| | def __init__(self, multimodal_model): |
| | """Initialize multimodal model.""" |
| | self.multimodal_model = multimodal_model |
| |
|
| | def __call_describe_image__(self, img_path: str, query: str) -> str: |
| | """Generate a detailed description of an image. |
| | |
| | This function reads a image from an url, encodes it, and sends it to a |
| | vision-capable language model to obtain a comprehensive, natural language |
| | description of the image's content, including its objects, actions, and context, |
| | following a specific query. |
| | |
| | Args: |
| | img_path: A string representing the path to an image (e.g., PNG, JPEG). |
| | query: Information to extract from the image. |
| | |
| | Returns: |
| | A single string containing a detailed description of the image. |
| | """ |
| | try: |
| | |
| | with open(img_path, "rb") as image_file: |
| | image_bytes = image_file.read() |
| |
|
| | image_base64 = base64.b64encode(image_bytes).decode("utf-8") |
| |
|
| | |
| | message = [ |
| | HumanMessage( |
| | content=[ |
| | { |
| | "type": "text", |
| | "text": ( |
| | f"Describe this image in rich detail. Include objects, people, setting, background elements, and any inferred actions or context. Avoid technical jargon. In particular, extract the following information: {query}" |
| | ), |
| | }, |
| | { |
| | "type": "image_url", |
| | "image_url": { |
| | "url": f"data:image/png;base64,{image_base64}" |
| | }, |
| | }, |
| | ] |
| | ) |
| | ] |
| | response = self.multimodal_model.invoke(message) |
| | return response.content.strip() |
| |
|
| | except Exception as e: |
| | error_msg = f"Error describing image: {str(e)}" |
| | print(error_msg) |
| | return "" |
| |
|
| |
|
| | @tool |
| | def transcribe_audio(audio_path: str) -> str: |
| | """Transcribe an MP3 file. |
| | |
| | Args: |
| | audio_path: Path to the MP3 audio file. |
| | |
| | Returns: |
| | Transcribed text as a string. |
| | """ |
| | try: |
| |
|
| | model = whisper.load_model("small") |
| | result = model.transcribe(audio_path) |
| | return result |
| |
|
| | except Exception as e: |
| | error_msg = f"Error transcribing audio: {str(e)}" |
| | print(error_msg) |
| | return "" |
| |
|
| |
|
| | @tool |
| | def download_youtube_video(youtube_url: str, output_path: str) -> str: |
| | """Download a YouTube video as an MP4 file. |
| | |
| | Args: |
| | youtube_url: The YouTube video URL. |
| | output_path: Desired output path for the downloaded MP4 file. |
| | |
| | Returns: |
| | Path to the saved video file. |
| | """ |
| | ydl_opts = { |
| | "format": "bestvideo+bestaudio/best", |
| | "outtmpl": output_path, |
| | "merge_output_format": "mp4", |
| | "quiet": True, |
| | } |
| | with contextlib.redirect_stderr(open(os.devnull, "w")): |
| | with yt_dlp.YoutubeDL(ydl_opts) as ydl: |
| | ydl.download([youtube_url]) |
| | return output_path |
| |
|
| |
|
| | @tool |
| | def extract_audio_from_video(video_path: str, audio_output: str) -> str: |
| | """Extracts audio from an MP4 video file and saves it as MP3. |
| | |
| | Args: |
| | video_path: Path to the input MP4 video file. |
| | audio_output: Path for the output MP3 file. |
| | |
| | Returns: |
| | Path to the audio file. |
| | """ |
| | try: |
| | ( |
| | ffmpeg.input(video_path) |
| | .output( |
| | audio_output, format="mp3", acodec="libmp3lame", t=60 |
| | ) |
| | .overwrite_output() |
| | .run(quiet=True) |
| | ) |
| | return audio_output |
| | except Exception as e: |
| | error_msg = f"Error transcribing audio: {str(e)}" |
| | print(error_msg) |
| | return "" |
| |
|
| |
|
| | @tool |
| | def wiki_search(query: str) -> str: |
| | """Search Wikipedia for a query and return maximum 2 results. |
| | |
| | Args: |
| | query: The search query. |
| | """ |
| | search_docs = WikipediaLoader(query=query, load_max_docs=2).load() |
| | formatted_search_docs = "\n\n---\n\n".join( |
| | [ |
| | f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>' |
| | for doc in search_docs |
| | ] |
| | ) |
| | return {"wiki_results": formatted_search_docs} |
| |
|
| |
|
| | @tool |
| | def web_search(query: str) -> str: |
| | """Search Tavily for a query and return maximum 3 results. |
| | |
| | Args: |
| | query: The search query. |
| | """ |
| | search_docs = TavilySearchResults(max_results=3).invoke(query) |
| | formatted_search_docs = "\n\n---\n\n".join( |
| | [ |
| | f'<Document source="{doc["url"]}" title="{doc["title"]}" score="{doc.get("score", "")}">\n{doc["content"]}\n</Document>' |
| | for doc in search_docs |
| | ] |
| | ) |
| | return {"web_results": formatted_search_docs} |
| |
|
| |
|
| | @tool |
| | def arxiv_search(query: str) -> str: |
| | """Search Arxiv for a paper. |
| | |
| | Args: |
| | query: The search query to retrieve a specific paper, consisting |
| | of title and/or authors name and/or year of publication. |
| | """ |
| | search_docs = ArxivLoader(query=query, load_max_docs=2).load() |
| | formatted_search_docs = "\n\n---\n\n".join( |
| | [ |
| | ( |
| | f'<Document title="{doc.metadata.get("Title", "")}" ' |
| | f'published="{doc.metadata.get("Published", "")}" ' |
| | f'authors="{doc.metadata.get("Authors", "")}">\n' |
| | f'Summary: {doc.metadata.get("Summary", "")}\n\n' |
| | f"{doc.page_content}\n" |
| | f"</Document>" |
| | ) |
| | for doc in search_docs |
| | ] |
| | ) |
| | return {"arvix_results": formatted_search_docs} |
| |
|
| |
|
| | @tool |
| | def add(numbers: List[float]) -> float: |
| | """Calculates the sum of a list of numbers. |
| | |
| | Args: |
| | numbers: A list of numeric values to be summed. |
| | |
| | Returns: |
| | The sum of all numbers in the list. |
| | """ |
| | return sum(numbers) |
| |
|
| |
|
| | @tool |
| | def multiply(numbers: List[float]) -> float: |
| | """Calculates the product of a list of numbers. |
| | |
| | Args: |
| | numbers: A list of numeric values to be multiplied. |
| | |
| | Returns: |
| | The product of all numbers in the list. |
| | """ |
| | return reduce(operator.mul, numbers, 1.0) |
| |
|
| |
|
| | @tool |
| | def divide(a: int, b: int) -> float: |
| | """Divide a and b. |
| | |
| | Args: |
| | a: first number |
| | b: second number |
| | """ |
| | return a / b |
| |
|