| import base64 |
| import pandas as pd |
| from langchain_core.messages import HumanMessage |
| from langchain.tools import tool |
| from langchain_community.tools.tavily_search import TavilySearchResults |
| from langchain_community.document_loaders import WikipediaLoader, ArxivLoader |
| import yt_dlp |
| import ffmpeg |
|
|
|
|
| @tool |
| def read_excel(file_path: str) -> str: |
| """ |
| Extract readable text from an Excel file (.xlsx or .xls). |
| |
| Args: |
| file_path: Path to the Excel file. |
| |
| Returns: |
| A string representation of all sheets and their content. |
| """ |
| try: |
| df_dict = pd.read_excel(file_path, sheet_name=None) |
| result = [] |
| for sheet_name, sheet_df in df_dict.items(): |
| sheet_text = sheet_df.to_string(index=False) |
| result.append(f"Sheet: {sheet_name}\n{sheet_text}") |
| return "\n\n".join(result) |
|
|
| except Exception as e: |
| return f"Error reading Excel file: {str(e)}" |
|
|
|
|
| @tool |
| def read_python(file_path: str) -> str: |
| """ |
| Extract source code from a Python (.py) file. |
| |
| Args: |
| file_path: Path to the Python file. |
| |
| Returns: |
| A string containing the full source code of the file. |
| """ |
| try: |
| with open(file_path, "r", encoding="utf-8") as f: |
| return f.read() |
| except Exception as e: |
| return f"Error reading Python file: {str(e)}" |
|
|
| |
| class ExtractTextFromImage: |
| def __init__(self, multimodal_model): |
| self.multimodal_model = multimodal_model |
|
|
| def __call__(self, img_path: str) -> str: |
| """ |
| Extract text from an image file. |
| |
| Args: |
| img_path: A string representing the path to an image (e.g., PNG, JPEG). |
| |
| Returns: |
| A single string containing the concatenated text extracted from the image. |
| """ |
| all_text = "" |
| try: |
| |
| with open(img_path, "rb") as image_file: |
| image_bytes = image_file.read() |
| |
| image_base64 = base64.b64encode(image_bytes).decode("utf-8") |
| |
| |
| message = [ |
| HumanMessage( |
| content=[ |
| { |
| "type": "text", |
| "text": ( |
| "Extract all the text from this image. " |
| "Return only the extracted text, no explanations." |
| ), |
| }, |
| { |
| "type": "image_url", |
| "image_url": { |
| "url": f"data:image/png;base64,{image_base64}" |
| }, |
| }, |
| ] |
| ) |
| ] |
| |
| |
| response = self.multimodal_model.invoke(message) |
| |
| |
| all_text += response.content + "\n\n" |
| |
| return all_text.strip() |
| except Exception as e: |
| error_msg = f"Error extracting text: {str(e)}" |
| print(error_msg) |
| return "" |
|
|
|
|
| class DescribeImage: |
| def __init__(self, multimodal_model): |
| self.multimodal_model = multimodal_model |
|
|
| def __call__(self, img_path: str, query: str) -> str: |
| """ |
| Generate a detailed description of an image. |
| This function reads a image from an url, encodes it, and sends it to a |
| vision-capable language model to obtain a comprehensive, natural language |
| description of the image's content, including its objects, actions, and context, |
| following a specific query. |
| |
| Args: |
| img_path: A string representing the path to an image (e.g., PNG, JPEG). |
| query: Information to extract from the image. |
| |
| Returns: |
| A single string containing a detailed description of the image. |
| """ |
| try: |
| |
| with open(img_path, "rb") as image_file: |
| image_bytes = image_file.read() |
| |
| image_base64 = base64.b64encode(image_bytes).decode("utf-8") |
| |
| |
| message = [ |
| HumanMessage( |
| content=[ |
| { |
| "type": "text", |
| "text": ( |
| f"Describe this image in rich detail. Include objects, people, setting, background elements, and any inferred actions or context. Avoid technical jargon. In particular, extract the following information: {query}" ), |
| }, |
| { |
| "type": "image_url", |
| "image_url": { |
| "url": f"data:image/png;base64,{image_base64}" |
| }, |
| }, |
| ] |
| ) |
| ] |
| response = self.multimodal_model.invoke(message) |
| return response.content.strip() |
| |
| except Exception as e: |
| error_msg = f"Error describing image: {str(e)}" |
| print(error_msg) |
| return "" |
|
|
| |
| class TranscribeAudio: |
| def __init__(self, multimodal_model): |
| self.multimodal_model = multimodal_model |
|
|
| def __call__(self, audio_path: str, query:str) -> str: |
| """ |
| Transcribe an MP3 file. |
| |
| Args: |
| audio_path: Path to the MP3 audio file. |
| |
| Returns: |
| Transcribed text as a string. |
| """ |
| try: |
| with open(audio_path, "rb") as audio_file: |
| audio_bytes = audio_file.read() |
|
|
| audio_data = AudioFile( |
| mime_type="audio/mpeg", |
| data=audio_bytes |
| ) |
|
|
| message = [ |
| HumanMessage( |
| content=[ |
| { |
| "type": "text", |
| "text": ( |
| "Transcribe the speech from this audio file. " |
| "Return only the transcribed text, with no extra commentary." |
| ), |
| }, |
| { |
| "type": "audio", |
| "audio": audio_data, |
| }, |
| ] |
| ) |
| ] |
|
|
| response = self.audio_llm.invoke(message) |
| return response.content.strip() |
|
|
| except Exception as e: |
| error_msg = f"Error transcribing audio: {str(e)}" |
| print(error_msg) |
| return "" |
|
|
|
|
| @tool |
| def download_youtube_video(youtube_url: str, output_path: str) -> str: |
| """ |
| Download a YouTube video as an MP4 file. |
| |
| Args: |
| youtube_url: The YouTube video URL. |
| output_path: Desired output path for the downloaded MP4 file. |
| |
| Returns: |
| Path to the saved video file. |
| """ |
| ydl_opts = { |
| 'format': 'bestvideo[ext=mp4]+bestaudio[ext=m4a]/best[ext=mp4]/best', |
| 'outtmpl': output_path, |
| 'merge_output_format': 'mp4', |
| 'quiet': True, |
| } |
| with yt_dlp.YoutubeDL(ydl_opts) as ydl: |
| ydl.download([youtube_url]) |
| return output_path |
|
|
|
|
| @tool |
| def extract_audio_from_video(video_path: str, audio_output: str) -> str: |
| """ |
| Extracts audio from an MP4 video file and saves it as MP3. |
| |
| Args: |
| video_path: Path to the input MP4 video file. |
| audio_output: Path for the output MP3 file. |
| |
| Returns: |
| Path to the audio file. |
| """ |
| try: |
| ( |
| ffmpeg |
| .input(video_path) |
| .output(audio_output, format='mp3', acodec='libmp3lame', t=60) |
| .overwrite_output() |
| .run(quiet=True) |
| ) |
| return audio_output |
| except ffmpeg.Error as e: |
| raise RuntimeError(f"FFmpeg error: {e.stderr.decode()}") from e |
| |
| |
| @tool |
| def wiki_search(query: str) -> str: |
| """Search Wikipedia for a query and return maximum 2 results. |
| |
| Args: |
| query: The search query.""" |
| search_docs = WikipediaLoader(query=query, load_max_docs=2).load() |
| formatted_search_docs = "\n\n---\n\n".join( |
| [ |
| f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>' |
| for doc in search_docs |
| ]) |
| return {"wiki_results": formatted_search_docs} |
|
|
|
|
| @tool |
| def web_search(query: str) -> str: |
| """Search Tavily for a query and return maximum 3 results. |
| |
| Args: |
| query: The search query.""" |
| search_docs = TavilySearchResults(max_results=3).invoke(query) |
| formatted_search_docs = "\n\n---\n\n".join( |
| [ |
| f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content}\n</Document>' |
| for doc in search_docs |
| ]) |
| return {"web_results": formatted_search_docs} |
|
|
|
|
| @tool |
| def arxiv_search(query: str) -> str: |
| """Search Arxiv for a query and return maximum 3 result. |
| |
| Args: |
| query: The search query.""" |
| search_docs = ArxivLoader(query=query, load_max_docs=3).load() |
| formatted_search_docs = "\n\n---\n\n".join( |
| [ |
| f'<Document source="{doc.metadata["source"]}" page="{doc.metadata.get("page", "")}"/>\n{doc.page_content[:1000]}\n</Document>' |
| for doc in search_docs |
| ]) |
| return {"arvix_results": formatted_search_docs} |
|
|