Spaces:
Sleeping
Sleeping
| import pandas as pd | |
| from langchain_community.tools import DuckDuckGoSearchRun, TavilySearchResults | |
| from langchain_core.tools import tool | |
| from langchain.schema import HumanMessage, AIMessage, SystemMessage | |
| from langchain_google_genai import ChatGoogleGenerativeAI | |
| import base64 | |
| #LLMs | |
| google_llm = ChatGoogleGenerativeAI(model='gemini-2.0-flash-lite') | |
| #IMAGE_TOOLS | |
| def extract_text(img_path: str) -> str: | |
| """ | |
| Extract text from an image file using a multimodal model. | |
| Args: | |
| img_path: A local image file path (strings). | |
| Returns: | |
| A single string containing the concatenated text extracted from each image. | |
| """ | |
| all_text = "" | |
| try: | |
| # Read image and encode as base64 | |
| with open(img_path, "rb") as image_file: | |
| image_bytes = image_file.read() | |
| image_base64 = base64.b64encode(image_bytes).decode("utf-8") | |
| # Prepare the prompt including the base64 image data | |
| message = [ | |
| HumanMessage( | |
| content=[ | |
| { | |
| "type": "text", | |
| "text": ( | |
| "Extract all the text from this image. " | |
| "Return only the extracted text, no explanations." | |
| ), | |
| }, | |
| { | |
| "type": "image_url", | |
| "image_url": { | |
| "url": f"data:image/png;base64,{image_base64}" | |
| }, | |
| }, | |
| ] | |
| ) | |
| ] | |
| # Call the vision-capable model | |
| response = google_llm.invoke(message) | |
| # Append extracted text | |
| all_text += response.content + "\n\n" | |
| return all_text.strip() | |
| except Exception as e: | |
| # You can choose whether to raise or just return an empty string / error message | |
| error_msg = f"Error extracting text: {str(e)}" | |
| print(error_msg) | |
| return "" | |
| def describe_image(img_path: str) -> str: | |
| """ | |
| Takes an image file path or URL and returns a detailed description of the image. | |
| Args: | |
| image_path_or_url (str): Local file path or URL to the image. | |
| Returns: | |
| str: A detailed description of the image content. | |
| """ | |
| all_text = "" | |
| try: | |
| # Read image and encode as base64 | |
| with open(img_path, "rb") as image_file: | |
| image_bytes = image_file.read() | |
| image_base64 = base64.b64encode(image_bytes).decode("utf-8") | |
| # Prepare the prompt including the base64 image data | |
| message = [ | |
| HumanMessage( | |
| content=[ | |
| { | |
| "type": "text", | |
| "text": ( | |
| "Provide a detailed description from this image. " | |
| "Return descriptive text only." | |
| ), | |
| }, | |
| { | |
| "type": "image_url", | |
| "image_url": { | |
| "url": f"data:image/png;base64,{image_base64}" | |
| }, | |
| }, | |
| ] | |
| ) | |
| ] | |
| # Call the vision-capable model | |
| response = google_llm.invoke(message) | |
| # Append extracted text | |
| all_text += response.content + "\n\n" | |
| return all_text.strip() | |
| except Exception as e: | |
| # You can choose whether to raise or just return an empty string / error message | |
| error_msg = f"Error extracting text: {str(e)}" | |
| print(error_msg) | |
| return "" | |
| #AUDIO_TOOLS | |
| def transcribe_audio(audio_path: str) -> str: | |
| """ | |
| Transcribe audio from a file using a multimodal model. | |
| Args: | |
| audio_path: A local audio file path (strings). | |
| Returns: | |
| A single string containing the transcribed text. | |
| """ | |
| all_text = "" | |
| try: | |
| # Read audio and encode as base64 | |
| with open(audio_path, "rb") as audio_file: | |
| audio_bytes = audio_file.read() | |
| audio_base64 = base64.b64encode(audio_bytes).decode() | |
| # Prepare the prompt including the base64 image data | |
| message = [ | |
| HumanMessage( | |
| content=[ | |
| { | |
| "type": "text", | |
| "text": ( | |
| "Transcribe the following audio input:" | |
| ), | |
| }, | |
| { | |
| "type": "input_audio", | |
| "input_audio": { | |
| "data": audio_base64, | |
| "format": "wav" | |
| }, | |
| }, | |
| ] | |
| ) | |
| ] | |
| # Call the vision-capable model | |
| response = google_llm.invoke(message) | |
| # Append extracted text | |
| all_text += response.content + "\n\n" | |
| return all_text.strip() | |
| except Exception as e: | |
| # You can choose whether to raise or just return an empty string / error message | |
| error_msg = f"Error transcribing audio: {str(e)}" | |
| print(error_msg) | |
| return "" | |
| #WEB_SEARCH_TOOL | |
| def web_search(query: str) -> str: | |
| """Perform a web search and return the top 5 results.""" | |
| #search_tool = DuckDuckGoSearchRun() | |
| search_tool = TavilySearchResults(searxch_depth='basic') | |
| result = search_tool.invoke(query) | |
| return result | |
| #FILE_PARSE_TOOL | |
| def read_file(file_path: str) -> str: | |
| """ | |
| Reads a text based file and returns its content as a string. | |
| Args: | |
| file_path (str): The path to the file. | |
| Returns: | |
| str: The content of the file. | |
| """ | |
| if file_path.endswith('.txt'): | |
| with open(file_path, 'r') as file: | |
| return file.read() | |
| elif file_path.endswith('.csv'): | |
| return pd.read_csv(file_path).to_string() | |
| elif file_path.endswith('.xlsx'): | |
| return pd.read_excel(file_path).to_string() | |
| elif file_path.endswith('.py'): | |
| with open(file_path, 'r') as file: | |
| return file.read() | |
| else: | |
| raise ValueError("Unsupported file format. Only .txt, .csv, and .xlsx are supported.") | |