CTPC's picture
Update tools.py
63e4748 verified
import pandas as pd
from langchain_community.tools import DuckDuckGoSearchRun, TavilySearchResults
from langchain_core.tools import tool
from langchain.schema import HumanMessage, AIMessage, SystemMessage
from langchain_google_genai import ChatGoogleGenerativeAI
import base64
#LLMs
google_llm = ChatGoogleGenerativeAI(model='gemini-2.0-flash-lite')
#IMAGE_TOOLS
@tool
def extract_text(img_path: str) -> str:
"""
Extract text from an image file using a multimodal model.
Args:
img_path: A local image file path (strings).
Returns:
A single string containing the concatenated text extracted from each image.
"""
all_text = ""
try:
# Read image and encode as base64
with open(img_path, "rb") as image_file:
image_bytes = image_file.read()
image_base64 = base64.b64encode(image_bytes).decode("utf-8")
# Prepare the prompt including the base64 image data
message = [
HumanMessage(
content=[
{
"type": "text",
"text": (
"Extract all the text from this image. "
"Return only the extracted text, no explanations."
),
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/png;base64,{image_base64}"
},
},
]
)
]
# Call the vision-capable model
response = google_llm.invoke(message)
# Append extracted text
all_text += response.content + "\n\n"
return all_text.strip()
except Exception as e:
# You can choose whether to raise or just return an empty string / error message
error_msg = f"Error extracting text: {str(e)}"
print(error_msg)
return ""
@tool
def describe_image(img_path: str) -> str:
"""
Takes an image file path or URL and returns a detailed description of the image.
Args:
image_path_or_url (str): Local file path or URL to the image.
Returns:
str: A detailed description of the image content.
"""
all_text = ""
try:
# Read image and encode as base64
with open(img_path, "rb") as image_file:
image_bytes = image_file.read()
image_base64 = base64.b64encode(image_bytes).decode("utf-8")
# Prepare the prompt including the base64 image data
message = [
HumanMessage(
content=[
{
"type": "text",
"text": (
"Provide a detailed description from this image. "
"Return descriptive text only."
),
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/png;base64,{image_base64}"
},
},
]
)
]
# Call the vision-capable model
response = google_llm.invoke(message)
# Append extracted text
all_text += response.content + "\n\n"
return all_text.strip()
except Exception as e:
# You can choose whether to raise or just return an empty string / error message
error_msg = f"Error extracting text: {str(e)}"
print(error_msg)
return ""
#AUDIO_TOOLS
@tool
def transcribe_audio(audio_path: str) -> str:
"""
Transcribe audio from a file using a multimodal model.
Args:
audio_path: A local audio file path (strings).
Returns:
A single string containing the transcribed text.
"""
all_text = ""
try:
# Read audio and encode as base64
with open(audio_path, "rb") as audio_file:
audio_bytes = audio_file.read()
audio_base64 = base64.b64encode(audio_bytes).decode()
# Prepare the prompt including the base64 image data
message = [
HumanMessage(
content=[
{
"type": "text",
"text": (
"Transcribe the following audio input:"
),
},
{
"type": "input_audio",
"input_audio": {
"data": audio_base64,
"format": "wav"
},
},
]
)
]
# Call the vision-capable model
response = google_llm.invoke(message)
# Append extracted text
all_text += response.content + "\n\n"
return all_text.strip()
except Exception as e:
# You can choose whether to raise or just return an empty string / error message
error_msg = f"Error transcribing audio: {str(e)}"
print(error_msg)
return ""
#WEB_SEARCH_TOOL
@tool
def web_search(query: str) -> str:
"""Perform a web search and return the top 5 results."""
#search_tool = DuckDuckGoSearchRun()
search_tool = TavilySearchResults(searxch_depth='basic')
result = search_tool.invoke(query)
return result
#FILE_PARSE_TOOL
@tool
def read_file(file_path: str) -> str:
"""
Reads a text based file and returns its content as a string.
Args:
file_path (str): The path to the file.
Returns:
str: The content of the file.
"""
if file_path.endswith('.txt'):
with open(file_path, 'r') as file:
return file.read()
elif file_path.endswith('.csv'):
return pd.read_csv(file_path).to_string()
elif file_path.endswith('.xlsx'):
return pd.read_excel(file_path).to_string()
elif file_path.endswith('.py'):
with open(file_path, 'r') as file:
return file.read()
else:
raise ValueError("Unsupported file format. Only .txt, .csv, and .xlsx are supported.")