giulia-fontanella's picture
Create tools.py
9b2bab8 verified
raw
history blame
1.73 kB
import base64
from langchain_core.messages import AnyMessage, HumanMessage, AIMessage
def extract_text(img_path: str) -> str:
"""
Extract text from an image file using a multimodal model.
"""
all_text = ""
try:
# Read image and encode as base64
with open(img_path, "rb") as image_file:
image_bytes = image_file.read()
image_base64 = base64.b64encode(image_bytes).decode("utf-8")
# Prepare the prompt including the base64 image data
message = [
HumanMessage(
content=[
{
"type": "text",
"text": (
"Extract all the text from this image. "
"Return only the extracted text, no explanations."
),
},
{
"type": "image_url",
"image_url": {
"url": f"data:image/png;base64,{image_base64}"
},
},
]
)
]
# Call the vision-capable model
response = vision_llm.invoke(message)
# Append extracted text
all_text += response.content + "\n\n"
return all_text.strip()
except Exception as e:
error_msg = f"Error extracting text: {str(e)}"
print(error_msg)
return ""
def web_search(query: str):
"""Performs a web search using SerpAPI."""
search = GoogleSearch({
"q": query,
"num": 5,
"api_key": "your_serpapi_key"
})
results = search.get_dict()["organic_results"]
return results