Spaces:
Sleeping
Sleeping
Luigi D'Addona
commited on
Commit
·
e0568e5
1
Parent(s):
93c3b2a
aggiunto tool analyze_png_image
Browse files
agent.py
CHANGED
|
@@ -14,7 +14,8 @@ from langchain_google_genai import ChatGoogleGenerativeAI
|
|
| 14 |
|
| 15 |
# Local imports
|
| 16 |
from tools import get_search_tool, get_tavily_search_tool, get_wikipedia_tool, wikipedia_search, wikipedia_search_3,\
|
| 17 |
-
execute_python_code_from_file, download_taskid_file, analyze_excel_file, get_analyze_mp3_tool
|
|
|
|
| 18 |
|
| 19 |
# Nota: per i test in locale si usa il .env
|
| 20 |
# su HuggingFace invece si usano le variabili definite in Settings/"Variables and secrets"
|
|
@@ -59,8 +60,9 @@ chat = ChatGoogleGenerativeAI(
|
|
| 59 |
search_tool = get_tavily_search_tool()
|
| 60 |
#wikipedia_tool = get_wikipedia_tool()
|
| 61 |
analyze_mp3_tool = get_analyze_mp3_tool(chat)
|
|
|
|
| 62 |
|
| 63 |
-
tools = [search_tool, wikipedia_search_3, execute_python_code_from_file, download_taskid_file, analyze_excel_file, analyze_mp3_tool, arxiv_search]
|
| 64 |
|
| 65 |
# Bind tools to the model
|
| 66 |
chat_with_tools = chat.bind_tools(tools)
|
|
|
|
| 14 |
|
| 15 |
# Local imports
|
| 16 |
from tools import get_search_tool, get_tavily_search_tool, get_wikipedia_tool, wikipedia_search, wikipedia_search_3,\
|
| 17 |
+
execute_python_code_from_file, download_taskid_file, analyze_excel_file, get_analyze_mp3_tool,\
|
| 18 |
+
get_analyze_image_tool, arxiv_search
|
| 19 |
|
| 20 |
# Nota: per i test in locale si usa il .env
|
| 21 |
# su HuggingFace invece si usano le variabili definite in Settings/"Variables and secrets"
|
|
|
|
| 60 |
search_tool = get_tavily_search_tool()
|
| 61 |
#wikipedia_tool = get_wikipedia_tool()
|
| 62 |
analyze_mp3_tool = get_analyze_mp3_tool(chat)
|
| 63 |
+
analyze_png_tool = get_analyze_image_tool(chat)
|
| 64 |
|
| 65 |
+
tools = [search_tool, wikipedia_search_3, execute_python_code_from_file, download_taskid_file, analyze_excel_file, analyze_mp3_tool, analyze_png_tool, arxiv_search]
|
| 66 |
|
| 67 |
# Bind tools to the model
|
| 68 |
chat_with_tools = chat.bind_tools(tools)
|
tools.py
CHANGED
|
@@ -256,6 +256,51 @@ def get_analyze_mp3_tool(llm):
|
|
| 256 |
return analyze_mp3_file
|
| 257 |
|
| 258 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 259 |
@tool
|
| 260 |
def arxiv_search(query: str) -> str:
|
| 261 |
"""Search Arxiv for a query and return maximum 3 result.
|
|
|
|
| 256 |
return analyze_mp3_file
|
| 257 |
|
| 258 |
|
| 259 |
+
def get_analyze_image_tool(llm):
|
| 260 |
+
@tool
|
| 261 |
+
def analyze_png_image(image_path: str) -> str:
|
| 262 |
+
"""
|
| 263 |
+
Analyzes a PNG image and returns a detailed description of its content.
|
| 264 |
+
This tool requires an LLM capable of processing images, such as Gemini 1.5 Pro or Gemini 2.0 Flash.
|
| 265 |
+
"""
|
| 266 |
+
try:
|
| 267 |
+
# Read image and encode as base64
|
| 268 |
+
with open(image_path, "rb") as image_file:
|
| 269 |
+
image_bytes = image_file.read()
|
| 270 |
+
|
| 271 |
+
image_base64 = base64.b64encode(image_bytes).decode("utf-8")
|
| 272 |
+
|
| 273 |
+
# Prepare the prompt including the base64 image data
|
| 274 |
+
message = [
|
| 275 |
+
HumanMessage(
|
| 276 |
+
content=[
|
| 277 |
+
{
|
| 278 |
+
"type": "text",
|
| 279 |
+
"text": (
|
| 280 |
+
"Provide a very detailed description of the content of this image. "
|
| 281 |
+
"Focus on objects, people, actions, text, and overall scene context. "
|
| 282 |
+
"Be as comprehensive as possible."
|
| 283 |
+
),
|
| 284 |
+
},
|
| 285 |
+
{
|
| 286 |
+
"type": "image_url",
|
| 287 |
+
"image_url": {"url": f"data:image/png;base64,{image_base64}"},
|
| 288 |
+
},
|
| 289 |
+
]
|
| 290 |
+
)
|
| 291 |
+
]
|
| 292 |
+
|
| 293 |
+
# Call the vision-capable model
|
| 294 |
+
response = llm.invoke(message)
|
| 295 |
+
|
| 296 |
+
return response.content.strip()
|
| 297 |
+
except Exception as e:
|
| 298 |
+
print("Error analyzing image file:{} - {}".format(image_path, e))
|
| 299 |
+
return ""
|
| 300 |
+
|
| 301 |
+
return analyze_png_image
|
| 302 |
+
|
| 303 |
+
|
| 304 |
@tool
|
| 305 |
def arxiv_search(query: str) -> str:
|
| 306 |
"""Search Arxiv for a query and return maximum 3 result.
|