giulia-fontanella commited on
Commit
41f6453
·
verified ·
1 Parent(s): 07b040d

Update agent.py

Browse files
Files changed (1) hide show
  1. agent.py +16 -2
agent.py CHANGED
@@ -4,7 +4,7 @@ from langgraph.prebuilt import ToolNode
4
  from langgraph.graph import START, StateGraph
5
  from langgraph.prebuilt import tools_condition
6
  from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
7
- from tools import extract_text
8
  from langchain_community.tools import DuckDuckGoSearchRun
9
  from langchain_openai import ChatOpenAI
10
  from typing import TypedDict, Annotated, Optional
@@ -20,7 +20,7 @@ class BasicAgent():
20
 
21
  search_tool = DuckDuckGoSearchRun()
22
  vision_llm = ChatOpenAI(model="gpt-4o")
23
- self.tools = [extract_text, search_tool]
24
  self.chat_with_tools = chat.bind_tools(self.tools)
25
  self._initialize_graph()
26
  print("BasicAgent initialized.")
@@ -71,6 +71,20 @@ class BasicAgent():
71
 
72
  Returns:
73
  A single string containing the search result or summary.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  """
75
  sys_msg = SystemMessage(content=f"""
76
  You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER].
 
4
  from langgraph.graph import START, StateGraph
5
  from langgraph.prebuilt import tools_condition
6
  from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
7
+ from tools import extract_text, describe_image
8
  from langchain_community.tools import DuckDuckGoSearchRun
9
  from langchain_openai import ChatOpenAI
10
  from typing import TypedDict, Annotated, Optional
 
20
 
21
  search_tool = DuckDuckGoSearchRun()
22
  vision_llm = ChatOpenAI(model="gpt-4o")
23
+ self.tools = [extract_text, describe_image, search_tool]
24
  self.chat_with_tools = chat.bind_tools(self.tools)
25
  self._initialize_graph()
26
  print("BasicAgent initialized.")
 
71
 
72
  Returns:
73
  A single string containing the search result or summary.
74
+
75
+ describe_image(img_path: str, query: str) -> str:
76
+ Generate a detailed description of an image using a multimodal model.
77
+ This function reads a local image file, encodes it, and sends it to a
78
+ vision-capable language model to obtain a comprehensive, natural language
79
+ description of the image's content, including its objects, actions, and context,
80
+ following a specific query.
81
+
82
+ Args:
83
+ img_path: A string path to a local image file (e.g., PNG, JPEG).
84
+ query: Information to extract from the image
85
+
86
+ Returns:
87
+ A single string containing a detailed, human-readable description of the image.
88
  """
89
  sys_msg = SystemMessage(content=f"""
90
  You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER].