Agent_Course_Final_Assignment

Sleeping

App Files Files Community

giulia-fontanella commited on Jun 4, 2025

Commit

41f6453

verified ·

1 Parent(s): 07b040d

Update agent.py

Browse files

Files changed (1) hide show

agent.py +16 -2

agent.py CHANGED Viewed

@@ -4,7 +4,7 @@ from langgraph.prebuilt import ToolNode
 from langgraph.graph import START, StateGraph
 from langgraph.prebuilt import tools_condition
 from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
-from tools import extract_text
 from langchain_community.tools import DuckDuckGoSearchRun
 from langchain_openai import ChatOpenAI
 from typing import TypedDict, Annotated, Optional
@@ -20,7 +20,7 @@ class BasicAgent():
         search_tool = DuckDuckGoSearchRun()
         vision_llm = ChatOpenAI(model="gpt-4o")
-        self.tools = [extract_text, search_tool]
         self.chat_with_tools = chat.bind_tools(self.tools)
         self._initialize_graph()
         print("BasicAgent initialized.")
@@ -71,6 +71,20 @@ class BasicAgent():
             Returns:
                 A single string containing the search result or summary.
         """
         sys_msg = SystemMessage(content=f"""
         You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER].

 from langgraph.graph import START, StateGraph
 from langgraph.prebuilt import tools_condition
 from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
+from tools import extract_text, describe_image
 from langchain_community.tools import DuckDuckGoSearchRun
 from langchain_openai import ChatOpenAI
 from typing import TypedDict, Annotated, Optional
         search_tool = DuckDuckGoSearchRun()
         vision_llm = ChatOpenAI(model="gpt-4o")
+        self.tools = [extract_text, describe_image, search_tool]
         self.chat_with_tools = chat.bind_tools(self.tools)
         self._initialize_graph()
         print("BasicAgent initialized.")
             Returns:
                 A single string containing the search result or summary.
+        describe_image(img_path: str, query: str) -> str:
+            Generate a detailed description of an image using a multimodal model.
+            This function reads a local image file, encodes it, and sends it to a
+            vision-capable language model to obtain a comprehensive, natural language
+            description of the image's content, including its objects, actions, and context,
+            following a specific query.
+            Args:
+                img_path: A string path to a local image file (e.g., PNG, JPEG).
+                query: Information to extract from the image
+            Returns:
+                A single string containing a detailed, human-readable description of the image.
         """
         sys_msg = SystemMessage(content=f"""
         You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER].