giulia-fontanella's picture
Update agent.py
e205ec9 verified
raw
history blame
4.42 kB
from langgraph.graph.message import add_messages
from langchain_core.messages import AnyMessage, HumanMessage, AIMessage, SystemMessage
from langgraph.prebuilt import ToolNode
from langgraph.graph import START, StateGraph
from langgraph.prebuilt import tools_condition
from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
from tools import extract_text, describe_image
from langchain_community.tools import DuckDuckGoSearchRun
from langchain_openai import ChatOpenAI
from typing import TypedDict, Annotated, Optional
class AgentState(TypedDict):
messages: Annotated[list[AnyMessage], add_messages]
class BasicAgent():
def __init__(self, llm):
chat = ChatHuggingFace(llm=llm, verbose=True)
search_tool = DuckDuckGoSearchRun()
vision_llm = ChatOpenAI(model="gpt-4o")
self.tools = [extract_text, describe_image, search_tool]
self.chat_with_tools = chat.bind_tools(self.tools)
self._initialize_graph()
print("BasicAgent initialized.")
def _initialize_graph(self):
builder = StateGraph(AgentState)
# Define nodes
builder.add_node("assistant", self.assistant)
builder.add_node("tools", ToolNode(self.tools))
# Define edges
builder.add_edge(START, "assistant")
builder.add_conditional_edges("assistant",tools_condition)
builder.add_edge("tools", "assistant")
# Compile the graph
self.agent = builder.compile()
def __call__(self, question: str) -> str:
print(f"Agent received question (first 50 chars): {question[:50]}...")
messages=[HumanMessage(content=question)]
response = self.agent.invoke({"messages":messages})
answer = response['messages'][-1].content
print(f"Agent returning answer: {answer}")
return answer
def assistant(self, state: AgentState):
textual_description_of_tool="""
extract_text(img_path: str) -> str:
Extract text from an image file using a multimodal model.
Args:
img_path: A url pointing to an image (e.g., PNG, JPEG).
Returns:
A single string containing the concatenated text extracted from each image.
search_tool(query: str) -> str:
Search the web using the DuckDuckGoSearchRun to perform a search query and return a summarized textual result.
Args:
query: A string representing the search query.
Returns:
A single string containing the search result or summary.
describe_image(img_path: str, query: str) -> str:
Generate a detailed description of an image using a multimodal model.
This function reads a image from an url, encodes it, and sends it to a
vision-capable language model to obtain a comprehensive, natural language
description of the image's content, including its objects, actions, and context,
following a specific query.
Args:
img_path: A url pointing to an image (e.g., PNG, JPEG).
query: Information to extract from the image
Returns:
A single string containing a detailed, human-readable description of the image.
"""
sys_msg = SystemMessage(content=f"""
You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER].
YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
You have access to the following tools:\n{textual_description_of_tool}\n""")
return {
"messages": [self.chat_with_tools.invoke([sys_msg] + state["messages"])],
}