Agent_Course_Final_Assignment

Sleeping

App Files Files Community

Agent_Course_Final_Assignment / agent.py

giulia-fontanella

Update agent.py

e205ec9 verified 11 months ago

raw

history blame

4.42 kB

	from langgraph.graph.message import add_messages
	from langchain_core.messages import AnyMessage, HumanMessage, AIMessage, SystemMessage
	from langgraph.prebuilt import ToolNode
	from langgraph.graph import START, StateGraph
	from langgraph.prebuilt import tools_condition
	from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
	from tools import extract_text, describe_image
	from langchain_community.tools import DuckDuckGoSearchRun
	from langchain_openai import ChatOpenAI
	from typing import TypedDict, Annotated, Optional


	class AgentState(TypedDict):
	messages: Annotated[list[AnyMessage], add_messages]


	class BasicAgent():
	def __init__(self, llm):
	chat = ChatHuggingFace(llm=llm, verbose=True)

	search_tool = DuckDuckGoSearchRun()
	vision_llm = ChatOpenAI(model="gpt-4o")
	self.tools = [extract_text, describe_image, search_tool]
	self.chat_with_tools = chat.bind_tools(self.tools)
	self._initialize_graph()
	print("BasicAgent initialized.")


	def _initialize_graph(self):
	builder = StateGraph(AgentState)

	# Define nodes
	builder.add_node("assistant", self.assistant)
	builder.add_node("tools", ToolNode(self.tools))

	# Define edges
	builder.add_edge(START, "assistant")
	builder.add_conditional_edges("assistant",tools_condition)
	builder.add_edge("tools", "assistant")

	# Compile the graph
	self.agent = builder.compile()


	def __call__(self, question: str) -> str:
	print(f"Agent received question (first 50 chars): {question[:50]}...")
	messages=[HumanMessage(content=question)]
	response = self.agent.invoke({"messages":messages})
	answer = response['messages'][-1].content
	print(f"Agent returning answer: {answer}")
	return answer


	def assistant(self, state: AgentState):
	textual_description_of_tool="""
	extract_text(img_path: str) -> str:
	Extract text from an image file using a multimodal model.

	Args:
	img_path: A url pointing to an image (e.g., PNG, JPEG).

	Returns:
	A single string containing the concatenated text extracted from each image.

	search_tool(query: str) -> str:
	Search the web using the DuckDuckGoSearchRun to perform a search query and return a summarized textual result.

	Args:
	query: A string representing the search query.

	Returns:
	A single string containing the search result or summary.

	describe_image(img_path: str, query: str) -> str:
	Generate a detailed description of an image using a multimodal model.
	This function reads a image from an url, encodes it, and sends it to a
	vision-capable language model to obtain a comprehensive, natural language
	description of the image's content, including its objects, actions, and context,
	following a specific query.

	Args:
	img_path: A url pointing to an image (e.g., PNG, JPEG).
	query: Information to extract from the image

	Returns:
	A single string containing a detailed, human-readable description of the image.
	"""
	sys_msg = SystemMessage(content=f"""
	You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER].
	YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
	You have access to the following tools:\n{textual_description_of_tool}\n""")

	return {
	"messages": [self.chat_with_tools.invoke([sys_msg] + state["messages"])],
	}