| from langgraph.graph.message import add_messages |
| from langchain_core.messages import AnyMessage, HumanMessage, AIMessage, SystemMessage |
| from langgraph.prebuilt import ToolNode |
| from langgraph.graph import START, StateGraph |
| from langgraph.prebuilt import tools_condition |
| from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace |
| from tools import extract_text, describe_image |
| from langchain_community.tools import DuckDuckGoSearchRun |
| from langchain_openai import ChatOpenAI |
| from typing import TypedDict, Annotated, Optional |
|
|
|
|
| class AgentState(TypedDict): |
| messages: Annotated[list[AnyMessage], add_messages] |
|
|
|
|
| class BasicAgent(): |
| def __init__(self, llm): |
| chat = ChatHuggingFace(llm=llm, verbose=True) |
| |
| search_tool = DuckDuckGoSearchRun() |
| vision_llm = ChatOpenAI(model="gpt-4o") |
| self.tools = [extract_text, describe_image, search_tool] |
| self.chat_with_tools = chat.bind_tools(self.tools) |
| self._initialize_graph() |
| print("BasicAgent initialized.") |
|
|
| |
| def _initialize_graph(self): |
| builder = StateGraph(AgentState) |
|
|
| |
| builder.add_node("assistant", self.assistant) |
| builder.add_node("tools", ToolNode(self.tools)) |
|
|
| |
| builder.add_edge(START, "assistant") |
| builder.add_conditional_edges("assistant",tools_condition) |
| builder.add_edge("tools", "assistant") |
|
|
| |
| self.agent = builder.compile() |
|
|
| |
| def __call__(self, question: str) -> str: |
| print(f"Agent received question (first 50 chars): {question[:50]}...") |
| messages=[HumanMessage(content=question)] |
| response = self.agent.invoke({"messages":messages}) |
| answer = response['messages'][-1].content |
| print(f"Agent returning answer: {answer}") |
| return answer |
|
|
| |
| def assistant(self, state: AgentState): |
| textual_description_of_tool=""" |
| extract_text(img_path: str) -> str: |
| Extract text from an image file using a multimodal model. |
| |
| Args: |
| img_path: A url pointing to an image (e.g., PNG, JPEG). |
| |
| Returns: |
| A single string containing the concatenated text extracted from each image. |
| |
| search_tool(query: str) -> str: |
| Search the web using the DuckDuckGoSearchRun to perform a search query and return a summarized textual result. |
| |
| Args: |
| query: A string representing the search query. |
| |
| Returns: |
| A single string containing the search result or summary. |
| |
| describe_image(img_path: str, query: str) -> str: |
| Generate a detailed description of an image using a multimodal model. |
| This function reads a image from an url, encodes it, and sends it to a |
| vision-capable language model to obtain a comprehensive, natural language |
| description of the image's content, including its objects, actions, and context, |
| following a specific query. |
| |
| Args: |
| img_path: A url pointing to an image (e.g., PNG, JPEG). |
| query: Information to extract from the image |
| |
| Returns: |
| A single string containing a detailed, human-readable description of the image. |
| """ |
| sys_msg = SystemMessage(content=f""" |
| You are a general AI assistant. I will ask you a question. Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER]. |
| YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings. If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise. If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise. If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string. |
| You have access to the following tools:\n{textual_description_of_tool}\n""") |
|
|
| return { |
| "messages": [self.chat_with_tools.invoke([sys_msg] + state["messages"])], |
| } |
|
|
| |