Agent_Course_Final_Assignment

Sleeping

File size: 5,587 Bytes

import os
from typing import Annotated, TypedDict

from langchain.tools import tool
from langchain_core.messages import AnyMessage, HumanMessage, SystemMessage
from langchain_openai import ChatOpenAI
from langfuse import Langfuse
from langfuse.langchain import CallbackHandler
from langgraph.graph import START, StateGraph
from langgraph.graph.message import add_messages
from langgraph.prebuilt import ToolNode, tools_condition

try:
    from tools import (
        DescribeImage,
        ExtractTextFromImage,
        arxiv_search,
        download_youtube_video,
        extract_audio_from_video,
        read_excel,
        read_python,
        transcribe_audio,
        web_search,
        wiki_search,
        add,
        divide,
        multiply,
    )
except:
    from .tools import (
        DescribeImage,
        ExtractTextFromImage,
        arxiv_search,
        download_youtube_video,
        extract_audio_from_video,
        read_excel,
        read_python,
        transcribe_audio,
        web_search,
        wiki_search,
        add,
        divide,
        multiply,
    )


class AgentState(TypedDict):
    """Class representing the state for agent graph."""

    messages: Annotated[list[AnyMessage], add_messages]


class SmartAgent:
    def __init__(self, chat):
        """Initialize agent, multimodal model and tools."""
        self.multimodal_model = ChatOpenAI(model="gpt-4o")

        extract_text_from_image = tool(
            ExtractTextFromImage(self.multimodal_model).__call_extract_text_from_image__
        )
        describe_image = tool(
            DescribeImage(self.multimodal_model).__call_describe_image__
        )

        self.tools = [
            extract_text_from_image,
            describe_image,
            transcribe_audio,
            read_excel,
            read_python,
            wiki_search,
            web_search,
            arxiv_search,
            download_youtube_video,
            extract_audio_from_video,
            add,
            divide,
            multiply,
        ]
        self.chat_with_tools = chat.bind_tools(self.tools)
        self._initialize_graph()
        self._initialize_telemetry()

    def _initialize_graph(self):
        """Initialize and compile the agent graph."""
        builder = StateGraph(AgentState)

        # Define nodes
        builder.add_node("assistant", self.assistant)
        builder.add_node("tools", ToolNode(self.tools))

        # Define edges
        builder.add_edge(START, "assistant")
        builder.add_conditional_edges("assistant", tools_condition)
        builder.add_edge("tools", "assistant")

        # Compile the graph
        self.agent = builder.compile()
        print("Agent initialized.")

    def _initialize_telemetry(self):
        """Initialize langfuse telemetry using CallbackHandler."""
        LANGFUSE_PUBLIC_KEY = os.getenv("LANGFUSE_PUBLIC_KEY")
        LANGFUSE_SECRET_KEY = os.getenv("LANGFUSE_SECRET_KEY")
        LANGFUSE_HOST = "https://cloud.langfuse.com"

        langfuse = Langfuse(
            public_key=LANGFUSE_PUBLIC_KEY,
            secret_key=LANGFUSE_SECRET_KEY,
            host=LANGFUSE_HOST,  # or your custom host if applicable
        )

        # Create a Langchain callback handler using the initialized client
        self.langfuse_handler = CallbackHandler()
        print("Telemetry initialized.")

    def __call__(self, question: str, file_name: str | None = None) -> str:
        """Call the agent, passing system prompt and eventual file name."""
        sys_msg = SystemMessage(
            content="""You are a general AI assistant. You will be asked a factual question. 

                1. Reason step by step and search for the information using available tools if needed.
                2. Finish your response with this exact format:  
                FINAL ANSWER: [YOUR FINAL ANSWER]

                IMPORTANT RULES for [YOUR FINAL ANSWER]:
                - If the answer is a number, provide only the number, with no commas, units, or symbols, do not write it as a string.
                - If the answer is a string, provide only the core noun phrase with no articles or abbreviations.
                - If the answer is a list, return a comma-separated list applying the above rules per item.
                - DO NOT include any other text before or after the final answer.
                - DO NOT explain or justify the answer after it is given.
                - DO NOT repeat the question.
                - DO NOT include the words 'FINAL ANSWER: '.

                Strictly follow these formatting rules.
           """
        )

        print(f"Agent received question: {question}.")

        if file_name is not None and file_name != "":
            print(f"Provided file: {file_name}.")
            messages = [sys_msg] + [
                HumanMessage(
                    content=f"{question}. The file you have access to is {file_name}."
                )
            ]
        else:
            messages = [sys_msg] + [HumanMessage(content=question)]

        response = self.agent.invoke(
            {"messages": messages}, config={"callbacks": [self.langfuse_handler]}
        )
        answer = response["messages"][-1].content
        print(f"Agent returning answer: {answer}")
        return answer

    def assistant(self, state: AgentState):
        """Assistant node which calls the model initialized with tools."""
        response = self.chat_with_tools.invoke(state["messages"])
        return {
            "messages": state["messages"] + [response],
        }