Final_Assignment_Agent_v01

Sleeping

App Files Files Community

josecordeiro commited on Jun 22, 2025

Commit

39c0b3a

verified ·

1 Parent(s): 81917a3

Upload 8 files

Browse files

Files changed (8) hide show

langgraph_agent.py +23 -0
models.py +28 -0
nodes.py +38 -0
state.py +7 -0
tools/math_tools.py +57 -0
tools/multimodal_tools.py +167 -0
tools/search_tools.py +45 -0
tools/youtube_tools.py +25 -0

langgraph_agent.py ADDED Viewed

	@@ -0,0 +1,23 @@

+from langgraph.graph import START, StateGraph
+from langgraph.prebuilt import tools_condition
+from langgraph.prebuilt import ToolNode
+from nodes import assistant, tools
+from state import AgentState
+## The graph
+builder = StateGraph(AgentState)
+# Define nodes: these do the work
+builder.add_node("assistant", assistant)
+builder.add_node("tools", ToolNode(tools))
+# Define edges: these determine how the control flow moves
+builder.add_edge(START, "assistant")
+builder.add_conditional_edges(
+    "assistant",
+    # If the latest message requires a tool, route to tools
+    # Otherwise, provide a direct response
+    tools_condition,
+)
+builder.add_edge("tools", "assistant")
+graph = builder.compile()

models.py ADDED Viewed

	@@ -0,0 +1,28 @@

+import os
+from dotenv import load_dotenv
+from langchain_google_genai import ChatGoogleGenerativeAI
+from langfuse.langchain import CallbackHandler
+load_dotenv()
+api_key = os.getenv("GEMINI_API_KEY")
+# Initialize Langfuse CallbackHandler for LangGraph/Langchain (tracing)
+langfuse_handler = CallbackHandler()
+# Create LLM class
+vlm = ChatGoogleGenerativeAI(
+    model= "gemini-2.5-flash",
+    temperature=0,
+    max_retries=2,
+    google_api_key=api_key
+)
+llm = ChatGoogleGenerativeAI(
+    model= "gemini-2.5-flash", #pro,flash-lite-preview-06-17
+    temperature=0,
+    max_retries=2,
+    google_api_key=api_key#,
+    #thinking_budget=0
+)

nodes.py ADDED Viewed

	@@ -0,0 +1,38 @@

+from state import AgentState
+from models import llm
+from tools.multimodal_tools import extract_text, analyze_image_tool, analyze_audio_tool
+from tools.math_tools import add, subtract, multiply, divide
+from tools.search_tools import google_search_tool #, search_tool, serpapi_search
+from tools.youtube_tools import extract_youtube_transcript
+tools = [
+    extract_text,
+    analyze_image_tool,
+    analyze_audio_tool,
+    extract_youtube_transcript,
+    add,
+    subtract,
+    multiply,
+    divide,
+    #search_tool#,
+    google_search_tool
+]
+llm_with_tools = llm.bind_tools(tools)
+def assistant(state: AgentState):
+    sys_msg = (
+        "You are a helpful assistant with access to tools. Understand user requests accurately. Use your tools when needed to answer effectively. Strictly follow all user instructions and constraints.\n"
+        "Your final output should be a number, as few words as possible, or a comma-separated list of numbers and/or strings (no spaces after commas).\n"
+        "If you are asked for a number, do not use commas as thousands separators, and do not use units such as $ or percent sign unless specified otherwise.\n"
+        "If you are asked for a string, do not use articles, do not use abbreviations (e.g., for cities), and write digits in plain text unless specified otherwise.\n"
+        "Write digits in full words only if asked.\n"
+        "If you are asked for a comma-separated list, apply the above rules to each element.\n"
+        "Never include reasoning, explanations, or extra words in your output.\n"
+        "If the answer cannot be found, output 'unknown' unless instructed otherwise."
+        "IMPORTANT: your output must contain only the final answer in the specific format requested in the question, without any reasoning, explanations, or extra words. For example if you are asked how many thousand and the result is 1000, your answer should be 1.\n"
+    )
+    return {
+        "messages": [llm_with_tools.invoke([sys_msg] + state["messages"])]
+    }

state.py ADDED Viewed

	@@ -0,0 +1,7 @@

+from typing import TypedDict, Annotated
+from langchain_core.messages import AnyMessage
+from langgraph.graph.message import add_messages
+class AgentState(TypedDict):
+    messages: Annotated[list[AnyMessage], add_messages]

tools/math_tools.py ADDED Viewed

	@@ -0,0 +1,57 @@

+from langchain_core.tools import tool
+import operator
+@tool("add_tool", parse_docstring=True)
+def add(a: float, b: float) -> float:
+    """Adds two numbers.
+    Args:
+        a (float): The first number.
+        b (float): The second number.
+    Returns:
+        float: The sum of a and b.
+    """
+    return operator.add(a, b)
+@tool("subtract_tool", parse_docstring=True)
+def subtract(a: float, b: float) -> float:
+    """Subtracts the second number from the first.
+    Args:
+        a (float): The first number (minuend).
+        b (float): The second number (subtrahend).
+    Returns:
+        float: The result of subtracting b from a.
+    """
+    return operator.sub(a, b)
+@tool("multiply_tool", parse_docstring=True)
+def multiply(a: float, b: float) -> float:
+    """Multiplies two numbers.
+    Args:
+        a (float): The first number.
+        b (float): The second number.
+    Returns:
+        float: The product of a and b.
+    """
+    return operator.mul(a, b)
+@tool("divide_tool", parse_docstring=True)
+def divide(a: float, b: float) -> float:
+    """Divides the first number by the second.
+    Args:
+        a (float): The numerator.
+        b (float): The denominator.
+    Returns:
+        float: The result of dividing a by b.
+               Returns an error message string if division by zero occurs.
+    """
+    if b == 0:
+        return "Error: Cannot divide by zero."
+    return operator.truediv(a, b)

tools/multimodal_tools.py ADDED Viewed

	@@ -0,0 +1,167 @@

+import base64
+import os
+from models import vlm
+from langchain_core.messages import HumanMessage
+from langchain_core.tools import tool
+@tool("extract_text_tool", parse_docstring=True)
+def extract_text(img_path: str) -> str:
+    """Extract text from an image file using a multimodal model.
+    Args:
+        img_path (str): The path to the image file from which to extract text.
+    Returns:
+        str: The extracted text from the image, or an empty string if an error occurs.
+    """
+    all_text = ""
+    try:
+        # Read image and encode as base64
+        with open(img_path, "rb") as image_file:
+            image_bytes = image_file.read()
+        image_base64 = base64.b64encode(image_bytes).decode("utf-8")
+        # Prepare the prompt including the base64 image data
+        message = [
+            HumanMessage(
+                content=[
+                    {
+                        "type": "text",
+                        "text": (
+                            "Extract all the text from this image. "
+                            "Return only the extracted text, no explanations."
+                        ),
+                    },
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": f"data:image/png;base64,{image_base64}"
+                        },
+                    },
+                ]
+            )
+        ]
+        # Call the vision-capable model
+        response = vlm.invoke(message)
+        # Append extracted text
+        all_text += response.content + "\n\n"
+        return all_text.strip()
+    except Exception as e:
+        # A butler should handle errors gracefully
+        error_msg = f"Error extracting text: {str(e)}"
+        print(error_msg)
+        return ""
+@tool("analyze_image_tool", parse_docstring=True)
+def analyze_image_tool(user_query: str, img_path: str) -> str:
+    """Answer the question reasoning on the image.
+    Args:
+        user_query (str): The question to be answered based on the image.
+        img_path (str): Path to the image file to be analyzed.
+    Returns:
+        str: The answer to the query based on image content, or an empty string if an error occurs.
+    """
+    all_text = ""
+    try:
+        # Read image and encode as base64
+        with open(img_path, "rb") as image_file:
+            image_bytes = image_file.read()
+        image_base64 = base64.b64encode(image_bytes).decode("utf-8")
+        # Prepare the prompt including the base64 image data
+        message = [
+            HumanMessage(
+                content=[
+                    {
+                        "type": "text",
+                        "text": (
+                            f"User query: {user_query}"
+                        ),
+                    },
+                    {
+                        "type": "image_url",
+                        "image_url": {
+                            "url": f"data:image/png;base64,{image_base64}"
+                        },
+                    },
+                ]
+            )
+        ]
+        # Call the vision-capable model
+        response = vlm.invoke(message)
+        # Append extracted text
+        all_text += response.content + "\n\n"
+        return all_text.strip()
+    except Exception as e:
+        # A butler should handle errors gracefully
+        error_msg = f"Error analyzing image: {str(e)}"
+        print(error_msg)
+        return ""
+@tool("analyze_audio_tool", parse_docstring=True)
+def analyze_audio_tool(user_query: str, audio_path: str) -> str:
+    """Answer the question by reasoning on the provided audio file.
+    Args:
+        user_query (str): The question to be answered based on the audio content.
+        audio_path (str): Path to the audio file (e.g., .mp3, .wav, .flac, .aac, .ogg).
+    Returns:
+        str: The answer to the query based on audio content, or an error message/empty string if an error occurs.
+    """
+    try:
+        # Determine MIME type from file extension
+        _filename, file_extension = os.path.splitext(audio_path)
+        file_extension = file_extension.lower()
+        supported_formats = {
+            ".mp3": "audio/mp3", ".wav": "audio/wav", ".flac": "audio/flac",
+            ".aac": "audio/aac", ".ogg": "audio/ogg"
+        }
+        if file_extension not in supported_formats:
+            return (f"Error: Unsupported audio file format '{file_extension}'. "
+                    f"Supported extensions: {', '.join(supported_formats.keys())}.")
+        mime_type = supported_formats[file_extension]
+        # Read audio file and encode as base64
+        with open(audio_path, "rb") as audio_file:
+            audio_bytes = audio_file.read()
+        audio_base64 = base64.b64encode(audio_bytes).decode("utf-8")
+        # Prepare the prompt including the base64 audio data
+        message = [
+        HumanMessage(
+            content=[
+                {
+                    "type": "text",
+                    "text": f"User query: {user_query}",
+                },
+                {
+                    "type": "audio",
+                    "source_type": "base64",
+                    "mime_type": mime_type,
+                    "data": audio_base64
+                },
+            ]
+        )
+        ]
+        # Call the vision-capable model
+        response = vlm.invoke(message)
+        return response.content.strip()
+    except Exception as e:
+        error_msg = f"Error analyzing audio: {str(e)}"
+        print(error_msg)
+        return ""

tools/search_tools.py ADDED Viewed

	@@ -0,0 +1,45 @@

+import os
+from dotenv import load_dotenv
+from langchain_tavily import TavilySearch
+from langchain_core.tools import tool
+# import serpapi
+from google import genai
+from google.genai import types
+load_dotenv()
+@tool("google_search_tool", parse_docstring=True)
+def google_search_tool(query: str) -> str:
+    """
+    Performs a Google Search using Gemini's grounding tool and returns the grounded response text.
+    Args:
+        query (str): The search query.
+    Returns:
+        str: The grounded response text from Gemini's Google Search tool, or an error message if it fails.
+    """
+    try:
+        # Configure the client
+        client = genai.Client()
+        # Define the grounding tool
+        grounding_tool = types.Tool(
+            google_search=types.GoogleSearch()
+        )
+        # Configure generation settings
+        config = types.GenerateContentConfig(
+            tools=[grounding_tool]
+        )
+        # Make the request
+        response = client.models.generate_content(
+            model="gemini-2.5-flash",
+            contents=query,
+            config=config,
+        )
+        print("\n\ngoogle_search_tool\n\nresponse.text:\n\n"+response.text+"\n\n")
+        return response.text
+    except Exception as e:
+        return f"Error performing Google Search: {str(e)}"

tools/youtube_tools.py ADDED Viewed

	@@ -0,0 +1,25 @@

+from langchain_core.tools import tool
+from youtube_transcript_api import YouTubeTranscriptApi, NoTranscriptFound, TranscriptsDisabled
+@tool("youtube_transcript_extractor", parse_docstring=True)
+def extract_youtube_transcript(youtube_url: str) -> str:
+    """Extracts the transcript from a given YouTube video URL.
+    Args:
+        youtube_url (str): The URL of the YouTube video.
+    Returns:
+        str: The transcript as a single string, or an error message if the transcript
+            cannot be found or an error occurs.
+    """
+    try:
+        video_id = youtube_url.split("v=")[1].split("&")[0]
+        transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
+        transcript = " ".join([item['text'] for item in transcript_list])
+        return transcript
+    except NoTranscriptFound:
+        return "Error: No transcript found for this video. It might be disabled or not available in English."
+    except TranscriptsDisabled:
+        return "Error: Transcripts are disabled for this video."
+    except Exception as e:
+        return f"Error extracting transcript: {str(e)}"