josecordeiro commited on
Commit
39c0b3a
·
verified ·
1 Parent(s): 81917a3

Upload 8 files

Browse files
langgraph_agent.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langgraph.graph import START, StateGraph
2
+ from langgraph.prebuilt import tools_condition
3
+ from langgraph.prebuilt import ToolNode
4
+ from nodes import assistant, tools
5
+ from state import AgentState
6
+
7
+ ## The graph
8
+ builder = StateGraph(AgentState)
9
+
10
+ # Define nodes: these do the work
11
+ builder.add_node("assistant", assistant)
12
+ builder.add_node("tools", ToolNode(tools))
13
+
14
+ # Define edges: these determine how the control flow moves
15
+ builder.add_edge(START, "assistant")
16
+ builder.add_conditional_edges(
17
+ "assistant",
18
+ # If the latest message requires a tool, route to tools
19
+ # Otherwise, provide a direct response
20
+ tools_condition,
21
+ )
22
+ builder.add_edge("tools", "assistant")
23
+ graph = builder.compile()
models.py ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+ from langchain_google_genai import ChatGoogleGenerativeAI
4
+ from langfuse.langchain import CallbackHandler
5
+
6
+
7
+ load_dotenv()
8
+
9
+ api_key = os.getenv("GEMINI_API_KEY")
10
+
11
+ # Initialize Langfuse CallbackHandler for LangGraph/Langchain (tracing)
12
+ langfuse_handler = CallbackHandler()
13
+
14
+ # Create LLM class
15
+ vlm = ChatGoogleGenerativeAI(
16
+ model= "gemini-2.5-flash",
17
+ temperature=0,
18
+ max_retries=2,
19
+ google_api_key=api_key
20
+ )
21
+
22
+ llm = ChatGoogleGenerativeAI(
23
+ model= "gemini-2.5-flash", #pro,flash-lite-preview-06-17
24
+ temperature=0,
25
+ max_retries=2,
26
+ google_api_key=api_key#,
27
+ #thinking_budget=0
28
+ )
nodes.py ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from state import AgentState
2
+ from models import llm
3
+ from tools.multimodal_tools import extract_text, analyze_image_tool, analyze_audio_tool
4
+ from tools.math_tools import add, subtract, multiply, divide
5
+ from tools.search_tools import google_search_tool #, search_tool, serpapi_search
6
+ from tools.youtube_tools import extract_youtube_transcript
7
+
8
+
9
+ tools = [
10
+ extract_text,
11
+ analyze_image_tool,
12
+ analyze_audio_tool,
13
+ extract_youtube_transcript,
14
+ add,
15
+ subtract,
16
+ multiply,
17
+ divide,
18
+ #search_tool#,
19
+ google_search_tool
20
+ ]
21
+
22
+ llm_with_tools = llm.bind_tools(tools)
23
+
24
+ def assistant(state: AgentState):
25
+ sys_msg = (
26
+ "You are a helpful assistant with access to tools. Understand user requests accurately. Use your tools when needed to answer effectively. Strictly follow all user instructions and constraints.\n"
27
+ "Your final output should be a number, as few words as possible, or a comma-separated list of numbers and/or strings (no spaces after commas).\n"
28
+ "If you are asked for a number, do not use commas as thousands separators, and do not use units such as $ or percent sign unless specified otherwise.\n"
29
+ "If you are asked for a string, do not use articles, do not use abbreviations (e.g., for cities), and write digits in plain text unless specified otherwise.\n"
30
+ "Write digits in full words only if asked.\n"
31
+ "If you are asked for a comma-separated list, apply the above rules to each element.\n"
32
+ "Never include reasoning, explanations, or extra words in your output.\n"
33
+ "If the answer cannot be found, output 'unknown' unless instructed otherwise."
34
+ "IMPORTANT: your output must contain only the final answer in the specific format requested in the question, without any reasoning, explanations, or extra words. For example if you are asked how many thousand and the result is 1000, your answer should be 1.\n"
35
+ )
36
+ return {
37
+ "messages": [llm_with_tools.invoke([sys_msg] + state["messages"])]
38
+ }
state.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ from typing import TypedDict, Annotated
2
+ from langchain_core.messages import AnyMessage
3
+ from langgraph.graph.message import add_messages
4
+
5
+
6
+ class AgentState(TypedDict):
7
+ messages: Annotated[list[AnyMessage], add_messages]
tools/math_tools.py ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_core.tools import tool
2
+ import operator
3
+
4
+ @tool("add_tool", parse_docstring=True)
5
+ def add(a: float, b: float) -> float:
6
+ """Adds two numbers.
7
+
8
+ Args:
9
+ a (float): The first number.
10
+ b (float): The second number.
11
+
12
+ Returns:
13
+ float: The sum of a and b.
14
+ """
15
+ return operator.add(a, b)
16
+
17
+ @tool("subtract_tool", parse_docstring=True)
18
+ def subtract(a: float, b: float) -> float:
19
+ """Subtracts the second number from the first.
20
+
21
+ Args:
22
+ a (float): The first number (minuend).
23
+ b (float): The second number (subtrahend).
24
+
25
+ Returns:
26
+ float: The result of subtracting b from a.
27
+ """
28
+ return operator.sub(a, b)
29
+
30
+ @tool("multiply_tool", parse_docstring=True)
31
+ def multiply(a: float, b: float) -> float:
32
+ """Multiplies two numbers.
33
+
34
+ Args:
35
+ a (float): The first number.
36
+ b (float): The second number.
37
+
38
+ Returns:
39
+ float: The product of a and b.
40
+ """
41
+ return operator.mul(a, b)
42
+
43
+ @tool("divide_tool", parse_docstring=True)
44
+ def divide(a: float, b: float) -> float:
45
+ """Divides the first number by the second.
46
+
47
+ Args:
48
+ a (float): The numerator.
49
+ b (float): The denominator.
50
+
51
+ Returns:
52
+ float: The result of dividing a by b.
53
+ Returns an error message string if division by zero occurs.
54
+ """
55
+ if b == 0:
56
+ return "Error: Cannot divide by zero."
57
+ return operator.truediv(a, b)
tools/multimodal_tools.py ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import os
3
+ from models import vlm
4
+ from langchain_core.messages import HumanMessage
5
+ from langchain_core.tools import tool
6
+
7
+
8
+ @tool("extract_text_tool", parse_docstring=True)
9
+ def extract_text(img_path: str) -> str:
10
+ """Extract text from an image file using a multimodal model.
11
+
12
+ Args:
13
+ img_path (str): The path to the image file from which to extract text.
14
+
15
+ Returns:
16
+ str: The extracted text from the image, or an empty string if an error occurs.
17
+ """
18
+ all_text = ""
19
+ try:
20
+ # Read image and encode as base64
21
+ with open(img_path, "rb") as image_file:
22
+ image_bytes = image_file.read()
23
+
24
+ image_base64 = base64.b64encode(image_bytes).decode("utf-8")
25
+
26
+ # Prepare the prompt including the base64 image data
27
+ message = [
28
+ HumanMessage(
29
+ content=[
30
+ {
31
+ "type": "text",
32
+ "text": (
33
+ "Extract all the text from this image. "
34
+ "Return only the extracted text, no explanations."
35
+ ),
36
+ },
37
+ {
38
+ "type": "image_url",
39
+ "image_url": {
40
+ "url": f"data:image/png;base64,{image_base64}"
41
+ },
42
+ },
43
+ ]
44
+ )
45
+ ]
46
+
47
+ # Call the vision-capable model
48
+ response = vlm.invoke(message)
49
+
50
+ # Append extracted text
51
+ all_text += response.content + "\n\n"
52
+
53
+ return all_text.strip()
54
+ except Exception as e:
55
+ # A butler should handle errors gracefully
56
+ error_msg = f"Error extracting text: {str(e)}"
57
+ print(error_msg)
58
+ return ""
59
+
60
+ @tool("analyze_image_tool", parse_docstring=True)
61
+ def analyze_image_tool(user_query: str, img_path: str) -> str:
62
+ """Answer the question reasoning on the image.
63
+
64
+ Args:
65
+ user_query (str): The question to be answered based on the image.
66
+ img_path (str): Path to the image file to be analyzed.
67
+
68
+ Returns:
69
+ str: The answer to the query based on image content, or an empty string if an error occurs.
70
+ """
71
+ all_text = ""
72
+ try:
73
+ # Read image and encode as base64
74
+ with open(img_path, "rb") as image_file:
75
+ image_bytes = image_file.read()
76
+
77
+ image_base64 = base64.b64encode(image_bytes).decode("utf-8")
78
+
79
+ # Prepare the prompt including the base64 image data
80
+ message = [
81
+ HumanMessage(
82
+ content=[
83
+ {
84
+ "type": "text",
85
+ "text": (
86
+ f"User query: {user_query}"
87
+ ),
88
+ },
89
+ {
90
+ "type": "image_url",
91
+ "image_url": {
92
+ "url": f"data:image/png;base64,{image_base64}"
93
+ },
94
+ },
95
+ ]
96
+ )
97
+ ]
98
+
99
+ # Call the vision-capable model
100
+ response = vlm.invoke(message)
101
+
102
+ # Append extracted text
103
+ all_text += response.content + "\n\n"
104
+
105
+ return all_text.strip()
106
+ except Exception as e:
107
+ # A butler should handle errors gracefully
108
+ error_msg = f"Error analyzing image: {str(e)}"
109
+ print(error_msg)
110
+ return ""
111
+
112
+ @tool("analyze_audio_tool", parse_docstring=True)
113
+ def analyze_audio_tool(user_query: str, audio_path: str) -> str:
114
+ """Answer the question by reasoning on the provided audio file.
115
+
116
+ Args:
117
+ user_query (str): The question to be answered based on the audio content.
118
+ audio_path (str): Path to the audio file (e.g., .mp3, .wav, .flac, .aac, .ogg).
119
+
120
+ Returns:
121
+ str: The answer to the query based on audio content, or an error message/empty string if an error occurs.
122
+ """
123
+ try:
124
+ # Determine MIME type from file extension
125
+ _filename, file_extension = os.path.splitext(audio_path)
126
+ file_extension = file_extension.lower()
127
+
128
+ supported_formats = {
129
+ ".mp3": "audio/mp3", ".wav": "audio/wav", ".flac": "audio/flac",
130
+ ".aac": "audio/aac", ".ogg": "audio/ogg"
131
+ }
132
+
133
+ if file_extension not in supported_formats:
134
+ return (f"Error: Unsupported audio file format '{file_extension}'. "
135
+ f"Supported extensions: {', '.join(supported_formats.keys())}.")
136
+ mime_type = supported_formats[file_extension]
137
+
138
+ # Read audio file and encode as base64
139
+ with open(audio_path, "rb") as audio_file:
140
+ audio_bytes = audio_file.read()
141
+ audio_base64 = base64.b64encode(audio_bytes).decode("utf-8")
142
+
143
+ # Prepare the prompt including the base64 audio data
144
+ message = [
145
+ HumanMessage(
146
+ content=[
147
+ {
148
+ "type": "text",
149
+ "text": f"User query: {user_query}",
150
+ },
151
+ {
152
+ "type": "audio",
153
+ "source_type": "base64",
154
+ "mime_type": mime_type,
155
+ "data": audio_base64
156
+ },
157
+ ]
158
+ )
159
+ ]
160
+
161
+ # Call the vision-capable model
162
+ response = vlm.invoke(message)
163
+ return response.content.strip()
164
+ except Exception as e:
165
+ error_msg = f"Error analyzing audio: {str(e)}"
166
+ print(error_msg)
167
+ return ""
tools/search_tools.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from dotenv import load_dotenv
3
+ from langchain_tavily import TavilySearch
4
+ from langchain_core.tools import tool
5
+ # import serpapi
6
+ from google import genai
7
+ from google.genai import types
8
+
9
+ load_dotenv()
10
+
11
+
12
+ @tool("google_search_tool", parse_docstring=True)
13
+ def google_search_tool(query: str) -> str:
14
+ """
15
+ Performs a Google Search using Gemini's grounding tool and returns the grounded response text.
16
+
17
+ Args:
18
+ query (str): The search query.
19
+
20
+ Returns:
21
+ str: The grounded response text from Gemini's Google Search tool, or an error message if it fails.
22
+ """
23
+ try:
24
+ # Configure the client
25
+ client = genai.Client()
26
+ # Define the grounding tool
27
+ grounding_tool = types.Tool(
28
+ google_search=types.GoogleSearch()
29
+ )
30
+ # Configure generation settings
31
+ config = types.GenerateContentConfig(
32
+ tools=[grounding_tool]
33
+ )
34
+ # Make the request
35
+ response = client.models.generate_content(
36
+ model="gemini-2.5-flash",
37
+ contents=query,
38
+ config=config,
39
+ )
40
+
41
+ print("\n\ngoogle_search_tool\n\nresponse.text:\n\n"+response.text+"\n\n")
42
+
43
+ return response.text
44
+ except Exception as e:
45
+ return f"Error performing Google Search: {str(e)}"
tools/youtube_tools.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from langchain_core.tools import tool
2
+ from youtube_transcript_api import YouTubeTranscriptApi, NoTranscriptFound, TranscriptsDisabled
3
+
4
+ @tool("youtube_transcript_extractor", parse_docstring=True)
5
+ def extract_youtube_transcript(youtube_url: str) -> str:
6
+ """Extracts the transcript from a given YouTube video URL.
7
+
8
+ Args:
9
+ youtube_url (str): The URL of the YouTube video.
10
+
11
+ Returns:
12
+ str: The transcript as a single string, or an error message if the transcript
13
+ cannot be found or an error occurs.
14
+ """
15
+ try:
16
+ video_id = youtube_url.split("v=")[1].split("&")[0]
17
+ transcript_list = YouTubeTranscriptApi.get_transcript(video_id)
18
+ transcript = " ".join([item['text'] for item in transcript_list])
19
+ return transcript
20
+ except NoTranscriptFound:
21
+ return "Error: No transcript found for this video. It might be disabled or not available in English."
22
+ except TranscriptsDisabled:
23
+ return "Error: Transcripts are disabled for this video."
24
+ except Exception as e:
25
+ return f"Error extracting transcript: {str(e)}"