Update langgraph_agent.py
Browse files- langgraph_agent.py +85 -268
langgraph_agent.py
CHANGED
|
@@ -1,268 +1,85 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
-
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
|
| 15 |
-
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
|
| 21 |
-
|
| 22 |
-
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
|
| 37 |
-
|
| 38 |
-
|
| 39 |
-
|
| 40 |
-
|
| 41 |
-
|
| 42 |
-
|
| 43 |
-
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
| 56 |
-
|
| 57 |
-
|
| 58 |
-
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
|
| 78 |
-
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
|
| 82 |
-
|
| 83 |
-
|
| 84 |
-
|
| 85 |
-
|
| 86 |
-
@tool
|
| 87 |
-
def read_file_content(file_path: str) -> Dict[str, str]:
|
| 88 |
-
"""Reads the content of a file and returns its primary information. For text/code/excel, returns content. For media, indicates it's a blob for LLM processing."""
|
| 89 |
-
try:
|
| 90 |
-
_, file_extension = os.path.splitext(file_path)
|
| 91 |
-
file_extension = file_extension.lower()
|
| 92 |
-
|
| 93 |
-
# Prioritize handling of video, audio, and image files for direct LLM processing
|
| 94 |
-
if file_extension in (".mp4", ".avi", ".mov", ".mkv", ".webm"):
|
| 95 |
-
return {"file_type": "video", "file_name": file_path, "file_content": f"Video file '{file_path}' detected. The LLM (Gemini 2.5 Pro) can process this video content directly as a blob."}
|
| 96 |
-
elif file_extension == ".mp3":
|
| 97 |
-
return {"file_type": "audio", "file_name": file_path, "file_content": f"Audio file '{file_path}' detected. The LLM (Gemini 2.5 Pro) can process this audio content directly as a blob."}
|
| 98 |
-
elif file_extension in (".jpeg", ".jpg", ".png"):
|
| 99 |
-
return {"file_type": "image", "file_name": file_path, "file_content": f"Image file '{file_path}' detected. The LLM (Gemini 2.5 Pro) can process this image content directly as a blob."}
|
| 100 |
-
|
| 101 |
-
# Handle text and code files
|
| 102 |
-
elif file_extension in (".txt", ".py"):
|
| 103 |
-
with open(file_path, "r", encoding="utf-8") as f:
|
| 104 |
-
content = f.read()
|
| 105 |
-
return {"file_type": "text/code", "file_name": file_path, "file_content": content}
|
| 106 |
-
|
| 107 |
-
# Handle Excel files
|
| 108 |
-
elif file_extension == ".xlsx":
|
| 109 |
-
df = pd.read_excel(file_path)
|
| 110 |
-
content = df.to_string()
|
| 111 |
-
return {"file_type": "excel", "file_name": file_path, "file_content": content}
|
| 112 |
-
|
| 113 |
-
else:
|
| 114 |
-
return {"file_type": "unsupported", "file_name": file_path, "file_content": f"Unsupported file type: {file_extension}. Only .txt, .py, .xlsx, .jpeg, .jpg, .png, .mp3, .mp4, .avi, .mov, .mkv, .webm files are recognized."}
|
| 115 |
-
|
| 116 |
-
except FileNotFoundError:
|
| 117 |
-
return {"file_error": f"File not found: {file_path}. Please ensure the file exists in the environment."}
|
| 118 |
-
except Exception as e:
|
| 119 |
-
return {"file_error": f"Error reading file {file_path}: {e}"}
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
@tool
|
| 123 |
-
def python_interpreter(code: str) -> Dict[str, str]:
|
| 124 |
-
"""Executes Python code and returns its standard output. If there's an error during execution, it returns the error message."""
|
| 125 |
-
old_stdout = io.StringIO()
|
| 126 |
-
with contextlib.redirect_stdout(old_stdout):
|
| 127 |
-
try:
|
| 128 |
-
exec_globals = {}
|
| 129 |
-
exec_locals = {}
|
| 130 |
-
exec(code, exec_globals, exec_locals)
|
| 131 |
-
output = old_stdout.getvalue()
|
| 132 |
-
return {"execution_result": output.strip()}
|
| 133 |
-
except Exception as e:
|
| 134 |
-
return {"execution_error": str(e)}
|
| 135 |
-
|
| 136 |
-
# --- Youtube Tool (Remains the same) ---
|
| 137 |
-
@tool
|
| 138 |
-
def Youtube(url: str, question: str) -> Dict[str, str]:
|
| 139 |
-
"""
|
| 140 |
-
Tells about the YouTube video identified by the given URL, answering a question about it.
|
| 141 |
-
Note: This is a simulated response. In a real application, this would interact with a YouTube API
|
| 142 |
-
or a video analysis service to get actual video information and transcripts.
|
| 143 |
-
"""
|
| 144 |
-
print(f"Youtube called with URL: {url}, Question: {question}")
|
| 145 |
-
|
| 146 |
-
# Placeholder for actual YouTube API call.
|
| 147 |
-
# In a real scenario, you'd use a library like `google-api-python-client` for YouTube Data API
|
| 148 |
-
# or a dedicated video transcription/analysis service.
|
| 149 |
-
|
| 150 |
-
# Simulating the previous video content for demonstration
|
| 151 |
-
if "https://www.youtube.com/watch?v=1htKBjuUWec" in url or re.search(r'youtube\.com/watch\?v=|youtu\.be/', url):
|
| 152 |
-
return {
|
| 153 |
-
"video_url": url,
|
| 154 |
-
"question_asked": question,
|
| 155 |
-
"video_summary": "The video titled 'Teal'c coffee first time' shows a scene where several individuals are reacting to a beverage, presumably coffee, that Teal'c is trying for the first time. Key moments include: A person off-screen remarking, 'Wow this coffee's great'; another asking if it's 'cinnamon chicory tea oak'; and Teal'c reacting strongly to the taste or temperature, stating 'isn't that hot' indicating he finds it very warm.",
|
| 156 |
-
"details": {
|
| 157 |
-
"00:00:00": "Someone remarks, 'Wow this coffee's great I was just thinking that yeah is that cinnamon chicory tea oak'",
|
| 158 |
-
"00:00:11": "Teal'c takes a large gulp from a black mug",
|
| 159 |
-
"00:00:24": "Teal'c reacts strongly, someone asks 'isn't that hot'",
|
| 160 |
-
"00:00:26": "Someone agrees, 'extremely'"
|
| 161 |
-
}
|
| 162 |
-
}
|
| 163 |
-
else:
|
| 164 |
-
return {"error": "Invalid or unrecognized YouTube URL.", "url": url}
|
| 165 |
-
|
| 166 |
-
# --- END YOUTUBE TOOL ---
|
| 167 |
-
|
| 168 |
-
API_KEY = os.getenv("GEMINI_API_KEY")
|
| 169 |
-
HF_API_TOKEN = os.getenv("HF_SPACE_TOKEN") # Kept for potential future HF uses, but not for describe_image
|
| 170 |
-
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
|
| 171 |
-
|
| 172 |
-
# Update the tools list (removed describe_image and arvix_search)
|
| 173 |
-
tools = [
|
| 174 |
-
multiply, add, subtract, divide, modulus,
|
| 175 |
-
wiki_search,
|
| 176 |
-
google_web_search,
|
| 177 |
-
read_file_content,
|
| 178 |
-
python_interpreter,
|
| 179 |
-
Youtube,
|
| 180 |
-
]
|
| 181 |
-
|
| 182 |
-
with open("prompt.txt", "r", encoding="utf-8") as f:
|
| 183 |
-
system_prompt = f.read()
|
| 184 |
-
sys_msg = SystemMessage(content=system_prompt)
|
| 185 |
-
|
| 186 |
-
def build_graph(provider: str = "gemini"):
|
| 187 |
-
if provider == "gemini":
|
| 188 |
-
llm = ChatGoogleGenerativeAI(
|
| 189 |
-
model=MODEL,
|
| 190 |
-
temperature=1.0,
|
| 191 |
-
max_retries=2,
|
| 192 |
-
api_key=GEMINI_API_KEY,
|
| 193 |
-
max_tokens=5000
|
| 194 |
-
)
|
| 195 |
-
elif provider == "huggingface":
|
| 196 |
-
llm = ChatHuggingFace(
|
| 197 |
-
llm=HuggingFaceEndpoint(
|
| 198 |
-
url="https://api-inference.huggingface.co/models/Meta-DeepLearning/llama-2-7b-chat-hf",
|
| 199 |
-
),
|
| 200 |
-
temperature=0,
|
| 201 |
-
)
|
| 202 |
-
else:
|
| 203 |
-
raise ValueError("Invalid provider. Choose 'gemini' or 'huggingface'.")
|
| 204 |
-
|
| 205 |
-
llm_with_tools = llm.bind_tools(tools)
|
| 206 |
-
|
| 207 |
-
def assistant(state: MessagesState):
|
| 208 |
-
messages_to_send = [sys_msg] + state["messages"]
|
| 209 |
-
|
| 210 |
-
# --- IMPORTANT NOTE ON HANDLING BINARY BLOB DATA FOR MULTIMODAL LLMs ---
|
| 211 |
-
# When read_file_content returns a file_type of "image" or "audio",
|
| 212 |
-
# the agent should be able to send the actual binary data of that file
|
| 213 |
-
# as part of the message to the LLM. LangChain's ChatGoogleGenerativeAI
|
| 214 |
-
# supports this via content parts in HumanMessage.
|
| 215 |
-
#
|
| 216 |
-
# For this setup, we're assuming the framework (LangGraph/LangChain)
|
| 217 |
-
# will correctly handle passing the actual file content when read_file_content
|
| 218 |
-
# is called and its output indicates a media type.
|
| 219 |
-
#
|
| 220 |
-
# A more explicit implementation in the assistant node might look like this
|
| 221 |
-
# for real binary file handling if the framework doesn't do it implicitly:
|
| 222 |
-
#
|
| 223 |
-
# new_messages_to_send = []
|
| 224 |
-
# for msg in state["messages"]:
|
| 225 |
-
# if isinstance(msg, HumanMessage) and msg.tool_calls:
|
| 226 |
-
# # If a tool call to read_file_content happened in the previous turn
|
| 227 |
-
# # and it returned a media type, we might need to get the file data
|
| 228 |
-
# # and append it to the message parts. This logic is complex and
|
| 229 |
-
# # depends heavily on how tool outputs are structured and passed.
|
| 230 |
-
# # For simplicity in this template, we assume direct handling by the LLM
|
| 231 |
-
# # if the tool output indicates media, and the file itself is accessible
|
| 232 |
-
# # via the environment.
|
| 233 |
-
# pass # Keep original message, tool output will follow
|
| 234 |
-
# elif isinstance(msg, HumanMessage) and any(part.get("file_type") in ["image", "audio"] for part in msg.content if isinstance(part, dict)):
|
| 235 |
-
# # This is a conceptual example for if the HumanMessage itself contains file data
|
| 236 |
-
# # or a reference that needs to be resolved into data.
|
| 237 |
-
# # You'd need to load the actual file bytes here.
|
| 238 |
-
# # e.g., if msg.content was like: [{"type": "file_reference", "file_path": "image.png"}]
|
| 239 |
-
# # with open(msg.content[0]["file_path"], "rb") as f:
|
| 240 |
-
# # file_bytes = f.read()
|
| 241 |
-
# # new_messages_to_send.append(
|
| 242 |
-
# # HumanMessage(
|
| 243 |
-
# # content=[
|
| 244 |
-
# # {"type": "text", "text": "Here is the media content:"},
|
| 245 |
-
# # {"type": "image_data" if "image" in msg.content[0]["file_type"] else "audio_data", "data": base64.b64encode(file_bytes).decode('utf-8'), "media_type": "image/png" if "image" in msg.content[0]["file_type"] else "audio/mp3"}
|
| 246 |
-
# # ]
|
| 247 |
-
# # )
|
| 248 |
-
# # )
|
| 249 |
-
# else:
|
| 250 |
-
# new_messages_to_send.append(msg)
|
| 251 |
-
# llm_response = llm_with_tools.invoke([sys_msg] + new_messages_to_send)
|
| 252 |
-
# --- END IMPORTANT NOTE ---
|
| 253 |
-
|
| 254 |
-
llm_response = llm_with_tools.invoke(messages_to_send,{"recursion_limit": 25}) # For now, keep as is, rely on framework
|
| 255 |
-
print(f"LLM Raw Response: {llm_response}")
|
| 256 |
-
return {"messages": [llm_response]}
|
| 257 |
-
|
| 258 |
-
builder = StateGraph(MessagesState)
|
| 259 |
-
builder.add_node("assistant", assistant)
|
| 260 |
-
builder.add_node("tools", ToolNode(tools))
|
| 261 |
-
builder.add_edge(START, "assistant")
|
| 262 |
-
builder.add_conditional_edges("assistant", tools_condition)
|
| 263 |
-
builder.add_edge("tools", "assistant")
|
| 264 |
-
|
| 265 |
-
return builder.compile()
|
| 266 |
-
|
| 267 |
-
if __name__ == "__main__":
|
| 268 |
-
pass
|
|
|
|
| 1 |
+
You are a highly capable and intelligent assistant designed to answer questions and perform tasks using the following tools:
|
| 2 |
+
|
| 3 |
+
Available Tools:
|
| 4 |
+
|
| 5 |
+
- multiply(a: int, b: int): Multiply two integers.
|
| 6 |
+
- add(a: int, b: int): Add two integers.
|
| 7 |
+
- subtract(a: int, b: int): Subtract the second integer from the first.
|
| 8 |
+
- divide(a: int, b: int): Divide the first integer by the second. Division by zero raises an error.
|
| 9 |
+
- modulus(a: int, b: int): Return the remainder of dividing the first integer by the second.
|
| 10 |
+
- wiki_search(query: str): Search Wikipedia for up to 2 relevant documents. Use for general knowledge or historical info. Extract the main subject from the user's question as the query.
|
| 11 |
+
- google_web_search(query: str): Perform a web search via Google Custom Search. Use for current events, specific facts, or academic/research topics (e.g., arXiv).
|
| 12 |
+
When using this tool:
|
| 13 |
+
- Simplify queries to core keywords only.
|
| 14 |
+
- Format and URL-encode queries properly.
|
| 15 |
+
- If initial search fails, try up to two alternative simplified or rephrased queries.
|
| 16 |
+
- If still unsuccessful, state inability to find the information.
|
| 17 |
+
- read_file_content(file_path: str): Read raw content of a specified file. Use when the user references files (e.g., "attached file", "this document", "file_name:"). You are responsible for interpreting the content regardless of file type (text, code, image, audio, Excel).
|
| 18 |
+
- python_interpreter(code: str): Execute Python code and return output. Use when user provides Python code or after reading Python code from a file.
|
| 19 |
+
- Youtube(url: str, question: str): Answer questions about a YouTube video given its URL. Use when the user query contains a YouTube link.
|
| 20 |
+
|
| 21 |
+
Instructions for Using Your Tools:
|
| 22 |
+
|
| 23 |
+
1. File Handling (Highest Priority):
|
| 24 |
+
- If the user references a file, immediately use read_file_content(file_path=<filename>).
|
| 25 |
+
- Do not attempt to answer from general knowledge before reading the file.
|
| 26 |
+
- After reading, process the file content to answer the question.
|
| 27 |
+
- If the file contains Python code and the user asks for execution, use python_interpreter with the code.
|
| 28 |
+
- For other file types, process the raw content natively.
|
| 29 |
+
- If file content is missing or unreadable, state that you need the content to proceed.
|
| 30 |
+
|
| 31 |
+
2. URL Handling (Second Priority):
|
| 32 |
+
- If the query contains a URL (e.g., YouTube), first try to answer from your knowledge or by processing the URL content.
|
| 33 |
+
- If unable to answer or if specific video info is requested, use the Youtube tool.
|
| 34 |
+
- When using the Youtube tool:
|
| 35 |
+
- Identify the YouTube URL pattern.
|
| 36 |
+
- Use the user's specific question about the video if provided; otherwise, use "Tell me about this video."
|
| 37 |
+
- Integrate returned info, including timestamps if relevant.
|
| 38 |
+
- If the video lacks requested info, clearly state what the video shows.
|
| 39 |
+
|
| 40 |
+
3. General Questions (Third Priority):
|
| 41 |
+
- For questions without files or URLs, first attempt a direct answer from your knowledge.
|
| 42 |
+
- If you can answer directly, respond immediately in the format:
|
| 43 |
+
FINAL ANSWER: <direct answer>
|
| 44 |
+
- If you cannot answer directly or if the question requires calculation or search, use the appropriate tool(s):
|
| 45 |
+
- Use math tools (multiply, add, subtract, divide, modulus) for calculations.
|
| 46 |
+
- Use wiki_search for general knowledge or historical facts.
|
| 47 |
+
- Use google_web_search for current events, specific data, or academic topics.
|
| 48 |
+
|
| 49 |
+
Tool Argument Extraction and Query Formulation:
|
| 50 |
+
|
| 51 |
+
- Extract only essential arguments from the user's query (e.g., numbers for math, keywords for searches, file paths, code snippets, URLs).
|
| 52 |
+
- Keep queries short and focused by removing filler words and unnecessary phrases.
|
| 53 |
+
|
| 54 |
+
Tool Execution and Output Processing:
|
| 55 |
+
|
| 56 |
+
- Execute selected tools with correct arguments.
|
| 57 |
+
- Analyze outputs carefully. If output is indirect or partial, formulate follow-up queries within tool attempt limits.
|
| 58 |
+
- If a tool returns an error or no answer after reasonable attempts, state inability to determine the answer.
|
| 59 |
+
|
| 60 |
+
Decision to Stop and Provide Answer:
|
| 61 |
+
|
| 62 |
+
- Once you have sufficient information to answer fully and accurately, stop and provide the final answer.
|
| 63 |
+
- Do not call additional tools unnecessarily.
|
| 64 |
+
|
| 65 |
+
Answer Formatting Rules:
|
| 66 |
+
|
| 67 |
+
- Provide answers ONLY in the format:
|
| 68 |
+
FINAL ANSWER: "<direct answer or result>"
|
| 69 |
+
- If unable to answer, respond with:
|
| 70 |
+
FINAL ANSWER: ""
|
| 71 |
+
- For numbers, do NOT use commas or units (e.g., $, %, unless explicitly requested).
|
| 72 |
+
- For strings, avoid articles and abbreviations; write digits as plain text unless specified.
|
| 73 |
+
- For comma-separated lists, apply the above rules to each element.
|
| 74 |
+
|
| 75 |
+
Examples:
|
| 76 |
+
|
| 77 |
+
- "What is 25 times 13?" → Use multiply
|
| 78 |
+
- "Who is Marie Curie according to Wikipedia?" → Use wiki_search
|
| 79 |
+
- "What's the weather like in London tomorrow?" → Use google_web_search(query='weather in London tomorrow')
|
| 80 |
+
- "Calculate the remainder of 100 divided by 7." → Use modulus
|
| 81 |
+
- "Please summarize the attached file 'document.txt'." → Use read_file_content(file_path='document.txt')
|
| 82 |
+
- "What is the output of this Python code: print(2 + 2)" → Use python_interpreter
|
| 83 |
+
- "Analyze the image in 'chart.png'." → Use read_file_content(file_path='chart.png') and process natively
|
| 84 |
+
- "Listen to 'speech.mp3' and tell me what is said." → Use read_file_content(file_path='speech.mp3')
|
| 85 |
+
- "Tell me about this video: https://www.youtube.com/watch" → Use Youtube tool if needed
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|