Update langgraph_agent.py
Browse files- langgraph_agent.py +52 -57
langgraph_agent.py
CHANGED
|
@@ -5,8 +5,8 @@ import pandas as pd
|
|
| 5 |
from typing import Dict, List, Union
|
| 6 |
import re
|
| 7 |
|
| 8 |
-
from PIL import Image as PILImage
|
| 9 |
-
from huggingface_hub import InferenceClient
|
| 10 |
|
| 11 |
from langgraph.graph import START, StateGraph, MessagesState
|
| 12 |
from langgraph.prebuilt import tools_condition, ToolNode
|
|
@@ -82,12 +82,14 @@ def arvix_search(query: str) -> dict:
|
|
| 82 |
)
|
| 83 |
return {"arvix_results": formatted}
|
| 84 |
|
|
|
|
|
|
|
| 85 |
HF_API_TOKEN = os.getenv("HF_API_TOKEN")
|
| 86 |
HF_INFERENCE_CLIENT = None
|
| 87 |
if HF_API_TOKEN:
|
| 88 |
HF_INFERENCE_CLIENT = InferenceClient(token=HF_API_TOKEN)
|
| 89 |
else:
|
| 90 |
-
print("WARNING: HF_API_TOKEN not set.
|
| 91 |
|
| 92 |
@tool
|
| 93 |
def read_file_content(file_path: str) -> Dict[str, str]:
|
|
@@ -105,12 +107,10 @@ def read_file_content(file_path: str) -> Dict[str, str]:
|
|
| 105 |
content = df.to_string()
|
| 106 |
return {"file_type": "excel", "file_name": file_path, "file_content": content}
|
| 107 |
elif file_extension in (".jpeg", ".jpg", ".png"):
|
| 108 |
-
|
|
|
|
| 109 |
elif file_extension == ".mp3":
|
| 110 |
# For MP3, we indicate it's an audio file and expect the LLM to handle the blob directly.
|
| 111 |
-
# In a real Langchain setup, you might actually read the bytes here and pass them
|
| 112 |
-
# as a part of the message content to the LLM if it supports direct binary upload.
|
| 113 |
-
# For now, this tool simply confirms its type for the agent.
|
| 114 |
return {"file_type": "audio", "file_name": file_path, "file_content": f"Audio file '{file_path}' detected. The LLM (Gemini 2.5 Pro) can process this audio content directly."}
|
| 115 |
else:
|
| 116 |
return {"file_type": "unsupported", "file_name": file_path, "file_content": f"Unsupported file type: {file_extension}. Only .txt, .py, .xlsx, .jpeg, .jpg, .png, .mp3 files are recognized."}
|
|
@@ -133,21 +133,6 @@ def python_interpreter(code: str) -> Dict[str, str]:
|
|
| 133 |
except Exception as e:
|
| 134 |
return {"execution_error": str(e)}
|
| 135 |
|
| 136 |
-
@tool
|
| 137 |
-
def describe_image(image_path: str) -> Dict[str, str]:
|
| 138 |
-
"""Generates a textual description for an image file (JPEG, JPG, PNG) using an image-to-text model from the Hugging Face Inference API. Requires HF_API_TOKEN environment variable to be set."""
|
| 139 |
-
if not HF_INFERENCE_CLIENT:
|
| 140 |
-
return {"error": "Hugging Face API token not configured for image description. Cannot use this tool."}
|
| 141 |
-
try:
|
| 142 |
-
with open(image_path, "rb") as f:
|
| 143 |
-
image_bytes = f.read()
|
| 144 |
-
description = HF_INFERENCE_CLIENT.image_to_text(image_bytes)
|
| 145 |
-
return {"image_description": description, "image_path": image_path}
|
| 146 |
-
except FileNotFoundError:
|
| 147 |
-
return {"error": f"Image file not found: {image_path}. Please ensure the file exists."}
|
| 148 |
-
except Exception as e:
|
| 149 |
-
return {"error": f"Error describing image {image_path}: {str(e)}"}
|
| 150 |
-
|
| 151 |
# --- Youtube Tool (Remains the same) ---
|
| 152 |
@tool
|
| 153 |
def Youtube(url: str, question: str) -> Dict[str, str]:
|
|
@@ -181,10 +166,10 @@ def Youtube(url: str, question: str) -> Dict[str, str]:
|
|
| 181 |
# --- END YOUTUBE TOOL ---
|
| 182 |
|
| 183 |
API_KEY = os.getenv("GEMINI_API_KEY")
|
| 184 |
-
HF_API_TOKEN = os.getenv("HF_SPACE_TOKEN")
|
| 185 |
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
|
| 186 |
|
| 187 |
-
# Update the tools list (removed
|
| 188 |
tools = [
|
| 189 |
multiply, add, subtract, divide, modulus,
|
| 190 |
wiki_search,
|
|
@@ -192,8 +177,7 @@ tools = [
|
|
| 192 |
arvix_search,
|
| 193 |
read_file_content,
|
| 194 |
python_interpreter,
|
| 195 |
-
|
| 196 |
-
Youtube, # <-- transcribe_audio has been removed
|
| 197 |
]
|
| 198 |
|
| 199 |
with open("prompt.txt", "r", encoding="utf-8") as f:
|
|
@@ -224,38 +208,49 @@ def build_graph(provider: str = "gemini"):
|
|
| 224 |
def assistant(state: MessagesState):
|
| 225 |
messages_to_send = [sys_msg] + state["messages"]
|
| 226 |
|
| 227 |
-
#
|
| 228 |
-
#
|
| 229 |
-
#
|
| 230 |
-
#
|
| 231 |
-
#
|
| 232 |
-
#
|
| 233 |
-
#
|
| 234 |
-
|
| 235 |
-
#
|
| 236 |
-
#
|
| 237 |
-
#
|
| 238 |
-
|
| 239 |
-
#
|
| 240 |
# new_messages_to_send = []
|
| 241 |
-
# for msg in
|
| 242 |
-
#
|
| 243 |
-
#
|
| 244 |
-
#
|
| 245 |
-
#
|
| 246 |
-
#
|
| 247 |
-
#
|
| 248 |
-
#
|
| 249 |
-
#
|
| 250 |
-
#
|
| 251 |
-
#
|
| 252 |
-
#
|
| 253 |
-
#
|
| 254 |
-
#
|
| 255 |
-
#
|
| 256 |
-
#
|
| 257 |
-
#
|
| 258 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 259 |
|
| 260 |
llm_response = llm_with_tools.invoke(messages_to_send) # For now, keep as is, rely on framework
|
| 261 |
print(f"LLM Raw Response: {llm_response}")
|
|
|
|
| 5 |
from typing import Dict, List, Union
|
| 6 |
import re
|
| 7 |
|
| 8 |
+
from PIL import Image as PILImage # Keep PIL for potential future use or if other parts depend on it, but describe_image is removed.
|
| 9 |
+
from huggingface_hub import InferenceClient # Keep InferenceClient for other potential HF uses, but describe_image is removed.
|
| 10 |
|
| 11 |
from langgraph.graph import START, StateGraph, MessagesState
|
| 12 |
from langgraph.prebuilt import tools_condition, ToolNode
|
|
|
|
| 82 |
)
|
| 83 |
return {"arvix_results": formatted}
|
| 84 |
|
| 85 |
+
# HF_API_TOKEN is no longer directly needed for describe_image as that tool is removed.
|
| 86 |
+
# But keeping InferenceClient initialization for completeness if other HF tools might be added later.
|
| 87 |
HF_API_TOKEN = os.getenv("HF_API_TOKEN")
|
| 88 |
HF_INFERENCE_CLIENT = None
|
| 89 |
if HF_API_TOKEN:
|
| 90 |
HF_INFERENCE_CLIENT = InferenceClient(token=HF_API_TOKEN)
|
| 91 |
else:
|
| 92 |
+
print("WARNING: HF_API_TOKEN not set. If any other HF tools are used, they might not function.")
|
| 93 |
|
| 94 |
@tool
|
| 95 |
def read_file_content(file_path: str) -> Dict[str, str]:
|
|
|
|
| 107 |
content = df.to_string()
|
| 108 |
return {"file_type": "excel", "file_name": file_path, "file_content": content}
|
| 109 |
elif file_extension in (".jpeg", ".jpg", ".png"):
|
| 110 |
+
# For images, we indicate it's an image file and expect the LLM to handle the blob directly.
|
| 111 |
+
return {"file_type": "image", "file_name": file_path, "file_content": f"Image file '{file_path}' detected. The LLM (Gemini 2.5 Pro) can process this image content directly."}
|
| 112 |
elif file_extension == ".mp3":
|
| 113 |
# For MP3, we indicate it's an audio file and expect the LLM to handle the blob directly.
|
|
|
|
|
|
|
|
|
|
| 114 |
return {"file_type": "audio", "file_name": file_path, "file_content": f"Audio file '{file_path}' detected. The LLM (Gemini 2.5 Pro) can process this audio content directly."}
|
| 115 |
else:
|
| 116 |
return {"file_type": "unsupported", "file_name": file_path, "file_content": f"Unsupported file type: {file_extension}. Only .txt, .py, .xlsx, .jpeg, .jpg, .png, .mp3 files are recognized."}
|
|
|
|
| 133 |
except Exception as e:
|
| 134 |
return {"execution_error": str(e)}
|
| 135 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 136 |
# --- Youtube Tool (Remains the same) ---
|
| 137 |
@tool
|
| 138 |
def Youtube(url: str, question: str) -> Dict[str, str]:
|
|
|
|
| 166 |
# --- END YOUTUBE TOOL ---
|
| 167 |
|
| 168 |
API_KEY = os.getenv("GEMINI_API_KEY")
|
| 169 |
+
HF_API_TOKEN = os.getenv("HF_SPACE_TOKEN") # Kept for potential future HF uses, but not for describe_image
|
| 170 |
GEMINI_API_KEY = os.getenv("GEMINI_API_KEY")
|
| 171 |
|
| 172 |
+
# Update the tools list (removed describe_image)
|
| 173 |
tools = [
|
| 174 |
multiply, add, subtract, divide, modulus,
|
| 175 |
wiki_search,
|
|
|
|
| 177 |
arvix_search,
|
| 178 |
read_file_content,
|
| 179 |
python_interpreter,
|
| 180 |
+
Youtube,
|
|
|
|
| 181 |
]
|
| 182 |
|
| 183 |
with open("prompt.txt", "r", encoding="utf-8") as f:
|
|
|
|
| 208 |
def assistant(state: MessagesState):
|
| 209 |
messages_to_send = [sys_msg] + state["messages"]
|
| 210 |
|
| 211 |
+
# --- IMPORTANT NOTE ON HANDLING BINARY BLOB DATA FOR MULTIMODAL LLMs ---
|
| 212 |
+
# When read_file_content returns a file_type of "image" or "audio",
|
| 213 |
+
# the agent should be able to send the actual binary data of that file
|
| 214 |
+
# as part of the message to the LLM. LangChain's ChatGoogleGenerativeAI
|
| 215 |
+
# supports this via content parts in HumanMessage.
|
| 216 |
+
#
|
| 217 |
+
# For this setup, we're assuming the framework (LangGraph/LangChain)
|
| 218 |
+
# will correctly handle passing the actual file content when read_file_content
|
| 219 |
+
# is called and its output indicates a media type.
|
| 220 |
+
#
|
| 221 |
+
# A more explicit implementation in the assistant node might look like this
|
| 222 |
+
# for real binary file handling if the framework doesn't do it implicitly:
|
| 223 |
+
#
|
| 224 |
# new_messages_to_send = []
|
| 225 |
+
# for msg in state["messages"]:
|
| 226 |
+
# if isinstance(msg, HumanMessage) and msg.tool_calls:
|
| 227 |
+
# # If a tool call to read_file_content happened in the previous turn
|
| 228 |
+
# # and it returned a media type, we might need to get the file data
|
| 229 |
+
# # and append it to the message parts. This logic is complex and
|
| 230 |
+
# # depends heavily on how tool outputs are structured and passed.
|
| 231 |
+
# # For simplicity in this template, we assume direct handling by the LLM
|
| 232 |
+
# # if the tool output indicates media, and the file itself is accessible
|
| 233 |
+
# # via the environment.
|
| 234 |
+
# pass # Keep original message, tool output will follow
|
| 235 |
+
# elif isinstance(msg, HumanMessage) and any(part.get("file_type") in ["image", "audio"] for part in msg.content if isinstance(part, dict)):
|
| 236 |
+
# # This is a conceptual example for if the HumanMessage itself contains file data
|
| 237 |
+
# # or a reference that needs to be resolved into data.
|
| 238 |
+
# # You'd need to load the actual file bytes here.
|
| 239 |
+
# # e.g., if msg.content was like: [{"type": "file_reference", "file_path": "image.png"}]
|
| 240 |
+
# # with open(msg.content[0]["file_path"], "rb") as f:
|
| 241 |
+
# # file_bytes = f.read()
|
| 242 |
+
# # new_messages_to_send.append(
|
| 243 |
+
# # HumanMessage(
|
| 244 |
+
# # content=[
|
| 245 |
+
# # {"type": "text", "text": "Here is the media content:"},
|
| 246 |
+
# # {"type": "image_data" if "image" in msg.content[0]["file_type"] else "audio_data", "data": base64.b64encode(file_bytes).decode('utf-8'), "media_type": "image/png" if "image" in msg.content[0]["file_type"] else "audio/mp3"}
|
| 247 |
+
# # ]
|
| 248 |
+
# # )
|
| 249 |
+
# # )
|
| 250 |
+
# else:
|
| 251 |
+
# new_messages_to_send.append(msg)
|
| 252 |
+
# llm_response = llm_with_tools.invoke([sys_msg] + new_messages_to_send)
|
| 253 |
+
# --- END IMPORTANT NOTE ---
|
| 254 |
|
| 255 |
llm_response = llm_with_tools.invoke(messages_to_send) # For now, keep as is, rely on framework
|
| 256 |
print(f"LLM Raw Response: {llm_response}")
|