Update langgraph_agent.py
Browse files- langgraph_agent.py +24 -6
langgraph_agent.py
CHANGED
|
@@ -91,7 +91,7 @@ HF_INFERENCE_CLIENT = None
|
|
| 91 |
if HF_API_TOKEN:
|
| 92 |
HF_INFERENCE_CLIENT = InferenceClient(token=HF_API_TOKEN)
|
| 93 |
else:
|
| 94 |
-
print("WARNING: HF_API_TOKEN not set. Image tools will not function.")
|
| 95 |
|
| 96 |
@tool
|
| 97 |
def read_file_content(file_path: str) -> Dict[str, str]:
|
|
@@ -115,8 +115,8 @@ def read_file_content(file_path: str) -> Dict[str, str]:
|
|
| 115 |
# Indicate that it's an image and needs to be described by a specific tool
|
| 116 |
return {"file_type": "image", "file_name": file_path, "file_content": f"Image file '{file_path}' detected. Use 'describe_image' tool to get a textual description."}
|
| 117 |
elif file_extension == ".mp3":
|
| 118 |
-
# Indicate that it's an audio file and
|
| 119 |
-
return {"file_type": "audio", "file_name": file_path, "file_content": f"Audio file '{file_path}' detected.
|
| 120 |
else:
|
| 121 |
return {"file_type": "unsupported", "file_name": file_path, "file_content": f"Unsupported file type: {file_extension}. Only .txt, .py, .xlsx, .jpeg, .jpg, .png, .mp3 files are recognized."}
|
| 122 |
except FileNotFoundError:
|
|
@@ -159,6 +159,24 @@ def describe_image(image_path: str) -> Dict[str, str]:
|
|
| 159 |
except Exception as e:
|
| 160 |
return {"error": f"Error describing image {image_path}: {str(e)}"}
|
| 161 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 162 |
|
| 163 |
API_KEY = os.getenv("GEMINI_API_KEY")
|
| 164 |
HF_SPACE_TOKEN = os.getenv("HF_SPACE_TOKEN")
|
|
@@ -170,8 +188,8 @@ tools = [
|
|
| 170 |
wiki_search, web_search, arvix_search,
|
| 171 |
read_file_content,
|
| 172 |
python_interpreter,
|
| 173 |
-
describe_image,
|
| 174 |
-
|
| 175 |
]
|
| 176 |
|
| 177 |
|
|
@@ -184,7 +202,7 @@ def build_graph(provider: str = "gemini"):
|
|
| 184 |
"""Build the LangGraph agent with chosen LLM (default: Gemini)."""
|
| 185 |
if provider == "gemini":
|
| 186 |
llm = ChatGoogleGenerativeAI(
|
| 187 |
-
model= "gemini-1.5-flash-preview-05-20",
|
| 188 |
temperature=1.0,
|
| 189 |
max_retries=2,
|
| 190 |
api_key=GEMINI_API_KEY,
|
|
|
|
| 91 |
if HF_API_TOKEN:
|
| 92 |
HF_INFERENCE_CLIENT = InferenceClient(token=HF_API_TOKEN)
|
| 93 |
else:
|
| 94 |
+
print("WARNING: HF_API_TOKEN not set. Image and Audio tools will not function.")
|
| 95 |
|
| 96 |
@tool
|
| 97 |
def read_file_content(file_path: str) -> Dict[str, str]:
|
|
|
|
| 115 |
# Indicate that it's an image and needs to be described by a specific tool
|
| 116 |
return {"file_type": "image", "file_name": file_path, "file_content": f"Image file '{file_path}' detected. Use 'describe_image' tool to get a textual description."}
|
| 117 |
elif file_extension == ".mp3":
|
| 118 |
+
# Indicate that it's an audio file and needs to be transcribed by a specific tool
|
| 119 |
+
return {"file_type": "audio", "file_name": file_path, "file_content": f"Audio file '{file_path}' detected. Use 'transcribe_audio' tool to get the text transcription."}
|
| 120 |
else:
|
| 121 |
return {"file_type": "unsupported", "file_name": file_path, "file_content": f"Unsupported file type: {file_extension}. Only .txt, .py, .xlsx, .jpeg, .jpg, .png, .mp3 files are recognized."}
|
| 122 |
except FileNotFoundError:
|
|
|
|
| 159 |
except Exception as e:
|
| 160 |
return {"error": f"Error describing image {image_path}: {str(e)}"}
|
| 161 |
|
| 162 |
+
@tool
|
| 163 |
+
def transcribe_audio(audio_path: str) -> Dict[str, str]:
|
| 164 |
+
"""
|
| 165 |
+
Transcribes an audio file (e.g., MP3) to text using an automatic speech recognition model
|
| 166 |
+
from the Hugging Face Inference API. Requires HF_API_TOKEN environment variable to be set.
|
| 167 |
+
"""
|
| 168 |
+
if not HF_INFERENCE_CLIENT:
|
| 169 |
+
return {"error": "Hugging Face API token not configured for audio transcription. Cannot use this tool."}
|
| 170 |
+
try:
|
| 171 |
+
with open(audio_path, "rb") as f:
|
| 172 |
+
audio_bytes = f.read()
|
| 173 |
+
transcription = HF_INFERENCE_CLIENT.automatic_speech_recognition(audio_bytes)
|
| 174 |
+
return {"audio_transcription": transcription, "audio_path": audio_path}
|
| 175 |
+
except FileNotFoundError:
|
| 176 |
+
return {"error": f"Audio file not found: {audio_path}. Please ensure the file exists."}
|
| 177 |
+
except Exception as e:
|
| 178 |
+
return {"error": f"Error transcribing audio {audio_path}: {str(e)}"}
|
| 179 |
+
|
| 180 |
|
| 181 |
API_KEY = os.getenv("GEMINI_API_KEY")
|
| 182 |
HF_SPACE_TOKEN = os.getenv("HF_SPACE_TOKEN")
|
|
|
|
| 188 |
wiki_search, web_search, arvix_search,
|
| 189 |
read_file_content,
|
| 190 |
python_interpreter,
|
| 191 |
+
describe_image,
|
| 192 |
+
transcribe_audio, # Re-added tool
|
| 193 |
]
|
| 194 |
|
| 195 |
|
|
|
|
| 202 |
"""Build the LangGraph agent with chosen LLM (default: Gemini)."""
|
| 203 |
if provider == "gemini":
|
| 204 |
llm = ChatGoogleGenerativeAI(
|
| 205 |
+
model= "gemini-1.5-flash-preview-05-20",
|
| 206 |
temperature=1.0,
|
| 207 |
max_retries=2,
|
| 208 |
api_key=GEMINI_API_KEY,
|