rqueraud commited on
Commit
2099ec7
·
1 Parent(s): 4d5f444
Files changed (4) hide show
  1. poetry.lock +28 -9
  2. pyproject.toml +3 -1
  3. src/flexible_agent.py +137 -307
  4. src/tools.py +144 -15
poetry.lock CHANGED
@@ -5935,6 +5935,18 @@ files = [
5935
  {file = "shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de"},
5936
  ]
5937
 
 
 
 
 
 
 
 
 
 
 
 
 
5938
  [[package]]
5939
  name = "six"
5940
  version = "1.17.0"
@@ -7317,19 +7329,26 @@ multidict = ">=4.0"
7317
  propcache = ">=0.2.1"
7318
 
7319
  [[package]]
7320
- name = "youtube-search-python"
7321
- version = "1.6.6"
7322
- description = "Search for YouTube videos, channels & playlists & get video information using link WITHOUT YouTube Data API v3"
7323
  optional = false
7324
- python-versions = ">=3.6"
7325
  groups = ["main"]
7326
  files = [
7327
- {file = "youtube-search-python-1.6.6.tar.gz", hash = "sha256:4568d1d769ecd7eb4bb8365f04eec6e364c5f70eec7b3765f543daebb135fcf5"},
7328
- {file = "youtube_search_python-1.6.6-py3-none-any.whl", hash = "sha256:f0d835278bc32335f2ded48ba119bef39cafb290d98648a64deb22f6c4a705f2"},
7329
  ]
7330
 
7331
- [package.dependencies]
7332
- httpx = ">=0.14.2"
 
 
 
 
 
 
 
7333
 
7334
  [[package]]
7335
  name = "zipp"
@@ -7466,4 +7485,4 @@ cffi = ["cffi (>=1.17,<2.0) ; platform_python_implementation != \"PyPy\" and pyt
7466
  [metadata]
7467
  lock-version = "2.1"
7468
  python-versions = ">=3.11,<3.12"
7469
- content-hash = "2905668a22145f92f7e5e93be056bef26befc214f63d5a75b4cd908b651c50ab"
 
5935
  {file = "shellingham-1.5.4.tar.gz", hash = "sha256:8dbca0739d487e5bd35ab3ca4b36e11c4078f3a234bfce294b0a0291363404de"},
5936
  ]
5937
 
5938
+ [[package]]
5939
+ name = "simpleeval"
5940
+ version = "1.0.3"
5941
+ description = "A simple, safe single expression evaluator library."
5942
+ optional = false
5943
+ python-versions = ">=3.9"
5944
+ groups = ["main"]
5945
+ files = [
5946
+ {file = "simpleeval-1.0.3-py3-none-any.whl", hash = "sha256:e3bdbb8c82c26297c9a153902d0fd1858a6c3774bf53ff4f134788c3f2035c38"},
5947
+ {file = "simpleeval-1.0.3.tar.gz", hash = "sha256:67bbf246040ac3b57c29cf048657b9cf31d4e7b9d6659684daa08ca8f1e45829"},
5948
+ ]
5949
+
5950
  [[package]]
5951
  name = "six"
5952
  version = "1.17.0"
 
7329
  propcache = ">=0.2.1"
7330
 
7331
  [[package]]
7332
+ name = "yt-dlp"
7333
+ version = "2025.9.26"
7334
+ description = "A feature-rich command-line audio/video downloader"
7335
  optional = false
7336
+ python-versions = ">=3.9"
7337
  groups = ["main"]
7338
  files = [
7339
+ {file = "yt_dlp-2025.9.26-py3-none-any.whl", hash = "sha256:36f5fbc153600f759abd48d257231f0e0a547a115ac7ffb05d5b64e5c7fdf8a2"},
7340
+ {file = "yt_dlp-2025.9.26.tar.gz", hash = "sha256:c148ae8233ac4ce6c5fbf6f70fcc390f13a00f59da3776d373cf88c5370bda86"},
7341
  ]
7342
 
7343
+ [package.extras]
7344
+ build = ["build", "hatchling (>=1.27.0)", "pip", "setuptools (>=71.0.2,<81)", "wheel"]
7345
+ curl-cffi = ["curl-cffi (>=0.5.10,<0.6.dev0 || >=0.10.dev0,<0.14) ; implementation_name == \"cpython\""]
7346
+ default = ["brotli ; implementation_name == \"cpython\"", "brotlicffi ; implementation_name != \"cpython\"", "certifi", "mutagen", "pycryptodomex", "requests (>=2.32.2,<3)", "urllib3 (>=2.0.2,<3)", "websockets (>=13.0)"]
7347
+ dev = ["autopep8 (>=2.0,<3.0)", "pre-commit", "pytest (>=8.1,<9.0)", "pytest-rerunfailures (>=14.0,<15.0)", "ruff (>=0.13.0,<0.14.0)"]
7348
+ pyinstaller = ["pyinstaller (>=6.13.0)"]
7349
+ secretstorage = ["cffi", "secretstorage"]
7350
+ static-analysis = ["autopep8 (>=2.0,<3.0)", "ruff (>=0.13.0,<0.14.0)"]
7351
+ test = ["pytest (>=8.1,<9.0)", "pytest-rerunfailures (>=14.0,<15.0)"]
7352
 
7353
  [[package]]
7354
  name = "zipp"
 
7485
  [metadata]
7486
  lock-version = "2.1"
7487
  python-versions = ">=3.11,<3.12"
7488
+ content-hash = "1da4648fb3d6c5c1af29971f0aa8c9eb7defb6895ab8962aedd627d6d8344ce4"
pyproject.toml CHANGED
@@ -16,7 +16,8 @@ langchain_huggingface = "*"
16
  langchain_community = "*"
17
  langchain_google_genai = "*"
18
  wikipedia = "*"
19
- youtube-search-python = "*"
 
20
  pillow = "*"
21
  langchain_experimental = "*"
22
  langchain-tavily = ">=0.2.11,<0.3.0"
@@ -32,6 +33,7 @@ langchain = "*"
32
  tesseract = ">=0.1.3,<0.2.0"
33
  unstructured = {extras = ["all-docs"], version = "*"}
34
  langchain-google-community = "^2.0.10"
 
35
 
36
 
37
  [build-system]
 
16
  langchain_community = "*"
17
  langchain_google_genai = "*"
18
  wikipedia = "*"
19
+ yt-dlp = "*"
20
+ youtube-transcript-api = "*"
21
  pillow = "*"
22
  langchain_experimental = "*"
23
  langchain-tavily = ">=0.2.11,<0.3.0"
 
33
  tesseract = ">=0.1.3,<0.2.0"
34
  unstructured = {extras = ["all-docs"], version = "*"}
35
  langchain-google-community = "^2.0.10"
36
+ simpleeval = "^1.0.3"
37
 
38
 
39
  [build-system]
src/flexible_agent.py CHANGED
@@ -13,13 +13,13 @@ try:
13
  # Try relative imports first (when used as package)
14
  from .tools import (
15
  wikipedia_search, youtube_search, decode_text,
16
- download_and_process_file, web_search
17
  )
18
  except ImportError:
19
  # Fall back to absolute imports (when run directly)
20
  from tools import (
21
  wikipedia_search, youtube_search, decode_text,
22
- download_and_process_file, web_search
23
  )
24
 
25
  from langchain_google_genai import ChatGoogleGenerativeAI
@@ -27,8 +27,8 @@ from langchain_google_genai import ChatGoogleGenerativeAI
27
 
28
  # --- Agent State following LangGraph pattern ---
29
  class AgentState(TypedDict):
30
- # The original question from the user
31
- question: str
32
 
33
  # Task ID for file downloads
34
  task_id: Optional[str]
@@ -36,17 +36,8 @@ class AgentState(TypedDict):
36
  # File classification results
37
  requires_file: Optional[bool]
38
 
39
- # File content if downloaded and processed
40
- file_content: Optional[str]
41
-
42
- # Search attempt counter to prevent infinite loops
43
- search_attempts: int
44
-
45
  # Final answer
46
  final_answer: Optional[str]
47
-
48
- # Messages for LLM interactions (for logging)
49
- messages: Annotated[List[BaseMessage], add_messages]
50
 
51
 
52
  # --- Flexible Tool-Based Agent ---
@@ -55,16 +46,19 @@ class FlexibleAgent:
55
 
56
  # Initialize Gemini chat model for LangChain integration
57
  self.chat = ChatGoogleGenerativeAI(
58
- # google_api_key=os.getenv("GEMINI_API_KEY"),
59
- # model="gemini-2.0-flash-lite",
60
- model="gemini-2.5-flash-lite",
61
  temperature=0.0,
62
  max_tokens=None
63
  )
64
 
65
- # Define available tools (excluding file detection - now handled by graph nodes)
66
  self.tools = [
67
- wikipedia_search, youtube_search, decode_text, web_search
 
 
 
 
 
68
  ]
69
 
70
  # Bind tools to the LLM
@@ -141,270 +135,112 @@ class FlexibleAgent:
141
  print(f"Logged full conversation to: {filename}")
142
 
143
  def classify_file_requirement(self, state: AgentState):
144
- """LLM-based classification of whether the question requires a file attachment"""
145
- question = state["question"]
146
 
147
- # For the first message, include the question
148
- if not state.get("messages"):
149
- # Initial message with question
150
- first_message = HumanMessage(content=question)
151
-
152
- # Classification prompt - no need to repeat the question
153
- classification_prompt = """
154
- Analyze the question above and determine if it requires accessing an attached file.
155
-
156
- Determine if the question mentions attached files (like "I've attached", "attached as", "see attached", etc.)
157
-
158
- If the question requires a file, answer "yes". If not, answer "no".
159
- If a url is provided, answer "no".
160
- """
161
-
162
- # Call the LLM with both messages
163
- messages = [first_message, HumanMessage(content=classification_prompt)]
164
- response = self.chat.invoke(messages)
165
-
166
- # Update messages for tracking
167
- new_messages = [first_message, HumanMessage(content=classification_prompt), response]
168
- else:
169
- # Subsequent call - messages already exist
170
- classification_prompt = """
171
- Analyze the question and determine if it requires accessing an attached file.
172
 
173
- If the question requires a file, answer "yes". If not, answer "no".
174
- If a url is provided, answer "no".
175
- """
176
-
177
- # Call the LLM
178
- messages = state["messages"] + [HumanMessage(content=classification_prompt)]
179
- response = self.chat.invoke(messages)
180
 
181
- # Update messages for tracking
182
- new_messages = state.get("messages", []) + [
183
- HumanMessage(content=classification_prompt),
184
- response
185
- ]
186
-
187
- # Parse the response to determine if file is required
188
- response_text = response.content.lower()
189
- requires_file = response_text == "yes"
190
-
191
- # Return state updates
192
- return {
193
- "requires_file": requires_file,
194
- "messages": new_messages
195
- }
196
 
197
  def download_file_content(self, state: AgentState):
198
- """Download and process the file content"""
199
- task_id = state["task_id"]
200
 
201
  if not task_id:
202
- error_msg = "Error: No task_id provided for file download"
203
- # Add error message to conversation
204
- new_messages = state.get("messages", []) + [
205
- HumanMessage(content=error_msg)
206
- ]
207
  return {
208
- "file_content": error_msg,
209
- "messages": new_messages
210
  }
211
 
212
  try:
213
- # Use the download tool (but call it directly instead of as a tool)
214
- file_result = download_and_process_file(task_id)
215
-
216
- # Add file content to conversation without repeating the question
217
- file_message = f"File Content:\n{file_result}"
218
-
219
- new_messages = state.get("messages", []) + [
220
- HumanMessage(content=file_message)
221
- ]
222
 
 
223
  return {
224
- "file_content": file_result,
225
- "messages": new_messages
226
  }
227
 
228
  except Exception as e:
229
- error_msg = f"Error downloading file: {str(e)}"
230
- new_messages = state.get("messages", []) + [
231
- HumanMessage(content=error_msg)
232
- ]
233
  return {
234
- "file_content": error_msg,
235
- "messages": new_messages
236
  }
237
 
238
- def answer_with_tools(self, state: AgentState):
239
- """Use tools to answer the question (with or without file content)"""
240
- # Increment search attempts
241
- search_attempts = state.get("search_attempts", 0) + 1
242
-
243
- # Create system prompt for tool usage - question is already in conversation
244
- system_prompt = f"""
245
- Use your tools to answer the question above.
246
- """
247
-
248
- # Use existing conversation context
249
- messages = state.get("messages", []) + [HumanMessage(content=system_prompt)]
250
-
251
- # Let the LLM decide what tools to use
252
  response = self.chat_with_tools.invoke(messages)
253
-
254
- # Update messages for tracking
255
- new_messages = state.get("messages", []) + [
256
- HumanMessage(content=system_prompt),
257
- response
258
- ]
259
-
260
- return {"messages": new_messages, "search_attempts": search_attempts}
261
 
262
- def plan_approach(self, state: AgentState):
263
- """Decide whether to use tools or answer directly"""
264
- # Create system prompt for decision making - no need to repeat the question
265
- planning_prompt = """Now you need to decide how to answer the question above.
266
 
267
- Should you use tools to answer this question? Respond with ONLY "tools" or "direct":
 
 
268
 
269
- - ALWAYS use "tools" if:
270
- * The user explicitly mentions "search", "Wikipedia", "YouTube", or any tool name
271
- * The question asks about factual information that would benefit from Wikipedia search
272
- * The question mentions YouTube videos or asks about video content
273
- * The question provides image URLs to analyze
274
- * The question involves encoded/backwards text
275
- * The user specifically requests using external sources
276
 
277
- - Use "direct" if:
278
- * It's a simple math calculation AND no search is requested
279
- * It's a general knowledge question you can answer confidently AND no search is requested
280
- * It's asking for an opinion or creative content
281
- * No tools would significantly improve the answer AND no search is requested
282
- """
283
-
284
- # Get LLM decision using existing conversation context
285
- messages = state.get("messages", []) + [HumanMessage(content=planning_prompt)]
286
- response = self.chat.invoke(messages)
287
-
288
- # Update messages for tracking
289
- new_messages = state.get("messages", []) + [
290
- HumanMessage(content=planning_prompt),
291
- response
292
- ]
293
-
294
- return {"messages": new_messages}
295
 
296
- def answer_directly(self, state: AgentState):
297
- """Answer the question directly without tools"""
298
- # Create system prompt - question is already in conversation
299
- system_prompt = "You are a helpful assistant. Answer the question above directly and accurately."
300
-
301
- # Use existing conversation context
302
- messages = state.get("messages", []) + [AIMessage(content=system_prompt)]
303
 
304
- # Get response
305
  response = self.chat.invoke(messages)
306
 
307
- # Update messages for tracking
308
- new_messages = state.get("messages", []) + [
309
- AIMessage(content=system_prompt),
310
- response
311
- ]
312
-
313
- return {"messages": new_messages}
314
 
315
- def provide_final_answer(self, state: AgentState):
316
- """Provide a final answer based on tool results, or request more searches if needed"""
317
- search_attempts = state.get("search_attempts", 0)
318
-
319
- # If we've reached the search limit, force a final answer
320
- if search_attempts >= 5:
321
- final_prompt = """You have reached the maximum number of search attempts (5).
322
-
323
- Based on all the information gathered in this conversation, provide the best possible answer to the original question.
324
- If you could not find the specific information requested, clearly state that the information could not be found."""
325
-
326
- # Use regular chat (without tools) to force a final answer
327
- messages = state.get("messages", []) + [HumanMessage(content=final_prompt)]
328
- response = self.chat.invoke(messages)
329
-
330
- new_messages = state.get("messages", []) + [
331
- HumanMessage(content=final_prompt),
332
- response
333
- ]
334
-
335
- return {"messages": new_messages}
336
- else:
337
- # Allow more searches if under the limit
338
- final_prompt = f"""Based on the conversation above and any tool results, either:
339
-
340
- 1. Provide a clear and direct answer to the original question if you have enough information, OR
341
- 2. Use additional tools to search for missing information
342
-
343
- SEARCH ATTEMPTS: {search_attempts}/5 (Maximum 5 attempts)
344
-
345
- SEARCH STRATEGY FOR COMPLEX QUESTIONS:
346
- - If you couldn't find information with one search, try breaking it down:
347
- * For questions about actors in different shows, search each show/movie separately
348
- * For questions about adaptations, search for the original work first, then the adaptation
349
- * Use simpler, more specific search terms
350
- * Try different keyword combinations if first search fails
351
-
352
- CURRENT SITUATION:
353
- - Review what searches you've already tried
354
- - If previous searches failed, try different, simpler search terms
355
- - Break complex questions into their component parts and search each separately
356
-
357
- If you need more information, use the tools. If you have enough information, provide the final answer."""
358
-
359
- # Use the chat with tools so it can decide to search more
360
- messages = state.get("messages", []) + [HumanMessage(content=final_prompt)]
361
- response = self.chat_with_tools.invoke(messages)
362
-
363
- # Update messages for tracking
364
- new_messages = state.get("messages", []) + [
365
- HumanMessage(content=final_prompt),
366
- response
367
- ]
368
-
369
- return {"messages": new_messages}
370
-
371
- def route_after_classification(self, state: AgentState) -> str:
372
- """Determine the next step based on file requirement classification"""
373
- if state["requires_file"]:
374
- return "file_required"
375
- else:
376
- return "no_file_required"
377
-
378
- def route_after_planning(self, state: AgentState) -> str:
379
- """Determine whether to use tools or answer directly based on LLM decision"""
380
- messages = state.get("messages", [])
381
-
382
- # Get the last AI message (the planning decision)
383
- for msg in reversed(messages):
384
- if isinstance(msg, AIMessage):
385
- decision = msg.content.lower().strip()
386
- if "tools" in decision:
387
- return "use_tools"
388
- elif "direct" in decision:
389
- return "answer_direct"
390
- break
391
-
392
- # Default to direct if unclear
393
- return "answer_direct"
394
 
395
  def extract_final_answer(self, state: AgentState):
396
  """Extract ONLY the final answer from the conversation"""
397
  # Create a dedicated extraction prompt that looks at the entire conversation
398
- extraction_prompt = """Look at the entire conversation above and extract ONLY the final answer to the original question.
399
- Return just the answer with no extra words, explanations, or formatting.
400
 
401
- If the answer is a number, write it in digits.
 
 
 
 
402
 
403
  Examples:
404
- - If the conversation concludes "The capital is Paris", return: Paris
405
- - If the conversation concludes "2 + 2 equals 4", return: 4
406
- - If the conversation concludes "The opposite of left is right", return: right
407
- - If the conversation concludes "Based on search results, the answer is 42", return: 42
408
 
409
  Final answer only:"""
410
 
@@ -413,110 +249,96 @@ Final answer only:"""
413
  messages = state["messages"] + [HumanMessage(content=extraction_prompt)]
414
  response = self.chat.invoke(messages)
415
  answer = response.content.strip()
416
- return answer
 
417
  except Exception as e:
418
  print(f"Answer extraction error: {e}")
419
  # Fallback: get the last AI message content
420
  messages = state["messages"]
421
  for msg in reversed(messages):
422
  if isinstance(msg, AIMessage) and not getattr(msg, 'tool_calls', None):
423
- return msg.content.strip()
424
- return "No answer found"
 
 
 
 
 
 
 
425
 
426
  def _build_graph(self):
427
- """Build the LangGraph workflow with proper planning approach"""
428
  graph = StateGraph(AgentState)
429
 
430
  # Add nodes
431
- graph.add_node("classify_file_requirement", self.classify_file_requirement)
432
- graph.add_node("download_file_content", self.download_file_content)
433
- graph.add_node("plan_approach", self.plan_approach)
434
- graph.add_node("answer_with_tools", self.answer_with_tools)
435
- graph.add_node("answer_directly", self.answer_directly)
436
  graph.add_node("tools", ToolNode(self.tools))
 
 
437
 
438
- # Define the flow - Start with file classification
439
- graph.add_edge(START, "classify_file_requirement")
440
 
441
- # Add conditional branching after classification
442
  graph.add_conditional_edges(
443
- "classify_file_requirement",
444
  self.route_after_classification,
445
  {
446
- "file_required": "download_file_content",
447
- "no_file_required": "plan_approach"
448
  }
449
  )
450
 
451
- # After downloading file, plan the approach
452
- graph.add_edge("download_file_content", "plan_approach")
453
 
454
- # After planning, decide whether to use tools or answer directly
455
  graph.add_conditional_edges(
456
- "plan_approach",
457
- self.route_after_planning,
458
  {
459
- "use_tools": "answer_with_tools",
460
- "answer_direct": "answer_directly"
461
  }
462
  )
463
 
464
- # From answer_with_tools, either use tools or end
465
- graph.add_conditional_edges(
466
- "answer_with_tools",
467
- tools_condition,
468
- {
469
- "tools": "tools",
470
- END: END,
471
- }
472
- )
473
 
474
- # From answer_directly, just end (no tool checking after direct answer)
475
- graph.add_edge("answer_directly", END)
476
-
477
- # After tools, check if more tools are needed or provide final answer
478
- graph.add_node("provide_final_answer", self.provide_final_answer)
479
  graph.add_conditional_edges(
480
- "tools",
481
- tools_condition,
482
  {
483
- "tools": "tools", # Allow multiple tool cycles
484
- END: "provide_final_answer",
485
  }
486
  )
487
 
488
- # Allow provide_final_answer to also use more tools if needed
489
- graph.add_conditional_edges(
490
- "provide_final_answer",
491
- tools_condition,
492
- {
493
- "tools": "tools", # Can go back to tools for more searches
494
- END: END,
495
- }
496
- )
497
 
498
  # Compile the graph
499
  self.compiled_graph = graph.compile()
500
- # self.compiled_graph.get_graph().draw_mermaid_png()
501
 
502
  def __call__(self, question: str, task_id: Optional[str] = None) -> str:
503
  """Process question using LangGraph workflow"""
504
  print(f"Processing: {question[:50]}...")
505
 
506
- # Create initial state following the new structure
507
  initial_state = {
508
- "question": question,
509
  "task_id": task_id,
510
  "requires_file": None,
511
- "file_content": None,
512
- "search_attempts": 0,
513
- "final_answer": None,
514
- "messages": []
515
  }
516
 
517
  try:
518
- # Run the graph with recursion limit configuration and Langfuse tracing
519
- config = {"recursion_limit": 25} # Higher limit for multiple tool usage
520
 
521
  # Add Langfuse callback handler if available
522
  if self.langfuse_handler:
@@ -525,8 +347,8 @@ Final answer only:"""
525
 
526
  result = self.compiled_graph.invoke(initial_state, config=config)
527
 
528
- # Extract the final answer
529
- answer = self.extract_final_answer(result)
530
  print(f"Answer: {answer[:50]}...")
531
 
532
  # Log the complete conversation for review
@@ -577,10 +399,18 @@ if __name__ == "__main__":
577
  print("✅ Test passed! The agent correctly answered the question.")
578
  else:
579
  print("❌ Test failed. Expected the answer to contain 'Louvrier'.")
 
 
 
 
 
 
 
 
580
 
581
 
582
  except Exception as e:
583
  import traceback
584
  print(f"❌ Test failed with error: {e}")
585
  print("Full traceback:")
586
- traceback.print_exc()
 
13
  # Try relative imports first (when used as package)
14
  from .tools import (
15
  wikipedia_search, youtube_search, decode_text,
16
+ download_and_process_file, web_search, evaluate_computation
17
  )
18
  except ImportError:
19
  # Fall back to absolute imports (when run directly)
20
  from tools import (
21
  wikipedia_search, youtube_search, decode_text,
22
+ download_and_process_file, web_search, evaluate_computation
23
  )
24
 
25
  from langchain_google_genai import ChatGoogleGenerativeAI
 
27
 
28
  # --- Agent State following LangGraph pattern ---
29
  class AgentState(TypedDict):
30
+ # Messages for LLM interactions (includes question and all conversation)
31
+ messages: Annotated[List[BaseMessage], add_messages]
32
 
33
  # Task ID for file downloads
34
  task_id: Optional[str]
 
36
  # File classification results
37
  requires_file: Optional[bool]
38
 
 
 
 
 
 
 
39
  # Final answer
40
  final_answer: Optional[str]
 
 
 
41
 
42
 
43
  # --- Flexible Tool-Based Agent ---
 
46
 
47
  # Initialize Gemini chat model for LangChain integration
48
  self.chat = ChatGoogleGenerativeAI(
49
+ model="gemini-2.5-flash",
 
 
50
  temperature=0.0,
51
  max_tokens=None
52
  )
53
 
54
+ # Define available tools
55
  self.tools = [
56
+ wikipedia_search,
57
+ youtube_search,
58
+ decode_text,
59
+ web_search,
60
+ download_and_process_file,
61
+ evaluate_computation
62
  ]
63
 
64
  # Bind tools to the LLM
 
135
  print(f"Logged full conversation to: {filename}")
136
 
137
  def classify_file_requirement(self, state: AgentState):
138
+ """Check if question mentions an attached file"""
139
+ messages = state["messages"]
140
 
141
+ # Get the original question from first message
142
+ if messages and isinstance(messages[0], HumanMessage):
143
+ question = messages[0].content.lower()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
144
 
145
+ # Simple keyword check for file attachments
146
+ file_keywords = ["attached", "attachment", "see the file", "in the file",
147
+ "i've attached", "attached as", "attached file"]
148
+ requires_file = any(keyword in question for keyword in file_keywords)
 
 
 
149
 
150
+ return {"requires_file": requires_file}
151
+
152
+ return {"requires_file": False}
 
 
 
 
 
 
 
 
 
 
 
 
153
 
154
  def download_file_content(self, state: AgentState):
155
+ """Download and add file content to messages"""
156
+ task_id = state.get("task_id")
157
 
158
  if not task_id:
159
+ # Add error message
 
 
 
 
160
  return {
161
+ "messages": [HumanMessage(content="Error: No task_id provided for file download")]
 
162
  }
163
 
164
  try:
165
+ # Use the download tool directly
166
+ file_result = download_and_process_file.invoke({"task_id": task_id})
 
 
 
 
 
 
 
167
 
168
+ # Add file content as a system message
169
  return {
170
+ "messages": [HumanMessage(content=f"File content:\n{file_result}")]
 
171
  }
172
 
173
  except Exception as e:
 
 
 
 
174
  return {
175
+ "messages": [HumanMessage(content=f"Error downloading file: {str(e)}")]
 
176
  }
177
 
178
+ def call_model(self, state: AgentState):
179
+ """Call the model with tools - it will decide what to do"""
180
+ messages = state["messages"]
 
 
 
 
 
 
 
 
 
 
 
181
  response = self.chat_with_tools.invoke(messages)
182
+ return {"messages": [response]}
 
 
 
 
 
 
 
183
 
184
+ def analyze_tool_results(self, state: AgentState):
185
+ """Analyze if tool results are sufficient to answer the question"""
186
+ analysis_prompt = """Based on the tool results above, think through the following:
 
187
 
188
+ 1. Do you have enough information to answer the original question?
189
+ 2. Are the tool results relevant and helpful?
190
+ 3. Do you need to use another tool to get more information?
191
 
192
+ If you consider that you don't need to use another tool, then try to answer the question based on what infos you have, the best you can.
193
+ Think about the fact that the answer may formulated using synonyms or similar words to the ones used in the question.
194
+ Even if you are not able to youtube video, the result may be in the description of the video.
 
 
 
 
195
 
196
+ Provide your reasoning and conclude with either:
197
+ - "READY_TO_ANSWER" if you have sufficient information
198
+ - "NEED_MORE_TOOLS" if you need additional tool calls
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
199
 
200
+ Format your response as:
201
+ REASONING: [your analysis here]
202
+ CONCLUSION: [READY_TO_ANSWER or NEED_MORE_TOOLS]"""
 
 
 
 
203
 
204
+ messages = state["messages"] + [HumanMessage(content=analysis_prompt)]
205
  response = self.chat.invoke(messages)
206
 
207
+ # Add the analysis to messages
208
+ return {"messages": [response]}
 
 
 
 
 
209
 
210
+ def route_after_analysis(self, state: AgentState) -> str:
211
+ """Route based on whether we can answer or need more tools"""
212
+ messages = state["messages"]
213
+
214
+ # Get the last message (should be the analysis)
215
+ if messages:
216
+ last_message = messages[-1]
217
+ if isinstance(last_message, AIMessage):
218
+ content = last_message.content.upper()
219
+
220
+ # Check if ready to answer
221
+ if "READY_TO_ANSWER" in content:
222
+ return "extract_answer"
223
+ elif "NEED_MORE_TOOLS" in content:
224
+ return "call_model"
225
+
226
+ # Default: try to answer
227
+ return "extract_answer"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
228
 
229
  def extract_final_answer(self, state: AgentState):
230
  """Extract ONLY the final answer from the conversation"""
231
  # Create a dedicated extraction prompt that looks at the entire conversation
232
+ extraction_prompt = """Based on all the information gathered above, provide ONLY the final answer to the original question.
 
233
 
234
+ Rules:
235
+ - Return ONLY the answer with NO explanations, sentences, or extra words
236
+ - If the answer is a number, write it in digits only
237
+ - No punctuation unless it's part of the answer
238
+ - No phrases like "The answer is" or "Based on..."
239
 
240
  Examples:
241
+ - Question: "What is the capital of France?" → Answer: Paris
242
+ - Question: "How much is 2+2?" Answer: 4
243
+ - Question: "What is the opposite of left?" Answer: right
 
244
 
245
  Final answer only:"""
246
 
 
249
  messages = state["messages"] + [HumanMessage(content=extraction_prompt)]
250
  response = self.chat.invoke(messages)
251
  answer = response.content.strip()
252
+ # Return dict to update state (LangGraph requirement)
253
+ return {"final_answer": answer}
254
  except Exception as e:
255
  print(f"Answer extraction error: {e}")
256
  # Fallback: get the last AI message content
257
  messages = state["messages"]
258
  for msg in reversed(messages):
259
  if isinstance(msg, AIMessage) and not getattr(msg, 'tool_calls', None):
260
+ return {"final_answer": msg.content.strip()}
261
+ return {"final_answer": "No answer found"}
262
+
263
+ def route_after_classification(self, state: AgentState) -> str:
264
+ """Route based on file requirement"""
265
+ if state.get("requires_file"):
266
+ return "download_file"
267
+ else:
268
+ return "call_model"
269
 
270
  def _build_graph(self):
271
+ """Build LangGraph workflow with reasoning/analysis step"""
272
  graph = StateGraph(AgentState)
273
 
274
  # Add nodes
275
+ graph.add_node("classify_file", self.classify_file_requirement)
276
+ graph.add_node("download_file", self.download_file_content)
277
+ graph.add_node("call_model", self.call_model)
 
 
278
  graph.add_node("tools", ToolNode(self.tools))
279
+ graph.add_node("analyze_results", self.analyze_tool_results)
280
+ graph.add_node("extract_answer", self.extract_final_answer)
281
 
282
+ # Define the flow
283
+ graph.add_edge(START, "classify_file")
284
 
285
+ # After classification, either download file or go to model
286
  graph.add_conditional_edges(
287
+ "classify_file",
288
  self.route_after_classification,
289
  {
290
+ "download_file": "download_file",
291
+ "call_model": "call_model"
292
  }
293
  )
294
 
295
+ # After downloading file, call model
296
+ graph.add_edge("download_file", "call_model")
297
 
298
+ # After model call, check if tools were called
299
  graph.add_conditional_edges(
300
+ "call_model",
301
+ tools_condition, # Built-in function that checks for tool calls
302
  {
303
+ "tools": "tools", # If tools called, execute them
304
+ END: "extract_answer", # No tools, go straight to answer
305
  }
306
  )
307
 
308
+ # After tools execute, analyze the results
309
+ graph.add_edge("tools", "analyze_results")
 
 
 
 
 
 
 
310
 
311
+ # After analysis, decide next step
 
 
 
 
312
  graph.add_conditional_edges(
313
+ "analyze_results",
314
+ self.route_after_analysis,
315
  {
316
+ "extract_answer": "extract_answer", # Ready to answer
317
+ "call_model": "call_model", # Need more tools
318
  }
319
  )
320
 
321
+ # After extracting answer, we're done
322
+ graph.add_edge("extract_answer", END)
 
 
 
 
 
 
 
323
 
324
  # Compile the graph
325
  self.compiled_graph = graph.compile()
 
326
 
327
  def __call__(self, question: str, task_id: Optional[str] = None) -> str:
328
  """Process question using LangGraph workflow"""
329
  print(f"Processing: {question[:50]}...")
330
 
331
+ # Create initial state with just the question as a message
332
  initial_state = {
333
+ "messages": [HumanMessage(content=question)],
334
  "task_id": task_id,
335
  "requires_file": None,
336
+ "final_answer": None
 
 
 
337
  }
338
 
339
  try:
340
+ # Run the graph with Langfuse tracing
341
+ config = {"recursion_limit": 25}
342
 
343
  # Add Langfuse callback handler if available
344
  if self.langfuse_handler:
 
347
 
348
  result = self.compiled_graph.invoke(initial_state, config=config)
349
 
350
+ # Extract the final answer from the state
351
+ answer = result.get("final_answer", "No answer found")
352
  print(f"Answer: {answer[:50]}...")
353
 
354
  # Log the complete conversation for review
 
399
  print("✅ Test passed! The agent correctly answered the question.")
400
  else:
401
  print("❌ Test failed. Expected the answer to contain 'Louvrier'.")
402
+
403
+ answer = agent("In the video https://www.youtube.com/watch?v=L1vXCYZAYYM, what is the highest number of bird species to be on camera simultaneously?")
404
+ print(f"Answer: {answer}")
405
+
406
+ if answer == "3":
407
+ print("✅ Test passed! The agent correctly answered the question.")
408
+ else:
409
+ print("❌ Test failed. Expected the answer to be '3'.")
410
 
411
 
412
  except Exception as e:
413
  import traceback
414
  print(f"❌ Test failed with error: {e}")
415
  print("Full traceback:")
416
+ traceback.print_exc()
src/tools.py CHANGED
@@ -28,11 +28,19 @@ from langchain_core.tools import Tool
28
  from langchain_google_community import GoogleSearchAPIWrapper
29
  from langchain_community.tools import DuckDuckGoSearchResults
30
  from langchain_community.document_loaders import WebBaseLoader
 
31
 
32
 
33
  @tool
34
  def wikipedia_search(query: str) -> str:
35
- """Search Wikipedia for information. Use this for factual information and encyclopedic content.
 
 
 
 
 
 
 
36
 
37
  Args:
38
  query: The search query."""
@@ -60,25 +68,98 @@ def wikipedia_search(query: str) -> str:
60
 
61
  @tool
62
  def youtube_search(query: str) -> str:
63
- """Search YouTube for videos and get video information. Use this when you need YouTube-specific content."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
64
  try:
65
- from youtubesearchpython import VideosSearch
66
- search = VideosSearch(query, limit=3)
67
- results = search.result()
68
-
69
- output = f"YouTube search results for '{query}':\n"
70
- for video in results['result']:
71
- output += f"- {video['title']} by {video['channel']['name']}\n"
72
- output += f" Duration: {video['duration']}, Views: {video['viewCount']['text']}\n"
73
- output += f" URL: {video['link']}\n\n"
74
 
75
- return output
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  except Exception as e:
77
  return f"YouTube search failed: {str(e)}"
78
 
79
  @tool
80
  def web_search(query: str) -> str:
81
- """Search the web for a query and return the first results.
 
 
 
 
 
 
 
 
 
 
 
 
82
  Args:
83
  query: The search query."""
84
 
@@ -101,7 +182,16 @@ def web_search(query: str) -> str:
101
 
102
  @tool
103
  def decode_text(text: str) -> str:
104
- """Decode or reverse text that might be encoded backwards or in other ways."""
 
 
 
 
 
 
 
 
 
105
  try:
106
  # Try reversing words
107
  words = text.split()
@@ -116,10 +206,49 @@ def decode_text(text: str) -> str:
116
  return f"Text decoding failed: {str(e)}"
117
 
118
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
119
  @tool
120
  def download_and_process_file(task_id: str) -> str:
121
  """Download and process a file from the GAIA API using the task_id.
122
- Use this tool when detect_file_requirement indicates a file is needed."""
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  api_url = "https://agents-course-unit4-scoring.hf.space"
124
  try:
125
  # Download file from API
 
28
  from langchain_google_community import GoogleSearchAPIWrapper
29
  from langchain_community.tools import DuckDuckGoSearchResults
30
  from langchain_community.document_loaders import WebBaseLoader
31
+ from simpleeval import simple_eval
32
 
33
 
34
  @tool
35
  def wikipedia_search(query: str) -> str:
36
+ """Search Wikipedia for factual information and encyclopedic content.
37
+
38
+ Use this tool when you need:
39
+ - Historical facts, dates, or events
40
+ - Biographical information about people
41
+ - Definitions and explanations of concepts
42
+ - General factual knowledge
43
+ - Information about places, organizations, or scientific topics
44
 
45
  Args:
46
  query: The search query."""
 
68
 
69
  @tool
70
  def youtube_search(query: str) -> str:
71
+ """Search YouTube for videos and get video information, or extract information from a specific YouTube URL.
72
+
73
+ Use this tool when:
74
+ - The question explicitly mentions YouTube or videos
75
+ - You need to find video content on a specific topic
76
+ - You have a YouTube URL and need to get information about it
77
+ - Looking for tutorials, demonstrations, or visual content
78
+ - The user asks about video creators or channels
79
+
80
+ When analyzing a YouTube URL, this tool provides:
81
+ - Video title, channel, duration, views, upload date
82
+ - Full description (contains key information about video content)
83
+ - Tags (keywords related to the video)
84
+
85
+ IMPORTANT: Use the title, description, and tags to answer questions about the video content.
86
+ The description often contains detailed information about what happens in the video.
87
+
88
+ Args:
89
+ query: The YouTube search query or direct YouTube URL."""
90
  try:
91
+ import yt_dlp
 
 
 
 
 
 
 
 
92
 
93
+ # Check if query is a direct YouTube URL
94
+ if 'youtube.com' in query or 'youtu.be' in query:
95
+ # Extract information from the specific video
96
+ ydl_opts = {
97
+ 'quiet': True,
98
+ 'no_warnings': True,
99
+ 'extract_flat': False,
100
+ }
101
+
102
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
103
+ info = ydl.extract_info(query, download=False)
104
+
105
+ output = f"YouTube Video Information:\n"
106
+ output += f"Title: {info.get('title', 'N/A')}\n"
107
+ output += f"Channel: {info.get('uploader', 'N/A')}\n"
108
+ output += f"Duration: {info.get('duration', 0)} seconds\n"
109
+ output += f"Views: {info.get('view_count', 'N/A')}\n"
110
+ output += f"Upload Date: {info.get('upload_date', 'N/A')}\n\n"
111
+
112
+ # Get full description (contains key information about video content)
113
+ description = info.get('description', 'N/A')
114
+ if description and description != 'N/A':
115
+ output += f"Description:\n{description}\n\n"
116
+ else:
117
+ output += f"Description: Not available\n\n"
118
+
119
+ # Add tags if available (help identify content)
120
+ tags = info.get('tags', [])
121
+ if tags:
122
+ output += f"Tags: {', '.join(tags[:10])}\n"
123
+
124
+ return output
125
+ else:
126
+ # Search for videos
127
+ ydl_opts = {
128
+ 'quiet': True,
129
+ 'no_warnings': True,
130
+ 'extract_flat': True,
131
+ }
132
+
133
+ search_query = f"ytsearch3:{query}"
134
+
135
+ with yt_dlp.YoutubeDL(ydl_opts) as ydl:
136
+ results = ydl.extract_info(search_query, download=False)
137
+
138
+ output = f"YouTube search results for '{query}':\n"
139
+ for entry in results.get('entries', []):
140
+ output += f"- {entry.get('title', 'N/A')} by {entry.get('uploader', 'N/A')}\n"
141
+ output += f" Duration: {entry.get('duration', 0)} seconds\n"
142
+ output += f" URL: {entry.get('url', 'N/A')}\n\n"
143
+
144
+ return output
145
  except Exception as e:
146
  return f"YouTube search failed: {str(e)}"
147
 
148
  @tool
149
  def web_search(query: str) -> str:
150
+ """Search the web for current information and load full webpage content.
151
+
152
+ Use this tool when:
153
+ - You need current/recent information not available in Wikipedia
154
+ - Looking for news, updates, or time-sensitive content
155
+ - Wikipedia doesn't have the specific information
156
+ - Need detailed content from specific web pages
157
+ - Looking for niche or specialized information
158
+
159
+ This tool performs a web search and loads the full content of the top 3 results.
160
+
161
+ If the question refers to an article, use this tool to query for the specific article mentioned in the question.
162
+
163
  Args:
164
  query: The search query."""
165
 
 
182
 
183
  @tool
184
  def decode_text(text: str) -> str:
185
+ """Decode or reverse text that might be encoded backwards or in other ways.
186
+
187
+ Use this tool when:
188
+ - Text appears to be reversed or encoded
189
+ - Words are spelled backwards
190
+ - The question mentions "decode", "reverse", or "backwards"
191
+ - Text looks scrambled or encoded
192
+
193
+ Args:
194
+ text: The text to decode or reverse."""
195
  try:
196
  # Try reversing words
197
  words = text.split()
 
206
  return f"Text decoding failed: {str(e)}"
207
 
208
 
209
+ @tool
210
+ def evaluate_computation(expression: str) -> str:
211
+ """Safely evaluate mathematical expressions and computations.
212
+
213
+ Use this tool when:
214
+ - You need to perform mathematical calculations
215
+ - The question involves arithmetic operations (+, -, *, /, **, %)
216
+ - You need to evaluate numeric expressions
217
+ - Computing formulas or mathematical operations
218
+
219
+ Supports:
220
+ - Basic arithmetic: +, -, *, /, **, %
221
+ - Mathematical functions: abs, max, min, round, sum
222
+ - Comparison operators: <, <=, >, >=, ==, !=
223
+ - Logical operators: and, or, not
224
+ - Constants: True, False, None
225
+
226
+ Args:
227
+ expression: The mathematical expression to evaluate (e.g., "2 + 2", "3.14 * 5**2")."""
228
+ try:
229
+ result = simple_eval(expression)
230
+ return f"Result of '{expression}': {result}"
231
+ except Exception as e:
232
+ return f"Computation failed for '{expression}': {str(e)}"
233
+
234
+
235
  @tool
236
  def download_and_process_file(task_id: str) -> str:
237
  """Download and process a file from the GAIA API using the task_id.
238
+
239
+ Use this tool when:
240
+ - The question explicitly mentions an "attached file" or "attachment"
241
+ - The question says "see the attached", "I've attached", "attached as", etc.
242
+ - A task_id has been provided for file access
243
+
244
+ This tool downloads and processes various file types including:
245
+ - PDF, Word, PowerPoint, Excel documents
246
+ - Images (extracts text via OCR)
247
+ - Audio files (transcribes speech to text)
248
+ - CSV, text, and markdown files
249
+
250
+ Args:
251
+ task_id: The GAIA task ID used to download the file."""
252
  api_url = "https://agents-course-unit4-scoring.hf.space"
253
  try:
254
  # Download file from API