ernani commited on
Commit
794ea68
·
1 Parent(s): 24b20b9

code refactor - first test

Browse files
Files changed (4) hide show
  1. app.py +3 -3
  2. manage_agents.py +1057 -346
  3. requirements.txt +3 -0
  4. tools.py +2 -2
app.py CHANGED
@@ -3,7 +3,7 @@ import gradio as gr
3
  import requests
4
  import inspect
5
  import pandas as pd
6
- from manage_agents import MainAgent
7
  from dotenv import load_dotenv
8
 
9
  # Load environment variables
@@ -15,8 +15,8 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
15
  # --- Agent Implementation ---
16
  class SearchAgent:
17
  def __init__(self):
18
- self.agent = MainAgent()
19
- print("SearchAgent initialized with MainAgent.")
20
 
21
  def __call__(self, task_id: str, question: str, file_name: str = "") -> str:
22
  print(f"Processing question: {question[:100]}...")
 
3
  import requests
4
  import inspect
5
  import pandas as pd
6
+ from manage_agents import StateGraphAgent
7
  from dotenv import load_dotenv
8
 
9
  # Load environment variables
 
15
  # --- Agent Implementation ---
16
  class SearchAgent:
17
  def __init__(self):
18
+ self.agent = StateGraphAgent()
19
+ print("SearchAgent initialized with StateGraphAgent.")
20
 
21
  def __call__(self, task_id: str, question: str, file_name: str = "") -> str:
22
  print(f"Processing question: {question[:100]}...")
manage_agents.py CHANGED
@@ -1,6 +1,7 @@
1
  from typing import Dict, List, Optional, Tuple
2
  from langchain.agents import AgentExecutor
3
  from langchain_openai import ChatOpenAI
 
4
  from langchain.memory import ConversationBufferMemory
5
  from langchain.chains import LLMChain
6
  from langchain.prompts import PromptTemplate
@@ -19,6 +20,16 @@ from tools import (
19
  ContentProcessingError
20
  )
21
  import logging
 
 
 
 
 
 
 
 
 
 
22
 
23
  class ContentTypeAgent:
24
  """Agent responsible for identifying content type and selecting appropriate tool"""
@@ -178,6 +189,8 @@ class ProcessContentAgent:
178
  Do not include explanations, steps, reasoning, or additional text.
179
  Be direct and specific. GAIA benchmark requires exact matching answers.
180
  For example, if asked "What is the capital of France?", respond simply with "Paris".
 
 
181
  """
182
  )
183
 
@@ -264,11 +277,20 @@ class ContentTranslateAgent:
264
  result = self.chain.invoke(question).strip()
265
  return result
266
 
267
- class MainAgent:
268
- """Main agent orchestrating the workflow"""
269
 
270
  def __init__(self):
271
- self.llm = ChatOpenAI(temperature=0, model="gpt-4o-mini")
 
 
 
 
 
 
 
 
 
272
 
273
  # Initialize tools
274
  self.wikipedia_tool = WikipediaTool()
@@ -290,398 +312,1087 @@ class MainAgent:
290
  "python": self.python_tool,
291
  }
292
 
293
- # Initialize special agents
294
- self.content_translate = ContentTranslateAgent(self.llm)
295
- self.content_type_agent = ContentTypeAgent(self.llm)
296
-
297
- # Create LLM with tools bound for tool-using capabilities
298
- self.general_tools = [self.wikipedia_tool, self.web_search_tool]
299
- self.llm_with_tools = self.llm.bind_tools(self.general_tools)
300
-
301
  # Tool usage tracking
302
  self.last_used_tool = None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
303
 
304
- def _format_question(self, question: str) -> str:
305
- """Format the question to be more specific and clear"""
306
- prompt = f"""You are an expert in transforming user questions into clear, specific, and search-optimized queries.
307
-
308
- Rewrite the following question with the following goals:
309
- - Add any necessary missing context to make it fully unambiguous
310
- - Make the question as specific as possible for retrieval by a search engine or knowledge base
311
- - Ensure the query is effective for retrieving the exact information needed
312
- - Query should use the context and not the entire question
313
-
314
- Question: {question}
315
-
316
- Example:
317
- Question: How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia.
318
- Query: Mercedes Sosa musician
319
-
320
- Question: Who did the actor who played Ray in the Polish-language version of Everybody Loves Raymond play in Magda M.? Give only the first name.
321
- Query: "Everybody Loves Raymond" actor Polish version Magda M.
322
-
323
- Question: Who are the pitchers with the number before and after Taishō Tamai's number as of July 2023? Give them to me in the form Pitcher Before, Pitcher After, use their last names only, in Roman characters.
324
- Query: Taishō Tamai baseball player
325
-
326
- Return only the rewritten query, no extra commentary.
327
- The query should be highly optimized for retrieving the exact information needed.
328
- """
329
- response = self.llm.invoke(prompt)
330
- formatted_query = response.content if hasattr(response, 'content') else str(response)
331
 
332
- return formatted_query
 
 
 
 
 
 
 
 
333
 
334
-
335
- def _get_answer_using_tools(self, question: str) -> str:
336
- """Get answer using tools bound directly to the LLM"""
337
- prompt = f"""Answer the following question using the provided tools when necessary.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
338
  Question: {question}
339
 
340
- Instructions:
341
- 1. Use the wikipedia tool for questions about facts, history, people, places, etc.
342
- 2. Use the web_search tool for current events or specific detailed information not typically found in an encyclopedia.
343
- 3. Provide a direct, concise answer based on the information you find.
344
- 4. If the search tools don't provide enough information, acknowledge what's missing.
345
- 5. Important: Be specific and direct with your answer. If asked for a name, provide only the name.
346
- 6. If asked for a specific piece of information (like a number, date, or code), provide exactly that.
347
- 7. Do not include explanations unless specifically asked.
348
-
349
- When answering, provide ONLY the precise answer requested.
350
- Do not include explanations, steps, reasoning, or additional text.
351
- Be direct and specific. GAIA benchmark requires exact matching answers.
352
- For example, if asked "What is the color of the sky?", respond simply with "blue".
353
- """
354
-
355
- response = self.llm_with_tools.invoke(prompt)
356
  answer = response.content if hasattr(response, 'content') else str(response)
357
- return answer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
358
 
359
  def process_question(self, task_id: str, question: str, file_name: str = "") -> str:
 
360
  try:
361
  # Reset tool tracking
362
  self.last_used_tool = None
363
 
364
- # First check if we can answer this directly without tools
365
- direct_answer = self.content_translate.answer_or_flag(question)
366
- if direct_answer != "TOOLS_REQUIRED":
367
  self.last_used_tool = "direct"
368
  return direct_answer
369
 
370
- # If we have a file to process, use specialized tools
 
 
 
371
  if file_name:
372
- # Identify content type based on file extension
373
- content_type, parameter, task_id = self.content_type_agent.identify_content_type(question, file_name, task_id)
374
- self.last_used_tool = content_type
375
-
376
- if content_type in self.tools:
377
- tool = self.tools[content_type]
 
 
 
 
 
 
378
 
379
- try:
380
- if content_type == "excel":
381
- result = tool._run(task_id, question=question)
382
- excel_data = result.page_content if hasattr(result, 'page_content') else str(result)
383
-
384
- # Use specialized prompt for Excel analysis
385
- excel_analysis_prompt = f"""
386
- Analyze this Excel data and provide an extremely concise answer:
387
-
388
- Question: {question}
389
-
390
- Excel Data:
391
- {excel_data}
392
-
393
- Instructions:
394
- 1. Focus only on answering the specific question being asked
395
- 2. If the question asks for a calculation or total, compute it precisely
396
- 3. Format currency values properly (like $123.45) if requested
397
- 4. Provide ONLY the answer in a clear, concise format - no additional explanations
398
- 5. If the answer is a number or calculation result, verify it's correct before responding
399
-
400
- When answering, provide ONLY the precise answer requested.
401
- Do not include explanations, steps, reasoning, or additional text.
402
- Be direct and specific. GAIA benchmark requires exact matching answers.
403
- For example, if asked "What is the total revenue?", respond simply with the exact number, like "$1,234.56".
404
- """
405
-
406
- response = self.llm.invoke(excel_analysis_prompt)
407
- return response.content if hasattr(response, 'content') else str(response)
408
- elif content_type == "python":
409
- result = tool._run(task_id, question=question)
410
- python_code = result[0].page_content if result and hasattr(result[0], 'page_content') else str(result)
411
-
412
- # Use specialized prompt for Python code analysis
413
- python_analysis_prompt = f"""
414
- Analyze this Python code and provide an extremely concise answer:
415
-
416
- Question: {question}
417
-
418
- Python Code:
419
- {python_code}
420
-
421
- Instructions:
422
- 1. If asked about the output or result of the code, mentally trace through the execution
423
- 2. Pay close attention to loops, conditionals, and mathematical operations
424
- 3. Provide ONLY the final output/answer without extra explanation
425
- 4. If the question asks for a specific value or number, provide just that value
426
-
427
- When answering, provide ONLY the precise answer requested.
428
- Do not include explanations, steps, reasoning, or additional text.
429
- Be direct and specific. GAIA benchmark requires exact matching answers.
430
- For example, if asked "What is the output of this code?", respond simply with the exact output value.
431
- """
432
-
433
- response = self.llm.invoke(python_analysis_prompt)
434
- return response.content if hasattr(response, 'content') else str(response)
435
- elif content_type == "image":
436
- # Image tool needs both task_id and question
437
- result = tool._run(task_id, question=question)
438
- return result.page_content if hasattr(result, 'page_content') else str(result)
439
- elif content_type == "audio":
440
- # Audio tool needs both task_id and question
441
- documents = tool._run(task_id, question)
442
- audio_transcript = documents[0].page_content if documents and hasattr(documents[0], 'page_content') else str(documents)
443
-
444
- # Use specialized prompt for audio analysis
445
- audio_analysis_prompt = f"""
446
- Provide an extremely concise answer based on this transcript:
447
-
448
- Question: {question}
449
-
450
- Audio Transcript:
451
- {audio_transcript}
452
-
453
- Instructions:
454
- 1. Pay careful attention to the specific format requested in the question
455
- 2. Extract only the information needed to answer the question
456
- 3. Format your answer exactly as requested (comma-separated list, alphabetical order, etc.)
457
- 4. Do not include any explanations or extra text in your answer
458
- 5. If asked to provide specific items (like ingredients), be sure to list ALL of them
459
-
460
- When answering, provide ONLY the precise answer requested.
461
- Do not include explanations, steps, reasoning, or additional text.
462
- Be direct and specific. GAIA benchmark requires exact matching answers.
463
- For example, if asked "What is the color of the sky?", respond simply with "blue".
464
- """
465
-
466
- response = self.llm.invoke(audio_analysis_prompt)
467
- return response.content if hasattr(response, 'content') else str(response)
468
- elif content_type == "youtube":
469
- result = tool._run(task_id, question=question)
470
-
471
- # Use specialized prompt for YouTube analysis
472
- youtube_analysis_prompt = f"""
473
- Analyze this YouTube video and provide an extremely concise answer:
474
-
475
- Question: {question}
476
-
477
- YouTube Video:
478
- {result}
479
-
480
- Instructions:
481
- 1. Pay careful attention to the specific format requested in the question
482
- 2. Extract only the information needed to answer the question
483
-
484
- When answering, provide ONLY the precise answer requested.
485
- Do not include explanations, steps, reasoning, or additional text.
486
- Be direct and specific. GAIA benchmark requires exact matching answers.
487
- For example, if asked "What is the color of the sky?", respond simply with "blue".
488
- """
489
-
490
- response = self.llm.invoke(youtube_analysis_prompt)
491
- return response.content if hasattr(response, 'content') else str(response)
492
- else:
493
- # Even for other tools, pass the question if the method accepts it
494
- try:
495
- # Try with question parameter first
496
- documents = tool._run(task_id, question)
497
- except TypeError:
498
- # Fall back to just task_id if question isn't accepted
499
- documents = tool._run(task_id)
500
-
501
- # Use specialized prompt for answer
502
- documents = documents[0].page_content if documents and hasattr(documents[0], 'page_content') else str(documents)
503
- answer_analysis_prompt = f"""
504
- Analyze the following and provide an extremely concise answer:
505
-
506
- Question: {question}
507
-
508
- Raw Answer: {documents}
509
-
510
- Instructions:
511
- 1. Pay careful attention to the specific format requested in the question
512
- 2. Extract only the information needed to answer the question
513
- 3. Format your answer exactly as requested (comma-separated list, alphabetical order, etc.)
514
- 4. Do not include any explanations or extra text in your answer
515
- 5. If asked to provide specific items or information, be sure to include ALL of them
516
-
517
- When answering, provide ONLY the precise answer requested.
518
- Do not include explanations, steps, reasoning, or additional text.
519
- Be direct and specific. GAIA benchmark requires exact matching answers.
520
- For example, if asked "What is the capital of France?", respond simply with "Paris".
521
- """
522
-
523
- response = self.llm.invoke(answer_analysis_prompt)
524
- return response.content if hasattr(response, 'content') else str(response)
525
- except Exception as e:
526
- return f"Error processing file: {str(e)}"
527
  else:
528
  return f"Unsupported file type: {content_type}"
529
 
530
- # For general questions (no files), check for special content types first
531
- # This is important for things like YouTube URLs that don't have a file
532
- content_type, parameter, _ = self.content_type_agent.identify_content_type(question, "", task_id)
533
-
534
- # Handle YouTube URLs in general questions
535
  if content_type == "youtube":
536
- self.last_used_tool = "youtube"
537
- # Extract YouTube URL properly
538
- if parameter.startswith("http"):
539
- youtube_url = parameter
540
- else:
541
- # Try to extract URL from question if parameter doesn't have one
542
- youtube_url = self._extract_youtube_url(question)
543
- if not youtube_url:
544
- # If no URL found, we can't process the YouTube video
545
- return "Error: No valid YouTube URL found in the question."
546
 
547
- result = self.youtube_tool._run(youtube_url, question=question)
 
548
 
549
- # Use specialized prompt for YouTube analysis
550
- youtube_analysis_prompt = f"""
551
- Analyze this YouTube video and provide an extremely concise answer:
552
-
553
  Question: {question}
554
-
555
- YouTube Video:
556
- {result}
557
-
558
- Instructions:
559
- 1. Pay careful attention to the specific format requested in the question
560
- 2. Extract only the information needed to answer the question
561
 
562
- When answering, provide ONLY the precise answer requested.
563
- Do not include explanations, steps, reasoning, or additional text.
564
- Be direct and specific. GAIA benchmark requires exact matching answers.
565
- For example, if asked "What is the color of the sky?", respond simply with "blue".
566
- """
567
 
568
- response = self.llm.invoke(youtube_analysis_prompt)
 
 
 
 
 
569
  return response.content if hasattr(response, 'content') else str(response)
570
 
571
- # For general questions (no files), use improved search strategy
572
- question_lower = question.lower()
573
- answer = None
574
-
575
- # Check for Wikipedia specific questions first
576
- if "wikipedia" in question_lower:
577
  self.last_used_tool = "wiki"
578
- wiki_query = self._format_question(question)
579
- wiki_result = self.wikipedia_tool._run(wiki_query)
580
- answer = self._generate_answer_from_context(question, wiki_result)
581
- if self._is_valid_answer(answer):
582
- return answer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
583
 
584
- # Use general web search with improved query formatting
585
- self.last_used_tool = "web"
586
- web_query = self._format_question(question)
587
- web_result = self.web_search_tool._run(web_query)
588
- answer = self._generate_answer_from_context(question, web_result)
589
 
590
- if self._is_valid_answer(answer):
591
- return answer
 
592
 
593
- # If no good answer from web search, try with Wikipedia as a last resource
594
- if "wikipedia" not in question_lower: # Only if not already tried
595
- self.last_used_tool = "wiki"
596
- wiki_query = self._format_question(question)
597
- wiki_result = self.wikipedia_tool._run(wiki_query)
598
- answer = self._generate_answer_from_context(question, wiki_result)
599
- if self._is_valid_answer(answer):
600
- return answer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
601
 
602
- # If we still don't have a good answer, use the general tools approach
603
- self.last_used_tool = "general"
604
- answer = self._get_answer_using_tools(question)
605
 
606
- return answer
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
607
 
608
  except Exception as e:
609
  return f"An unexpected error occurred: {str(e)}"
 
 
 
610
 
611
- def _generate_answer_from_context(self, question: str, context: str) -> str:
612
- """Generate an answer based on the question and context"""
613
- # Create a more effective prompt for answer generation
614
- answer_prompt = f"""Based on the following information, provide an extremely concise answer:
615
- Question: {question}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
616
 
617
- Information: {context}
 
 
 
618
 
619
- Instructions:
620
- 1. Read the question carefully and identify exactly what is being asked for
621
- 2. Pay close attention to any formatting requirements in the question (e.g., "give only the city name", "without abbreviations", etc.)
622
- 3. Find the specific information in the context that directly answers the question
623
- 4. Format your answer exactly as requested - if asked for just a name, number, or code, provide only that
624
- 5. For numerical answers, double-check your calculation or counting
625
- 6. For names or places, ensure correct and complete spelling
626
- 7. If asked for a specific format like "comma-separated list" or "alphabetical order", follow that exactly
627
- 8. If asked for just a specific piece of information, do not include any other details
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
628
 
629
- Example:
630
- Question: what's the capital of france?
631
- Answer: Paris
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
632
 
633
- If your question asks for without abreviations:
634
- city name: st. petersburg
635
- Answer: Saint Petersburg
 
 
 
 
 
 
 
 
636
 
637
- If your answer is a number, provide only the number.
638
- Example:
639
- Question: how many wheels does a car have?
640
- Answer: 4
641
 
642
- When answering, provide ONLY the precise answer requested.
643
- Do not include explanations, steps, reasoning, or additional text.
644
- Be direct and specific. GAIA benchmark requires exact matching answers.
645
- """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
646
 
647
- try:
648
- # Send prompt to LLM and get response
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
649
  response = self.llm.invoke(answer_prompt)
650
  answer = response.content if hasattr(response, 'content') else str(response)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
651
 
652
- # Post-process to ensure answer meets formatting requirements
653
- answer = answer.strip()
654
 
655
- return answer
656
- except Exception as e:
657
- return f"Could not generate an answer due to an error: {str(e)}"
658
-
659
- def _is_valid_answer(self, answer: str) -> bool:
660
- """Check if the answer appears to be valid and informative"""
661
- if not answer:
662
- return False
663
-
664
- # Check for common patterns in invalid answers
665
- invalid_patterns = [
666
- "i don't have enough information",
667
- "i cannot find",
668
- "cannot be determined",
669
- "is not provided in",
670
- "not mentioned in",
671
- "not specified in",
672
- "not included in",
673
- "not found in",
674
- "not stated in",
675
- "not given in",
676
- "no information about",
677
- "no specific information",
678
- "information is not available",
679
- "information is missing",
680
- "unable to determine"
681
- ]
682
-
683
- # If the answer contains any invalid patterns, consider it invalid
684
- if any(pattern in answer.lower() for pattern in invalid_patterns):
685
- return False
686
 
687
- return True
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  from typing import Dict, List, Optional, Tuple
2
  from langchain.agents import AgentExecutor
3
  from langchain_openai import ChatOpenAI
4
+ from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
5
  from langchain.memory import ConversationBufferMemory
6
  from langchain.chains import LLMChain
7
  from langchain.prompts import PromptTemplate
 
20
  ContentProcessingError
21
  )
22
  import logging
23
+ from langchain_core.messages import HumanMessage, AIMessage, FunctionMessage
24
+ # Import langgraph instead of langchain.graphs
25
+ from langgraph.graph import StateGraph, END, START
26
+ from langgraph.prebuilt import ToolNode
27
+ from langgraph.graph import MessagesState
28
+ from dotenv import load_dotenv
29
+ import os
30
+
31
+ load_dotenv()
32
+ HF_TOKEN = os.getenv("HF_TOKEN")
33
 
34
  class ContentTypeAgent:
35
  """Agent responsible for identifying content type and selecting appropriate tool"""
 
189
  Do not include explanations, steps, reasoning, or additional text.
190
  Be direct and specific. GAIA benchmark requires exact matching answers.
191
  For example, if asked "What is the capital of France?", respond simply with "Paris".
192
+ Answer format:
193
+ <answer>
194
  """
195
  )
196
 
 
277
  result = self.chain.invoke(question).strip()
278
  return result
279
 
280
+ class StateGraphAgent:
281
+ """Modern implementation of MainAgent for tool orchestration"""
282
 
283
  def __init__(self):
284
+ self.llm = ChatOpenAI(temperature=0.2, model="gpt-4o-mini")
285
+ # llm = HuggingFaceEndpoint(
286
+ # repo_id="Qwen/Qwen2.5-Coder-32B-Instruct",
287
+ # #repo_id="meta-llama/Llama-3.3-70B-Instruct",
288
+
289
+ # huggingfacehub_api_token=HF_TOKEN,
290
+ # )
291
+
292
+ # self.llm = ChatHuggingFace(llm=llm, verbose=True)
293
+
294
 
295
  # Initialize tools
296
  self.wikipedia_tool = WikipediaTool()
 
312
  "python": self.python_tool,
313
  }
314
 
 
 
 
 
 
 
 
 
315
  # Tool usage tracking
316
  self.last_used_tool = None
317
+
318
+ # Create LLM with generic tools bound for general purpose use
319
+ self.general_tools = [
320
+ {
321
+ "type": "function",
322
+ "function": {
323
+ "name": "search_wikipedia",
324
+ "description": "Search Wikipedia for information on a topic",
325
+ "parameters": {
326
+ "type": "object",
327
+ "properties": {
328
+ "query": {
329
+ "type": "string",
330
+ "description": "The search query to find information on Wikipedia"
331
+ }
332
+ },
333
+ "required": ["query"]
334
+ }
335
+ }
336
+ },
337
+ {
338
+ "type": "function",
339
+ "function": {
340
+ "name": "search_web",
341
+ "description": "Search the web for information on a topic",
342
+ "parameters": {
343
+ "type": "object",
344
+ "properties": {
345
+ "query": {
346
+ "type": "string",
347
+ "description": "The search query to find information on the web"
348
+ }
349
+ },
350
+ "required": ["query"]
351
+ }
352
+ }
353
+ },
354
+ {
355
+ "type": "function",
356
+ "function": {
357
+ "name": "analyze_youtube",
358
+ "description": "Analyze a YouTube video for information",
359
+ "parameters": {
360
+ "type": "object",
361
+ "properties": {
362
+ "url": {
363
+ "type": "string",
364
+ "description": "The YouTube URL to analyze"
365
+ },
366
+ "question": {
367
+ "type": "string",
368
+ "description": "The specific question to answer about the video"
369
+ }
370
+ },
371
+ "required": ["question"]
372
+ }
373
+ }
374
+ }
375
+ ]
376
+
377
+ # Create specific tools for file types
378
+ self.file_tools = {
379
+ "audio": {
380
+ "type": "function",
381
+ "function": {
382
+ "name": "process_audio",
383
+ "description": "Process an audio file to extract information",
384
+ "parameters": {
385
+ "type": "object",
386
+ "properties": {
387
+ "task_id": {
388
+ "type": "string",
389
+ "description": "The task ID associated with the file"
390
+ },
391
+ "question": {
392
+ "type": "string",
393
+ "description": "The specific question to answer about the file"
394
+ }
395
+ },
396
+ "required": ["task_id", "question"]
397
+ }
398
+ }
399
+ },
400
+ "image": {
401
+ "type": "function",
402
+ "function": {
403
+ "name": "analyze_image",
404
+ "description": "Analyze an image to extract information",
405
+ "parameters": {
406
+ "type": "object",
407
+ "properties": {
408
+ "task_id": {
409
+ "type": "string",
410
+ "description": "The task ID associated with the file"
411
+ },
412
+ "question": {
413
+ "type": "string",
414
+ "description": "The specific question to answer about the file"
415
+ }
416
+ },
417
+ "required": ["task_id", "question"]
418
+ }
419
+ }
420
+ },
421
+ "excel": {
422
+ "type": "function",
423
+ "function": {
424
+ "name": "analyze_excel",
425
+ "description": "Analyze an Excel file to extract information",
426
+ "parameters": {
427
+ "type": "object",
428
+ "properties": {
429
+ "task_id": {
430
+ "type": "string",
431
+ "description": "The task ID associated with the file"
432
+ },
433
+ "question": {
434
+ "type": "string",
435
+ "description": "The specific question to answer about the file"
436
+ }
437
+ },
438
+ "required": ["task_id", "question"]
439
+ }
440
+ }
441
+ },
442
+ "python": {
443
+ "type": "function",
444
+ "function": {
445
+ "name": "run_python",
446
+ "description": "Run and analyze Python code",
447
+ "parameters": {
448
+ "type": "object",
449
+ "properties": {
450
+ "task_id": {
451
+ "type": "string",
452
+ "description": "The task ID associated with the file"
453
+ },
454
+ "question": {
455
+ "type": "string",
456
+ "description": "The specific question to answer about the file"
457
+ }
458
+ },
459
+ "required": ["task_id", "question"]
460
+ }
461
+ }
462
+ }
463
+ }
464
 
465
+ def _identify_content_type(self, question, file_name, task_id):
466
+ """Identify the content type based on question and file_name"""
467
+ # Simple parsing for file detection
468
+ if file_name:
469
+ extension = file_name.split('.')[-1].lower()
470
+ extension_map = {
471
+ 'mp3': 'audio',
472
+ 'wav': 'audio',
473
+ 'png': 'image',
474
+ 'jpg': 'image',
475
+ 'jpeg': 'image',
476
+ 'xlsx': 'excel',
477
+ 'xls': 'excel',
478
+ 'csv': 'excel',
479
+ 'py': 'python'
480
+ }
481
+
482
+ if extension in extension_map:
483
+ return extension_map[extension], file_name
484
+
485
+ # Check for YouTube URLs
486
+ question_lower = question.lower()
487
+ youtube_pattern = r'https?://(?:www\.)?youtube\.com/watch\?v=[^\s\.,!?]+'
488
+ youtube_short_pattern = r'https?://(?:www\.)?youtu\.be/[^\s\.,!?]+'
489
+
490
+ is_youtube = "youtube" in question_lower or "video" in question_lower
491
+ has_youtube_url = re.search(youtube_pattern, question) or re.search(youtube_short_pattern, question)
492
 
493
+ if is_youtube or has_youtube_url:
494
+ return "youtube", question
495
+
496
+ # Check for Wikipedia references
497
+ if "wikipedia" in question_lower:
498
+ return "wiki", question
499
+
500
+ # Default to web search for general questions
501
+ return "web", question
502
 
503
+ def _extract_youtube_url(self, question):
504
+ """Extract YouTube URL from question if present"""
505
+ # First try exact pattern for watch URLs
506
+ youtube_pattern = r'https?://(?:www\.)?youtube\.com/watch\?v=[a-zA-Z0-9_-]{11}'
507
+ match = re.search(youtube_pattern, question)
508
+ if match:
509
+ return match.group(0)
510
+
511
+ # Then try youtu.be URLs
512
+ youtube_short_pattern = r'https?://(?:www\.)?youtu\.be/[a-zA-Z0-9_-]{11}'
513
+ match = re.search(youtube_short_pattern, question)
514
+ if match:
515
+ return match.group(0)
516
+
517
+ # Finally try a more lenient pattern
518
+ youtube_lenient_pattern = r'https?://(?:www\.)?youtube\.com/watch\?v=[^\s\.,!?]+'
519
+ match = re.search(youtube_lenient_pattern, question)
520
+ if match:
521
+ url = match.group(0).strip().rstrip('.,!?')
522
+ return url
523
+
524
+ return None
525
+
526
+ def _execute_tool(self, tool_name, args):
527
+ """Execute a tool based on name and arguments"""
528
+ result = None
529
+
530
+ if tool_name == "search_wikipedia":
531
+ query = args.get("query", "")
532
+ self.last_used_tool = "wiki"
533
+ result = self.wikipedia_tool._run(query)
534
+ elif tool_name == "search_web":
535
+ query = args.get("query", "")
536
+ self.last_used_tool = "web"
537
+ result = self.web_search_tool._run(query)
538
+ elif tool_name == "analyze_youtube":
539
+ url = args.get("url", "")
540
+ question = args.get("question", "")
541
+ self.last_used_tool = "youtube"
542
+ if not url and ("youtube.com" in question or "youtu.be" in question):
543
+ # Extract URL from question
544
+ url = self._extract_youtube_url(question)
545
+
546
+ # Use the extracted URL or full question if no URL
547
+ video_param = url if url else question
548
+ result = self.youtube_tool._run(video_param, question=question)
549
+ elif tool_name == "process_audio":
550
+ task_id = args.get("task_id", "")
551
+ question = args.get("question", "")
552
+ self.last_used_tool = "audio"
553
+ result = self.audio_tool._run(task_id, question)
554
+ elif tool_name == "analyze_image":
555
+ task_id = args.get("task_id", "")
556
+ question = args.get("question", "")
557
+ self.last_used_tool = "image"
558
+ result = self.image_tool._run(task_id, question=question)
559
+ elif tool_name == "analyze_excel":
560
+ task_id = args.get("task_id", "")
561
+ question = args.get("question", "")
562
+ self.last_used_tool = "excel"
563
+ result = self.excel_tool._run(task_id, question=question)
564
+ elif tool_name == "run_python":
565
+ task_id = args.get("task_id", "")
566
+ question = args.get("question", "")
567
+ self.last_used_tool = "python"
568
+ result = self.python_tool._run(task_id, question=question)
569
+ else:
570
+ result = f"Unknown tool: {tool_name}"
571
+
572
+ # Format the result for the assistant
573
+ if isinstance(result, list) and len(result) > 0 and hasattr(result[0], "page_content"):
574
+ content = result[0].page_content
575
+ elif hasattr(result, "page_content"):
576
+ content = result.page_content
577
+ else:
578
+ content = str(result)
579
+
580
+ return content
581
+
582
+ def _direct_answer_attempt(self, question):
583
+ """Try to answer directly without tools"""
584
+ direct_query = f"""Can you answer this question directly without using any tools?
585
  Question: {question}
586
 
587
+ If you can answer this directly (like math, text reversal, etc), provide the answer.
588
+ Your answer should be concise and direct. Focus only on answering the question.
589
+ No additional words or explanations.
590
+ Format:
591
+ <answer>
592
+ Otherwise respond with 'TOOLS_REQUIRED'."""
593
+
594
+ response = self.llm.invoke(direct_query)
 
 
 
 
 
 
 
 
595
  answer = response.content if hasattr(response, 'content') else str(response)
596
+
597
+ return answer if "TOOLS_REQUIRED" not in answer else None
598
+
599
+ def _optimize_query(self, question):
600
+ """Create an optimized search query for the question"""
601
+ query_prompt = f"""You are an agent that needs to understand user questions and formulate optimized search queries.
602
+
603
+ Question: {question}
604
+
605
+ Your task is to create an optimized search query that will retrieve the most relevant information.
606
+ Focus on extracting key entities, relationships, and constraints from the question.
607
+
608
+ Return only the optimized search query."""
609
+
610
+ response = self.llm.invoke(query_prompt)
611
+ return response.content if hasattr(response, 'content') else str(response)
612
 
613
  def process_question(self, task_id: str, question: str, file_name: str = "") -> str:
614
+ """Process a question using a multi-step approach with tools"""
615
  try:
616
  # Reset tool tracking
617
  self.last_used_tool = None
618
 
619
+ # Try answering directly first
620
+ direct_answer = self._direct_answer_attempt(question)
621
+ if direct_answer:
622
  self.last_used_tool = "direct"
623
  return direct_answer
624
 
625
+ # Identify content type
626
+ content_type, content_parameter = self._identify_content_type(question, file_name, task_id)
627
+
628
+ # For file-based questions, use the appropriate tool directly
629
  if file_name:
630
+ if content_type in self.file_tools:
631
+ tool_spec = self.file_tools[content_type]
632
+ tool_name = tool_spec["function"]["name"]
633
+ args = {"task_id": task_id, "question": question}
634
+ content = self._execute_tool(tool_name, args)
635
+
636
+ # Generate final answer
637
+ answer_prompt = f"""Based on the processed file information, answer the question precisely.
638
+
639
+ Question: {question}
640
+
641
+ File information: {content}
642
 
643
+ Your answer should be concise and direct. Focus only on answering the question.
644
+ No additional words or explanations.
645
+ Format:
646
+ <answer>
647
+ """
648
+
649
+ response = self.llm.invoke(answer_prompt)
650
+ return response.content if hasattr(response, 'content') else str(response)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
651
  else:
652
  return f"Unsupported file type: {content_type}"
653
 
654
+ # For YouTube content, use the specialized YouTube tool
 
 
 
 
655
  if content_type == "youtube":
656
+ youtube_url = self._extract_youtube_url(question)
657
+ args = {"url": youtube_url, "question": question}
658
+ content = self._execute_tool("analyze_youtube", args)
 
 
 
 
 
 
 
659
 
660
+ # Generate final answer
661
+ answer_prompt = f"""Based on the YouTube video content, answer the question precisely.
662
 
 
 
 
 
663
  Question: {question}
 
 
 
 
 
 
 
664
 
665
+ Video information: {content}
666
+
667
+ Your answer should be concise and direct. Focus only on answering the question.
 
 
668
 
669
+ No additional words or explanations.
670
+ Format:
671
+ <answer>
672
+ """
673
+
674
+ response = self.llm.invoke(answer_prompt)
675
  return response.content if hasattr(response, 'content') else str(response)
676
 
677
+ # Handle wiki and web searches directly
678
+ if content_type == "wiki":
679
+ # Direct wiki search
 
 
 
680
  self.last_used_tool = "wiki"
681
+ optimized_query = self._optimize_query(question)
682
+ wiki_result = self.wikipedia_tool._run(optimized_query)
683
+
684
+ # Format answer
685
+ answer_prompt = f"""Based on the following Wikipedia information, answer the question precisely:
686
+
687
+ Question: {question}
688
+
689
+ Information: {wiki_result}
690
+
691
+ Your answer should be concise and direct. Focus only on answering the question.
692
+ Pay careful attention to any formatting requirements in the question.
693
+ If asked for a city name without abbreviations, make sure to provide the full name (e.g., "Saint Petersburg" instead of "St. Petersburg").
694
+ No additional words or explanations.
695
+ Example:
696
+ Question: What is the capital of France?
697
+ Answer: Paris
698
+ Format:
699
+ <answer>
700
+ """
701
+
702
+ response = self.llm.invoke(answer_prompt)
703
+ return response.content if hasattr(response, 'content') else str(response)
704
+
705
+ if content_type == "web":
706
+ # Direct web search
707
+ self.last_used_tool = "web"
708
+ optimized_query = self._optimize_query(question)
709
+ web_result = self.web_search_tool._run(optimized_query)
710
+
711
+ # Format answer
712
+ answer_prompt = f"""Based on the following web search results, answer the question precisely:
713
+
714
+ Question: {question}
715
+
716
+ Information: {web_result}
717
+
718
+ Your answer should be concise and direct. Focus only on answering the question.
719
+ Pay careful attention to any formatting requirements in the question.
720
+ If asked for a city name without abbreviations, make sure to provide the full name (e.g., "Saint Petersburg" instead of "St. Petersburg").
721
+ If asked for only a first name or a code, provide only that specific information.
722
+ No additional words or explanations.
723
+ Format:
724
+ <answer>
725
+ """
726
+
727
+ response = self.llm.invoke(answer_prompt)
728
+ answer = response.content if hasattr(response, 'content') else str(response)
729
+
730
+ # Remove common verbose prefixes and patterns
731
+ answer = re.sub(
732
+ r'^(final answer:|answer:|the answer is|the final answer is|the final numeric output is|the vegetables are:|the answer to the question is|the correct answer is|output:|result:|response:)',
733
+ '', answer, flags=re.IGNORECASE
734
+ ).strip()
735
+
736
+ return answer
737
 
738
+ # For general questions, optimize the query and use a tool-equipped LLM
739
+ optimized_query = self._optimize_query(question)
 
 
 
740
 
741
+ # Create a LLM with appropriate tools for this question type
742
+ tools_to_use = self.general_tools
743
+ tool_equipped_llm = self.llm.bind_tools(tools_to_use)
744
 
745
+ # Formulate the system prompt based on content type
746
+ if content_type == "wiki":
747
+ system_prompt = f"""Answer this question using Wikipedia information.
748
+
749
+ Question: {question}
750
+
751
+ Optimized query: {optimized_query}
752
+
753
+ Use the search_wikipedia tool to find relevant information. Be concise and direct.
754
+ No additional words or explanations.
755
+ Format:
756
+ <answer>
757
+ """
758
+ else:
759
+ system_prompt = f"""Answer this question using web search or other appropriate tools.
760
+
761
+ Question: {question}
762
+
763
+ Optimized query: {optimized_query}
764
+
765
+ Use the most appropriate tool to find the information needed. Be concise and direct.
766
+ No additional words or explanations.
767
+ Format:
768
+ <answer>
769
+ """
770
 
771
+ # Get response from tool-equipped LLM
772
+ tool_response = tool_equipped_llm.invoke(system_prompt)
 
773
 
774
+ # Check if we got a function call
775
+ if hasattr(tool_response, 'additional_kwargs') and 'function_call' in tool_response.additional_kwargs:
776
+ # Extract tool info
777
+ function_call = tool_response.additional_kwargs['function_call']
778
+ tool_name = function_call['name']
779
+ try:
780
+ import json
781
+ args = json.loads(function_call['arguments'])
782
+ except:
783
+ args = {}
784
+
785
+ # Execute the tool
786
+ tool_result = self._execute_tool(tool_name, args)
787
+
788
+ # Generate final answer with tool result
789
+ answer_prompt = f"""Based on the following information, answer the question precisely.
790
+
791
+ Question: {question}
792
+
793
+ Information: {tool_result}
794
+
795
+ Your answer should be concise and direct. Focus only on answering the question.
796
+ No additional words or explanations.
797
+ Example:
798
+ Question: What is the capital of France?
799
+ Answer: Paris
800
+ Format:
801
+ <answer>
802
+ """
803
+
804
+ final_response = self.llm.invoke(answer_prompt)
805
+ return final_response.content if hasattr(final_response, 'content') else str(final_response)
806
+
807
+ # If no function call, return the direct response
808
+ return tool_response.content if hasattr(tool_response, 'content') else str(tool_response)
809
 
810
  except Exception as e:
811
  return f"An unexpected error occurred: {str(e)}"
812
+
813
+ class MainAgent:
814
+ """Main agent orchestrating the workflow using StateGraph"""
815
 
816
+ def __init__(self):
817
+ self.llm = ChatOpenAI(temperature=0, model="gpt-4o-mini")
818
+
819
+ # Initialize tools
820
+ self.wikipedia_tool = WikipediaTool()
821
+ self.web_search_tool = WebSearchTool()
822
+ self.youtube_tool = YouTubeVideoTool()
823
+ self.image_tool = ImageTool()
824
+ self.audio_tool = AudioTool()
825
+ self.excel_tool = ExcelTool()
826
+ self.python_tool = PythonTool()
827
+
828
+ # Create a dictionary of tools for easy access
829
+ self.tools = {
830
+ "wiki": self.wikipedia_tool,
831
+ "web": self.web_search_tool,
832
+ "youtube": self.youtube_tool,
833
+ "image": self.image_tool,
834
+ "audio": self.audio_tool,
835
+ "excel": self.excel_tool,
836
+ "python": self.python_tool,
837
+ }
838
+
839
+ # Tool usage tracking
840
+ self.last_used_tool = None
841
+
842
+ # Create StateGraph for orchestration
843
+ self.graph = self._build_graph()
844
+
845
+ def _tools_condition(self, state):
846
+ """Determine if the assistant message contains a function call"""
847
+ if len(state.messages) > 0 and isinstance(state.messages[-1], AIMessage):
848
+ return "tools" if state.messages[-1].additional_kwargs.get("function_call") else END
849
+ return END
850
+
851
+ def _identify_content_type(self, question, file_name, task_id):
852
+ """Identify the content type based on question and file_name"""
853
+ # Simple parsing for file detection
854
+ if file_name:
855
+ extension = file_name.split('.')[-1].lower()
856
+ extension_map = {
857
+ 'mp3': 'audio',
858
+ 'wav': 'audio',
859
+ 'png': 'image',
860
+ 'jpg': 'image',
861
+ 'jpeg': 'image',
862
+ 'xlsx': 'excel',
863
+ 'xls': 'excel',
864
+ 'csv': 'excel',
865
+ 'py': 'python'
866
+ }
867
+
868
+ if extension in extension_map:
869
+ return extension_map[extension], file_name
870
+
871
+ # Check for YouTube URLs
872
+ question_lower = question.lower()
873
+ youtube_pattern = r'https?://(?:www\.)?youtube\.com/watch\?v=[^\s\.,!?]+'
874
+ youtube_short_pattern = r'https?://(?:www\.)?youtu\.be/[^\s\.,!?]+'
875
+
876
+ is_youtube = "youtube" in question_lower or "video" in question_lower
877
+ has_youtube_url = re.search(youtube_pattern, question) or re.search(youtube_short_pattern, question)
878
+
879
+ if is_youtube or has_youtube_url:
880
+ return "youtube", question
881
+
882
+ # Check for Wikipedia references
883
+ if "wikipedia" in question_lower:
884
+ return "wiki", question
885
+
886
+ # Default to web search for general questions
887
+ return "web", question
888
+
889
+ def _create_tool_functions(self):
890
+ """Create a list of langchain tools for the LLM to use"""
891
+ # Format tools as langchain tools
892
+ tool_list = []
893
+
894
+ # Wikipedia tool
895
+ tool_list.append({
896
+ "type": "function",
897
+ "function": {
898
+ "name": "search_wikipedia",
899
+ "description": "Search Wikipedia for information on a topic",
900
+ "parameters": {
901
+ "type": "object",
902
+ "properties": {
903
+ "query": {
904
+ "type": "string",
905
+ "description": "The search query to find information on Wikipedia"
906
+ }
907
+ },
908
+ "required": ["query"]
909
+ }
910
+ }
911
+ })
912
+
913
+ # Web search tool
914
+ tool_list.append({
915
+ "type": "function",
916
+ "function": {
917
+ "name": "search_web",
918
+ "description": "Search the web for information on a topic",
919
+ "parameters": {
920
+ "type": "object",
921
+ "properties": {
922
+ "query": {
923
+ "type": "string",
924
+ "description": "The search query to find information on the web"
925
+ }
926
+ },
927
+ "required": ["query"]
928
+ }
929
+ }
930
+ })
931
+
932
+ # YouTube tool
933
+ tool_list.append({
934
+ "type": "function",
935
+ "function": {
936
+ "name": "analyze_youtube",
937
+ "description": "Analyze a YouTube video for information",
938
+ "parameters": {
939
+ "type": "object",
940
+ "properties": {
941
+ "url": {
942
+ "type": "string",
943
+ "description": "The YouTube URL to analyze"
944
+ },
945
+ "question": {
946
+ "type": "string",
947
+ "description": "The specific question to answer about the video"
948
+ }
949
+ },
950
+ "required": ["question"]
951
+ }
952
+ }
953
+ })
954
+
955
+ # Create audio, image, excel, and python tools
956
+ for tool_name, description in [
957
+ ("process_audio", "Process an audio file to extract information"),
958
+ ("analyze_image", "Analyze an image to extract information"),
959
+ ("analyze_excel", "Analyze an Excel file to extract information"),
960
+ ("run_python", "Run and analyze Python code")
961
+ ]:
962
+ tool_list.append({
963
+ "type": "function",
964
+ "function": {
965
+ "name": tool_name,
966
+ "description": description,
967
+ "parameters": {
968
+ "type": "object",
969
+ "properties": {
970
+ "task_id": {
971
+ "type": "string",
972
+ "description": "The task ID associated with the file"
973
+ },
974
+ "question": {
975
+ "type": "string",
976
+ "description": "The specific question to answer about the file"
977
+ }
978
+ },
979
+ "required": ["task_id", "question"]
980
+ }
981
+ }
982
+ })
983
+
984
+ return tool_list
985
 
986
+ def _build_graph(self):
987
+ """Build the state graph for orchestrating the agent workflow"""
988
+ # Create the tool functions for the assistant
989
+ tool_functions = self._create_tool_functions()
990
 
991
+ # Create the retriever node
992
+ retriever_prompt = PromptTemplate.from_template(
993
+ """You are an agent that needs to understand user questions and formulate optimized search queries.
994
+
995
+ Question: {question}
996
+
997
+ Your task is to create an optimized search query that will retrieve the most relevant information.
998
+ Focus on extracting key entities, relationships, and constraints from the question.
999
+
1000
+ If the question is about searching something on the web, use the search_web tool or wikipedia tool.
1001
+
1002
+ Example:
1003
+ Question: What is the capital of France?
1004
+ Optimized query: search_wikipedia("capital of France")
1005
+
1006
+ Question: How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia.?
1007
+ Optimized query: search_web("Mercedes Sosa musician")
1008
+
1009
+ Question: Who are the pitchers with the number before and after Taishō Tamai's number as of July 2023? Give them to me in the form Pitcher Before, Pitcher After, use their last names only, in Roman characters.
1010
+ Optimized query: search_web("Taishō Tamai's baseball player")
1011
+
1012
+
1013
+ Return only the optimized search query."""
1014
+ )
1015
+
1016
+ retriever = (
1017
+ retriever_prompt
1018
+ | self.llm.with_config({"tags": ["retriever"]})
1019
+ | StrOutputParser()
1020
+ )
1021
+
1022
+ # Create the assistant node with tools
1023
+ assistant = self.llm.bind_tools(
1024
+ tools=[
1025
+ {
1026
+ "type": "function",
1027
+ "function": {
1028
+ "name": "search_wikipedia",
1029
+ "description": "Search Wikipedia for information on a topic",
1030
+ "parameters": {
1031
+ "type": "object",
1032
+ "properties": {
1033
+ "query": {
1034
+ "type": "string",
1035
+ "description": "The search query to find information on Wikipedia"
1036
+ }
1037
+ },
1038
+ "required": ["query"]
1039
+ }
1040
+ }
1041
+ },
1042
+ {
1043
+ "type": "function",
1044
+ "function": {
1045
+ "name": "search_web",
1046
+ "description": "Search the web for information on a topic",
1047
+ "parameters": {
1048
+ "type": "object",
1049
+ "properties": {
1050
+ "query": {
1051
+ "type": "string",
1052
+ "description": "The search query to find information on the web"
1053
+ }
1054
+ },
1055
+ "required": ["query"]
1056
+ }
1057
+ }
1058
+ },
1059
+ {
1060
+ "type": "function",
1061
+ "function": {
1062
+ "name": "analyze_youtube",
1063
+ "description": "Analyze a YouTube video for information",
1064
+ "parameters": {
1065
+ "type": "object",
1066
+ "properties": {
1067
+ "url": {
1068
+ "type": "string",
1069
+ "description": "The YouTube URL to analyze"
1070
+ },
1071
+ "question": {
1072
+ "type": "string",
1073
+ "description": "The specific question to answer about the video"
1074
+ }
1075
+ },
1076
+ "required": ["question"]
1077
+ }
1078
+ }
1079
+ },
1080
+ {
1081
+ "type": "function",
1082
+ "function": {
1083
+ "name": "process_audio",
1084
+ "description": "Process an audio file to extract information",
1085
+ "parameters": {
1086
+ "type": "object",
1087
+ "properties": {
1088
+ "task_id": {
1089
+ "type": "string",
1090
+ "description": "The task ID associated with the file"
1091
+ },
1092
+ "question": {
1093
+ "type": "string",
1094
+ "description": "The specific question to answer about the file"
1095
+ }
1096
+ },
1097
+ "required": ["task_id", "question"]
1098
+ }
1099
+ }
1100
+ },
1101
+ {
1102
+ "type": "function",
1103
+ "function": {
1104
+ "name": "analyze_image",
1105
+ "description": "Analyze an image to extract information",
1106
+ "parameters": {
1107
+ "type": "object",
1108
+ "properties": {
1109
+ "task_id": {
1110
+ "type": "string",
1111
+ "description": "The task ID associated with the file"
1112
+ },
1113
+ "question": {
1114
+ "type": "string",
1115
+ "description": "The specific question to answer about the file"
1116
+ }
1117
+ },
1118
+ "required": ["task_id", "question"]
1119
+ }
1120
+ }
1121
+ },
1122
+ {
1123
+ "type": "function",
1124
+ "function": {
1125
+ "name": "analyze_excel",
1126
+ "description": "Analyze an Excel file to extract information",
1127
+ "parameters": {
1128
+ "type": "object",
1129
+ "properties": {
1130
+ "task_id": {
1131
+ "type": "string",
1132
+ "description": "The task ID associated with the file"
1133
+ },
1134
+ "question": {
1135
+ "type": "string",
1136
+ "description": "The specific question to answer about the file"
1137
+ }
1138
+ },
1139
+ "required": ["task_id", "question"]
1140
+ }
1141
+ }
1142
+ },
1143
+ {
1144
+ "type": "function",
1145
+ "function": {
1146
+ "name": "run_python",
1147
+ "description": "Run and analyze Python code",
1148
+ "parameters": {
1149
+ "type": "object",
1150
+ "properties": {
1151
+ "task_id": {
1152
+ "type": "string",
1153
+ "description": "The task ID associated with the file"
1154
+ },
1155
+ "question": {
1156
+ "type": "string",
1157
+ "description": "The specific question to answer about the file"
1158
+ }
1159
+ },
1160
+ "required": ["task_id", "question"]
1161
+ }
1162
+ }
1163
+ }
1164
+ ]
1165
+ ).with_config({"tags": ["assistant"]})
1166
 
1167
+ # Create the tools node
1168
+ def _run_tool(state):
1169
+ """Run the appropriate tool based on the function call"""
1170
+ # Get the most recent AI message
1171
+ last_message = state.messages[-1]
1172
+ if not hasattr(last_message, "additional_kwargs") or "function_call" not in last_message.additional_kwargs:
1173
+ return state
1174
+
1175
+ function_call = last_message.additional_kwargs["function_call"]
1176
+ tool_name = function_call["name"]
1177
+ arguments = function_call.get("arguments", "{}")
1178
+
1179
+ # Parse the arguments
1180
+ import json
1181
+ try:
1182
+ args = json.loads(arguments)
1183
+ except:
1184
+ args = {}
1185
+
1186
+ # Track the tool used
1187
+ self.last_used_tool = tool_name
1188
+
1189
+ result = ""
1190
+ # Execute the correct tool
1191
+ if tool_name == "search_wikipedia":
1192
+ query = args.get("query", "")
1193
+ result = self.wikipedia_tool._run(query)
1194
+ elif tool_name == "search_web":
1195
+ query = args.get("query", "")
1196
+ result = self.web_search_tool._run(query)
1197
+ elif tool_name == "analyze_youtube":
1198
+ url = args.get("url", "")
1199
+ question = args.get("question", "")
1200
+ if not url and "youtube.com" in question or "youtu.be" in question:
1201
+ # Extract URL from question
1202
+ youtube_pattern = r'https?://(?:www\.)?youtube\.com/watch\?v=[^\s\.,!?]+'
1203
+ youtube_short_pattern = r'https?://(?:www\.)?youtu\.be/[^\s\.,!?]+'
1204
+ match = re.search(youtube_pattern, question) or re.search(youtube_short_pattern, question)
1205
+ if match:
1206
+ url = match.group(0)
1207
+
1208
+ # Use the extracted URL or full question if no URL
1209
+ video_param = url if url else question
1210
+ result = self.youtube_tool._run(video_param, question=question)
1211
+ elif tool_name == "process_audio":
1212
+ task_id = args.get("task_id", "")
1213
+ question = args.get("question", "")
1214
+ result = self.audio_tool._run(task_id, question)
1215
+ elif tool_name == "analyze_image":
1216
+ task_id = args.get("task_id", "")
1217
+ question = args.get("question", "")
1218
+ result = self.image_tool._run(task_id, question=question)
1219
+ elif tool_name == "analyze_excel":
1220
+ task_id = args.get("task_id", "")
1221
+ question = args.get("question", "")
1222
+ result = self.excel_tool._run(task_id, question=question)
1223
+ elif tool_name == "run_python":
1224
+ task_id = args.get("task_id", "")
1225
+ question = args.get("question", "")
1226
+ result = self.python_tool._run(task_id, question=question)
1227
+ else:
1228
+ result = f"Unknown tool: {tool_name}"
1229
+
1230
+ # Format the result for the assistant
1231
+ if isinstance(result, list) and len(result) > 0 and hasattr(result[0], "page_content"):
1232
+ content = result[0].page_content
1233
+ elif hasattr(result, "page_content"):
1234
+ content = result.page_content
1235
+ else:
1236
+ content = str(result)
1237
+
1238
+ # Add the function result to the state
1239
+ state.messages.append(
1240
+ FunctionMessage(
1241
+ name=tool_name,
1242
+ content=content
1243
+ )
1244
+ )
1245
+ return state
1246
+
1247
+ # Build the graph
1248
+ builder = StateGraph(MessagesState)
1249
 
1250
+ # Create a direct answer node
1251
+ def _direct_answer(state):
1252
+ """Attempt to answer the question directly without tools"""
1253
+ # Get the question from the HumanMessage
1254
+ if not state.messages or not isinstance(state.messages[0], HumanMessage):
1255
+ return state
1256
+
1257
+ question = state.messages[0].content
1258
+ query = f"""Analyze the question, understand the instructions, the context.
1259
+ If you can answer this directly (like math, text reversal, etc), provide the answer.
1260
+ Otherwise respond with 'TOOLS_REQUIRED'
1261
 
1262
+ Question: {question}
 
 
 
1263
 
1264
+ Return your answer in the following format:
1265
+ Be concise, and to the point.
1266
+ Do not include any other text or comments, just the answer.
1267
+ If the question is:
1268
+ "What is the capital of France?"
1269
+ Your answer should be:
1270
+ "Paris"
1271
+ Format:
1272
+ <answer>
1273
+ """
1274
+
1275
+ response = self.llm.invoke(query)
1276
+ answer = response.content if hasattr(response, 'content') else str(response)
1277
+
1278
+ if "TOOLS_REQUIRED" not in answer:
1279
+ # We can answer directly
1280
+ self.last_used_tool = "direct"
1281
+ state.messages.append(AIMessage(content=answer))
1282
+ return state
1283
+
1284
+ # Otherwise, continue to the retriever
1285
+ return state
1286
+
1287
+ builder.add_node("direct_answer", _direct_answer)
1288
+ builder.add_node("retriever", retriever)
1289
+ builder.add_node("assistant", assistant)
1290
+ builder.add_node("tools", _run_tool)
1291
 
1292
+ # Add synthesize_answer node
1293
+ def _synthesize_answer(state):
1294
+ import re
1295
+ # Find the original question and the latest FunctionMessage (tool output)
1296
+ question = None
1297
+ tool_output = None
1298
+ for msg in state.messages:
1299
+ if isinstance(msg, HumanMessage):
1300
+ question = msg.content
1301
+ if isinstance(msg, FunctionMessage):
1302
+ tool_output = msg.content
1303
+ if not question or not tool_output:
1304
+ return state # Defensive: should not happen
1305
+
1306
+ # Compose the answer prompt
1307
+ answer_prompt = f"""You are a helpful AI assistant. Use the following context to answer the question as directly and concisely as possible.
1308
+
1309
+ Context: {tool_output}
1310
+
1311
+ Question: {question}
1312
+
1313
+ Instructions:
1314
+ - Output ONLY the answer, with no extra words, no sentences, no restatement, no quotes, and no explanations.
1315
+ - Do NOT repeat or rephrase the question.
1316
+ - Do NOT include any introductory or closing phrases.
1317
+ - If the answer is a single word, number, or phrase, output only that.
1318
+ - If the answer is a list, output only the list as requested (e.g., comma-separated, one per line, etc.).
1319
+ - If the answer is not present in the context, output "NOT FOUND".
1320
+
1321
+ Examples of correct answers:
1322
+ Q: What is the capital of France?
1323
+ A: Paris
1324
+
1325
+ Q: What does Teal'c say in response to the question \"Isn't that hot?\"
1326
+ A: extremely
1327
+
1328
+ Q: List the ingredients.
1329
+ A: salt, flour, eggs
1330
+
1331
+ Examples of incorrect answers (do NOT do this):
1332
+ - The answer is Paris.
1333
+ - The final numeric output is 0.
1334
+ - The vegetables are: acorns, bell pepper, ...
1335
+ - Answer: extremely
1336
+
1337
+ Now, output ONLY the answer.
1338
+ Output Format:
1339
+ <answer>
1340
+ """
1341
+
1342
  response = self.llm.invoke(answer_prompt)
1343
  answer = response.content if hasattr(response, 'content') else str(response)
1344
+ # Remove any prefix like "Final Answer:" or "Answer:" and strip whitespace
1345
+ answer = re.sub(r'^(final answer:|answer:|<answer>|</answer>)', '', answer, flags=re.IGNORECASE).strip()
1346
+ state.messages.append(AIMessage(content=answer))
1347
+ return state
1348
+ builder.add_node("synthesize_answer", _synthesize_answer)
1349
+
1350
+ # Add edges
1351
+ builder.add_edge(START, "direct_answer")
1352
+ builder.add_edge("direct_answer", "retriever")
1353
+ builder.add_edge("retriever", "assistant")
1354
+ builder.add_conditional_edges(
1355
+ "assistant",
1356
+ self._tools_condition,
1357
+ {
1358
+ "tools": "tools",
1359
+ END: END
1360
+ }
1361
+ )
1362
+ builder.add_edge("tools", "synthesize_answer")
1363
+ builder.add_edge("synthesize_answer", END)
1364
+
1365
+ return builder.compile()
1366
+
1367
+ def process_question(self, task_id: str, question: str, file_name: str = "") -> str:
1368
+ """Process a question using the StateGraph"""
1369
+ try:
1370
+ # Reset tool tracking
1371
+ self.last_used_tool = None
1372
 
1373
+ # Prepare the initial state
1374
+ initial_state = {"messages": [HumanMessage(content=question)]}
1375
 
1376
+ # Add file information to the question if necessary
1377
+ if file_name:
1378
+ content_type, parameter = self._identify_content_type(question, file_name, task_id)
1379
+ question_with_context = f"{question}\n\nThis question involves a {content_type} file with task_id: {task_id}"
1380
+ initial_state = {"messages": [HumanMessage(content=question_with_context)]}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1381
 
1382
+ # Run the graph
1383
+ result = self.graph.invoke(initial_state)
1384
+
1385
+ # Extract the final answer
1386
+ final_messages = result.get("messages", [])
1387
+ if not final_messages:
1388
+ return "No answer generated."
1389
+
1390
+ final_message = final_messages[-1]
1391
+ if isinstance(final_message, AIMessage):
1392
+ return final_message.content
1393
+
1394
+ # If the last message isn't from the AI, something went wrong
1395
+ return "Error: No AI response generated."
1396
+
1397
+ except Exception as e:
1398
+ return f"An unexpected error occurred: {str(e)}"
requirements.txt CHANGED
@@ -4,7 +4,10 @@ duckduckgo-search>=3.0.0
4
  gradio>=4.0.0
5
  langchain>=0.1.0
6
  langchain_community>=0.1.0
 
 
7
  langchain_openai>=0.1.0
 
8
  librosa>=0.10.0
9
  openai>=1.3.0
10
  openpyxl
 
4
  gradio>=4.0.0
5
  langchain>=0.1.0
6
  langchain_community>=0.1.0
7
+ langchain-core
8
+ langchain-huggingface
9
  langchain_openai>=0.1.0
10
+ langgraph
11
  librosa>=0.10.0
12
  openai>=1.3.0
13
  openpyxl
tools.py CHANGED
@@ -95,7 +95,7 @@ class WikipediaTool(BaseTool):
95
  wikipedia_tool: WikipediaQueryRun = Field(default_factory=lambda: WikipediaQueryRun(
96
  api_wrapper=WikipediaAPIWrapper(top_k_results=5)
97
  ))
98
-
99
  def _run(self, question: str) -> str:
100
  """Search Wikipedia and return the result as a string"""
101
  try:
@@ -697,7 +697,7 @@ class WebSearchTool(BaseTool):
697
  name: str = "web_search"
698
  description: str = "Search the web for information. Useful for questions about current events, specific facts, or topics not covered in Wikipedia."
699
  search_tool: DuckDuckGoSearchResults = Field(default_factory=DuckDuckGoSearchResults)
700
-
701
  def _extract_links_from_results(self, search_result: str) -> list:
702
  """Extract links from search results using string splitting"""
703
  links = []
 
95
  wikipedia_tool: WikipediaQueryRun = Field(default_factory=lambda: WikipediaQueryRun(
96
  api_wrapper=WikipediaAPIWrapper(top_k_results=5)
97
  ))
98
+ print("WikipediaTool initialized")
99
  def _run(self, question: str) -> str:
100
  """Search Wikipedia and return the result as a string"""
101
  try:
 
697
  name: str = "web_search"
698
  description: str = "Search the web for information. Useful for questions about current events, specific facts, or topics not covered in Wikipedia."
699
  search_tool: DuckDuckGoSearchResults = Field(default_factory=DuckDuckGoSearchResults)
700
+ print("WebSearchTool initialized")
701
  def _extract_links_from_results(self, search_result: str) -> list:
702
  """Extract links from search results using string splitting"""
703
  links = []