Final_Assignment_Agents

Build error

App Files Files Community

ernani commited on Apr 30, 2025

Commit

794ea68

1 Parent(s): 24b20b9

code refactor - first test

Browse files

Files changed (4) hide show

app.py +3 -3
manage_agents.py +1057 -346
requirements.txt +3 -0
tools.py +2 -2

app.py CHANGED Viewed

@@ -3,7 +3,7 @@ import gradio as gr
 import requests
 import inspect
 import pandas as pd
-from manage_agents import MainAgent
 from dotenv import load_dotenv
 # Load environment variables
@@ -15,8 +15,8 @@ DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 # --- Agent Implementation ---
 class SearchAgent:
     def __init__(self):
-        self.agent = MainAgent()
-        print("SearchAgent initialized with MainAgent.")
     def __call__(self, task_id: str, question: str, file_name: str = "") -> str:
         print(f"Processing question: {question[:100]}...")

 import requests
 import inspect
 import pandas as pd
+from manage_agents import StateGraphAgent
 from dotenv import load_dotenv
 # Load environment variables
 # --- Agent Implementation ---
 class SearchAgent:
     def __init__(self):
+        self.agent = StateGraphAgent()
+        print("SearchAgent initialized with StateGraphAgent.")
     def __call__(self, task_id: str, question: str, file_name: str = "") -> str:
         print(f"Processing question: {question[:100]}...")

manage_agents.py CHANGED Viewed

@@ -1,6 +1,7 @@
 from typing import Dict, List, Optional, Tuple
 from langchain.agents import AgentExecutor
 from langchain_openai import ChatOpenAI
 from langchain.memory import ConversationBufferMemory
 from langchain.chains import LLMChain
 from langchain.prompts import PromptTemplate
@@ -19,6 +20,16 @@ from tools import (
     ContentProcessingError
 )
 import logging
 class ContentTypeAgent:
     """Agent responsible for identifying content type and selecting appropriate tool"""
@@ -178,6 +189,8 @@ class ProcessContentAgent:
             Do not include explanations, steps, reasoning, or additional text.
             Be direct and specific. GAIA benchmark requires exact matching answers.
             For example, if asked "What is the capital of France?", respond simply with "Paris".
             """
         )
@@ -264,11 +277,20 @@ class ContentTranslateAgent:
         result = self.chain.invoke(question).strip()
         return result
-class MainAgent:
-    """Main agent orchestrating the workflow"""
     def __init__(self):
-        self.llm = ChatOpenAI(temperature=0, model="gpt-4o-mini")
         # Initialize tools
         self.wikipedia_tool = WikipediaTool()
@@ -290,398 +312,1087 @@ class MainAgent:
             "python": self.python_tool,
         }
-        # Initialize special agents
-        self.content_translate = ContentTranslateAgent(self.llm)
-        self.content_type_agent = ContentTypeAgent(self.llm)
-        # Create LLM with tools bound for tool-using capabilities
-        self.general_tools = [self.wikipedia_tool, self.web_search_tool]
-        self.llm_with_tools = self.llm.bind_tools(self.general_tools)
         # Tool usage tracking
         self.last_used_tool = None
-    def _format_question(self, question: str) -> str:
-        """Format the question to be more specific and clear"""
-        prompt = f"""You are an expert in transforming user questions into clear, specific, and search-optimized queries.
-        Rewrite the following question with the following goals:
-        - Add any necessary missing context to make it fully unambiguous
-        - Make the question as specific as possible for retrieval by a search engine or knowledge base
-        - Ensure the query is effective for retrieving the exact information needed
-        - Query should use the context and not the entire question
-        Question: {question}
-        Example:
-        Question: How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia.
-        Query: Mercedes Sosa musician
-        Question: Who did the actor who played Ray in the Polish-language version of Everybody Loves Raymond play in Magda M.? Give only the first name.
-        Query: "Everybody Loves Raymond" actor Polish version Magda M.
-        Question: Who are the pitchers with the number before and after Taishō Tamai's number as of July 2023? Give them to me in the form Pitcher Before, Pitcher After, use their last names only, in Roman characters.
-        Query: Taishō Tamai baseball player
-        Return only the rewritten query, no extra commentary.
-        The query should be highly optimized for retrieving the exact information needed.
-        """
-        response = self.llm.invoke(prompt)
-        formatted_query = response.content if hasattr(response, 'content') else str(response)
-        return formatted_query
-    def _get_answer_using_tools(self, question: str) -> str:
-        """Get answer using tools bound directly to the LLM"""
-        prompt = f"""Answer the following question using the provided tools when necessary.
         Question: {question}
-        Instructions:
-        1. Use the wikipedia tool for questions about facts, history, people, places, etc.
-        2. Use the web_search tool for current events or specific detailed information not typically found in an encyclopedia.
-        3. Provide a direct, concise answer based on the information you find.
-        4. If the search tools don't provide enough information, acknowledge what's missing.
-        5. Important: Be specific and direct with your answer. If asked for a name, provide only the name.
-        6. If asked for a specific piece of information (like a number, date, or code), provide exactly that.
-        7. Do not include explanations unless specifically asked.
-        When answering, provide ONLY the precise answer requested.
-        Do not include explanations, steps, reasoning, or additional text.
-        Be direct and specific. GAIA benchmark requires exact matching answers.
-        For example, if asked "What is the color of the sky?", respond simply with "blue".
-        """
-        response = self.llm_with_tools.invoke(prompt)
         answer = response.content if hasattr(response, 'content') else str(response)
-        return answer
     def process_question(self, task_id: str, question: str, file_name: str = "") -> str:
         try:
             # Reset tool tracking
             self.last_used_tool = None
-            # First check if we can answer this directly without tools
-            direct_answer = self.content_translate.answer_or_flag(question)
-            if direct_answer != "TOOLS_REQUIRED":
                 self.last_used_tool = "direct"
                 return direct_answer
-            # If we have a file to process, use specialized tools
             if file_name:
-                # Identify content type based on file extension
-                content_type, parameter, task_id = self.content_type_agent.identify_content_type(question, file_name, task_id)
-                self.last_used_tool = content_type
-                if content_type in self.tools:
-                    tool = self.tools[content_type]
-                    try:
-                        if content_type == "excel":
-                            result = tool._run(task_id, question=question)
-                            excel_data = result.page_content if hasattr(result, 'page_content') else str(result)
-                            # Use specialized prompt for Excel analysis
-                            excel_analysis_prompt = f"""
-                            Analyze this Excel data and provide an extremely concise answer:
-                            Question: {question}
-                            Excel Data:
-                            {excel_data}
-                            Instructions:
-                            1. Focus only on answering the specific question being asked
-                            2. If the question asks for a calculation or total, compute it precisely
-                            3. Format currency values properly (like $123.45) if requested
-                            4. Provide ONLY the answer in a clear, concise format - no additional explanations
-                            5. If the answer is a number or calculation result, verify it's correct before responding
-                            When answering, provide ONLY the precise answer requested.
-                            Do not include explanations, steps, reasoning, or additional text.
-                            Be direct and specific. GAIA benchmark requires exact matching answers.
-                            For example, if asked "What is the total revenue?", respond simply with the exact number, like "$1,234.56".
-                            """
-                            response = self.llm.invoke(excel_analysis_prompt)
-                            return response.content if hasattr(response, 'content') else str(response)
-                        elif content_type == "python":
-                            result = tool._run(task_id, question=question)
-                            python_code = result[0].page_content if result and hasattr(result[0], 'page_content') else str(result)
-                            # Use specialized prompt for Python code analysis
-                            python_analysis_prompt = f"""
-                            Analyze this Python code and provide an extremely concise answer:
-                            Question: {question}
-                            Python Code:
-                            {python_code}
-                            Instructions:
-                            1. If asked about the output or result of the code, mentally trace through the execution
-                            2. Pay close attention to loops, conditionals, and mathematical operations
-                            3. Provide ONLY the final output/answer without extra explanation
-                            4. If the question asks for a specific value or number, provide just that value
-                            When answering, provide ONLY the precise answer requested.
-                            Do not include explanations, steps, reasoning, or additional text.
-                            Be direct and specific. GAIA benchmark requires exact matching answers.
-                            For example, if asked "What is the output of this code?", respond simply with the exact output value.
-                            """
-                            response = self.llm.invoke(python_analysis_prompt)
-                            return response.content if hasattr(response, 'content') else str(response)
-                        elif content_type == "image":
-                            # Image tool needs both task_id and question
-                            result = tool._run(task_id, question=question)
-                            return result.page_content if hasattr(result, 'page_content') else str(result)
-                        elif content_type == "audio":
-                            # Audio tool needs both task_id and question
-                            documents = tool._run(task_id, question)
-                            audio_transcript = documents[0].page_content if documents and hasattr(documents[0], 'page_content') else str(documents)
-                            # Use specialized prompt for audio analysis
-                            audio_analysis_prompt = f"""
-                            Provide an extremely concise answer based on this transcript:
-                            Question: {question}
-                            Audio Transcript:
-                            {audio_transcript}
-                            Instructions:
-                            1. Pay careful attention to the specific format requested in the question
-                            2. Extract only the information needed to answer the question
-                            3. Format your answer exactly as requested (comma-separated list, alphabetical order, etc.)
-                            4. Do not include any explanations or extra text in your answer
-                            5. If asked to provide specific items (like ingredients), be sure to list ALL of them
-                            When answering, provide ONLY the precise answer requested.
-                            Do not include explanations, steps, reasoning, or additional text.
-                            Be direct and specific. GAIA benchmark requires exact matching answers.
-                            For example, if asked "What is the color of the sky?", respond simply with "blue".
-                            """
-                            response = self.llm.invoke(audio_analysis_prompt)
-                            return response.content if hasattr(response, 'content') else str(response)
-                        elif content_type == "youtube":
-                            result = tool._run(task_id, question=question)
-                            # Use specialized prompt for YouTube analysis
-                            youtube_analysis_prompt = f"""
-                            Analyze this YouTube video and provide an extremely concise answer:
-                            Question: {question}
-                            YouTube Video:
-                            {result}
-                            Instructions:
-                            1. Pay careful attention to the specific format requested in the question
-                            2. Extract only the information needed to answer the question
-                            When answering, provide ONLY the precise answer requested.
-                            Do not include explanations, steps, reasoning, or additional text.
-                            Be direct and specific. GAIA benchmark requires exact matching answers.
-                            For example, if asked "What is the color of the sky?", respond simply with "blue".
-                            """
-                            response = self.llm.invoke(youtube_analysis_prompt)
-                            return response.content if hasattr(response, 'content') else str(response)
-                        else:
-                            # Even for other tools, pass the question if the method accepts it
-                            try:
-                                # Try with question parameter first
-                                documents = tool._run(task_id, question)
-                            except TypeError:
-                                # Fall back to just task_id if question isn't accepted
-                                documents = tool._run(task_id)
-                            # Use specialized prompt for answer
-                            documents = documents[0].page_content if documents and hasattr(documents[0], 'page_content') else str(documents)
-                            answer_analysis_prompt = f"""
-                            Analyze the following and provide an extremely concise answer:
-                            Question: {question}
-                            Raw Answer: {documents}
-                            Instructions:
-                            1. Pay careful attention to the specific format requested in the question
-                            2. Extract only the information needed to answer the question
-                            3. Format your answer exactly as requested (comma-separated list, alphabetical order, etc.)
-                            4. Do not include any explanations or extra text in your answer
-                            5. If asked to provide specific items or information, be sure to include ALL of them
-                            When answering, provide ONLY the precise answer requested.
-                            Do not include explanations, steps, reasoning, or additional text.
-                            Be direct and specific. GAIA benchmark requires exact matching answers.
-                            For example, if asked "What is the capital of France?", respond simply with "Paris".
-                            """
-                            response = self.llm.invoke(answer_analysis_prompt)
-                            return response.content if hasattr(response, 'content') else str(response)
-                    except Exception as e:
-                        return f"Error processing file: {str(e)}"
                 else:
                     return f"Unsupported file type: {content_type}"
-            # For general questions (no files), check for special content types first
-            # This is important for things like YouTube URLs that don't have a file
-            content_type, parameter, _ = self.content_type_agent.identify_content_type(question, "", task_id)
-            # Handle YouTube URLs in general questions
             if content_type == "youtube":
-                self.last_used_tool = "youtube"
-                # Extract YouTube URL properly
-                if parameter.startswith("http"):
-                    youtube_url = parameter
-                else:
-                    # Try to extract URL from question if parameter doesn't have one
-                    youtube_url = self._extract_youtube_url(question)
-                    if not youtube_url:
-                        # If no URL found, we can't process the YouTube video
-                        return "Error: No valid YouTube URL found in the question."
-                result = self.youtube_tool._run(youtube_url, question=question)
-                # Use specialized prompt for YouTube analysis
-                youtube_analysis_prompt = f"""
-                Analyze this YouTube video and provide an extremely concise answer:
                 Question: {question}
-                YouTube Video:
-                {result}
-                Instructions:
-                1. Pay careful attention to the specific format requested in the question
-                2. Extract only the information needed to answer the question
-                When answering, provide ONLY the precise answer requested.
-                Do not include explanations, steps, reasoning, or additional text.
-                Be direct and specific. GAIA benchmark requires exact matching answers.
-                For example, if asked "What is the color of the sky?", respond simply with "blue".
-                """
-                response = self.llm.invoke(youtube_analysis_prompt)
                 return response.content if hasattr(response, 'content') else str(response)
-            # For general questions (no files), use improved search strategy
-            question_lower = question.lower()
-            answer = None
-            # Check for Wikipedia specific questions first
-            if "wikipedia" in question_lower:
                 self.last_used_tool = "wiki"
-                wiki_query = self._format_question(question)
-                wiki_result = self.wikipedia_tool._run(wiki_query)
-                answer = self._generate_answer_from_context(question, wiki_result)
-                if self._is_valid_answer(answer):
-                    return answer
-            # Use general web search with improved query formatting
-            self.last_used_tool = "web"
-            web_query = self._format_question(question)
-            web_result = self.web_search_tool._run(web_query)
-            answer = self._generate_answer_from_context(question, web_result)
-            if self._is_valid_answer(answer):
-                return answer
-            # If no good answer from web search, try with Wikipedia as a last resource
-            if "wikipedia" not in question_lower:  # Only if not already tried
-                self.last_used_tool = "wiki"
-                wiki_query = self._format_question(question)
-                wiki_result = self.wikipedia_tool._run(wiki_query)
-                answer = self._generate_answer_from_context(question, wiki_result)
-                if self._is_valid_answer(answer):
-                    return answer
-            # If we still don't have a good answer, use the general tools approach
-            self.last_used_tool = "general"
-            answer = self._get_answer_using_tools(question)
-            return answer
         except Exception as e:
             return f"An unexpected error occurred: {str(e)}"
-    def _generate_answer_from_context(self, question: str, context: str) -> str:
-        """Generate an answer based on the question and context"""
-        # Create a more effective prompt for answer generation
-        answer_prompt = f"""Based on the following information, provide an extremely concise answer:
-        Question: {question}
-        Information: {context}
-        Instructions:
-        1. Read the question carefully and identify exactly what is being asked for
-        2. Pay close attention to any formatting requirements in the question (e.g., "give only the city name", "without abbreviations", etc.)
-        3. Find the specific information in the context that directly answers the question
-        4. Format your answer exactly as requested - if asked for just a name, number, or code, provide only that
-        5. For numerical answers, double-check your calculation or counting
-        6. For names or places, ensure correct and complete spelling
-        7. If asked for a specific format like "comma-separated list" or "alphabetical order", follow that exactly
-        8. If asked for just a specific piece of information, do not include any other details
-        Example:
-        Question: what's the capital of france?
-        Answer: Paris
-        If your question asks for without abreviations:
-        city name: st. petersburg
-        Answer: Saint Petersburg
-        If your answer is a number, provide only the number.
-        Example:
-        Question: how many wheels does a car have?
-        Answer: 4
-        When answering, provide ONLY the precise answer requested.
-        Do not include explanations, steps, reasoning, or additional text.
-        Be direct and specific. GAIA benchmark requires exact matching answers.
-        """
-        try:
-            # Send prompt to LLM and get response
             response = self.llm.invoke(answer_prompt)
             answer = response.content if hasattr(response, 'content') else str(response)
-            # Post-process to ensure answer meets formatting requirements
-            answer = answer.strip()
-            return answer
-        except Exception as e:
-            return f"Could not generate an answer due to an error: {str(e)}"
-    def _is_valid_answer(self, answer: str) -> bool:
-        """Check if the answer appears to be valid and informative"""
-        if not answer:
-            return False
-        # Check for common patterns in invalid answers
-        invalid_patterns = [
-            "i don't have enough information",
-            "i cannot find",
-            "cannot be determined",
-            "is not provided in",
-            "not mentioned in",
-            "not specified in",
-            "not included in",
-            "not found in",
-            "not stated in",
-            "not given in",
-            "no information about",
-            "no specific information",
-            "information is not available",
-            "information is missing",
-            "unable to determine"
-        ]
-        # If the answer contains any invalid patterns, consider it invalid
-        if any(pattern in answer.lower() for pattern in invalid_patterns):
-            return False
-        return True

 from typing import Dict, List, Optional, Tuple
 from langchain.agents import AgentExecutor
 from langchain_openai import ChatOpenAI
+from langchain_huggingface import HuggingFaceEndpoint, ChatHuggingFace
 from langchain.memory import ConversationBufferMemory
 from langchain.chains import LLMChain
 from langchain.prompts import PromptTemplate
     ContentProcessingError
 )
 import logging
+from langchain_core.messages import HumanMessage, AIMessage, FunctionMessage
+# Import langgraph instead of langchain.graphs
+from langgraph.graph import StateGraph, END, START
+from langgraph.prebuilt import ToolNode
+from langgraph.graph import MessagesState
+from dotenv import load_dotenv
+import os
+load_dotenv()
+HF_TOKEN = os.getenv("HF_TOKEN")
 class ContentTypeAgent:
     """Agent responsible for identifying content type and selecting appropriate tool"""
             Do not include explanations, steps, reasoning, or additional text.
             Be direct and specific. GAIA benchmark requires exact matching answers.
             For example, if asked "What is the capital of France?", respond simply with "Paris".
+            Answer format:
+            <answer>
             """
         )
         result = self.chain.invoke(question).strip()
         return result
+class StateGraphAgent:
+    """Modern implementation of MainAgent for tool orchestration"""
     def __init__(self):
+        self.llm = ChatOpenAI(temperature=0.2, model="gpt-4o-mini")
+        # llm = HuggingFaceEndpoint(
+        #     repo_id="Qwen/Qwen2.5-Coder-32B-Instruct",
+        #     #repo_id="meta-llama/Llama-3.3-70B-Instruct",
+        #     huggingfacehub_api_token=HF_TOKEN,
+        # )
+        # self.llm = ChatHuggingFace(llm=llm, verbose=True)
         # Initialize tools
         self.wikipedia_tool = WikipediaTool()
             "python": self.python_tool,
         }
         # Tool usage tracking
         self.last_used_tool = None
+        # Create LLM with generic tools bound for general purpose use
+        self.general_tools = [
+            {
+                "type": "function",
+                "function": {
+                    "name": "search_wikipedia",
+                    "description": "Search Wikipedia for information on a topic",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "query": {
+                                "type": "string",
+                                "description": "The search query to find information on Wikipedia"
+                            }
+                        },
+                        "required": ["query"]
+                    }
+                }
+            },
+            {
+                "type": "function",
+                "function": {
+                    "name": "search_web",
+                    "description": "Search the web for information on a topic",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "query": {
+                                "type": "string",
+                                "description": "The search query to find information on the web"
+                            }
+                        },
+                        "required": ["query"]
+                    }
+                }
+            },
+            {
+                "type": "function",
+                "function": {
+                    "name": "analyze_youtube",
+                    "description": "Analyze a YouTube video for information",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "url": {
+                                "type": "string",
+                                "description": "The YouTube URL to analyze"
+                            },
+                            "question": {
+                                "type": "string",
+                                "description": "The specific question to answer about the video"
+                            }
+                        },
+                        "required": ["question"]
+                    }
+                }
+            }
+        ]
+        # Create specific tools for file types
+        self.file_tools = {
+            "audio": {
+                "type": "function",
+                "function": {
+                    "name": "process_audio",
+                    "description": "Process an audio file to extract information",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "task_id": {
+                                "type": "string",
+                                "description": "The task ID associated with the file"
+                            },
+                            "question": {
+                                "type": "string",
+                                "description": "The specific question to answer about the file"
+                            }
+                        },
+                        "required": ["task_id", "question"]
+                    }
+                }
+            },
+            "image": {
+                "type": "function",
+                "function": {
+                    "name": "analyze_image",
+                    "description": "Analyze an image to extract information",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "task_id": {
+                                "type": "string",
+                                "description": "The task ID associated with the file"
+                            },
+                            "question": {
+                                "type": "string",
+                                "description": "The specific question to answer about the file"
+                            }
+                        },
+                        "required": ["task_id", "question"]
+                    }
+                }
+            },
+            "excel": {
+                "type": "function",
+                "function": {
+                    "name": "analyze_excel",
+                    "description": "Analyze an Excel file to extract information",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "task_id": {
+                                "type": "string",
+                                "description": "The task ID associated with the file"
+                            },
+                            "question": {
+                                "type": "string",
+                                "description": "The specific question to answer about the file"
+                            }
+                        },
+                        "required": ["task_id", "question"]
+                    }
+                }
+            },
+            "python": {
+                "type": "function",
+                "function": {
+                    "name": "run_python",
+                    "description": "Run and analyze Python code",
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "task_id": {
+                                "type": "string",
+                                "description": "The task ID associated with the file"
+                            },
+                            "question": {
+                                "type": "string",
+                                "description": "The specific question to answer about the file"
+                            }
+                        },
+                        "required": ["task_id", "question"]
+                    }
+                }
+            }
+        }
+    def _identify_content_type(self, question, file_name, task_id):
+        """Identify the content type based on question and file_name"""
+        # Simple parsing for file detection
+        if file_name:
+            extension = file_name.split('.')[-1].lower()
+            extension_map = {
+                'mp3': 'audio',
+                'wav': 'audio',
+                'png': 'image',
+                'jpg': 'image',
+                'jpeg': 'image',
+                'xlsx': 'excel',
+                'xls': 'excel',
+                'csv': 'excel',
+                'py': 'python'
+            }
+            if extension in extension_map:
+                return extension_map[extension], file_name
+        # Check for YouTube URLs
+        question_lower = question.lower()
+        youtube_pattern = r'https?://(?:www\.)?youtube\.com/watch\?v=[^\s\.,!?]+'
+        youtube_short_pattern = r'https?://(?:www\.)?youtu\.be/[^\s\.,!?]+'
+        is_youtube = "youtube" in question_lower or "video" in question_lower
+        has_youtube_url = re.search(youtube_pattern, question) or re.search(youtube_short_pattern, question)
+        if is_youtube or has_youtube_url:
+            return "youtube", question
+        # Check for Wikipedia references
+        if "wikipedia" in question_lower:
+            return "wiki", question
+        # Default to web search for general questions
+        return "web", question
+    def _extract_youtube_url(self, question):
+        """Extract YouTube URL from question if present"""
+        # First try exact pattern for watch URLs
+        youtube_pattern = r'https?://(?:www\.)?youtube\.com/watch\?v=[a-zA-Z0-9_-]{11}'
+        match = re.search(youtube_pattern, question)
+        if match:
+            return match.group(0)
+        # Then try youtu.be URLs
+        youtube_short_pattern = r'https?://(?:www\.)?youtu\.be/[a-zA-Z0-9_-]{11}'
+        match = re.search(youtube_short_pattern, question)
+        if match:
+            return match.group(0)
+        # Finally try a more lenient pattern
+        youtube_lenient_pattern = r'https?://(?:www\.)?youtube\.com/watch\?v=[^\s\.,!?]+'
+        match = re.search(youtube_lenient_pattern, question)
+        if match:
+            url = match.group(0).strip().rstrip('.,!?')
+            return url
+        return None
+    def _execute_tool(self, tool_name, args):
+        """Execute a tool based on name and arguments"""
+        result = None
+        if tool_name == "search_wikipedia":
+            query = args.get("query", "")
+            self.last_used_tool = "wiki"
+            result = self.wikipedia_tool._run(query)
+        elif tool_name == "search_web":
+            query = args.get("query", "")
+            self.last_used_tool = "web"
+            result = self.web_search_tool._run(query)
+        elif tool_name == "analyze_youtube":
+            url = args.get("url", "")
+            question = args.get("question", "")
+            self.last_used_tool = "youtube"
+            if not url and ("youtube.com" in question or "youtu.be" in question):
+                # Extract URL from question
+                url = self._extract_youtube_url(question)
+            # Use the extracted URL or full question if no URL
+            video_param = url if url else question
+            result = self.youtube_tool._run(video_param, question=question)
+        elif tool_name == "process_audio":
+            task_id = args.get("task_id", "")
+            question = args.get("question", "")
+            self.last_used_tool = "audio"
+            result = self.audio_tool._run(task_id, question)
+        elif tool_name == "analyze_image":
+            task_id = args.get("task_id", "")
+            question = args.get("question", "")
+            self.last_used_tool = "image"
+            result = self.image_tool._run(task_id, question=question)
+        elif tool_name == "analyze_excel":
+            task_id = args.get("task_id", "")
+            question = args.get("question", "")
+            self.last_used_tool = "excel"
+            result = self.excel_tool._run(task_id, question=question)
+        elif tool_name == "run_python":
+            task_id = args.get("task_id", "")
+            question = args.get("question", "")
+            self.last_used_tool = "python"
+            result = self.python_tool._run(task_id, question=question)
+        else:
+            result = f"Unknown tool: {tool_name}"
+        # Format the result for the assistant
+        if isinstance(result, list) and len(result) > 0 and hasattr(result[0], "page_content"):
+            content = result[0].page_content
+        elif hasattr(result, "page_content"):
+            content = result.page_content
+        else:
+            content = str(result)
+        return content
+    def _direct_answer_attempt(self, question):
+        """Try to answer directly without tools"""
+        direct_query = f"""Can you answer this question directly without using any tools?
         Question: {question}
+        If you can answer this directly (like math, text reversal, etc), provide the answer.
+        Your answer should be concise and direct. Focus only on answering the question.
+        No additional words or explanations.
+        Format:
+        <answer>
+        Otherwise respond with 'TOOLS_REQUIRED'."""
+        response = self.llm.invoke(direct_query)
         answer = response.content if hasattr(response, 'content') else str(response)
+        return answer if "TOOLS_REQUIRED" not in answer else None
+    def _optimize_query(self, question):
+        """Create an optimized search query for the question"""
+        query_prompt = f"""You are an agent that needs to understand user questions and formulate optimized search queries.
+        Question: {question}
+        Your task is to create an optimized search query that will retrieve the most relevant information.
+        Focus on extracting key entities, relationships, and constraints from the question.
+        Return only the optimized search query."""
+        response = self.llm.invoke(query_prompt)
+        return response.content if hasattr(response, 'content') else str(response)
     def process_question(self, task_id: str, question: str, file_name: str = "") -> str:
+        """Process a question using a multi-step approach with tools"""
         try:
             # Reset tool tracking
             self.last_used_tool = None
+            # Try answering directly first
+            direct_answer = self._direct_answer_attempt(question)
+            if direct_answer:
                 self.last_used_tool = "direct"
                 return direct_answer
+            # Identify content type
+            content_type, content_parameter = self._identify_content_type(question, file_name, task_id)
+            # For file-based questions, use the appropriate tool directly
             if file_name:
+                if content_type in self.file_tools:
+                    tool_spec = self.file_tools[content_type]
+                    tool_name = tool_spec["function"]["name"]
+                    args = {"task_id": task_id, "question": question}
+                    content = self._execute_tool(tool_name, args)
+                    # Generate final answer
+                    answer_prompt = f"""Based on the processed file information, answer the question precisely.
+                    Question: {question}
+                    File information: {content}
+                    Your answer should be concise and direct. Focus only on answering the question.
+                    No additional words or explanations.
+                    Format:
+                    <answer>
+                    """
+                    response = self.llm.invoke(answer_prompt)
+                    return response.content if hasattr(response, 'content') else str(response)
                 else:
                     return f"Unsupported file type: {content_type}"
+            # For YouTube content, use the specialized YouTube tool
             if content_type == "youtube":
+                youtube_url = self._extract_youtube_url(question)
+                args = {"url": youtube_url, "question": question}
+                content = self._execute_tool("analyze_youtube", args)
+                # Generate final answer
+                answer_prompt = f"""Based on the YouTube video content, answer the question precisely.
                 Question: {question}
+                Video information: {content}
+                Your answer should be concise and direct. Focus only on answering the question.
+                No additional words or explanations.
+                Format:
+                <answer>
+                """
+                response = self.llm.invoke(answer_prompt)
                 return response.content if hasattr(response, 'content') else str(response)
+            # Handle wiki and web searches directly
+            if content_type == "wiki":
+                # Direct wiki search
                 self.last_used_tool = "wiki"
+                optimized_query = self._optimize_query(question)
+                wiki_result = self.wikipedia_tool._run(optimized_query)
+                # Format answer
+                answer_prompt = f"""Based on the following Wikipedia information, answer the question precisely:
+                Question: {question}
+                Information: {wiki_result}
+                Your answer should be concise and direct. Focus only on answering the question.
+                Pay careful attention to any formatting requirements in the question.
+                If asked for a city name without abbreviations, make sure to provide the full name (e.g., "Saint Petersburg" instead of "St. Petersburg").
+                No additional words or explanations.
+                Example:
+                Question: What is the capital of France?
+                Answer: Paris
+                Format:
+                <answer>
+                """
+                response = self.llm.invoke(answer_prompt)
+                return response.content if hasattr(response, 'content') else str(response)
+            if content_type == "web":
+                # Direct web search
+                self.last_used_tool = "web"
+                optimized_query = self._optimize_query(question)
+                web_result = self.web_search_tool._run(optimized_query)
+                # Format answer
+                answer_prompt = f"""Based on the following web search results, answer the question precisely:
+                Question: {question}
+                Information: {web_result}
+                Your answer should be concise and direct. Focus only on answering the question.
+                Pay careful attention to any formatting requirements in the question.
+                If asked for a city name without abbreviations, make sure to provide the full name (e.g., "Saint Petersburg" instead of "St. Petersburg").
+                If asked for only a first name or a code, provide only that specific information.
+                No additional words or explanations.
+                Format:
+                <answer>
+                """
+                response = self.llm.invoke(answer_prompt)
+                answer = response.content if hasattr(response, 'content') else str(response)
+                # Remove common verbose prefixes and patterns
+                answer = re.sub(
+                    r'^(final answer:|answer:|the answer is|the final answer is|the final numeric output is|the vegetables are:|the answer to the question is|the correct answer is|output:|result:|response:)',
+                    '', answer, flags=re.IGNORECASE
+                ).strip()
+                return answer
+            # For general questions, optimize the query and use a tool-equipped LLM
+            optimized_query = self._optimize_query(question)
+            # Create a LLM with appropriate tools for this question type
+            tools_to_use = self.general_tools
+            tool_equipped_llm = self.llm.bind_tools(tools_to_use)
+            # Formulate the system prompt based on content type
+            if content_type == "wiki":
+                system_prompt = f"""Answer this question using Wikipedia information.
+                Question: {question}
+                Optimized query: {optimized_query}
+                Use the search_wikipedia tool to find relevant information. Be concise and direct.
+                No additional words or explanations.
+                Format:
+                <answer>
+                """
+            else:
+                system_prompt = f"""Answer this question using web search or other appropriate tools.
+                Question: {question}
+                Optimized query: {optimized_query}
+                Use the most appropriate tool to find the information needed. Be concise and direct.
+                No additional words or explanations.
+                Format:
+                <answer>
+                """
+            # Get response from tool-equipped LLM
+            tool_response = tool_equipped_llm.invoke(system_prompt)
+            # Check if we got a function call
+            if hasattr(tool_response, 'additional_kwargs') and 'function_call' in tool_response.additional_kwargs:
+                # Extract tool info
+                function_call = tool_response.additional_kwargs['function_call']
+                tool_name = function_call['name']
+                try:
+                    import json
+                    args = json.loads(function_call['arguments'])
+                except:
+                    args = {}
+                # Execute the tool
+                tool_result = self._execute_tool(tool_name, args)
+                # Generate final answer with tool result
+                answer_prompt = f"""Based on the following information, answer the question precisely.
+                Question: {question}
+                Information: {tool_result}
+                Your answer should be concise and direct. Focus only on answering the question.
+                No additional words or explanations.
+                Example:
+                Question: What is the capital of France?
+                Answer: Paris
+                Format:
+                <answer>
+                """
+                final_response = self.llm.invoke(answer_prompt)
+                return final_response.content if hasattr(final_response, 'content') else str(final_response)
+            # If no function call, return the direct response
+            return tool_response.content if hasattr(tool_response, 'content') else str(tool_response)
         except Exception as e:
             return f"An unexpected error occurred: {str(e)}"
+class MainAgent:
+    """Main agent orchestrating the workflow using StateGraph"""
+    def __init__(self):
+        self.llm = ChatOpenAI(temperature=0, model="gpt-4o-mini")
+        # Initialize tools
+        self.wikipedia_tool = WikipediaTool()
+        self.web_search_tool = WebSearchTool()
+        self.youtube_tool = YouTubeVideoTool()
+        self.image_tool = ImageTool()
+        self.audio_tool = AudioTool()
+        self.excel_tool = ExcelTool()
+        self.python_tool = PythonTool()
+        # Create a dictionary of tools for easy access
+        self.tools = {
+            "wiki": self.wikipedia_tool,
+            "web": self.web_search_tool,
+            "youtube": self.youtube_tool,
+            "image": self.image_tool,
+            "audio": self.audio_tool,
+            "excel": self.excel_tool,
+            "python": self.python_tool,
+        }
+        # Tool usage tracking
+        self.last_used_tool = None
+        # Create StateGraph for orchestration
+        self.graph = self._build_graph()
+    def _tools_condition(self, state):
+        """Determine if the assistant message contains a function call"""
+        if len(state.messages) > 0 and isinstance(state.messages[-1], AIMessage):
+            return "tools" if state.messages[-1].additional_kwargs.get("function_call") else END
+        return END
+    def _identify_content_type(self, question, file_name, task_id):
+        """Identify the content type based on question and file_name"""
+        # Simple parsing for file detection
+        if file_name:
+            extension = file_name.split('.')[-1].lower()
+            extension_map = {
+                'mp3': 'audio',
+                'wav': 'audio',
+                'png': 'image',
+                'jpg': 'image',
+                'jpeg': 'image',
+                'xlsx': 'excel',
+                'xls': 'excel',
+                'csv': 'excel',
+                'py': 'python'
+            }
+            if extension in extension_map:
+                return extension_map[extension], file_name
+        # Check for YouTube URLs
+        question_lower = question.lower()
+        youtube_pattern = r'https?://(?:www\.)?youtube\.com/watch\?v=[^\s\.,!?]+'
+        youtube_short_pattern = r'https?://(?:www\.)?youtu\.be/[^\s\.,!?]+'
+        is_youtube = "youtube" in question_lower or "video" in question_lower
+        has_youtube_url = re.search(youtube_pattern, question) or re.search(youtube_short_pattern, question)
+        if is_youtube or has_youtube_url:
+            return "youtube", question
+        # Check for Wikipedia references
+        if "wikipedia" in question_lower:
+            return "wiki", question
+        # Default to web search for general questions
+        return "web", question
+    def _create_tool_functions(self):
+        """Create a list of langchain tools for the LLM to use"""
+        # Format tools as langchain tools
+        tool_list = []
+        # Wikipedia tool
+        tool_list.append({
+            "type": "function",
+            "function": {
+                "name": "search_wikipedia",
+                "description": "Search Wikipedia for information on a topic",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "query": {
+                            "type": "string",
+                            "description": "The search query to find information on Wikipedia"
+                        }
+                    },
+                    "required": ["query"]
+                }
+            }
+        })
+        # Web search tool
+        tool_list.append({
+            "type": "function",
+            "function": {
+                "name": "search_web",
+                "description": "Search the web for information on a topic",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "query": {
+                            "type": "string",
+                            "description": "The search query to find information on the web"
+                        }
+                    },
+                    "required": ["query"]
+                }
+            }
+        })
+        # YouTube tool
+        tool_list.append({
+            "type": "function",
+            "function": {
+                "name": "analyze_youtube",
+                "description": "Analyze a YouTube video for information",
+                "parameters": {
+                    "type": "object",
+                    "properties": {
+                        "url": {
+                            "type": "string",
+                            "description": "The YouTube URL to analyze"
+                        },
+                        "question": {
+                            "type": "string",
+                            "description": "The specific question to answer about the video"
+                        }
+                    },
+                    "required": ["question"]
+                }
+            }
+        })
+        # Create audio, image, excel, and python tools
+        for tool_name, description in [
+            ("process_audio", "Process an audio file to extract information"),
+            ("analyze_image", "Analyze an image to extract information"),
+            ("analyze_excel", "Analyze an Excel file to extract information"),
+            ("run_python", "Run and analyze Python code")
+        ]:
+            tool_list.append({
+                "type": "function",
+                "function": {
+                    "name": tool_name,
+                    "description": description,
+                    "parameters": {
+                        "type": "object",
+                        "properties": {
+                            "task_id": {
+                                "type": "string",
+                                "description": "The task ID associated with the file"
+                            },
+                            "question": {
+                                "type": "string",
+                                "description": "The specific question to answer about the file"
+                            }
+                        },
+                        "required": ["task_id", "question"]
+                    }
+                }
+            })
+        return tool_list
+    def _build_graph(self):
+        """Build the state graph for orchestrating the agent workflow"""
+        # Create the tool functions for the assistant
+        tool_functions = self._create_tool_functions()
+        # Create the retriever node
+        retriever_prompt = PromptTemplate.from_template(
+            """You are an agent that needs to understand user questions and formulate optimized search queries.
+            Question: {question}
+            Your task is to create an optimized search query that will retrieve the most relevant information.
+            Focus on extracting key entities, relationships, and constraints from the question.
+            If the question is about searching something on the web, use the search_web tool or wikipedia tool.
+            Example:
+            Question: What is the capital of France?
+            Optimized query: search_wikipedia("capital of France")
+            Question: How many studio albums were published by Mercedes Sosa between 2000 and 2009 (included)? You can use the latest 2022 version of english wikipedia.?
+            Optimized query: search_web("Mercedes Sosa musician")
+            Question: Who are the pitchers with the number before and after Taishō Tamai's number as of July 2023? Give them to me in the form Pitcher Before, Pitcher After, use their last names only, in Roman characters.
+            Optimized query: search_web("Taishō Tamai's baseball player")
+            Return only the optimized search query."""
+        )
+        retriever = (
+            retriever_prompt
+            | self.llm.with_config({"tags": ["retriever"]})
+            | StrOutputParser()
+        )
+        # Create the assistant node with tools
+        assistant = self.llm.bind_tools(
+            tools=[
+                {
+                    "type": "function",
+                    "function": {
+                        "name": "search_wikipedia",
+                        "description": "Search Wikipedia for information on a topic",
+                        "parameters": {
+                            "type": "object",
+                            "properties": {
+                                "query": {
+                                    "type": "string",
+                                    "description": "The search query to find information on Wikipedia"
+                                }
+                            },
+                            "required": ["query"]
+                        }
+                    }
+                },
+                {
+                    "type": "function",
+                    "function": {
+                        "name": "search_web",
+                        "description": "Search the web for information on a topic",
+                        "parameters": {
+                            "type": "object",
+                            "properties": {
+                                "query": {
+                                    "type": "string",
+                                    "description": "The search query to find information on the web"
+                                }
+                            },
+                            "required": ["query"]
+                        }
+                    }
+                },
+                {
+                    "type": "function",
+                    "function": {
+                        "name": "analyze_youtube",
+                        "description": "Analyze a YouTube video for information",
+                        "parameters": {
+                            "type": "object",
+                            "properties": {
+                                "url": {
+                                    "type": "string",
+                                    "description": "The YouTube URL to analyze"
+                                },
+                                "question": {
+                                    "type": "string",
+                                    "description": "The specific question to answer about the video"
+                                }
+                            },
+                            "required": ["question"]
+                        }
+                    }
+                },
+                {
+                    "type": "function",
+                    "function": {
+                        "name": "process_audio",
+                        "description": "Process an audio file to extract information",
+                        "parameters": {
+                            "type": "object",
+                            "properties": {
+                                "task_id": {
+                                    "type": "string",
+                                    "description": "The task ID associated with the file"
+                                },
+                                "question": {
+                                    "type": "string",
+                                    "description": "The specific question to answer about the file"
+                                }
+                            },
+                            "required": ["task_id", "question"]
+                        }
+                    }
+                },
+                {
+                    "type": "function",
+                    "function": {
+                        "name": "analyze_image",
+                        "description": "Analyze an image to extract information",
+                        "parameters": {
+                            "type": "object",
+                            "properties": {
+                                "task_id": {
+                                    "type": "string",
+                                    "description": "The task ID associated with the file"
+                                },
+                                "question": {
+                                    "type": "string",
+                                    "description": "The specific question to answer about the file"
+                                }
+                            },
+                            "required": ["task_id", "question"]
+                        }
+                    }
+                },
+                {
+                    "type": "function",
+                    "function": {
+                        "name": "analyze_excel",
+                        "description": "Analyze an Excel file to extract information",
+                        "parameters": {
+                            "type": "object",
+                            "properties": {
+                                "task_id": {
+                                    "type": "string",
+                                    "description": "The task ID associated with the file"
+                                },
+                                "question": {
+                                    "type": "string",
+                                    "description": "The specific question to answer about the file"
+                                }
+                            },
+                            "required": ["task_id", "question"]
+                        }
+                    }
+                },
+                {
+                    "type": "function",
+                    "function": {
+                        "name": "run_python",
+                        "description": "Run and analyze Python code",
+                        "parameters": {
+                            "type": "object",
+                            "properties": {
+                                "task_id": {
+                                    "type": "string",
+                                    "description": "The task ID associated with the file"
+                                },
+                                "question": {
+                                    "type": "string",
+                                    "description": "The specific question to answer about the file"
+                                }
+                            },
+                            "required": ["task_id", "question"]
+                        }
+                    }
+                }
+            ]
+        ).with_config({"tags": ["assistant"]})
+        # Create the tools node
+        def _run_tool(state):
+            """Run the appropriate tool based on the function call"""
+            # Get the most recent AI message
+            last_message = state.messages[-1]
+            if not hasattr(last_message, "additional_kwargs") or "function_call" not in last_message.additional_kwargs:
+                return state
+            function_call = last_message.additional_kwargs["function_call"]
+            tool_name = function_call["name"]
+            arguments = function_call.get("arguments", "{}")
+            # Parse the arguments
+            import json
+            try:
+                args = json.loads(arguments)
+            except:
+                args = {}
+            # Track the tool used
+            self.last_used_tool = tool_name
+            result = ""
+            # Execute the correct tool
+            if tool_name == "search_wikipedia":
+                query = args.get("query", "")
+                result = self.wikipedia_tool._run(query)
+            elif tool_name == "search_web":
+                query = args.get("query", "")
+                result = self.web_search_tool._run(query)
+            elif tool_name == "analyze_youtube":
+                url = args.get("url", "")
+                question = args.get("question", "")
+                if not url and "youtube.com" in question or "youtu.be" in question:
+                    # Extract URL from question
+                    youtube_pattern = r'https?://(?:www\.)?youtube\.com/watch\?v=[^\s\.,!?]+'
+                    youtube_short_pattern = r'https?://(?:www\.)?youtu\.be/[^\s\.,!?]+'
+                    match = re.search(youtube_pattern, question) or re.search(youtube_short_pattern, question)
+                    if match:
+                        url = match.group(0)
+                # Use the extracted URL or full question if no URL
+                video_param = url if url else question
+                result = self.youtube_tool._run(video_param, question=question)
+            elif tool_name == "process_audio":
+                task_id = args.get("task_id", "")
+                question = args.get("question", "")
+                result = self.audio_tool._run(task_id, question)
+            elif tool_name == "analyze_image":
+                task_id = args.get("task_id", "")
+                question = args.get("question", "")
+                result = self.image_tool._run(task_id, question=question)
+            elif tool_name == "analyze_excel":
+                task_id = args.get("task_id", "")
+                question = args.get("question", "")
+                result = self.excel_tool._run(task_id, question=question)
+            elif tool_name == "run_python":
+                task_id = args.get("task_id", "")
+                question = args.get("question", "")
+                result = self.python_tool._run(task_id, question=question)
+            else:
+                result = f"Unknown tool: {tool_name}"
+            # Format the result for the assistant
+            if isinstance(result, list) and len(result) > 0 and hasattr(result[0], "page_content"):
+                content = result[0].page_content
+            elif hasattr(result, "page_content"):
+                content = result.page_content
+            else:
+                content = str(result)
+            # Add the function result to the state
+            state.messages.append(
+                FunctionMessage(
+                    name=tool_name,
+                    content=content
+                )
+            )
+            return state
+        # Build the graph
+        builder = StateGraph(MessagesState)
+        # Create a direct answer node
+        def _direct_answer(state):
+            """Attempt to answer the question directly without tools"""
+            # Get the question from the HumanMessage
+            if not state.messages or not isinstance(state.messages[0], HumanMessage):
+                return state
+            question = state.messages[0].content
+            query = f"""Analyze the question, understand the instructions, the context.
+            If you can answer this directly (like math, text reversal, etc), provide the answer.
+            Otherwise respond with 'TOOLS_REQUIRED'
+            Question: {question}
+            Return your answer in the following format:
+            Be concise, and to the point.
+            Do not include any other text or comments, just the answer.
+            If the question is:
+            "What is the capital of France?"
+            Your answer should be:
+            "Paris"
+            Format:
+            <answer>
+            """
+            response = self.llm.invoke(query)
+            answer = response.content if hasattr(response, 'content') else str(response)
+            if "TOOLS_REQUIRED" not in answer:
+                # We can answer directly
+                self.last_used_tool = "direct"
+                state.messages.append(AIMessage(content=answer))
+                return state
+            # Otherwise, continue to the retriever
+            return state
+        builder.add_node("direct_answer", _direct_answer)
+        builder.add_node("retriever", retriever)
+        builder.add_node("assistant", assistant)
+        builder.add_node("tools", _run_tool)
+        # Add synthesize_answer node
+        def _synthesize_answer(state):
+            import re
+            # Find the original question and the latest FunctionMessage (tool output)
+            question = None
+            tool_output = None
+            for msg in state.messages:
+                if isinstance(msg, HumanMessage):
+                    question = msg.content
+                if isinstance(msg, FunctionMessage):
+                    tool_output = msg.content
+            if not question or not tool_output:
+                return state  # Defensive: should not happen
+            # Compose the answer prompt
+            answer_prompt = f"""You are a helpful AI assistant. Use the following context to answer the question as directly and concisely as possible.
+            Context: {tool_output}
+            Question: {question}
+            Instructions:
+            - Output ONLY the answer, with no extra words, no sentences, no restatement, no quotes, and no explanations.
+            - Do NOT repeat or rephrase the question.
+            - Do NOT include any introductory or closing phrases.
+            - If the answer is a single word, number, or phrase, output only that.
+            - If the answer is a list, output only the list as requested (e.g., comma-separated, one per line, etc.).
+            - If the answer is not present in the context, output "NOT FOUND".
+            Examples of correct answers:
+            Q: What is the capital of France?
+            A: Paris
+            Q: What does Teal'c say in response to the question \"Isn't that hot?\"
+            A: extremely
+            Q: List the ingredients.
+            A: salt, flour, eggs
+            Examples of incorrect answers (do NOT do this):
+            - The answer is Paris.
+            - The final numeric output is 0.
+            - The vegetables are: acorns, bell pepper, ...
+            - Answer: extremely
+            Now, output ONLY the answer.
+            Output Format:
+            <answer>
+            """
             response = self.llm.invoke(answer_prompt)
             answer = response.content if hasattr(response, 'content') else str(response)
+            # Remove any prefix like "Final Answer:" or "Answer:" and strip whitespace
+            answer = re.sub(r'^(final answer:|answer:|<answer>|</answer>)', '', answer, flags=re.IGNORECASE).strip()
+            state.messages.append(AIMessage(content=answer))
+            return state
+        builder.add_node("synthesize_answer", _synthesize_answer)
+        # Add edges
+        builder.add_edge(START, "direct_answer")
+        builder.add_edge("direct_answer", "retriever")
+        builder.add_edge("retriever", "assistant")
+        builder.add_conditional_edges(
+            "assistant",
+            self._tools_condition,
+            {
+                "tools": "tools",
+                END: END
+            }
+        )
+        builder.add_edge("tools", "synthesize_answer")
+        builder.add_edge("synthesize_answer", END)
+        return builder.compile()
+    def process_question(self, task_id: str, question: str, file_name: str = "") -> str:
+        """Process a question using the StateGraph"""
+        try:
+            # Reset tool tracking
+            self.last_used_tool = None
+            # Prepare the initial state
+            initial_state = {"messages": [HumanMessage(content=question)]}
+            # Add file information to the question if necessary
+            if file_name:
+                content_type, parameter = self._identify_content_type(question, file_name, task_id)
+                question_with_context = f"{question}\n\nThis question involves a {content_type} file with task_id: {task_id}"
+                initial_state = {"messages": [HumanMessage(content=question_with_context)]}
+            # Run the graph
+            result = self.graph.invoke(initial_state)
+            # Extract the final answer
+            final_messages = result.get("messages", [])
+            if not final_messages:
+                return "No answer generated."
+            final_message = final_messages[-1]
+            if isinstance(final_message, AIMessage):
+                return final_message.content
+            # If the last message isn't from the AI, something went wrong
+            return "Error: No AI response generated."
+        except Exception as e:
+            return f"An unexpected error occurred: {str(e)}"

requirements.txt CHANGED Viewed

@@ -4,7 +4,10 @@ duckduckgo-search>=3.0.0
 gradio>=4.0.0
 langchain>=0.1.0
 langchain_community>=0.1.0
 langchain_openai>=0.1.0
 librosa>=0.10.0
 openai>=1.3.0
 openpyxl

 gradio>=4.0.0
 langchain>=0.1.0
 langchain_community>=0.1.0
+langchain-core
+langchain-huggingface
 langchain_openai>=0.1.0
+langgraph
 librosa>=0.10.0
 openai>=1.3.0
 openpyxl

tools.py CHANGED Viewed

@@ -95,7 +95,7 @@ class WikipediaTool(BaseTool):
     wikipedia_tool: WikipediaQueryRun = Field(default_factory=lambda: WikipediaQueryRun(
         api_wrapper=WikipediaAPIWrapper(top_k_results=5)
     ))
     def _run(self, question: str) -> str:
         """Search Wikipedia and return the result as a string"""
         try:
@@ -697,7 +697,7 @@ class WebSearchTool(BaseTool):
     name: str = "web_search"
     description: str = "Search the web for information. Useful for questions about current events, specific facts, or topics not covered in Wikipedia."
     search_tool: DuckDuckGoSearchResults = Field(default_factory=DuckDuckGoSearchResults)
     def _extract_links_from_results(self, search_result: str) -> list:
         """Extract links from search results using string splitting"""
         links = []

     wikipedia_tool: WikipediaQueryRun = Field(default_factory=lambda: WikipediaQueryRun(
         api_wrapper=WikipediaAPIWrapper(top_k_results=5)
     ))
+    print("WikipediaTool initialized")
     def _run(self, question: str) -> str:
         """Search Wikipedia and return the result as a string"""
         try:
     name: str = "web_search"
     description: str = "Search the web for information. Useful for questions about current events, specific facts, or topics not covered in Wikipedia."
     search_tool: DuckDuckGoSearchResults = Field(default_factory=DuckDuckGoSearchResults)
+    print("WebSearchTool initialized")
     def _extract_links_from_results(self, search_result: str) -> list:
         """Extract links from search results using string splitting"""
         links = []