Final_Assignment_Template

Sleeping

App Files Files Community

DrekFretson commited on May 31, 2025

Commit

3872031

verified ·

1 Parent(s): 03caaac

Update agent.py

Browse files

Files changed (1) hide show

agent.py +53 -350

agent.py CHANGED Viewed

@@ -1,353 +1,56 @@
-import os
-import time
-from langchain.tools import Tool, tool
-from typing import Tuple, List
-from typing_extensions import TypedDict, Annotated, Optional
-from langgraph.graph.message import add_messages
-from langchain_core.messages import AnyMessage, HumanMessage, AIMessage, SystemMessage
-from langgraph.graph import START, StateGraph, END
-from langgraph.prebuilt import ToolNode, tools_condition
-from langchain_litellm import ChatLiteLLM
-from IPython.display import Image, display
-import asyncio
-from tools import (search_tool,
-                    download_tool,
-                    get_web_page,
-                    add,
-                    subtract,
-                    multiply,
-                    divide,
-                    power,
-                    square_root,
-                    get_information_from_wikipedia,
-                    get_information_from_arxiv,
-                    get_information_from_youtube,
-                    python_tool,
-                    get_information_from_json,
-                    get_information_from_audio,
-                    get_information_from_xml,
-                    get_information_from_docx,
-                    get_information_from_txt,
-                    get_information_from_pdf,
-                    get_information_from_csv,
-                    get_information_from_excel,
-                    get_information_from_pdb,
-                    get_information_from_image,
-                    get_information_from_pptx,
-                    get_all_files_from_zip,
-                    get_information_from_python)
-DELAY = 5
-TIME_SLEEP = 60/15 + DELAY
-GEMINI_API_KEY_1 = os.getenv("GOOGLE_API_KEY_1")
-GEMINI_API_KEY_2 = os.getenv("GOOGLE_API_KEY_2")
-GEMINI_API_KEY_3 = os.getenv("GOOGLE_API_KEY_3")
-chat_model_1 = ChatLiteLLM(model="gemini/gemini-2.0-flash",
-                         temperature=0,
-                         api_key=GEMINI_API_KEY_1,
-                         max_retries=10,
-                         verbose=True)
-chat_model_2 = ChatLiteLLM(model="gemini/gemini-2.0-flash",
-                         temperature=0,
-                         api_key=GEMINI_API_KEY_2,
-                         max_retries=10,
-                         verbose=True)
-chat_model_3 = ChatLiteLLM(model="gemini/gemini-2.0-flash",
-                         temperature=0,
-                         api_key=GEMINI_API_KEY_3,
-                         max_retries=10,
-                         verbose=True)
-class AgentState(TypedDict):
-    messages: Annotated[list[AnyMessage], add_messages]
-    question: Optional[str]
-    file_path: Optional[str]
-    task_id: Optional[str]
-    new_messages: Optional[int]
-    final_answer: Optional[str]
-    attempt: Optional[int]
-    chat_model: Optional[int]
-class MyAgent:
-    def __init__(self, web_tools=None):
-        print("MyAgent initialized.")
-        self.chat_1 = chat_model_1
-        self.chat_2 = chat_model_2
-        self.chat_3 = chat_model_3
-        self.tools = [search_tool,
-                    download_tool,
-                    get_web_page,
-                    add,
-                    subtract,
-                    multiply,
-                    divide,
-                    power,
-                    square_root,
-                    get_information_from_wikipedia,
-                    get_information_from_arxiv,
-                    get_information_from_youtube,
-                    python_tool,
-                    get_information_from_json,
-                    get_information_from_audio,
-                    get_information_from_xml,
-                    get_information_from_docx,
-                    get_information_from_txt,
-                    get_information_from_pdf,
-                    get_information_from_csv,
-                    get_information_from_excel,
-                    get_information_from_pdb,
-                    get_information_from_image,
-                    get_information_from_pptx,
-                    get_all_files_from_zip] + web_tools
-        self.chat_with_tools_1 = self.chat_1.bind_tools(self.tools, verbose=True)
-        self.chat_with_tools_2 = self.chat_2.bind_tools(self.tools, verbose=True)
-        self.chat_with_tools_3 = self.chat_3.bind_tools(self.tools, verbose=True)
-        self.chats = [self.chat_with_tools_1, self.chat_with_tools_2, self.chat_with_tools_3]
-        self.builder = StateGraph(AgentState)
-        self.builder.add_node("assistant", self.assistant)
-        self.builder.add_node("tools", ToolNode(self.tools))
-        self.builder.add_node("extract_data_from_file", self.extract_data_from_file)
-        self.builder.add_node("postprocess", self.postprocess)
-        self.builder.add_edge(START, "extract_data_from_file")
-        self.builder.add_edge("extract_data_from_file", "assistant")
-        self.builder.add_conditional_edges(
-            "assistant",
-            self.assistant_router,
-            {
-                "tools": "tools",
-                "postprocess": "postprocess"
-            }
         )
-        self.builder.add_edge("tools", "assistant")
-        self.builder.add_conditional_edges(
-            "postprocess",
-            self.answer_evaluation,
-            {
-                "RETRY": "assistant",
-                "END": END
-            }
-        )
-        self.agent = self.builder.compile()
-    async def __call__(self, question: str, file_path: str, task_id: str) -> str:
-        print("\033[1m\033[93m"+"="*150+"\033[0m")
-        print(f"QUESTION: {question}")
-        print(f"File: {file_path}")
-        prompt = f"""You are a general AI assistant. You will receive a user question and extracted data from associated files.
-Follow this process:
-1. Identify the required output type (e.g., number, string, list) and key concepts in the question.
-2. Before using any tools, check if the answer can be deduced or recalled directly. If yes, answer immediately. Never guess.
-3. If tools are needed:
-   - Create a plan with:
-     - The reasoning approach and tool sequence.
-     - A rephrased version of the question optimized for search engines (DuckDuckGo or Google).
-     - Search queries must:
-       - Be keyword-focused (avoid full sentences).
-       - Use advanced operators if helpful: `site:` for domains, `inurl:` for internal paths, `filetype:` for formats.
-       - Avoid punctuation, commas, quotes, or special characters.
-       - Cover multiple query angles if needed.
-4. Do not run any tool until the plan is complete.
-5. If a tool fails or returns no useful result:
-   - Reformulate the query with synonyms or tighter context.
-   - Retry or use a fallback tool.
-6. Analyze tool results carefully. If multiple source links appear, use `navigate_browser` to explore and extract relevant information from each.
-Report your thoughts, and finish your answer with the following template: FINAL ANSWER: [YOUR FINAL ANSWER].
-YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
-If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
-If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
-If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string."""
-        user_message = f"""Question: {question}
-Filepath: {file_path}"""
-        messages = [SystemMessage(content=prompt, name="SYSTEM"),
-                    HumanMessage(content=user_message, name="USER")]
-        response = await self.agent.ainvoke({"messages": messages,
-                                             "question": question,
-                                             "file_path": file_path,
-                                             "task_id": task_id,
-                                             "new_messages": 1,
-                                             "chat_model": 0,
-                                             "final_answer": "",
-                                             "attempt": 0},
-                                              {"recursion_limit": 100})
-        print("\033[1m\033[93m"+"="*150+"\033[0m")
-        return response['final_answer']
-    async def call_chat(self, chat, state: AgentState, max_retries=5):
-        from google.api_core.exceptions import GoogleAPICallError
-        for i in range(max_retries):
-            try:
-                return await chat.ainvoke(state["messages"])
-            except GoogleAPICallError as e:
-                if "503" in str(e) or "UNAVAILABLE" in str(e):
-                    wait = 2 ** i
-                    print(f"[Gemini] Overloaded (attempt {i+1}), retrying in {wait:.1f}s...")
-                    await asyncio.sleep(wait)
-                else:
-                    raise e
-        raise RuntimeError("Gemini failed after multiple retries")
-    async def assistant(self, state: AgentState):
-        new_messages = state["new_messages"]
-        for i in reversed(range(1, new_messages+1)):
-            print("\033[1m\033[92m"+"+"*150+"\033[0m")
-            name = state["messages"][-i].name
-            content = state["messages"][-i].content
-            print(f'\033[1m\033[96m{name}\033[0m: {content if len(content) < 5000 else content[:5000]}')
-        chat = self.chats[state["chat_model"]]
-        result = await self.call_chat(chat=chat, state=state)
-        state["chat_model"] += 1
-        state["chat_model"] %= len(self.chats)
-        result.name="ASSISTANT"
-        await asyncio.sleep(TIME_SLEEP)
-        print("\033[1m\033[92m"+"+"*150+"\033[0m")
-        content = result.content[:-2] if result.content[-2:] == '\n\n' else result.content
-        print(f'\033[1m\033[96m{result.name}\033[0m: {content}')
-        state["new_messages"] = 1
-        state["messages"].append(result)
-        return state
-    def extract_data_from_file(self, state: AgentState) -> str:
-        path = state["file_path"]
-        new_messages = state["new_messages"]
-        prompt = ""
-        messages = []
-        if path and "." in path:
-            ext = path.strip().split(".")[-1].lower()
-            print(f"Extension detected: {ext}")
-            if ext == "zip":
-                files, prompt = get_all_files_from_zip(path)
-                name = "get_all_file_from_zip"
-                messages.append(AIMessage(content=prompt, name=name))
-            else:
-                files = [path]
-            for file_path in files:
-                ext = file_path.strip().split(".")[-1].lower()
-                print(f"Extension detected: {ext}")
-                prompt = f"Information extracted from {file_path}.\n\n"
-                match ext:
-                    case "csv":
-                        content = get_information_from_csv.invoke(file_path)
-                        name = "get_information_from_csv"
-                    case "txt":
-                        content = get_information_from_txt.invoke(file_path)
-                        name = "get_information_from_txt"
-                    case "pdf":
-                        content = get_information_from_pdf.invoke(file_path)
-                        name = "get_information_from_pdf"
-                    case "json":
-                        content = get_information_from_json.invoke(file_path)
-                        name = "get_information_from_json"
-                    case "jsonld":
-                        content = get_information_from_json.invoke(file_path)
-                        name = "get_information_from_json"
-                    case "xml":
-                        content = get_information_from_xml.invoke(file_path)
-                        name = "get_information_from_xml"
-                    case "pdb":
-                        content = get_information_from_pdb.invoke(file_path)
-                        name = "get_information_from_pdb"
-                    case "mp3":
-                        content = get_information_from_audio.invoke(file_path)
-                        name = "get_information_from_audio"
-                    case "m4a":
-                        content = get_information_from_audio.invoke(file_path)
-                        name = "get_information_from_audio"
-                    case "docx":
-                        content = get_information_from_docx.invoke(file_path)
-                        name = "get_information_from_docx"
-                    case "xlsx":
-                        content = get_information_from_excel.invoke(file_path)
-                        name = "get_information_from_excel"
-                    case "xls":
-                        content = get_information_from_excel.invoke(file_path)
-                        name = "get_information_from_excel"
-                    case "png":
-                        content = get_information_from_image.invoke({"file_path": file_path, "question": state["question"]})
-                        name = "get_information_from_image"
-                    case "jpg":
-                        content = get_information_from_image.invoke({"file_path": file_path, "question": state["question"]})
-                        name = "get_information_from_image"
-                    case "py":
-                        content = get_information_from_python.invoke(file_path)
-                        name = "get_information_from_python"
-                    case "pptx":
-                        content = get_information_from_pptx.invoke(file_path)
-                        name = "get_information_from_pptx"
-                    case _:
-                        content = "Try to use some available tool to answer the user question."
-                        name = "handle_no_file"
-                prompt += f"{content}"
-                messages.append(AIMessage(content=prompt, name=name))
-                new_messages += 1
-        else:
-            prompt = "The question doesn't have an attached file."
-            name = "handle_no_file"
-        return {"messages": messages, "new_messages": new_messages}
-    def assistant_router(self, state: AgentState) -> str:
-        tool_decision = tools_condition(state)
-        if tool_decision == "tools":
-            return "tools"
-        else:
-            return "postprocess"
-    def postprocess(self, state: AgentState) -> AgentState:
-        last_msg = state["messages"][-1]
-        content = last_msg.content
-        index = content.find("FINAL ANSWER: ")
-        if index != -1:
-            content = content[index+len("FINAL ANSWER: "):].replace("\n", "")
-            state["final_answer"] = content
-            return state
-        else:
-            state["attempt"] += 1
-            prompt = f"""You were unable to find a satisfactory answer to the user's question.
-Now, try again, but use a different approach. You may:
-- Focus on a different angle of the question,
-- Reformulate it using alternative terminology,
-- Search for related concepts,
-- Or use a different reasoning path.
-Be creative and precise. Your goal is to uncover useful information that may have been missed previously.
-Original question:
-{state["question"]}"""
-            state["messages"].append(AIMessage(content=prompt, name="ASSISTANT"))
-            return state
-    def answer_evaluation(self, state: AgentState):
-        if state["final_answer"] != "":
-            return "END"
-        elif state["attempt"] >= 3:
-            state["final_answer"] = "Unable to find the answer."
-            return "END"
-        else:
-            return "RETRY"
-    def draw_graph(self):
-        display(Image(self.agent.get_graph().draw_mermaid_png()))
-        return

+from typing import Optional
+from smolagents import (
+    CodeAgent,
+    DuckDuckGoSearchTool,
+    InferenceClientModel,
+    VisitWebpageTool,
+)
+from tools import describe_image, transcribe_mp3, extract_data
+class SmolAgent:
+    def __init__(self):
+        print("SmolAgent initialized.")
+        # Initialize a simple CodeAgent with a DuckDuckGo search tool
+        self.search_tool = DuckDuckGoSearchTool()
+        self.visit_web_tool = VisitWebpageTool()
+        self.agent = CodeAgent(
+            name="SmolAgent",
+            description="An agent that can solve GAIA challenges using web search and code execution.",
+            tools=[
+                self.search_tool,
+                self.visit_web_tool,
+                describe_image,
+                transcribe_mp3,
+                extract_data,
+            ],
+            add_base_tools=True,
+            model=InferenceClientModel(),  # or another available model
+            additional_authorized_imports=["requests", "json", "pandas", "numpy"],
+            max_steps=5,
         )
+    def run(self, question: str, file_path: Optional[str] = None) -> str:
+        print(f"Agent received question (first 50 chars): {question[:50]}...")
+        # Use the CodeAgent to answer the question
+        file_prompt = ""
+        if file_path:
+            file_prompt = f"You can find the provided fiel at {file_path}"
+        prompt = f"""
+            You are a general AI assistant. I will ask you a question. And I want you to reply with just your final answer.
+            YOUR FINAL ANSWER should be a number OR as few words as possible OR a comma separated list of numbers and/or strings.
+            If you are asked for a number, don't use comma to write your number neither use units such as $ or percent sign unless specified otherwise.
+            If you are asked for a string, don't use articles, neither abbreviations (e.g. for cities), and write the digits in plain text unless specified otherwise.
+            If you are asked for a comma separated list, apply the above rules depending of whether the element to be put in the list is a number or a string.
+            Question: {question}
+            {file_prompt}
+        """
+        answer = self.agent.run(prompt)
+        print(f"Agent returning answer: {answer}")
+        return answer