Agent_Course_Final_Assignment

Runtime error

App Files Files Community

lwant commited on Jun 29, 2025

Commit

5ab1e4f

1 Parent(s): cfb1f9f

fixup! Add file handling support in `hf_submission_api`, include file save logic, and static file directory setup

Browse files

Files changed (3) hide show

.gitattributes +2 -0
src/gaia_solving_agent/agent.py +107 -31
src/gaia_solving_agent/hf_submission_api.py +42 -4

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+*.mp3 filter=lfs diff=lfs merge=lfs -text
+*.psd filter=lfs diff=lfs merge=lfs -text

src/gaia_solving_agent/agent.py CHANGED Viewed

@@ -1,18 +1,24 @@
-import re
 from pathlib import Path
-from typing import Any
 from llama_index.core.agent.workflow import FunctionAgent, AgentWorkflow
 from llama_index.core.prompts import RichPromptTemplate
 from llama_index.llms.nebius import NebiusLLM
 from llama_index.tools.requests import RequestsToolSpec
 from llama_index.tools.wikipedia import WikipediaToolSpec
-from workflows import Workflow, step
 from workflows.events import StartEvent, Event, StopEvent
 from gaia_solving_agent import NEBIUS_API_KEY
 from gaia_solving_agent.prompts import PLANING_PROMPT, FORMAT_ANSWER
-from gaia_solving_agent.tools import tavily_search_web, wikipedia_tool_spec
 # Choice of the model
 cheap_model_name = "meta-llama/Meta-Llama-3.1-8B-Instruct"
@@ -31,12 +37,12 @@ def get_llm(model_name=cheap_model_name):
         max_retries=5,
     )
 class QueryEvent(Event):
-    query: str
-    additional_file: Any | None
-    additional_file_path: str | Path | None = None
-    plan: str
 class AnswerEvent(Event):
     plan: str
@@ -45,44 +51,99 @@ class AnswerEvent(Event):
 class GaiaWorkflow(Workflow):
     @step
-    async def setup(self, ev: StartEvent) -> QueryEvent:
         llm = get_llm(reasoning_model_name)
         prompt_template = RichPromptTemplate(PLANING_PROMPT)
-        file_extension = Path(ev.additional_file_path).suffix if ev.additional_file_path else ""
-        plan = llm.complete(prompt_template.format(
-            user_request=ev.user_msg,
             additional_file_extension=file_extension,
-        ))
-        return QueryEvent(
-            query=ev.user_msg,
-            additional_file=ev.additional_file,
-            additional_file_path=ev.additional_file_path,
-            plan=plan.text,
         )
     @step()
-    async def multi_agent_process(self, ev: QueryEvent) -> AnswerEvent:
         # Cheap trick to avoid Error 400 errors from OpenAPI
         from llama_index.core.memory import ChatMemoryBuffer
         memory = ChatMemoryBuffer.from_defaults(token_limit=100000)
         agent_output = await gaia_solving_agent.run(
-            user_msg=ev.plan,
             memory=memory,
-            additional_file=ev.additional_file,
-            additional_file_path=ev.additional_file_path,
         )
-        return AnswerEvent(plan=ev.plan, answer=str(agent_output))
     @step
-    async def parse_answer(self, ev: AnswerEvent) -> StopEvent:
         llm = get_llm(balanced_model_name)
         prompt_template = RichPromptTemplate(FORMAT_ANSWER)
-        pattern = r"<Question> :\s*(.*)[\n$]"
-        search = re.search(pattern, ev.plan)
-        question = search.group(1) if search else ""
-        result = llm.complete(prompt_template.format(question=question))
-        return StopEvent(result=result)
 tavily_search_engine = FunctionAgent(
@@ -100,9 +161,10 @@ tavily_search_engine = FunctionAgent(
     description="Agent that makes web searches to answer questions."
 )
-visit_website = FunctionAgent(
     tools=[
         *RequestsToolSpec().to_tool_list(),
     ],
     llm=get_llm(),
     system_prompt="""
@@ -119,7 +181,7 @@ visit_website = FunctionAgent(
 )
 wikipedia_agent = FunctionAgent(
-    tools=[*WikipediaToolSpec().to_tool_list()],
     llm=get_llm(),
     system_prompt="""
 You are a helpful assistant that searches Wikipedia and visit Wikipedia pages.
@@ -139,16 +201,30 @@ You are a helpful assistant that searches Wikipedia and visit Wikipedia pages.
 #     num_concurrent_runs=1,
 # )
 gaia_solving_agent = FunctionAgent(
     tools = [
         tavily_search_web,
         *load_and_search_tools_from_toolspec(WikipediaToolSpec()),
         *simple_web_page_reader_toolspec.to_tool_list(),
         *RequestsToolSpec().to_tool_list(),
     ],
     llm=get_llm(balanced_model_name),
     system_prompt="""
     You are a helpful assistant that uses tools to browse additional information and resources on the web to answer questions.
     """,
     name="gaia_solving_agent",
     description="Agent that browse additional information and resources on the web.",

 from pathlib import Path
+from typing import Literal
 from llama_index.core.agent.workflow import FunctionAgent, AgentWorkflow
 from llama_index.core.prompts import RichPromptTemplate
 from llama_index.llms.nebius import NebiusLLM
 from llama_index.tools.requests import RequestsToolSpec
 from llama_index.tools.wikipedia import WikipediaToolSpec
+from workflows import Workflow, step, Context
 from workflows.events import StartEvent, Event, StopEvent
 from gaia_solving_agent import NEBIUS_API_KEY
 from gaia_solving_agent.prompts import PLANING_PROMPT, FORMAT_ANSWER
+from gaia_solving_agent.tools import (
+    tavily_search_web,
+    load_and_search_tools_from_toolspec,
+    simple_web_page_reader_toolspec,
+    vllm_ask_image_tool,
+    youtube_transcript_reader_toolspec,
+)
+from gaia_solving_agent.utils import extract_pattern
 # Choice of the model
 cheap_model_name = "meta-llama/Meta-Llama-3.1-8B-Instruct"
         max_retries=5,
     )
+class PlanEvent(Event):
+    to_do: Literal["Initialize", "Format", "Replan"] = "Initialize"
+    plan: str | None = None
 class QueryEvent(Event):
+    pass
 class AnswerEvent(Event):
     plan: str
 class GaiaWorkflow(Workflow):
     @step
+    async def setup(self, ctx: Context, ev: StartEvent) -> PlanEvent:
+        await ctx.set("user_msg", ev.user_msg)
+        await ctx.set("additional_file", ev.additional_file)
+        await ctx.set("additional_file_path", ev.additional_file_path)
+        return PlanEvent()
+    @step
+    async def make_plan(self, ctx: Context, ev: PlanEvent) -> PlanEvent | QueryEvent:
+        additional_file_path = await ctx.get("additional_file_path")
+        user_msg = await ctx.get("user_msg")
         llm = get_llm(reasoning_model_name)
         prompt_template = RichPromptTemplate(PLANING_PROMPT)
+        file_extension = Path(additional_file_path).suffix if additional_file_path else ""
+        prompt = prompt_template.format(
+            user_request=user_msg,
             additional_file_extension=file_extension,
         )
+        if ev.to_do == "Replan":
+            ...
+            # TODO : Placeholder for future update
+        elif ev.to_do == "Format":
+            prompt = f"""
+The original plan is not in the correct format.
+There is the query and constraints you must respect :
+{prompt}
+There is the original plan you must reformat :
+{ev.plan}
+Stick strictly to the formatting constraints !
+"""
+        plan = llm.complete(prompt)
+        await ctx.set("plan", plan.text)
+        question = extract_pattern(pattern=r"<Question> :\s*([\s\S]*?)\s*</Question>", text=plan.text)
+        known_facts = extract_pattern(pattern=r"<Known facts> :\s*([\s\S]*?)\s*</Known facts>", text=plan.text)
+        sub_tasks = extract_pattern(pattern=r"<Sub-tasks> :\s*([\s\S]*?)\s*<\/Sub-tasks>", text=plan.text)
+        if any(
+            extracted is None
+            for extracted in [question, known_facts, sub_tasks]
+        ):
+            return PlanEvent(to_do="Format", plan=plan.text)
+        else:
+            await ctx.set("question", question if question is not None else "")
+            await ctx.set("known_facts", known_facts if known_facts is not None else "")
+            await ctx.set("sub_tasks", sub_tasks if sub_tasks is not None else "")
+            return QueryEvent()
     @step()
+    async def multi_agent_process(self, ctx: Context, ev: QueryEvent) -> AnswerEvent:
+        plan = await ctx.get("plan")
+        additional_file = await ctx.get("additional_file")
+        additional_file_path = await ctx.get("additional_file_path")
+        question = await ctx.get("question")
+        known_facts = await ctx.get("known_facts")
+        sub_tasks = await ctx.get("sub_tasks")
+        prompt = f"""
+The question is : {question}
+The known facts are :
+{known_facts}
+The sub-tasks are :
+{sub_tasks}
+"""
         # Cheap trick to avoid Error 400 errors from OpenAPI
         from llama_index.core.memory import ChatMemoryBuffer
         memory = ChatMemoryBuffer.from_defaults(token_limit=100000)
         agent_output = await gaia_solving_agent.run(
+            user_msg=plan,
             memory=memory,
+            additional_file=additional_file,
+            additional_file_path=additional_file_path,
         )
+        return AnswerEvent(plan=plan, answer=str(agent_output))
     @step
+    async def parse_answer(self, ctx: Context, ev: AnswerEvent) -> StopEvent:
         llm = get_llm(balanced_model_name)
         prompt_template = RichPromptTemplate(FORMAT_ANSWER)
+        question = await ctx.get("question")
+        prompt = prompt_template.format(question=question, answer=ev.answer)
+        result = llm.complete(prompt)
+        return StopEvent(result=result.text, reasoning=ev.plan)
 tavily_search_engine = FunctionAgent(
     description="Agent that makes web searches to answer questions."
 )
+visit_web_page_agent = FunctionAgent(
     tools=[
         *RequestsToolSpec().to_tool_list(),
+        *simple_web_page_reader_toolspec.to_tool_list(),
     ],
     llm=get_llm(),
     system_prompt="""
 )
 wikipedia_agent = FunctionAgent(
+    tools=[*load_and_search_tools_from_toolspec(WikipediaToolSpec())],
     llm=get_llm(),
     system_prompt="""
 You are a helpful assistant that searches Wikipedia and visit Wikipedia pages.
 #     num_concurrent_runs=1,
 # )
 gaia_solving_agent = FunctionAgent(
     tools = [
+        vllm_ask_image_tool,
         tavily_search_web,
         *load_and_search_tools_from_toolspec(WikipediaToolSpec()),
         *simple_web_page_reader_toolspec.to_tool_list(),
         *RequestsToolSpec().to_tool_list(),
+        *youtube_transcript_reader_toolspec.to_tool_list(),
     ],
     llm=get_llm(balanced_model_name),
     system_prompt="""
     You are a helpful assistant that uses tools to browse additional information and resources on the web to answer questions.
+    Tools you have are of three types:
+    - External resources getter: get text, images, video, etc. from the internet
+    - Resource querier and transformer: query, summarize or transform a resource into a more digestible format.
+    - Analyse or compute : specialized tools to provide a specific analysis or computation.
+    Try to get resources before querying them.
+    If the analysis require a new external resource get it first.(e.g. a set of rules or a process)
+    You will be provided a question, some known facts summarizing the user provided context and some sub-tasks to complete.
+    You should follow the order of the sub-tasks.
     """,
     name="gaia_solving_agent",
     description="Agent that browse additional information and resources on the web.",

src/gaia_solving_agent/hf_submission_api.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import re
 from functools import lru_cache
 import pandas as pd
 import requests
@@ -11,7 +12,7 @@ from gaia_solving_agent.agent import GaiaWorkflow
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
 def instantiate_agent(space_id: str, **agent_kwargs):
     try:
@@ -52,10 +53,41 @@ def fetching_questions(api_url: str = DEFAULT_API_URL):
     return questions_data
-async def run_agent(agent, questions_data):
-    results_log = []
-    answers_payload = []
     # First, ensure that all complementary files are in the FILE_DIR
     for item in questions_data:
         if item.get("file_name"):
@@ -65,6 +97,12 @@ async def run_agent(agent, questions_data):
         str(file_path): document
         for file_path, document in zip(file_reader.list_resources(), file_reader.load_data())
     }
     print(f"Running agent on {len(questions_data)} questions...")
     for item in questions_data:

 import re
 from functools import lru_cache
+from pathlib import Path
 import pandas as pd
 import requests
 DEFAULT_API_URL = "https://agents-course-unit4-scoring.hf.space"
+FILE_DIR = Path(__file__).parent / "static" / "files"
 def instantiate_agent(space_id: str, **agent_kwargs):
     try:
     return questions_data
+def get_or_save_file_associated_with_task(file_name: str, api_url: str = DEFAULT_API_URL) -> Path | None:
+    file_path = FILE_DIR / file_name
+    task_id = file_path.stem
+    if file_path.exists() and file_path.is_file():
+        print(f"File already exists: {file_path}")
+        return file_path
+    file_url = f"{api_url}/files/{task_id}"
+    print(f"Getting file associated to task: {task_id}")
+    try:
+        response = requests.get(file_url, timeout=15)
+        response.raise_for_status()
+        file_data = response.content
+        if not file_data:
+             print("Fetched file is empty.")
+        print(f"Fetched the file.")
+    except requests.exceptions.RequestException as e:
+        print(f"Error fetching file: {e}")
+    except requests.exceptions.JSONDecodeError as e:
+         print(f"Error decoding JSON response from file endpoint: {e}")
+         print(f"Response text: {response.text[:500]}")
+    except Exception as e:
+        print(f"An unexpected error occurred fetching file: {e}")
+    try:  # Save the file
+        with open(file_path, 'wb') as f:
+            f.write(response.content)
+        return file_path
+    except Exception as e:
+        print(f"Error saving file: {e}")
+        return None
+def ensure_files_are_loaded(questions_data):
     # First, ensure that all complementary files are in the FILE_DIR
     for item in questions_data:
         if item.get("file_name"):
         str(file_path): document
         for file_path, document in zip(file_reader.list_resources(), file_reader.load_data())
     }
+    return additional_files
+async def run_agent(agent, questions_data, additional_files):
+    results_log = []
+    answers_payload = []
     print(f"Running agent on {len(questions_data)} questions...")
     for item in questions_data: