from pathlib import Path from typing import Literal from llama_index.core.agent.workflow import FunctionAgent from llama_index.core.prompts import RichPromptTemplate from llama_index.llms.nebius import NebiusLLM from llama_index.llms.mistralai import MistralAI from llama_index.llms.openai import OpenAI from workflows import Workflow, step, Context from workflows.events import StartEvent, Event, StopEvent from gaia_solving_agent import NEBIUS_API_KEY, MISTRAL_API_KEY, OPENAI_API_KEY from gaia_solving_agent.prompts import PLANING_PROMPT, FORMAT_ANSWER from gaia_solving_agent.tools import ( tavily_search_web, simple_web_page_reader_toolspec, vllm_ask_image_tool, youtube_transcript_reader_toolspec, text_content_analysis, research_paper_reader_toolspec, get_text_representation_of_additional_file, wikipedia_toolspec, ) from gaia_solving_agent.utils import extract_pattern # Choice of the model cheap_model_name = "meta-llama/Meta-Llama-3.1-8B-Instruct" light_model_name = "Qwen/Qwen2.5-32B-Instruct" balanced_model_name = "meta-llama/Meta-Llama-3.1-70B-Instruct" reasoning_model_name = "Qwen/Qwen3-235B-A22B" vlm_model_name = "mistralai/Mistral-Small-3.1-24B-Instruct-2503" # For VLM needs openai_reasoning = OpenAI( model="gpt-4.1", api_key=OPENAI_API_KEY, temperature=.1, max_retries=5, ) openai_llm = OpenAI( model="gpt-4.1-nano", api_key=OPENAI_API_KEY, temperature=.1, max_retries=5, ) mistral_llm = MistralAI( model="mistral-small-latest", api_key=MISTRAL_API_KEY, temperature=.1, max_retries=5, # is_function_calling_model=True, ) def get_llm(model_name=cheap_model_name): return NebiusLLM( model=model_name, api_key=NEBIUS_API_KEY, is_function_calling_model=True, max_completion_tokens=10000, context_window=80000, # max = 128000 for "meta-llama/Meta-Llama-3.1-8B-Instruct" temperature=.1, max_retries=5, ) class PlanEvent(Event): to_do: Literal["Initialize", "Format", "Replan"] = "Initialize" plan: str | None = None n_retries: int = 0 class QueryEvent(Event): pass class AnswerEvent(Event): plan: str answer: str class GaiaWorkflow(Workflow): @step async def setup(self, ctx: Context, ev: StartEvent) -> PlanEvent: await ctx.store.set("user_msg", ev.user_msg) await ctx.store.set("additional_file", ev.additional_file) await ctx.store.set("additional_file_path", ev.additional_file_path) return PlanEvent() @step async def make_plan(self, ctx: Context, ev: PlanEvent) -> PlanEvent | QueryEvent | StopEvent: additional_file_path = await ctx.store.get("additional_file_path") user_msg = await ctx.store.get("user_msg") llm = openai_reasoning prompt_template = RichPromptTemplate(PLANING_PROMPT) file_extension = Path(additional_file_path).suffix if additional_file_path else "" prompt = prompt_template.format( user_request=user_msg, additional_file_extension=file_extension, ) if ev.to_do == "Replan": ... # TODO : Placeholder for future update elif ev.to_do == "Format": if ev.n_retries > 3: return StopEvent(result="Cannot provide a plan. Format may be wrong.", reasoning=ev.plan) prompt = f""" The original plan is not in the correct format. ______________ There is the query and constraints you must respect : {prompt} ______________ There is the original plan you must reformat : {ev.plan} ______________ Ask yourself what you did wrong and fix it. Stick strictly to the formatting constraints ! """ plan = llm.complete(prompt) await ctx.store.set("plan", plan.text) question = extract_pattern(pattern=r" :\s*([\s\S]*?)\s*", text=plan.text) known_facts = extract_pattern(pattern=r" :\s*([\s\S]*?)\s*", text=plan.text) sub_tasks = extract_pattern(pattern=r" :\s*([\s\S]*?)\s*<\/Sub-tasks>", text=plan.text) if any( extracted is None for extracted in [question, known_facts, sub_tasks] ): return PlanEvent(to_do="Format", plan=plan.text, n_retries=ev.n_retries + 1) else: await ctx.store.set("question", question if question is not None else "") await ctx.store.set("known_facts", known_facts if known_facts is not None else "") await ctx.store.set("sub_tasks", sub_tasks if sub_tasks is not None else "") return QueryEvent() @step() async def multi_agent_process(self, ctx: Context, ev: QueryEvent) -> AnswerEvent: plan = await ctx.store.get("plan") additional_file = await ctx.store.get("additional_file") question = await ctx.store.get("question") known_facts = await ctx.store.get("known_facts") sub_tasks = await ctx.store.get("sub_tasks") prompt = f""" The question is : {question} The known facts are : {known_facts} The sub-tasks are : {sub_tasks} """ # Cheap trick to avoid Error 400 errors from OpenAPI from llama_index.core.memory import ChatMemoryBuffer memory = ChatMemoryBuffer.from_defaults(token_limit=100000) agent_ctx = Context(gaia_solving_agent) await agent_ctx.store.set("additional_file", additional_file) agent_output = await gaia_solving_agent.run( user_msg=prompt, memory=memory, ctx=agent_ctx, ) return AnswerEvent(plan=plan, answer=str(agent_output)) @step async def parse_answer(self, ctx: Context, ev: AnswerEvent) -> StopEvent: llm = get_llm(balanced_model_name) prompt_template = RichPromptTemplate(FORMAT_ANSWER) question = await ctx.store.get("question") prompt = prompt_template.format(question=question, answer=ev.answer) result = llm.complete(prompt) return StopEvent(result=result.text, reasoning=ev.plan) gaia_solving_agent = FunctionAgent( tools=[ get_text_representation_of_additional_file, vllm_ask_image_tool, tavily_search_web, *wikipedia_toolspec, *simple_web_page_reader_toolspec.to_tool_list(), *youtube_transcript_reader_toolspec.to_tool_list(), *research_paper_reader_toolspec.to_tool_list(), text_content_analysis, ], llm=get_llm(reasoning_model_name), system_prompt=""" You are a helpful assistant that uses tools to browse additional information and resources on the web to answer questions. Tools you have are of three types: - External resources getter: get text, images, video, etc. from the internet - Resource querier and transformer: query, summarize or transform a resource into a more digestible format. - Analyse or compute : specialized tools to provide a specific analysis or computation. Try to get resources before querying them. If it is an additional file, you can access its content through the get_text_representation_of_additional_file tool. If you need the original Document, you can use the llamaindex context with ctx.store.get("additional_file"). If the analysis require a new external resource get it first.(e.g. a set of rules or a process) You will be provided a question, some known facts summarizing the user provided context and some sub-tasks to complete. You should follow the order of the sub-tasks. If the tools provides facts that go against your knowledge, you should not use them. """, name="gaia_solving_agent", description="Agent that browse additional information and resources on the web.", allow_parallel_tool_calls=False, )