jpuri's picture
update
4e4c4d1
import asyncio
from llama_index.core.agent.workflow import AgentWorkflow, ReActAgent
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
def add(a: int, b: int) -> int:
"""Add two numbers."""
return a + b
def multiply(a: int, b: int) -> int:
"""Multiply two numbers."""
return a * b
async def main():
llm = HuggingFaceInferenceAPI(model_name="Qwen/Qwen2.5-Coder-32B-Instruct")
# Workaround: llama_index's astream_chat calls _async_client.close()
# after each streaming response, killing the httpx connection. ReAct
# agents need multiple LLM calls per run, so subsequent steps fail.
async def _noop():
pass
llm._async_client.close = _noop
multiply_agent = ReActAgent(
name="multiply_agent",
description="Is able to multiply two integers",
system_prompt="A helpful assistant that can use a tool to multiply numbers.",
tools=[multiply],
llm=llm,
)
addition_agent = ReActAgent(
name="add_agent",
description="Is able to add two integers",
system_prompt="A helpful assistant that can use a tool to add numbers.",
tools=[add],
llm=llm,
)
workflow = AgentWorkflow(
agents=[multiply_agent, addition_agent],
root_agent="multiply_agent",
)
response = await workflow.run(user_msg="Can you add 5 and 3?")
print(response)
if __name__ == "__main__":
asyncio.run(main())