# Chat agent using LlamaIndex SimpleChatEngine + Gradio import asyncio from llama_index.core.agent.workflow import ( AgentWorkflow, FunctionAgent, ReActAgent, ) from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI import chromadb from llama_index.vector_stores.chroma import ChromaVectorStore from llama_index.embeddings.huggingface import HuggingFaceEmbedding from llama_index.core.tools import QueryEngineTool from llama_index.core import VectorStoreIndex def add(a: int, b: int) -> int: """Adds two numbers together and returns the result.""" return a + b def subtract(a: int, b: int) -> int: """Subtracts the second number from the first and returns the result.""" return a - b async def main(): llm = HuggingFaceInferenceAPI( model_name="Qwen/Qwen2.5-Coder-32B-Instruct", ) # Workaround: llama_index's astream_chat/astream_complete call # self._async_client.close() after each streaming response, which # permanently kills the httpx connection. ReAct agents make multiple # LLM calls per run, so subsequent steps hit a closed client. # Neutralize close() to keep the connection alive across steps. async def _noop_close(): pass llm._async_client.close = _noop_close calculator_agent = ReActAgent( name="calculator_agent", description="A calculator agent that can add and subtract numbers.", system_prompt="You are a calculator assistant. Use your tools for any math operation.", tools=[add, subtract], llm=llm, ) # Create a vector store db = chromadb.PersistentClient(path="./alfred_chroma_db") chroma_collection = db.get_or_create_collection("alfred") vector_store = ChromaVectorStore(chroma_collection=chroma_collection) # Create a query engine embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5") index = VectorStoreIndex.from_vector_store( vector_store=vector_store, embed_model=embed_model ) query_engine = index.as_query_engine(llm=llm) query_engine_tool = QueryEngineTool.from_defaults( query_engine=query_engine, name="personas", description="descriptions for various types of personas", return_direct=False, ) query_agent = ReActAgent( name="query_agent", description="A query agent that can query the internet.", system_prompt="use your tool to query a RAG system to answer informaiton about XYZ.", tools=[query_engine_tool], llm=llm, ) agent = AgentWorkflow( agents=[calculator_agent, query_agent], root_agent="calculator_agent", ) response = await agent.run(user_msg="What is 10 + 5?") print(response) if __name__ == "__main__": asyncio.run(main())