jpuri's picture
update
65d923f
# Chat agent using LlamaIndex SimpleChatEngine + Gradio
import asyncio
from llama_index.core.agent.workflow import (
AgentWorkflow,
FunctionAgent,
ReActAgent,
)
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
import chromadb
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.tools import QueryEngineTool
from llama_index.core import VectorStoreIndex
def add(a: int, b: int) -> int:
"""Adds two numbers together and returns the result."""
return a + b
def subtract(a: int, b: int) -> int:
"""Subtracts the second number from the first and returns the result."""
return a - b
async def main():
llm = HuggingFaceInferenceAPI(
model_name="Qwen/Qwen2.5-Coder-32B-Instruct",
)
# Workaround: llama_index's astream_chat/astream_complete call
# self._async_client.close() after each streaming response, which
# permanently kills the httpx connection. ReAct agents make multiple
# LLM calls per run, so subsequent steps hit a closed client.
# Neutralize close() to keep the connection alive across steps.
async def _noop_close():
pass
llm._async_client.close = _noop_close
calculator_agent = ReActAgent(
name="calculator_agent",
description="A calculator agent that can add and subtract numbers.",
system_prompt="You are a calculator assistant. Use your tools for any math operation.",
tools=[add, subtract],
llm=llm,
)
# Create a vector store
db = chromadb.PersistentClient(path="./alfred_chroma_db")
chroma_collection = db.get_or_create_collection("alfred")
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
# Create a query engine
embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
index = VectorStoreIndex.from_vector_store(
vector_store=vector_store, embed_model=embed_model
)
query_engine = index.as_query_engine(llm=llm)
query_engine_tool = QueryEngineTool.from_defaults(
query_engine=query_engine,
name="personas",
description="descriptions for various types of personas",
return_direct=False,
)
query_agent = ReActAgent(
name="query_agent",
description="A query agent that can query the internet.",
system_prompt="use your tool to query a RAG system to answer informaiton about XYZ.",
tools=[query_engine_tool],
llm=llm,
)
agent = AgentWorkflow(
agents=[calculator_agent, query_agent],
root_agent="calculator_agent",
)
response = await agent.run(user_msg="What is 10 + 5?")
print(response)
if __name__ == "__main__":
asyncio.run(main())