Spaces:

jpuri
/

llama_multi_agent

Runtime error

App Files Files Community

llama_multi_agent / app.py

jpuri

update

65d923f about 2 months ago

raw

history blame contribute delete

2.81 kB

	# Chat agent using LlamaIndex SimpleChatEngine + Gradio
	import asyncio
	from llama_index.core.agent.workflow import (
	AgentWorkflow,
	FunctionAgent,
	ReActAgent,
	)
	from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
	import chromadb
	from llama_index.vector_stores.chroma import ChromaVectorStore
	from llama_index.embeddings.huggingface import HuggingFaceEmbedding
	from llama_index.core.tools import QueryEngineTool
	from llama_index.core import VectorStoreIndex

	def add(a: int, b: int) -> int:
	"""Adds two numbers together and returns the result."""
	return a + b

	def subtract(a: int, b: int) -> int:
	"""Subtracts the second number from the first and returns the result."""
	return a - b

	async def main():
	llm = HuggingFaceInferenceAPI(
	model_name="Qwen/Qwen2.5-Coder-32B-Instruct",
	)

	# Workaround: llama_index's astream_chat/astream_complete call
	# self._async_client.close() after each streaming response, which
	# permanently kills the httpx connection. ReAct agents make multiple
	# LLM calls per run, so subsequent steps hit a closed client.
	# Neutralize close() to keep the connection alive across steps.
	async def _noop_close():
	pass
	llm._async_client.close = _noop_close

	calculator_agent = ReActAgent(
	name="calculator_agent",
	description="A calculator agent that can add and subtract numbers.",
	system_prompt="You are a calculator assistant. Use your tools for any math operation.",
	tools=[add, subtract],
	llm=llm,
	)

	# Create a vector store
	db = chromadb.PersistentClient(path="./alfred_chroma_db")
	chroma_collection = db.get_or_create_collection("alfred")
	vector_store = ChromaVectorStore(chroma_collection=chroma_collection)

	# Create a query engine
	embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
	index = VectorStoreIndex.from_vector_store(
	vector_store=vector_store, embed_model=embed_model
	)
	query_engine = index.as_query_engine(llm=llm)
	query_engine_tool = QueryEngineTool.from_defaults(
	query_engine=query_engine,
	name="personas",
	description="descriptions for various types of personas",
	return_direct=False,
	)

	query_agent = ReActAgent(
	name="query_agent",
	description="A query agent that can query the internet.",
	system_prompt="use your tool to query a RAG system to answer informaiton about XYZ.",
	tools=[query_engine_tool],
	llm=llm,
	)

	agent = AgentWorkflow(
	agents=[calculator_agent, query_agent],
	root_agent="calculator_agent",
	)

	response = await agent.run(user_msg="What is 10 + 5?")
	print(response)

	if __name__ == "__main__":
	asyncio.run(main())