# Chat agent using LlamaIndex SimpleChatEngine + Gradio
import asyncio
from llama_index.core.agent.workflow import (
    AgentWorkflow,
    FunctionAgent,
    ReActAgent,
)
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
import chromadb
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from llama_index.core.tools import QueryEngineTool
from llama_index.core import VectorStoreIndex

def add(a: int, b: int) -> int:
    """Adds two numbers together and returns the result."""
    return a + b

def subtract(a: int, b: int) -> int:
    """Subtracts the second number from the first and returns the result."""
    return a - b

async def main():
    llm = HuggingFaceInferenceAPI(
        model_name="Qwen/Qwen2.5-Coder-32B-Instruct",
    )

    # Workaround: llama_index's astream_chat/astream_complete call
    # self._async_client.close() after each streaming response, which
    # permanently kills the httpx connection. ReAct agents make multiple
    # LLM calls per run, so subsequent steps hit a closed client.
    # Neutralize close() to keep the connection alive across steps.
    async def _noop_close():
        pass
    llm._async_client.close = _noop_close

    calculator_agent = ReActAgent(
        name="calculator_agent",
        description="A calculator agent that can add and subtract numbers.",
        system_prompt="You are a calculator assistant. Use your tools for any math operation.",
        tools=[add, subtract],
        llm=llm,
    )

    # Create a vector store
    db = chromadb.PersistentClient(path="./alfred_chroma_db")
    chroma_collection = db.get_or_create_collection("alfred")
    vector_store = ChromaVectorStore(chroma_collection=chroma_collection)

    # Create a query engine
    embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
    index = VectorStoreIndex.from_vector_store(
        vector_store=vector_store, embed_model=embed_model
    )
    query_engine = index.as_query_engine(llm=llm)
    query_engine_tool = QueryEngineTool.from_defaults(
        query_engine=query_engine,
        name="personas",
        description="descriptions for various types of personas",
        return_direct=False,
    )

    query_agent = ReActAgent(
        name="query_agent",
        description="A query agent that can query the internet.",
        system_prompt="use your tool to query a RAG system to answer informaiton about XYZ.",
        tools=[query_engine_tool],
        llm=llm,
    )

    agent = AgentWorkflow(
        agents=[calculator_agent, query_agent],
        root_agent="calculator_agent",
    )

    response = await agent.run(user_msg="What is 10 + 5?")
    print(response)

if __name__ == "__main__":
    asyncio.run(main())