import gradio as gr
import os
import asyncio
from llama_index.core.agent.workflow import AgentWorkflow
from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI

# ------------------
# 1. Tools
# ------------------
def add(a: int, b: int) -> int:
    """Adds two integers together."""
    return a + b

def subtract(a: int, b: int) -> int:
    """Subtracts one integer from another."""
    return a - b

# ------------------
# 2. Hugging Face LLM (LlamaIndex Compatible)
# ------------------
llm = HuggingFaceInferenceAPI(
    model_name="katanemo/Arch-Router-1.5B", 
    token=os.environ.get("HF_TOKEN"),
    temperature=0.7,
    max_token=100,
)

# ------------------
# 3. Agent Setup
# ------------------
agent = AgentWorkflow.from_tools_or_functions(
    [add, subtract],
    llm=llm,
    system_prompt="You are a calculator assistant. Use tools for math calculations."
)

# ------------------
# 4. Streaming handler
# ------------------
async def chat_stream(user_msg):
    try:
        # Initialize the run
        handler = agent.run(user_msg=user_msg)
        
        # Track if we got a final response
        final_response = None
        
        async for event in handler.stream_events():
            # You can leave this empty if you don't want to show thoughts
            # But consuming the stream keeps the connection active
            pass

        # Await the final result while the context is still active
        result = await handler
        final_response = str(result)
        
        yield final_response
                
    except Exception as e:
        # If it's the 'client closed' error, it's often a sync/async mismatch
        yield f"Connection Error: {str(e)}. Try sending the message again."

# ------------------
# 5. Gradio UI
# ------------------
demo = gr.Interface(
    fn=chat_stream,
    inputs=gr.Textbox(label="Ask", placeholder="e.g. What is 50 minus 20?"),
    outputs=gr.Textbox(label="Response"),
    title="HF LLM Calculator Agent",
)

demo.launch()