import gradio as gr import os import asyncio from llama_index.core.agent.workflow import AgentWorkflow from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI # ------------------ # 1. Tools # ------------------ def add(a: int, b: int) -> int: """Adds two integers together.""" return a + b def subtract(a: int, b: int) -> int: """Subtracts one integer from another.""" return a - b # ------------------ # 2. Hugging Face LLM (LlamaIndex Compatible) # ------------------ llm = HuggingFaceInferenceAPI( model_name="katanemo/Arch-Router-1.5B", token=os.environ.get("HF_TOKEN"), temperature=0.7, max_token=100, ) # ------------------ # 3. Agent Setup # ------------------ agent = AgentWorkflow.from_tools_or_functions( [add, subtract], llm=llm, system_prompt="You are a calculator assistant. Use tools for math calculations." ) # ------------------ # 4. Streaming handler # ------------------ async def chat_stream(user_msg): try: # Initialize the run handler = agent.run(user_msg=user_msg) # Track if we got a final response final_response = None async for event in handler.stream_events(): # You can leave this empty if you don't want to show thoughts # But consuming the stream keeps the connection active pass # Await the final result while the context is still active result = await handler final_response = str(result) yield final_response except Exception as e: # If it's the 'client closed' error, it's often a sync/async mismatch yield f"Connection Error: {str(e)}. Try sending the message again." # ------------------ # 5. Gradio UI # ------------------ demo = gr.Interface( fn=chat_stream, inputs=gr.Textbox(label="Ask", placeholder="e.g. What is 50 minus 20?"), outputs=gr.Textbox(label="Response"), title="HF LLM Calculator Agent", ) demo.launch()