import gradio as gr import os import asyncio from llama_index.core.agent.workflow import AgentWorkflow from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI # ------------------ # 1. Tools # ------------------ def add(a: int, b: int) -> int: """Adds two integers together.""" return a + b def subtract(a: int, b: int) -> int: """Subtracts one integer from another.""" return a - b # ------------------ # 2. Hugging Face LLM # ------------------ llm = HuggingFaceInferenceAPI( model_name="zai-org/GLM-4.7", token=os.getenv("HF_TOKEN"), context_window=4096, max_new_tokens=512, generate_kwargs={ "temperature": 0.1, }, ) # ------------------ # 3. Agent Setup (The Fixed Part) # ------------------ # We use from_tools_or_functions to automatically create the agent logic agent = AgentWorkflow.from_tools_or_functions( [add, subtract], llm=llm, system_prompt="You are a calculator assistant. Use tools for math calculations." ) # ------------------ # 4. Streaming handler # ------------------ async def chat_stream(user_msg): partial_text = "" # astream_run is the correct method for AgentWorkflow handler = agent.astream_run(user_msg=user_msg) async for event in handler: # This safely checks for text updates without needing specific imports if hasattr(event, "delta") and event.delta is not None: partial_text += event.delta yield partial_text # ------------------ # 5. Gradio UI # ------------------ demo = gr.Interface( fn=chat_stream, inputs=gr.Textbox(label="Ask", placeholder="e.g. What is 50 minus 20?"), outputs=gr.Textbox(label="Response"), title="HF LLM Calculator Agent", ) demo.launch()