Spaces:
Sleeping
Sleeping
| import gradio as gr | |
| import os | |
| import asyncio | |
| from llama_index.core.agent.workflow import AgentWorkflow | |
| from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI | |
| # ------------------ | |
| # 1. Tools | |
| # ------------------ | |
| def add(a: int, b: int) -> int: | |
| """Adds two integers together.""" | |
| return a + b | |
| def subtract(a: int, b: int) -> int: | |
| """Subtracts one integer from another.""" | |
| return a - b | |
| # ------------------ | |
| # 2. Hugging Face LLM | |
| # ------------------ | |
| llm = HuggingFaceInferenceAPI( | |
| model_name="zai-org/GLM-4.7", | |
| token=os.getenv("HF_TOKEN"), | |
| context_window=4096, | |
| max_new_tokens=512, | |
| generate_kwargs={ | |
| "temperature": 0.1, | |
| }, | |
| ) | |
| # ------------------ | |
| # 3. Agent Setup (The Fixed Part) | |
| # ------------------ | |
| # We use from_tools_or_functions to automatically create the agent logic | |
| agent = AgentWorkflow.from_tools_or_functions( | |
| [add, subtract], | |
| llm=llm, | |
| system_prompt="You are a calculator assistant. Use tools for math calculations." | |
| ) | |
| # ------------------ | |
| # 4. Streaming handler | |
| # ------------------ | |
| async def chat_stream(user_msg): | |
| partial_text = "" | |
| # astream_run is the correct method for AgentWorkflow | |
| handler = agent.astream_run(user_msg=user_msg) | |
| async for event in handler: | |
| # This safely checks for text updates without needing specific imports | |
| if hasattr(event, "delta") and event.delta is not None: | |
| partial_text += event.delta | |
| yield partial_text | |
| # ------------------ | |
| # 5. Gradio UI | |
| # ------------------ | |
| demo = gr.Interface( | |
| fn=chat_stream, | |
| inputs=gr.Textbox(label="Ask", placeholder="e.g. What is 50 minus 20?"), | |
| outputs=gr.Textbox(label="Response"), | |
| title="HF LLM Calculator Agent", | |
| ) | |
| demo.launch() |