# Chat agent using LlamaIndex SimpleChatEngine + Gradio from llama_index.core.chat_engine import SimpleChatEngine from llama_index.core.memory import ChatMemoryBuffer from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI import gradio as gr import os from dotenv import load_dotenv load_dotenv() hf_token = os.getenv("HF_TOKEN") llm = HuggingFaceInferenceAPI( model_name="Qwen/Qwen2.5-Coder-32B-Instruct", temperature=0.7, max_tokens=1024, token=hf_token, provider="auto", ) memory = ChatMemoryBuffer.from_defaults(token_limit=3000) chat_engine = SimpleChatEngine.from_defaults(llm=llm, memory=memory) def respond(message, history): response = chat_engine.chat(message) return str(response) demo = gr.ChatInterface(respond) demo.launch()