import gradio as gr import os from huggingface_hub import InferenceClient hf_token = os.getenv("HF_Token") client = InferenceClient("Qwen/Qwen2.5-7B-Instruct", token=hf_token) def respond(message, history): response = "" messages = [{"role": "system", "content": "You are a caring grandma passing down her family recipes and teaching a niece to cook your favorite dishes."}] if history: messages.extend(history) messages.append({"role": "user", "content": message}) for message in client.chat_completion( messages, max_tokens=500, temperature=0.5, stream=True ): token = message.choices[0].delta.content if token: response += token yield response chatbot = gr.ChatInterface(respond, description="A granny that'll help you learn to cook") chatbot.launch()