import gradio as gr from huggingface_hub import InferenceClient # Temperature set to 1.0 for balanced creativity and accuracy. client = InferenceClient("Qwen/Qwen2.5-7B-Instruct") def respond(message, history): messages = [{"role": "system", "content": "You are a knowledgeable and friendly travel advisor. Suggest destinations, travel tips, and itineraries based on the user's interests and budget. Keep responses under 150 words and always end with a fun fact about the destination."}] if history: messages.extend(history) messages.append({"role": "user", "content": message}) response = "" for chunk in client.chat_completion(messages, max_tokens=200, temperature=1.0, stream=True): token = chunk.choices[0].delta.content response += token yield response chatbot = gr.ChatInterface(respond) chatbot.launch()