import gradio as gr
from huggingface_hub import InferenceClient

# Temperature set to 1.0 for balanced creativity and accuracy.
client = InferenceClient("Qwen/Qwen2.5-7B-Instruct")

def respond(message, history):
    messages = [{"role": "system", "content": "You are a knowledgeable and friendly travel advisor. Suggest destinations, travel tips, and itineraries based on the user's interests and budget. Keep responses under 150 words and always end with a fun fact about the destination."}]

    if history:
        messages.extend(history)

    messages.append({"role": "user", "content": message})

    response = ""
    for chunk in client.chat_completion(messages, max_tokens=200, temperature=1.0, stream=True):
        token = chunk.choices[0].delta.content
        response += token
        yield response

chatbot = gr.ChatInterface(respond)
chatbot.launch()