from transformers import pipeline

MODEL = "google/gemma-2-2b-it"   # <-- safe, open, ungated

pipe = pipeline(
    "text-generation",
    model=MODEL,
    device_map="cpu",
    max_new_tokens=300,
)

SYSTEM_PROMPT = (
    "You are a warm, friendly, knowledgeable art guide for the Venice Biennale. "
    "Give helpful, specific, conversational answers. Avoid repeating yourself. "
    "If the user asks something unrelated to art, still answer normally but in a "
    "kind, human, engaging way."
)

def format_prompt(user_input):
    return f"<start_of_turn>system\n{SYSTEM_PROMPT}<end_of_turn>\n<start_of_turn>user\n{user_input}<end_of_turn>\n<start_of_turn>model\n"

def predict(user_input):
    prompt = format_prompt(user_input)
    result = pipe(prompt)[0]["generated_text"]
    # return only the model's part
    if "<start_of_turn>model" in result:
        result = result.split("<start_of_turn>model")[-1]
    return result.strip()

# Simple UI
import gradio as gr

def chat_fn(message, history):
    return predict(message)

ui = gr.ChatInterface(chat_fn, title="Venice Biennale Art Guide")
ui.launch()