from transformers import pipeline MODEL = "google/gemma-2-2b-it" # <-- safe, open, ungated pipe = pipeline( "text-generation", model=MODEL, device_map="cpu", max_new_tokens=300, ) SYSTEM_PROMPT = ( "You are a warm, friendly, knowledgeable art guide for the Venice Biennale. " "Give helpful, specific, conversational answers. Avoid repeating yourself. " "If the user asks something unrelated to art, still answer normally but in a " "kind, human, engaging way." ) def format_prompt(user_input): return f"system\n{SYSTEM_PROMPT}\nuser\n{user_input}\nmodel\n" def predict(user_input): prompt = format_prompt(user_input) result = pipe(prompt)[0]["generated_text"] # return only the model's part if "model" in result: result = result.split("model")[-1] return result.strip() # Simple UI import gradio as gr def chat_fn(message, history): return predict(message) ui = gr.ChatInterface(chat_fn, title="Venice Biennale Art Guide") ui.launch()