# app.py  — local chat with FLAN-T5 (no token needed)
import gradio as gr
from transformers import pipeline

pipe = pipeline("text2text-generation", model="google/flan-t5-base")

def chat(message, history):
    # prepend a simple instruction; keep short history to avoid long inputs
    prompt = f"Respond helpfully to the user.\nUser: {message}\nAssistant:"
    out = pipe(prompt, max_new_tokens=256, temperature=0.7)[0]["generated_text"]
    return out

demo = gr.ChatInterface(chat, title="FLAN-T5 Chat (no token)")
if __name__ == "__main__":
    demo.launch()