# app.py — local chat with FLAN-T5 (no token needed) import gradio as gr from transformers import pipeline pipe = pipeline("text2text-generation", model="google/flan-t5-base") def chat(message, history): # prepend a simple instruction; keep short history to avoid long inputs prompt = f"Respond helpfully to the user.\nUser: {message}\nAssistant:" out = pipe(prompt, max_new_tokens=256, temperature=0.7)[0]["generated_text"] return out demo = gr.ChatInterface(chat, title="FLAN-T5 Chat (no token)") if __name__ == "__main__": demo.launch()