File size: 1,284 Bytes
38d0bd1 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 |
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch
# only the real FLAN-T5 Small model repo
MODEL_REPO = "google/flan-t5-small"
print("Loading FLAN-T5 Small model...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_REPO)
model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_REPO)
model.eval()
print("Model loaded")
def chat(prompt):
if not prompt.strip():
return "Type a message first"
inputs = tokenizer(
prompt,
return_tensors="pt",
truncation=True,
padding=True,
max_length=256
)
with torch.no_grad():
output = model.generate(
**inputs,
max_length=128,
do_sample=True,
top_p=0.9,
temperature=0.7
)
return tokenizer.decode(output[0], skip_special_tokens=True)
with gr.Blocks(title="SmallGPT CPU") as demo:
gr.Markdown("# SmallGPT CPU Chat \nPowered by google/flan-t5-small (CPU only)")
user_input = gr.Textbox(label="Your message", lines=2)
bot_output = gr.Textbox(label="SmallGPT says", lines=4)
send_btn = gr.Button("Send")
send_btn.click(chat, inputs=user_input, outputs=bot_output)
user_input.submit(chat, inputs=user_input, outputs=bot_output)
demo.launch()
|