File size: 1,284 Bytes
38d0bd1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import torch

# only the real FLAN-T5 Small model repo
MODEL_REPO = "google/flan-t5-small"

print("Loading FLAN-T5 Small model...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_REPO)
model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_REPO)
model.eval()
print("Model loaded")

def chat(prompt):
    if not prompt.strip():
        return "Type a message first"
    inputs = tokenizer(
        prompt,
        return_tensors="pt",
        truncation=True,
        padding=True,
        max_length=256
    )
    with torch.no_grad():
        output = model.generate(
            **inputs,
            max_length=128,
            do_sample=True,
            top_p=0.9,
            temperature=0.7
        )
    return tokenizer.decode(output[0], skip_special_tokens=True)

with gr.Blocks(title="SmallGPT CPU") as demo:
    gr.Markdown("# SmallGPT CPU Chat \nPowered by google/flan-t5-small (CPU only)")

    user_input = gr.Textbox(label="Your message", lines=2)
    bot_output = gr.Textbox(label="SmallGPT says", lines=4)

    send_btn = gr.Button("Send")
    send_btn.click(chat, inputs=user_input, outputs=bot_output)
    user_input.submit(chat, inputs=user_input, outputs=bot_output)

demo.launch()