Spaces:
Runtime error
Runtime error
File size: 1,517 Bytes
e38d496 d1cdbeb 5d8146e 37f3507 7268f80 3820c26 7268f80 d48b5b3 5d8146e 7268f80 5d8146e 7268f80 5d8146e d48b5b3 5d8146e 40bf4c8 7268f80 4cd7316 5d8146e 40bf4c8 7268f80 5d8146e 37f3507 e38d496 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 | import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
# ✅ Use a small model that works on CPU
MODEL_NAME = "togethercomputer/RedPajama-INCITE-3B-v1"
print("Loading model. This may take a few moments…")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME)
model = AutoModelForCausalLM.from_pretrained(MODEL_NAME).to("cpu")
print("Model loaded!")
history = []
def chat_with_airi(user_msg):
global history
# build conversation prompt (last 5 exchanges)
prompt = ""
for u, a in history[-5:]:
prompt += f"User: {u}\nAiri: {a}\n"
prompt += f"User: {user_msg}\nAiri:"
inputs = tokenizer(prompt, return_tensors="pt").to("cpu")
with torch.no_grad():
output = model.generate(
**inputs,
max_new_tokens=100, # can adjust for longer replies
do_sample=True,
top_p=0.9,
temperature=0.8,
pad_token_id=tokenizer.eos_token_id
)
reply = tokenizer.decode(output[0], skip_special_tokens=True)
reply = reply.split("Airi:", 1)[-1].strip()
history.append([user_msg, reply])
return history, ""
with gr.Blocks() as demo:
gr.HTML("<h2 style='text-align:center'>Airi — Mini Chat AI</h2>")
gr.HTML("<p style='text-align:center;color:#666;'>Small, Fast & Public Model</p>")
chat = gr.Chatbot()
msg = gr.Textbox(label="Talk to Airi…", placeholder="Write here…")
msg.submit(chat_with_airi, msg, [chat, msg])
demo.launch()
|