Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| import torch | |
| # ✅ Use a small model that works on CPU | |
| MODEL_NAME = "togethercomputer/RedPajama-INCITE-3B-v1" | |
| print("Loading model. This may take a few moments…") | |
| tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME) | |
| model = AutoModelForCausalLM.from_pretrained(MODEL_NAME).to("cpu") | |
| print("Model loaded!") | |
| history = [] | |
| def chat_with_airi(user_msg): | |
| global history | |
| # build conversation prompt (last 5 exchanges) | |
| prompt = "" | |
| for u, a in history[-5:]: | |
| prompt += f"User: {u}\nAiri: {a}\n" | |
| prompt += f"User: {user_msg}\nAiri:" | |
| inputs = tokenizer(prompt, return_tensors="pt").to("cpu") | |
| with torch.no_grad(): | |
| output = model.generate( | |
| **inputs, | |
| max_new_tokens=100, # can adjust for longer replies | |
| do_sample=True, | |
| top_p=0.9, | |
| temperature=0.8, | |
| pad_token_id=tokenizer.eos_token_id | |
| ) | |
| reply = tokenizer.decode(output[0], skip_special_tokens=True) | |
| reply = reply.split("Airi:", 1)[-1].strip() | |
| history.append([user_msg, reply]) | |
| return history, "" | |
| with gr.Blocks() as demo: | |
| gr.HTML("<h2 style='text-align:center'>Airi — Mini Chat AI</h2>") | |
| gr.HTML("<p style='text-align:center;color:#666;'>Small, Fast & Public Model</p>") | |
| chat = gr.Chatbot() | |
| msg = gr.Textbox(label="Talk to Airi…", placeholder="Write here…") | |
| msg.submit(chat_with_airi, msg, [chat, msg]) | |
| demo.launch() | |