Splashdude's picture
Upload folder using huggingface_hub
e9c6c9e verified
import threading
import torch
import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
MODEL_ID = "Splashdude/smollm-chatbot"
SYSTEM_PROMPT = (
"You are a helpful, friendly AI assistant. "
"You give clear, accurate, and conversational answers. "
"Remember what the user tells you in this conversation."
)
model = None
tokenizer = None
def load_model():
global model, tokenizer
if model is not None:
return
print("Loading model...")
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype=torch.float32)
model.to("cpu")
model.eval()
print("Model loaded!")
def generate_response(message, chat_history):
if model is None:
try:
load_model()
except Exception as e:
chat_history.append({"role": "user", "content": message})
chat_history.append({"role": "assistant", "content": f"Error: {e}"})
yield chat_history, ""
return
if not message or not message.strip():
yield chat_history, ""
return
chat_history.append({"role": "user", "content": message})
chat_history.append({"role": "assistant", "content": ""})
messages = [{"role": "system", "content": SYSTEM_PROMPT}]
for msg in chat_history[:-1]:
messages.append({"role": msg["role"], "content": msg["content"]})
text = tokenizer.apply_chat_template(
messages, tokenize=False, add_generation_prompt=True
)
inputs = tokenizer(text, return_tensors="pt")
streamer = TextIteratorStreamer(
tokenizer, skip_prompt=True, skip_special_tokens=True
)
thread = threading.Thread(
target=model.generate,
kwargs={
**inputs,
"max_new_tokens": 512,
"do_sample": True,
"temperature": 0.7,
"top_p": 0.9,
"repetition_penalty": 1.1,
"streamer": streamer,
},
)
thread.start()
partial = ""
for token in streamer:
partial += token
chat_history[-1]["content"] = partial
yield chat_history, ""
thread.join()
def clear_chat():
return [], ""
with gr.Blocks(title="AI Chatbot", theme=gr.themes.Soft()) as demo:
gr.Markdown("# AI Chatbot\nFast conversational AI powered by SmolLM2-360M.")
chatbot = gr.Chatbot(type="messages", height=500, show_copy_button=True, label="Chat")
chat_state = gr.State([])
with gr.Row():
msg = gr.Textbox(
placeholder="Type your message...",
show_label=False,
container=False,
scale=8,
)
submit = gr.Button("Send", variant="primary", scale=1)
clear = gr.Button("New Chat", scale=1)
gr.Examples(
examples=[
"Hello! How are you?",
"Tell me a joke.",
"What is the capital of France?",
"Explain gravity in simple terms.",
],
inputs=msg,
label="Examples",
)
def user_submit(message, history):
for updated_history, _ in generate_response(message, history):
yield updated_history, "", updated_history
def bot_response(message, history):
for updated_history, _ in generate_response(message, history):
yield updated_history, updated_history
msg.submit(
user_submit,
[msg, chat_state],
[chatbot, msg, chat_state],
queue=True,
)
submit.click(
user_submit,
[msg, chat_state],
[chatbot, msg, chat_state],
queue=True,
)
clear.click(clear_chat, None, [chatbot, chat_state])
if __name__ == "__main__":
demo.queue()
demo.launch()