import threading import torch import gradio as gr from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer MODEL_ID = "Splashdude/smollm-chatbot" SYSTEM_PROMPT = ( "You are a helpful, friendly AI assistant. " "You give clear, accurate, and conversational answers. " "Remember what the user tells you in this conversation." ) model = None tokenizer = None def load_model(): global model, tokenizer if model is not None: return print("Loading model...") tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) model = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype=torch.float32) model.to("cpu") model.eval() print("Model loaded!") def generate_response(message, chat_history): if model is None: try: load_model() except Exception as e: chat_history.append({"role": "user", "content": message}) chat_history.append({"role": "assistant", "content": f"Error: {e}"}) yield chat_history, "" return if not message or not message.strip(): yield chat_history, "" return chat_history.append({"role": "user", "content": message}) chat_history.append({"role": "assistant", "content": ""}) messages = [{"role": "system", "content": SYSTEM_PROMPT}] for msg in chat_history[:-1]: messages.append({"role": msg["role"], "content": msg["content"]}) text = tokenizer.apply_chat_template( messages, tokenize=False, add_generation_prompt=True ) inputs = tokenizer(text, return_tensors="pt") streamer = TextIteratorStreamer( tokenizer, skip_prompt=True, skip_special_tokens=True ) thread = threading.Thread( target=model.generate, kwargs={ **inputs, "max_new_tokens": 512, "do_sample": True, "temperature": 0.7, "top_p": 0.9, "repetition_penalty": 1.1, "streamer": streamer, }, ) thread.start() partial = "" for token in streamer: partial += token chat_history[-1]["content"] = partial yield chat_history, "" thread.join() def clear_chat(): return [], "" with gr.Blocks(title="AI Chatbot", theme=gr.themes.Soft()) as demo: gr.Markdown("# AI Chatbot\nFast conversational AI powered by SmolLM2-360M.") chatbot = gr.Chatbot(type="messages", height=500, show_copy_button=True, label="Chat") chat_state = gr.State([]) with gr.Row(): msg = gr.Textbox( placeholder="Type your message...", show_label=False, container=False, scale=8, ) submit = gr.Button("Send", variant="primary", scale=1) clear = gr.Button("New Chat", scale=1) gr.Examples( examples=[ "Hello! How are you?", "Tell me a joke.", "What is the capital of France?", "Explain gravity in simple terms.", ], inputs=msg, label="Examples", ) def user_submit(message, history): for updated_history, _ in generate_response(message, history): yield updated_history, "", updated_history def bot_response(message, history): for updated_history, _ in generate_response(message, history): yield updated_history, updated_history msg.submit( user_submit, [msg, chat_state], [chatbot, msg, chat_state], queue=True, ) submit.click( user_submit, [msg, chat_state], [chatbot, msg, chat_state], queue=True, ) clear.click(clear_chat, None, [chatbot, chat_state]) if __name__ == "__main__": demo.queue() demo.launch()