Spaces:

Splashdude
/

reasoning-chat-model-chat

Runtime error

App Files Files Community

reasoning-chat-model-chat / app.py

Splashdude

Upload folder using huggingface_hub

e9c6c9e verified 16 days ago

raw

history blame contribute delete

3.79 kB

	import threading
	import torch
	import gradio as gr
	from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer

	MODEL_ID = "Splashdude/smollm-chatbot"
	SYSTEM_PROMPT = (
	"You are a helpful, friendly AI assistant. "
	"You give clear, accurate, and conversational answers. "
	"Remember what the user tells you in this conversation."
	)

	model = None
	tokenizer = None


	def load_model():
	global model, tokenizer
	if model is not None:
	return
	print("Loading model...")
	tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
	model = AutoModelForCausalLM.from_pretrained(MODEL_ID, torch_dtype=torch.float32)
	model.to("cpu")
	model.eval()
	print("Model loaded!")


	def generate_response(message, chat_history):
	if model is None:
	try:
	load_model()
	except Exception as e:
	chat_history.append({"role": "user", "content": message})
	chat_history.append({"role": "assistant", "content": f"Error: {e}"})
	yield chat_history, ""
	return

	if not message or not message.strip():
	yield chat_history, ""
	return

	chat_history.append({"role": "user", "content": message})
	chat_history.append({"role": "assistant", "content": ""})

	messages = [{"role": "system", "content": SYSTEM_PROMPT}]
	for msg in chat_history[:-1]:
	messages.append({"role": msg["role"], "content": msg["content"]})

	text = tokenizer.apply_chat_template(
	messages, tokenize=False, add_generation_prompt=True
	)
	inputs = tokenizer(text, return_tensors="pt")

	streamer = TextIteratorStreamer(
	tokenizer, skip_prompt=True, skip_special_tokens=True
	)

	thread = threading.Thread(
	target=model.generate,
	kwargs={
	**inputs,
	"max_new_tokens": 512,
	"do_sample": True,
	"temperature": 0.7,
	"top_p": 0.9,
	"repetition_penalty": 1.1,
	"streamer": streamer,
	},
	)
	thread.start()

	partial = ""
	for token in streamer:
	partial += token
	chat_history[-1]["content"] = partial
	yield chat_history, ""

	thread.join()


	def clear_chat():
	return [], ""


	with gr.Blocks(title="AI Chatbot", theme=gr.themes.Soft()) as demo:
	gr.Markdown("# AI Chatbot\nFast conversational AI powered by SmolLM2-360M.")

	chatbot = gr.Chatbot(type="messages", height=500, show_copy_button=True, label="Chat")
	chat_state = gr.State([])

	with gr.Row():
	msg = gr.Textbox(
	placeholder="Type your message...",
	show_label=False,
	container=False,
	scale=8,
	)
	submit = gr.Button("Send", variant="primary", scale=1)
	clear = gr.Button("New Chat", scale=1)

	gr.Examples(
	examples=[
	"Hello! How are you?",
	"Tell me a joke.",
	"What is the capital of France?",
	"Explain gravity in simple terms.",
	],
	inputs=msg,
	label="Examples",
	)

	def user_submit(message, history):
	for updated_history, _ in generate_response(message, history):
	yield updated_history, "", updated_history

	def bot_response(message, history):
	for updated_history, _ in generate_response(message, history):
	yield updated_history, updated_history

	msg.submit(
	user_submit,
	[msg, chat_state],
	[chatbot, msg, chat_state],
	queue=True,
	)

	submit.click(
	user_submit,
	[msg, chat_state],
	[chatbot, msg, chat_state],
	queue=True,
	)

	clear.click(clear_chat, None, [chatbot, chat_state])

	if __name__ == "__main__":
	demo.queue()
	demo.launch()