Spaces:

dispatchAI
/

mobile-chat-demo

Runtime error

Upload app.py with huggingface_hub

af88c48 verified 3 days ago

1.77 kB

	import gradio as gr
	import spaces
	import torch
	from transformers import AutoTokenizer, AutoModelForCausalLM

	MODEL_ID = "dispatchAI/SmolLM2-135M-Instruct-mobile"

	tokenizer = None
	model = None

	def load_model():
	global tokenizer, model
	if tokenizer is None:
	tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_ID,
	torch_dtype=torch.float16,
	device_map="auto",
	)
	return tokenizer, model

	@spaces.GPU
	def chat(message, history):
	tokenizer, model = load_model()

	messages = [{"role": "system", "content": "You are a helpful assistant running on a mobile-optimized model."}]
	for h in history:
	messages.append({"role": "user", "content": h[0]})
	messages.append({"role": "assistant", "content": h[1]})
	messages.append({"role": "user", "content": message})

	input_text = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
	inputs = tokenizer(input_text, return_tensors="pt").to(model.device)

	with torch.no_grad():
	outputs = model.generate(
	**inputs,
	max_new_tokens=256,
	temperature=0.7,
	do_sample=True,
	pad_token_id=tokenizer.eos_token_id,
	)

	response = tokenizer.decode(outputs[0][inputs["input_ids"].shape[1]:], skip_special_tokens=True)
	return response

	demo = gr.ChatInterface(
	fn=chat,
	title="🚀 dispatchAI Mobile Chat",
	description="Chat with dispatchAI/SmolLM2-135M-Instruct-mobile — a 135M parameter model optimized for mobile devices. This runs on ZeroGPU.",
	theme=gr.themes.Soft(primary_hue="blue"),
	)

	if __name__ == "__main__":
	demo.launch()