Spaces:

rufatronics
/

smol-ai

Runtime error

smol-ai / app.py

Update app.py

2fec00c verified 2 months ago

1.61 kB

	import gradio as gr
	import torch
	from transformers import AutoModelForCausalLM, AutoTokenizer

	# Model ID for the stable Instruct version
	MODEL_ID = "HuggingFaceTB/SmolLM2-135M-Instruct"

	# Load tokenizer and model once at startup
	print("System: Booting Stable-Lite Brain...")
	tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_ID,
	device_map="cpu",
	torch_dtype=torch.float32
	)

	def chat(message, history):
	# Standard Instruct Format for SmolLM2
	# 'Be helpful and precise' is the only instruction to save RAM/Attention
	prompt = f"<\|user\|>\nBe helpful and precise: {message}<\|endoftext\|>\n<\|assistant\|>\n"

	inputs = tokenizer(prompt, return_tensors="pt").to("cpu")

	with torch.no_grad():
	outputs = model.generate(
	**inputs,
	max_new_tokens=150,
	temperature=0.1,
	do_sample=True,
	repetition_penalty=1.2,
	eos_token_id=tokenizer.eos_token_id
	)

	# Extracting only the new tokens (the response)
	input_length = inputs.input_ids.shape[1]
	response_tokens = outputs[0][input_length:]
	response = tokenizer.decode(response_tokens, skip_special_tokens=True)

	return response.strip()

	# Gradio Interface configured for Stability
	demo = gr.ChatInterface(
	fn=chat,
	title="Smol-AI Kano (Stable-Lite)",
	description="Optimized for local students and businesses on 4GB RAM devices.",
	cache_examples=False # Prevents the Python 3.13 caching error
	)

	if __name__ == "__main__":
	demo.launch()