Spaces:

Maximofn
/

SmolLM2_localModel

Sleeping

App Files Files Community

SmolLM2_localModel / app.py

Maximofn

Update system message to encourage multilingual responses

e68c545 11 months ago

raw

history blame contribute delete

2.36 kB

	import gradio as gr
	from transformers import AutoModelForCausalLM, AutoTokenizer
	import torch

	"""
	For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
	"""

	# Cargar el modelo y el tokenizer
	model_name = "HuggingFaceTB/SmolLM2-1.7B-Instruct"
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	model = AutoModelForCausalLM.from_pretrained(
	model_name,
	torch_dtype=torch.float16,
	device_map="auto"
	)

	def respond(
	message,
	history: list[tuple[str, str]],
	system_message,
	max_tokens,
	temperature,
	top_p,
	):
	# Construir el prompt con el formato correcto
	prompt = f"<\|system\|>\n{system_message}</s>\n"

	for val in history:
	if val[0]:
	prompt += f"<\|user\|>\n{val[0]}</s>\n"
	if val[1]:
	prompt += f"<\|assistant\|>\n{val[1]}</s>\n"

	prompt += f"<\|user\|>\n{message}</s>\n<\|assistant\|>\n"

	# Tokenizar el prompt
	inputs = tokenizer(prompt, return_tensors="pt").to(model.device)

	# Generar la respuesta
	outputs = model.generate(
	**inputs,
	max_new_tokens=max_tokens,
	temperature=temperature,
	top_p=top_p,
	do_sample=True,
	pad_token_id=tokenizer.eos_token_id
	)

	# Decodificar la respuesta
	response = tokenizer.decode(outputs[0], skip_special_tokens=True)

	# Extraer solo la parte de la respuesta del asistente
	response = response.split("<\|assistant\|>\n")[-1].strip()

	yield response


	"""
	For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
	"""
	demo = gr.ChatInterface(
	respond,
	additional_inputs=[
	gr.Textbox(
	value="You are a friendly Chatbot. Always reply in the language in which the user is writing to you.",
	label="System message"
	),
	gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
	gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
	gr.Slider(
	minimum=0.1,
	maximum=1.0,
	value=0.95,
	step=0.05,
	label="Top-p (nucleus sampling)",
	),
	],
	)


	if __name__ == "__main__":
	demo.launch()