Spaces:

Maximofn
/

SmolLM2_localModel

Running

File size: 2,361 Bytes

4c2efe8
b6bfa2b
 
4c2efe8
 
 
 
 
b6bfa2b
 
 
 
 
 
 
 
4c2efe8
 
 
 
 
 
 
 
 
b6bfa2b
 
 
4c2efe8
 
b6bfa2b
4c2efe8
b6bfa2b
 
 
 
 
 
 
 
 
 
 
4c2efe8
 
b6bfa2b
 
 
 
 
 
 
 
 
 
 
4c2efe8
 
 
 
 
 
 
 
e68c545
 
 
 
4c2efe8

import gradio as gr
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch

"""
For more information on `huggingface_hub` Inference API support, please check the docs: https://huggingface.co/docs/huggingface_hub/v0.22.2/en/guides/inference
"""

# Cargar el modelo y el tokenizer
model_name = "HuggingFaceTB/SmolLM2-1.7B-Instruct"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
    model_name,
    torch_dtype=torch.float16,
    device_map="auto"
)

def respond(
    message,
    history: list[tuple[str, str]],
    system_message,
    max_tokens,
    temperature,
    top_p,
):
    # Construir el prompt con el formato correcto
    prompt = f"<|system|>\n{system_message}</s>\n"
    
    for val in history:
        if val[0]:
            prompt += f"<|user|>\n{val[0]}</s>\n"
        if val[1]:
            prompt += f"<|assistant|>\n{val[1]}</s>\n"
    
    prompt += f"<|user|>\n{message}</s>\n<|assistant|>\n"
    
    # Tokenizar el prompt
    inputs = tokenizer(prompt, return_tensors="pt").to(model.device)
    
    # Generar la respuesta
    outputs = model.generate(
        **inputs,
        max_new_tokens=max_tokens,
        temperature=temperature,
        top_p=top_p,
        do_sample=True,
        pad_token_id=tokenizer.eos_token_id
    )
    
    # Decodificar la respuesta
    response = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    # Extraer solo la parte de la respuesta del asistente
    response = response.split("<|assistant|>\n")[-1].strip()
    
    yield response


"""
For information on how to customize the ChatInterface, peruse the gradio docs: https://www.gradio.app/docs/chatinterface
"""
demo = gr.ChatInterface(
    respond,
    additional_inputs=[
        gr.Textbox(
            value="You are a friendly Chatbot. Always reply in the language in which the user is writing to you.", 
            label="System message"
        ),
        gr.Slider(minimum=1, maximum=2048, value=512, step=1, label="Max new tokens"),
        gr.Slider(minimum=0.1, maximum=4.0, value=0.7, step=0.1, label="Temperature"),
        gr.Slider(
            minimum=0.1,
            maximum=1.0,
            value=0.95,
            step=0.05,
            label="Top-p (nucleus sampling)",
        ),
    ],
)


if __name__ == "__main__":
    demo.launch()