S1mp1eXXX's picture
Update app.py
3b1a202 verified
raw
history blame contribute delete
832 Bytes
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch
model_name = "S1mp1eXXX/Nimi-1b-thinking"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float16,
device_map="auto"
)
def respond(message, history, system_message, max_tokens, temperature, top_p):
messages = system_message + "\n"
for h in history:
messages += f"{h['role']}: {h['content']}\n"
messages += f"user: {message}\nassistant:"
inputs = tokenizer(messages, return_tensors="pt").to(model.device)
output = model.generate(
**inputs,
max_new_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
)
decoded = tokenizer.decode(output[0], skip_special_tokens=True)
yield decoded