File size: 1,022 Bytes
3b1d293 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 |
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
DTYPE = torch.float32
model_name = "doubleblind/DeepSeek-R1-Distill-QweNSA-1.5B"
tok = AutoTokenizer.from_pretrained("deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=DTYPE,
device_map="auto" if DEVICE == "cuda" else None,
trust_remote_code=True)
prompt = tok.apply_chat_template(
[
{"role": "system",
"content": r"You are a helpful assistant. Place your final answer in \boxed{}."},
{"role": "user", "content": "What is 1 + 1?"},
],
tokenize=True,
add_generation_prompt=True,
return_tensors="pt")
out = model.generate(input_ids=prompt.to(model.device), max_new_tokens=128, do_sample=True, temperature=0.6, top_p=0.95)
print(tok.decode(out[0], skip_special_tokens=True)) |