from transformers import AutoModelForCausalLM, AutoTokenizer import torch DEVICE = "cuda" if torch.cuda.is_available() else "cpu" DTYPE = torch.float32 model_name = "doubleblind/DeepSeek-R1-Distill-QweNSA-1.5B" tok = AutoTokenizer.from_pretrained("deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", trust_remote_code=True) model = AutoModelForCausalLM.from_pretrained( model_name, torch_dtype=DTYPE, device_map="auto" if DEVICE == "cuda" else None, trust_remote_code=True) prompt = tok.apply_chat_template( [ {"role": "system", "content": r"You are a helpful assistant. Place your final answer in \boxed{}."}, {"role": "user", "content": "What is 1 + 1?"}, ], tokenize=True, add_generation_prompt=True, return_tensors="pt") out = model.generate(input_ids=prompt.to(model.device), max_new_tokens=128, do_sample=True, temperature=0.6, top_p=0.95) print(tok.decode(out[0], skip_special_tokens=True))