| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| import torch | |
| DEVICE = "cuda" if torch.cuda.is_available() else "cpu" | |
| DTYPE = torch.float32 | |
| model_name = "doubleblind/DeepSeek-R1-Distill-QweNSA-1.5B" | |
| tok = AutoTokenizer.from_pretrained("deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B", trust_remote_code=True) | |
| model = AutoModelForCausalLM.from_pretrained( | |
| model_name, | |
| torch_dtype=DTYPE, | |
| device_map="auto" if DEVICE == "cuda" else None, | |
| trust_remote_code=True) | |
| prompt = tok.apply_chat_template( | |
| [ | |
| {"role": "system", | |
| "content": r"You are a helpful assistant. Place your final answer in \boxed{}."}, | |
| {"role": "user", "content": "What is 1 + 1?"}, | |
| ], | |
| tokenize=True, | |
| add_generation_prompt=True, | |
| return_tensors="pt") | |
| out = model.generate(input_ids=prompt.to(model.device), max_new_tokens=128, do_sample=True, temperature=0.6, top_p=0.95) | |
| print(tok.decode(out[0], skip_special_tokens=True)) |