| import torch | |
| from transformers import AutoModelForCausalLM, AutoTokenizer | |
| tokenizer = AutoTokenizer.from_pretrained("./") | |
| model = AutoModelForCausalLM.from_pretrained("./", device_map="auto", torch_dtype=torch.float16) | |
| messages = [ | |
| {"role": "user", "content": "9.9 和 9.11 哪个大?"}, | |
| ] | |
| tokenized_chat = tokenizer.apply_chat_template(messages, tokenize=True, add_generation_prompt=True, return_tensors="pt") | |
| print(tokenizer.decode(tokenized_chat[0])) | |
| outputs = model.generate(tokenized_chat, max_new_tokens=4096) | |
| print(tokenizer.decode(outputs[0])) |