| import torch | |
| from transformers import PreTrainedTokenizerFast, LlamaForCausalLM | |
| from tokenizers import Tokenizer | |
| tk = PreTrainedTokenizerFast(tokenizer_object=Tokenizer.from_file("tokenizer.json"), bos_token="<|bos|>", eos_token="<|eos|>", unk_token="<|unk|>", pad_token="<|pad|>") | |
| md = LlamaForCausalLM.from_pretrained(".", torch_dtype=torch.bfloat16).to("cuda") | |
| ids = tk.encode("<|bos|>The future of AI is", return_tensors="pt").to("cuda") | |
| gen = md.generate(ids, max_new_tokens=150, do_sample=True, temperature=0.7, repetition_penalty=1.2, no_repeat_ngram_size=3, pad_token_id=0) | |
| print(tk.decode(gen[0], skip_special_tokens=True)) | |