| # pip install git+https://github.com/huggingface/transformers.git@main accelerate | |
| from transformers import LlamaTokenizer, AutoModelForCausalLM | |
| tokenizer = LlamaTokenizer.from_pretrained("./") | |
| model = AutoModelForCausalLM.from_pretrained("./") | |
| inputs = tokenizer("A cat sat", return_tensors="pt")["input_ids"] | |
| outputs = model.generate(inputs, max_new_tokens=5) | |
| print(tokenizer.decode(outputs[0])) |