| print("[*] Loading libraries...") |
| import torch |
| from transformers import LlamaForCausalLM, PreTrainedTokenizerFast |
|
|
| model_path = "./llama-sub-1m-final" |
|
|
| print("[*] Loading tokenizer...") |
| tokenizer = PreTrainedTokenizerFast.from_pretrained(model_path) |
|
|
| print("[*] Loading model...") |
| model = LlamaForCausalLM.from_pretrained(model_path) |
| model.eval() |
|
|
| prompt = "Artificial intelligence is " |
| print(f"[*] Prompt: {prompt!r}") |
|
|
| inputs = tokenizer(prompt, return_tensors="pt") |
|
|
| with torch.no_grad(): |
| outputs = model.generate( |
| input_ids=inputs["input_ids"], |
| attention_mask=inputs["attention_mask"], |
| max_new_tokens=150, |
| do_sample=True, |
| temperature=0.35, |
| top_p=0.85, |
| repetition_penalty=1.2, |
| pad_token_id=tokenizer.pad_token_id, |
| eos_token_id=tokenizer.eos_token_id, |
| ) |
|
|
| print("[*] Output:", tokenizer.decode(outputs[0], skip_special_tokens=True)) |