| | --- |
| | license: apache-2.0 |
| | --- |
| | ## Usage |
| | Let's define a function to convert `instruction` and `input` into a single prompt as input to our `model.generate` |
| | ```python |
| | def generate_prompt(instruction, input=None): |
| | # Templates used by Stanford Alpaca: https://github.com/tatsu-lab/stanford_alpaca |
| | if input is not None: |
| | prompt = f"Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.\n\n### Instruction:\n{instruction}\n\n### Input:\n{input}\n\n### Response:" |
| | else: |
| | prompt = f"prompt_no_input": "Below is an instruction that describes a task. Write a response that appropriately completes the request.\n\n### Instruction:\n{instruction}\n\n### Response:" |
| | return prompt |
| | ``` |
| | Load model and generate prediction |
| |
|
| | ```python |
| | import sys |
| | import torch |
| | from transformers import LlamaTokenizer, LlamaForCausalLM |
| | from peft import PeftModel |
| | |
| | tokenizer = LlamaTokenizer.from_pretrained("decapoda-research/llama-7b-hf") |
| | model = LlamaForCausalLM.from_pretrained("decapoda-research/llama-7b-hf", |
| | load_in_8bit=True, |
| | dtype=torch.float16, |
| | device_map="auto") |
| | model = PeftModel.from_pretrained("Fsoft-AIC/CodeCapybara-LoRA", |
| | load_in_8bit=True, |
| | dtype=torch.float16, |
| | device_map="auto") |
| | |
| | model.config.pad_token_id = tokenizer.pad_token_id = 0 |
| | model.config.bos_token_id = 1 |
| | model.config.eos_token_id = 2 |
| | |
| | model.eval() |
| | if torch.__version__ >= "2" and sys.platform != "win32": |
| | model = torch.compile(model) |
| | |
| | instruction = "Write a Python program that prints the first 10 Fibonacci numbers" |
| | prompt = generate_prompt(instruction) |
| | |
| | input_ids = tokenizer(prompt)["input_ids"] |
| | |
| | generation_config = GenerationConfig(temperature=0.1, |
| | top_k=40, |
| | top_p=0.75) |
| | with torch.no_grad(): |
| | output_ids = model.generate(inputs, |
| | generation_config=generation_config, |
| | max_new_tokens=128) |
| | output = tokenizer.decode(output_ids, skip_special_tokens=True, ignore_tokenization_space=True) |
| | print(output) |
| | ``` |