| import torch |
| from transformers import AutoModelForCausalLM, AutoTokenizer |
|
|
| model_id = "edumunozsala/llama-2-7b-int4-python-code-20k" |
|
|
| tokenizer = AutoTokenizer.from_pretrained(hf_model_repo) |
|
|
| model = AutoModelForCausalLM.from_pretrained(hf_model_repo, load_in_4bit=True, torch_dtype=torch.float16, |
| device_map=device_map) |
|
|
| instruction="Write a Python function to display the first and last elements of a list." |
| input="" |
|
|
| prompt = f"""### Instruction: |
| Use the Task below and the Input given to write the Response, which is a programming code that can solve the Task. |
| |
| ### Task: |
| {instruction} |
| |
| ### Input: |
| {input} |
| |
| ### Response: |
| """ |
|
|
| input_ids = tokenizer(prompt, return_tensors="pt", truncation=True).input_ids.cuda() |
| |
| outputs = model.generate(input_ids=input_ids, max_new_tokens=100, do_sample=True, top_p=0.9,temperature=0.5) |
|
|
| print(f"Prompt:\n{prompt}\n") |
| print(f"Generated instruction:\n{tokenizer.batch_decode(outputs.detach().cpu().numpy(), skip_special_tokens=True)[0][len(prompt):]}") |
|
|