| |
| """Llama 2 Inference.ipynb |
| |
| Automatically generated by Colaboratory. |
| |
| Original file is located at |
| https://colab.research.google.com/drive/1tS9ep-M5slbxKrGP2btamFUhMM00QkKt |
| |
| # Fine-tune Llama 2 in Google Colab |
| > 🗣️ Large Language Model Course |
| |
| ❤️ Created by [@maximelabonne](https://twitter.com/maximelabonne), based on Younes Belkada's [GitHub Gist](https://gist.github.com/younesbelkada/9f7f75c94bdc1981c8ca5cc937d4a4da). Special thanks to Tolga HOŞGÖR for his solution to empty the VRAM. |
| |
| This notebook runs on a T4 GPU. (Last update: 24 Aug 2023) |
| """ |
|
|
| !pip install -q accelerate==0.21.0 peft==0.4.0 bitsandbytes==0.40.2 transformers==4.31.0 trl==0.4.7 |
|
|
| import os |
| import torch |
| from datasets import load_dataset |
| from transformers import ( |
| AutoModelForCausalLM, |
| AutoTokenizer, |
| BitsAndBytesConfig, |
| HfArgumentParser, |
| TrainingArguments, |
| pipeline, |
| logging, |
| ) |
| from peft import LoraConfig, PeftModel |
| from trl import SFTTrainer |
|
|
| model = AutoModelForCausalLM.from_pretrained("tminh/llama-2-7b-glenda") |
|
|
| model_name = "TinyPixel/Llama-2-7B-bf16-sharded" |
| tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True) |
|
|
| |
| logging.set_verbosity(logging.CRITICAL) |
|
|
| |
| prompt = "What can drug D07OAC do?" |
| pipe = pipeline(task="text-generation", model=model, tokenizer=tokenizer, max_length=200) |
| result = pipe(f"<s>[INST] {prompt} [/INST]") |
| print(result[0]['generated_text']) |
|
|
|
|