|
|
--- |
|
|
license: cc-by-nc-4.0 |
|
|
base_model: |
|
|
- unsloth/llama-3-8b-bnb-4bit |
|
|
pipeline_tag: text-generation |
|
|
tags: |
|
|
- conversational |
|
|
- llama |
|
|
- ollama |
|
|
- unsloth |
|
|
- gguf |
|
|
- 8b |
|
|
--- |
|
|
|
|
|
# Training Process |
|
|
## Model + LoRA Loading |
|
|
```python |
|
|
from unsloth import FastLanguageModel |
|
|
import torch |
|
|
max_seq_length = 2048 |
|
|
dtype = None # None for auto detection. Float16 for Tesla T4, V100, Bfloat16 for Ampere+ |
|
|
load_in_4bit = True # Use 4bit quantization to reduce memory usage. Can be False. |
|
|
|
|
|
model, tokenizer = FastLanguageModel.from_pretrained( |
|
|
model_name = "unsloth/llama-3-8b-bnb-4bit", |
|
|
max_seq_length = max_seq_length, |
|
|
dtype = dtype, |
|
|
load_in_4bit = load_in_4bit, |
|
|
# token = "hf_...", # use one if using gated models like meta-llama/Llama-2-7b-hf |
|
|
) |
|
|
|
|
|
model = FastLanguageModel.get_peft_model( |
|
|
model, |
|
|
r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128 |
|
|
target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", |
|
|
"gate_proj", "up_proj", "down_proj",], |
|
|
lora_alpha = 16, |
|
|
lora_dropout = 0, # Supports any, but = 0 is optimized |
|
|
bias = "none", # Supports any, but = "none" is optimized |
|
|
use_gradient_checkpointing = "unsloth", # True or "unsloth" for very long context |
|
|
random_state = 3407, |
|
|
use_rslora = False, # We support rank stabilized LoRA |
|
|
loftq_config = None, # And LoftQ |
|
|
) |
|
|
``` |
|
|
|
|
|
## Dataset Preparation |
|
|
```python |
|
|
from datasets import load_dataset |
|
|
dataset = load_dataset( |
|
|
"csv", |
|
|
data_files = "/content/synth_data.csv", |
|
|
split = "train", |
|
|
) |
|
|
|
|
|
from unsloth import to_sharegpt |
|
|
dataset = to_sharegpt( |
|
|
dataset, |
|
|
merged_prompt = "Labels: {available_entities}\n\nText: {text}\n", |
|
|
conversation_extension = 5, # Randomnly combines conversations into 1 |
|
|
output_column_name = "label", |
|
|
) |
|
|
|
|
|
from unsloth import standardize_sharegpt |
|
|
dataset = standardize_sharegpt(dataset) |
|
|
|
|
|
chat_template = """{SYSTEM} |
|
|
USER: {INPUT} |
|
|
ASSISTANT: {OUTPUT}""" |
|
|
|
|
|
from unsloth import apply_chat_template |
|
|
dataset = apply_chat_template( |
|
|
dataset, |
|
|
tokenizer = tokenizer, |
|
|
chat_template = chat_template, |
|
|
default_system_message = "NER Task: Label the text based on the available Labels." |
|
|
) |
|
|
``` |
|
|
|
|
|
## Training Configuration |
|
|
```python |
|
|
from trl import SFTTrainer |
|
|
from transformers import TrainingArguments |
|
|
from unsloth import is_bfloat16_supported |
|
|
|
|
|
trainer = SFTTrainer( |
|
|
model = model, |
|
|
tokenizer = tokenizer, |
|
|
train_dataset = dataset, |
|
|
dataset_text_field = "text", |
|
|
max_seq_length = max_seq_length, |
|
|
dataset_num_proc = 2, |
|
|
packing = True, # Can make training 5x faster for short sequences. |
|
|
args = TrainingArguments( |
|
|
per_device_train_batch_size = 2, |
|
|
gradient_accumulation_steps = 4, |
|
|
warmup_steps = 5, |
|
|
# max_steps = None, |
|
|
num_train_epochs = 1, |
|
|
learning_rate = 2e-4, |
|
|
fp16 = not is_bfloat16_supported(), |
|
|
bf16 = is_bfloat16_supported(), |
|
|
logging_steps = 1, |
|
|
optim = "adamw_8bit", |
|
|
weight_decay = 0.01, |
|
|
lr_scheduler_type = "linear", |
|
|
seed = 3407, |
|
|
output_dir = "outputs", |
|
|
), |
|
|
) |
|
|
|
|
|
trainer_stats = trainer.train() |
|
|
|
|
|
# Save to 8bit Q8_0 |
|
|
if False: model.save_pretrained_gguf("model", tokenizer,) |
|
|
``` |
|
|
|
|
|
## Training Results |
|
|
 |
|
|
- Steps Trained: 26 |
|
|
- Final Loss: 0.1870 |
|
|
- Total Time: 21:04 min |
|
|
- Full epoch had been 261 steps |
|
|
|
|
|
# Sample Inference |
|
|
```python |
|
|
FastLanguageModel.for_inference(model) # Enable native 2x faster inference |
|
|
messages = [ # Change below! |
|
|
{"role": "user", "content": 'Labels: ATTR, CITY, CITY_PART, COUNTRY, O, ORG, PER, PHONE, REGION, REL, STREET, WORK_P, WORK_S\n\n'\ |
|
|
'Text: "doctors in berlin"'}, |
|
|
] |
|
|
input_ids = tokenizer.apply_chat_template( |
|
|
messages, |
|
|
add_generation_prompt = True, |
|
|
return_tensors = "pt", |
|
|
).to("cuda") |
|
|
|
|
|
from transformers import TextStreamer |
|
|
text_streamer = TextStreamer(tokenizer, skip_prompt = True) |
|
|
_ = model.generate(input_ids, streamer = text_streamer, max_new_tokens = 128, pad_token_id = tokenizer.eos_token_id) |
|
|
``` |
|
|
|
|
|
|
|
|
|