| | from unsloth import FastLanguageModel |
| | import torch |
| | from trl import SFTTrainer |
| | from transformers import TrainingArguments |
| | from unsloth import is_bfloat16_supported |
| |
|
| |
|
| | def load_model(model_name, max_seq_length): |
| | dtype = None |
| | load_in_4bit = True |
| | |
| | model, tokenizer = FastLanguageModel.from_pretrained( |
| | model_name = model_name, |
| | max_seq_length = max_seq_length, |
| | dtype = dtype, |
| | load_in_4bit = load_in_4bit, |
| | |
| | ) |
| | return model, tokenizer |
| |
|
| |
|
| | def get_peft(model, peft, max_seq_length, random_seed): |
| | |
| | model = FastLanguageModel.get_peft_model( |
| | model, |
| | r = peft['r',] |
| | target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", |
| | "gate_proj", "up_proj", "down_proj",], |
| | lora_alpha = peft['alpha'], |
| | lora_dropout = peft['dropout'], |
| | bias = peft['bias'], |
| | use_gradient_checkpointing = "unsloth", |
| | random_state = random_seed, |
| | use_rslora = peft['rslora'], |
| | loftq_config = peft['loftq_config'], |
| | ) |
| | return model |
| |
|
| |
|
| | def get_trainer(model, tokenizer, dataset, sft, |
| | data_field, max_seq_length, random_seed, |
| | num_epochs, max_steps): |
| | |
| | trainer = SFTTrainer( |
| | model = model, |
| | tokenizer = tokenizer, |
| | train_dataset = dataset, |
| | dataset_text_field = data_field, |
| | max_seq_length = max_seq_length, |
| | dataset_num_proc = 2, |
| | packing = False, |
| | args = TrainingArguments( |
| | per_device_train_batch_size = sft['per_device_train_batch_size'], |
| | gradient_accumulation_steps = sft['gradient_accumulation_steps'], |
| | warmup_steps = sft['warmup_steps'], |
| | num_train_epochs = num_epochs, |
| | max_steps = max_steps, |
| | learning_rate = sft['learning_rate'], |
| | fp16 = not is_bfloat16_supported(), |
| | bf16 = is_bfloat16_supported(), |
| | logging_steps = sft['logging_steps'], |
| | optim = sft['optim'], |
| | weight_decay = sft['weight_decay'], |
| | lr_scheduler_type = sft['lr_scheduler_type'], |
| | seed = random_seed, |
| | output_dir = "outputs", |
| | ), |
| | ) |
| | return trainer |
| |
|
| |
|
| | def prepare_trainer(model_name, max_seq_length, random_seed, |
| | num_epochs, max_steps, |
| | peft, sft, dataset, data_field): |
| |
|
| | print("Loading Model") |
| | model, tokenizer = load_model(model_name, max_seq_length) |
| |
|
| | print("Preparing for PEFT") |
| | model = get_peft(model, peft, max_seq_length, random_seed) |
| |
|
| | print("Getting Trainer Model") |
| | trainer = get_trainer(model, tokenizer, dataset, data_field, max_seq_length, random_seed, |
| | num_epochs, max_steps) |
| |
|
| | return trainer |
| |
|
| | if __name__ == "__main__": |
| | trainer = prepare_trainer() |
| |
|
| |
|