| | from unsloth import FastLanguageModel |
| | import torch,sys |
| |
|
| | model = sys.argv[1] |
| | steps = int(sys.argv[2]) |
| | training_data = sys.argv[3] |
| |
|
| | max_seq_length = 4096 |
| | dtype = None |
| | load_in_4bit = True |
| |
|
| | |
| | fourbit_models = [ |
| | "unsloth/mistral-7b-bnb-4bit", |
| | "unsloth/mistral-7b-instruct-v0.2-bnb-4bit", |
| | "unsloth/llama-2-7b-bnb-4bit", |
| | "unsloth/llama-2-13b-bnb-4bit", |
| | "unsloth/codellama-34b-bnb-4bit", |
| | "unsloth/tinyllama-bnb-4bit", |
| | ] |
| |
|
| | model, tokenizer = FastLanguageModel.from_pretrained( |
| | |
| | model_name = model, |
| | max_seq_length = max_seq_length, |
| | dtype = dtype, |
| | load_in_4bit = load_in_4bit, |
| | ) |
| |
|
| | model = FastLanguageModel.get_peft_model( |
| | model, |
| | r = 32, |
| | target_modules = ["q_proj", "k_proj", "v_proj", "o_proj", |
| | "gate_proj", "up_proj", "down_proj",], |
| | lora_alpha = 16, |
| | lora_dropout = 0, |
| | bias = "none", |
| | use_gradient_checkpointing = True, |
| | random_state = 3407, |
| | use_rslora = False, |
| | loftq_config = None, |
| | ) |
| |
|
| | alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request. |
| | |
| | ### Instruction: |
| | {} |
| | |
| | ### Input: |
| | {} |
| | |
| | ### Response: |
| | {}""" |
| |
|
| | EOS_TOKEN = tokenizer.eos_token |
| | def formatting_prompts_func(examples): |
| | instructions = examples["instruction"] |
| | inputs = examples["input"] |
| | outputs = examples["output"] |
| | texts = [] |
| | for instruction, input, output in zip(instructions, inputs, outputs): |
| | |
| | text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN |
| | texts.append(text) |
| | return { "text" : texts, } |
| | pass |
| |
|
| | |
| | from datasets import load_dataset |
| | dataset = load_dataset("json", data_files=training_data, split = "train") |
| | dataset = dataset.map(formatting_prompts_func, batched = True,) |
| |
|
| |
|
| | from trl import SFTTrainer |
| | from transformers import TrainingArguments |
| |
|
| | trainer = SFTTrainer( |
| | model = model, |
| | tokenizer = tokenizer, |
| | train_dataset = dataset, |
| | dataset_text_field = "text", |
| | max_seq_length = max_seq_length, |
| | dataset_num_proc = 2, |
| | packing = False, |
| | args = TrainingArguments( |
| | per_device_train_batch_size = 4, |
| | gradient_accumulation_steps = 4, |
| | warmup_steps = 5, |
| | max_steps = steps, |
| | learning_rate = 2e-4, |
| | fp16 = not torch.cuda.is_bf16_supported(), |
| | bf16 = torch.cuda.is_bf16_supported(), |
| | logging_steps = 1, |
| | optim = "adamw_8bit", |
| | weight_decay = 0.01, |
| | lr_scheduler_type = "linear", |
| | seed = 3407, |
| | output_dir = "outputs", |
| | save_strategy= "steps", |
| | save_steps=50 |
| | ), |
| | ) |
| |
|
| | gpu_stats = torch.cuda.get_device_properties(0) |
| | start_gpu_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3) |
| | max_memory = round(gpu_stats.total_memory / 1024 / 1024 / 1024, 3) |
| | print(f"GPU = {gpu_stats.name}. Max memory = {max_memory} GB.") |
| | print(f"{start_gpu_memory} GB of memory reserved.") |
| |
|
| | |
| | trainer_stats = trainer.train() |
| |
|
| | used_memory = round(torch.cuda.max_memory_reserved() / 1024 / 1024 / 1024, 3) |
| | used_memory_for_lora = round(used_memory - start_gpu_memory, 3) |
| | used_percentage = round(used_memory /max_memory*100, 3) |
| | lora_percentage = round(used_memory_for_lora/max_memory*100, 3) |
| | print(f"{trainer_stats.metrics['train_runtime']} seconds used for training.") |
| | print(f"{round(trainer_stats.metrics['train_runtime']/60, 2)} minutes used for training.") |
| | print(f"Peak reserved memory = {used_memory} GB.") |
| | print(f"Peak reserved memory for training = {used_memory_for_lora} GB.") |
| | print(f"Peak reserved memory % of max memory = {used_percentage} %.") |
| | print(f"Peak reserved memory for training % of max memory = {lora_percentage} %.") |
| |
|
| |
|
| | model.save_pretrained(f"lora_model_{steps}") |
| |
|
| | |
| | if True: model.save_pretrained_merged(f"model_{steps}", tokenizer, save_method = "lora",) |
| |
|
| | |
| | if True: model.save_pretrained_gguf(f"model_{steps}", tokenizer, quantization_method = "q4_k_m") |
| |
|