File size: 1,954 Bytes
8d2f225 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 | max_seq_length = 500
def fmt(examples):
print(len(examples))
return examples
# 'lora_r' is the dimension of the LoRA attention.
lora_r = 32
# 'lora_alpha' is the alpha parameter for LoRA scaling.
lora_alpha = 16
# 'lora_dropout' is the dropout probability for LoRA layers.
lora_dropout = 0.05
# 'target_modules' is a list of the modules that should be targeted by LoRA.
target_modules= ['k_proj', 'q_proj', 'v_proj', 'o_proj', "gate_proj", "down_proj", "up_proj"]
# 'se
peft_config = LoraConfig(
r=lora_r,
lora_alpha=lora_alpha,
lora_dropout=lora_dropout,
task_type=TaskType.CAUSAL_LM,
target_modules=target_modules,
)
trainer = SFTTrainer(
model = model,
tokenizer = tokenizer,
train_dataset = qa_dataset['train'],
eval_dataset = qa_dataset['test'],
dataset_text_field = "text",
max_seq_length = max_seq_length,
dataset_num_proc = 4,
data_collator = collator,
# formatting_func = fmt,
# peft_config=peft_config,
args = TrainingArguments(
per_device_train_batch_size = 2,
gradient_checkpointing = True,
gradient_accumulation_steps = 4,
per_device_eval_batch_size = 40,
do_eval = True,
eval_strategy = 'steps',
eval_steps = 50,
# save_strategy = 'steps',
save_steps = 1000,
# Use num_train_epochs and warmup_ratio for longer runs!
# max_steps = 70,
# warmup_steps = 10,
# warmup_ratio = 0.1,
num_train_epochs = 2,
# Select a 2 to 10x smaller learning rate for the embedding matrices!
learning_rate = 3e-5,
# embedding_learning_rate = 1e-6,
# fp16 = not is_bfloat16_supported(),
bf16 = True,
logging_steps = 1,
optim = "adamw_torch",
weight_decay = 0.00,
lr_scheduler_type = "linear",
# seed = 3407,
output_dir = "llama_3b_step2_batch_v2",
),
) |