entropy-v1-lora / train_config.json
ysong21's picture
Overwrite adapter with checkpoint-125 (r64 epoch-1)
2227164 verified
raw
history blame contribute delete
889 Bytes
{
"trainer": "transformers.Trainer",
"peft": "LoRA",
"base_model": "google/gemma-3-27b-it",
"dataset": "N8Programs/unslop-good",
"objective": "PPL_cond on assistant tokens only; prompt masked up to and including <start_of_turn>model",
"max_length": 8704,
"lora": {
"r": 64,
"alpha": 128,
"dropout": 0.05,
"target_modules": ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"]
},
"optim": {
"optimizer": "adamw_torch_fused",
"learning_rate": 0.0001,
"lr_scheduler": "cosine",
"warmup_ratio": 0.03,
"weight_decay": 0.0
},
"batching": {
"per_device_train_batch_size": 1,
"gradient_accumulation_steps": 8
},
"precision": {
"bf16": true,
"tf32": true,
"gradient_checkpointing": true
},
"epochs": 5,
"selected_checkpoint": {
"checkpoint": "checkpoint-125",
"epoch": 1
}
}