{ "trainer": "transformers.Trainer", "peft": "LoRA", "base_model": "google/gemma-3-27b-it", "dataset": "N8Programs/unslop-good", "objective": "PPL_cond on assistant tokens only; prompt masked up to and including model", "max_length": 8704, "lora": { "r": 64, "alpha": 128, "dropout": 0.05, "target_modules": ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"] }, "optim": { "optimizer": "adamw_torch_fused", "learning_rate": 0.0001, "lr_scheduler": "cosine", "warmup_ratio": 0.03, "weight_decay": 0.0 }, "batching": { "per_device_train_batch_size": 1, "gradient_accumulation_steps": 8 }, "precision": { "bf16": true, "tf32": true, "gradient_checkpointing": true }, "epochs": 5, "selected_checkpoint": { "checkpoint": "checkpoint-125", "epoch": 1 } }