| { | |
| "trainer": "transformers.Trainer", | |
| "peft": "LoRA", | |
| "base_model": "google/gemma-3-27b-it", | |
| "dataset": "N8Programs/unslop-good", | |
| "objective": "PPL_cond on assistant tokens only; prompt masked up to and including <start_of_turn>model", | |
| "max_length": 8704, | |
| "lora": { | |
| "r": 64, | |
| "alpha": 128, | |
| "dropout": 0.05, | |
| "target_modules": ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"] | |
| }, | |
| "optim": { | |
| "optimizer": "adamw_torch_fused", | |
| "learning_rate": 0.0001, | |
| "lr_scheduler": "cosine", | |
| "warmup_ratio": 0.03, | |
| "weight_decay": 0.0 | |
| }, | |
| "batching": { | |
| "per_device_train_batch_size": 1, | |
| "gradient_accumulation_steps": 8 | |
| }, | |
| "precision": { | |
| "bf16": true, | |
| "tf32": true, | |
| "gradient_checkpointing": true | |
| }, | |
| "epochs": 5, | |
| "selected_checkpoint": { | |
| "checkpoint": "checkpoint-125", | |
| "epoch": 1 | |
| } | |
| } | |