| { | |
| "model_name": "google/gemma-2-9b-it", | |
| "display_name": "Gemma 2 9B (BF16, Batch16 MaxSafe)", | |
| "timestamp": "2026-02-04T13:14:38.929340", | |
| "training_config": { | |
| "num_train_epochs": 1, | |
| "per_device_train_batch_size": 16, | |
| "gradient_accumulation_steps": 6, | |
| "learning_rate": 5e-05, | |
| "warmup_ratio": 0.03, | |
| "lr_scheduler_type": "cosine", | |
| "weight_decay": 0.01, | |
| "max_seq_length": 2048, | |
| "logging_steps": 25, | |
| "eval_steps": 100, | |
| "save_steps": 200, | |
| "seed": 42, | |
| "bf16": true, | |
| "optim": "adamw_torch_fused", | |
| "dataloader_num_workers": 8, | |
| "torch_compile": false | |
| }, | |
| "lora_config": { | |
| "r": 16, | |
| "lora_alpha": 32, | |
| "lora_dropout": 0.05, | |
| "target_modules": [ | |
| "q_proj", | |
| "k_proj", | |
| "v_proj", | |
| "o_proj", | |
| "gate_proj", | |
| "up_proj", | |
| "down_proj" | |
| ], | |
| "bias": "none", | |
| "task_type": "CAUSAL_LM" | |
| }, | |
| "train_loss": 0.6173567452229245, | |
| "train_samples": 170305, | |
| "val_samples": 8965, | |
| "train_time_minutes": 666.9830995202065, | |
| "max_memory_gb": 77.72561597824097, | |
| "fix_applied": "YAML normalization via PyYAML (2 spaces), packing=False, Native BF16 Training, Batch 96" | |
| } |