| { | |
| "model_name": "microsoft/Phi-3.5-mini-instruct", | |
| "display_name": "Phi-3.5 (BF16, Liger, Batch96, CkptON)", | |
| "timestamp": "2026-02-03T19:35:19.489805", | |
| "training_config": { | |
| "num_train_epochs": 1, | |
| "per_device_train_batch_size": 96, | |
| "gradient_accumulation_steps": 1, | |
| "learning_rate": 0.0002, | |
| "warmup_ratio": 0.03, | |
| "lr_scheduler_type": "cosine", | |
| "weight_decay": 0.01, | |
| "max_seq_length": 4096, | |
| "logging_steps": 1, | |
| "eval_steps": 50, | |
| "save_steps": 200, | |
| "seed": 42, | |
| "bf16": true, | |
| "optim": "adamw_torch_fused", | |
| "dataloader_num_workers": 8, | |
| "torch_compile": true | |
| }, | |
| "lora_config": { | |
| "r": 16, | |
| "lora_alpha": 32, | |
| "lora_dropout": 0.05, | |
| "target_modules": [ | |
| "q_proj", | |
| "k_proj", | |
| "v_proj", | |
| "o_proj", | |
| "gate_proj", | |
| "up_proj", | |
| "down_proj" | |
| ], | |
| "bias": "none", | |
| "task_type": "CAUSAL_LM" | |
| }, | |
| "train_loss": 0.5981301681586066, | |
| "train_samples": 172145, | |
| "val_samples": 9066, | |
| "train_time_minutes": 147.3416652202606, | |
| "max_memory_gb": 54.91591787338257, | |
| "fix_applied": "YAML normalization via PyYAML (2 spaces), packing=False, Native BF16 Training, Batch 96" | |
| } |